Hi!
I was surprised that when I pass the input tensor to svi.step
when it goes to the guide, the values of this tensor change. For example, in the original tensor, it was something like [.., 57, 58, 59, 60, 61, ...]
and the guide’s argument is [..., 57, 58, 19, 59, 60, ...]
. Tensor shapes are the same. It seems like it reproduces only on large tensors. I’m new to Pyro, so I don’t know if it is a bug or expected behavior, but it certainly breaks my program.
In other words, with the following code, X
in guide
is different from X
passed in svi.step
but has the same shape.
def model(X):
# some code with X
def guide(X):
# some code with X
svi = SVI(model, gude, optimizer, loss=Trace_ELBO())
svi.step(X)
Full Code to Reproduce
import argparse
import os
import numpy as np
import pandas as pd
from torch import nn
import torch
import pyro
from pyro.ops.indexing import Vindex
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive
from pyro.infer.reparam import LocScaleReparam
import torch.distributions.constraints as constraints
from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO
from tqdm.auto import tqdm, trange
from crowdkit.datasets import load_dataset
DEVICE = 'cpu'
def bcc(worker_pos, tasks_pos, worker_labels):
n_tasks = torch.max(tasks_pos) + 1
n_workers = torch.max(worker_pos) + 1
n_labels = torch.max(worker_labels) + 1
init = torch.ones(n_labels, n_labels, device=DEVICE) + (torch.eye(n_labels, device=DEVICE) if n_labels == 2 else torch.eye(n_labels, device=DEVICE) * (n_labels - 2)).repeat(n_workers, 1, 1)
beta = pyro.sample('beta', dist.Dirichlet(init).to_event(2))
with pyro.plate('items', n_tasks, dim=-1):
pi = pyro.sample('pi', dist.Dirichlet(torch.ones(n_labels, device=DEVICE)))
c = pyro.sample('c', dist.Categorical(pi).to_event(1))
y = pyro.sample('y', dist.Categorical(beta[worker_pos, c[tasks_pos], :]).to_event(1), obs=worker_labels)
def bcc_guide(worker_pos, tasks_pos, worker_labels):
#!!! This is wrong. The maximum value is different to the original tasks_pos since tensors are different
n_tasks = torch.max(tasks_pos) + 1
# !!!
torch.save(tasks_pos, 'tasks_pos.pt')
n_workers = torch.max(worker_pos) + 1
n_labels = torch.max(worker_labels) + 1
init = torch.ones(n_labels, n_labels, device=DEVICE) + (torch.eye(n_labels, device=DEVICE) if n_labels == 2 else torch.eye(n_labels, device=DEVICE) * (n_labels - 2)).repeat(n_workers, 1, 1)
beta_q = pyro.param('beta_q', init)
beta = pyro.sample('beta', dist.Dirichlet(beta_q).to_event(2))
pi_q = pyro.param('pi_q', torch.ones(n_labels, device=DEVICE))
with pyro.plate('items', n_tasks, dim=-1):
pi = pyro.sample('pi', dist.Dirichlet(pi_q))
c_q = pyro.param('c_q', torch.ones(n_tasks, n_labels, device=DEVICE) / n_labels, constraint=constraints.simplex)
c = pyro.sample('c', dist.Categorical(c_q).to_event(1))
df, gt = load_dataset('relevance-2')
tasks_pos = torch.tensor(pd.factorize(df['task'])[0]).to(DEVICE)
worker_pos = torch.tensor(pd.factorize(df['performer'])[0]).to(DEVICE)
worker_labels = torch.tensor(df['label'].values).to(DEVICE)
adam_params = {"lr": 0.01, "betas": (0.90, 0.999)}
optimizer = Adam(adam_params)
model = bcc
svi = SVI(model, bcc_guide, optimizer, loss=Trace_ELBO())
pbar = trange(1)
for step in pbar:
loss = svi.step(worker_pos, tasks_pos, worker_labels)
pbar.set_description(f'Trace ELBO: {round(loss, 4)}')
It would be great if someone could provide an insight about what I can do to avoid this behaviour.