Hi,
I’m struggling with a basic inference problem with pyro, here is my question:
Having a probablity graph model with fixed parameter, how to predict the nodes’ conditional prob with SVI for each input data.
As in the simple example below, b depends on a and the parameter has been fixed in weight. Given the value of a, I want to get the conditional prob of b.
The most straight way is to use compute_marginal() to get the precise result, which is obivious in this example. But when graph is dense, the enumeration process will be very slow. So I try to overcome this with SVI, but the result is not quite right.
@config_enumerate
def model(data):
weight = pyro.param("weight", torch.tensor([[0.3,0.7],[0.4,0.6]]), constraints.unit_interval)
with pyro.plate("data", len(data)):
a = pyro.sample("a", dist.Categorical(torch.tensor([0.5,0.5])), obs=data[:,0])
weights_b = Vindex(weight)[a,:]
b = pyro.sample("b", dist.Categorical(weights_b), infer={"enumerate": "parallel"})
@config_enumerate
def guide(data):
poutine.block(model, hide=["weight"])
with pyro.plate("data", len(data)):
weight_guide = pyro.param('weight_guide', torch.tensor([0.5,0.5]),
constraint=constraints.unit_interval)
print(weight_guide)
b = pyro.sample('b', dist.Categorical(weight_guide))
def infer():
pyro.clear_param_store()
optim = pyro.optim.Adam({'lr': 0.2, 'betas': [0.8, 0.99]})
elbo = TraceEnum_ELBO()
svi = SVI(model, guide, optim, loss=elbo)
# Register hooks to monitor gradient norms.
gradient_norms = defaultdict(list)
svi.loss(model, guide, data) # Initializes param store.
for name, value in pyro.get_param_store().named_parameters():
value.register_hook(lambda g, name=name: gradient_norms[name].append(g.norm().item()))
losses = []
for i in range(1000 if not smoke_test else 2):
loss = svi.step(data)
losses.append(loss)
print('.' if i % 100 else '\n', end='')
if __name__ == '__main__':
data = torch.tensor([[0],[1]])
infer()
print('weight_guide = {}'.format(pyro.param('weight_guide')))
print('weight = {}'.format(pyro.param('weight')))
The result is :
weight_guide = tensor([0.582313, 0.814295], grad_fn=<ClampBackward>)
weight = tensor([[0.584472, 0.816690],
[0.593544, 0.829365]], grad_fn=<ClampBackward>)
It seems no matter the value or dimension of the weight_guide are both wrong. And the poutine block seems not working because the value of weight changes, .
So where is the problem of this code?
Thanks!