Simple Mixture Model

Hi,

I am trying to write a simple mixture model, but running into an error:

import pyro, numpy as np, torch, pyro.distributions as dist, torch.nn as nn
from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO
from torch.autograd import Variable
import torch.distributions.constraints as constraints

data_y= [1.0, 1.0,2.0,100.0,101.0,120.0,4.0,2.0,1.0]
data_y = [torch.tensor(x) for x in data_y]
p=2
pyro.enable_validation(True)
def model(data_y):
    mu = pyro.sample("mu", dist.Normal(0*torch.ones(p), 1*torch.ones(p)))
    sigma = pyro.sample("sigma", dist.Normal(0*torch.ones(p), 1*torch.ones(p)))
    theta = pyro.sample("theta", dist.Dirichlet(torch.ones(p)))
    print(mu)


    cat = pyro.sample("cat", dist.Categorical(theta))
    pyro.sample("obs", dist.Normal(mu[cat], sigma[cat]), obs=data_y)



def guide(data_y):
    mu_mu = pyro.param("mu_q", torch.ones(p))
    mu_sigma = pyro.param("mu_sigma", torch.ones(p), constraint=constraints.positive)

    mu = pyro.sample("mu", dist.Normal(mu_mu, mu_sigma))
    print(mu)
    sigma_mu = pyro.param("sigma_q", torch.ones(p), constraint=constraints.positive)
    sigma_sigma = pyro.param("sigma_sigma", torch.ones(p), constraint=constraints.positive)
    sigma = pyro.sample("sigma", dist.Normal(sigma_mu, sigma_sigma))

    theta_mu = pyro.param("theta_q", torch.ones(p)/p, constraint=constraints.simplex)
    theta = pyro.sample("theta", dist.Dirichlet(theta_mu))


    pyro.sample("cat", dist.Categorical(theta))

optim = Adam({'lr': 0.001})
svi = SVI(model, guide, optim, loss=Trace_ELBO())
for i in range(100):
    loss = svi.step(data_y)
    if ((i % 100) == 0):
        print((loss / float(len(data_y))))
for name in pyro.get_param_store().get_all_param_names():
    print(name)
    print(pyro.param(name))

It prints the error:

tensor([ 1.1412, -1.1431], grad_fn=<ThAddBackward>)
tensor([ 1.1412, -1.1431], grad_fn=<ThAddBackward>)
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/pyro/poutine/trace_struct.py", line 249, in compute_log_prob
    site["log_prob"]
KeyError: 'log_prob'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "test.py", line 44, in <module>
    loss = svi.step(data_y)
  File "/usr/local/lib/python3.6/dist-packages/pyro/infer/svi.py", line 75, in step
    loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/pyro/infer/trace_elbo.py", line 107, in loss_and_grads
    for model_trace, guide_trace in self._get_traces(model, guide, *args, **kwargs):
  File "/usr/local/libs/python3.6/dist-packages/pyro/infer/trace_elbo.py", line 68, in _get_traces
    model_trace.compute_log_prob()
  File "/usr/local/lib/python3.6/dist-packages/pyro/poutine/trace_struct.py", line 252, in compute_log_prob
    site_log_p = site["fn"].log_prob(site["value"], *args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/distributions/normal.py", line 63, in log_prob
    self._validate_sample(value)
  File "/usr/local/lib/python3.6/dist-packages/torch/distributions/distribution.py", line 206, in _validate_sample
    raise ValueError('The value argument to log_prob must be a Tensor')
ValueError: The value argument to log_prob must be a Tensor

Am i missing something here? Please suggest something

i am using Python 3.6 and Pyro 0.2.1

data_y is a list of tensors, but the observation needs to be a pytorch tensor itself.

you probably want:

pyro.sample("obs", dist.Normal(mu[cat], sigma[cat]), obs=data_y[cat])

Ok, did that. Now I get the error:

tensor([2.2320, 3.2894], grad_fn=<ThAddBackward>)
tensor([2.2320, 3.2894], grad_fn=<ThAddBackward>)
Traceback (most recent call last):
  File "test.py", line 44, in <module>
    loss = svi.step(data_y)
  File "/usr/local/lib/python3.6/dist-packages/pyro/infer/svi.py", line 75, in step
    loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/pyro/infer/trace_elbo.py", line 107, in loss_and_grads
    for model_trace, guide_trace in self._get_traces(model, guide, *args, **kwargs):
  File "/usr/local/lib/python3.6/dist-packages/pyro/infer/trace_elbo.py", line 73, in _get_traces
    check_site_shape(site, self.max_iarange_nesting)
  File "/usr/local/lib/python3.6/dist-packages/pyro/util.py", line 230, in check_site_shape
    '- .permute() data dimensions']))
ValueError: at site "mu", invalid log_prob shape
  Expected [], actual [2]
  Try one of the following fixes:
  - enclose the batched tensor in a with iarange(...): context
  - .independent(...) the distribution being sampled
  - .permute() data dimensions

what should be done?

Your mu is a tensor, and Pyro needs to know whether it can treat the tensor elements independently. You can either mark one of the Normal batch dimensions as an event dimension

mu = pyro.sample("mu", dist.Normal(mu_mu, mu_sigma).independent(1))

or give Pyro license to treat the the batch dimensions independently:

with pyro.iarange('components', p):
    mu = pyro.sample("mu", dist.Normal(mu_mu, mu_sigma))

See the tensor shapes tutorial for details. I also recommend reading the gaussian mixture mode tutorial for a grounding example. Let us know if any part of the tutorials could be clarified.

Thanks for the response @fritzo. The code works now!
I am trying to do this using SVI instead of maximum likelihood approach as in the gaussian mixture example. But the results that I am getting are not close enough. I expect the mean to be close to 1 and 1000 here (data_y)
This is the current program:

import pyro, numpy as np, torch, pyro.distributions as dist, torch.nn as nn
from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO
from torch.autograd import Variable
import torch.distributions.constraints as constraints

data_y= [1.0, 1.0,2.0,1000.0,1001.0,1200.0,4.0,2.0,1.0, 1.0, 1.0,2.0,1000.0,1001.0,1200.0,4.0,2.0,1.0, 1.0]
data_y = torch.tensor(data_y)
p=2
pyro.enable_validation(True)

def model(data_y):
    mu = pyro.sample("mu", dist.Normal(0*torch.ones(p), 1*torch.ones(p)).independent(1))
    sigma = torch.nn.Softplus()(pyro.sample("sigma", dist.Normal(torch.ones(1), torch.ones(1))))
    theta = pyro.sample("theta", dist.Dirichlet(torch.ones(p)))

    for ind in range(len(data_y)):
        cat = pyro.sample("cat_{0}".format(ind), dist.Categorical(theta))
        pyro.sample("obs_{0}".format(ind), dist.Normal(mu[cat], sigma), obs=data_y[ind])

def guide(data_y):
    mu_mu = pyro.param("mu_mu", torch.ones(p))
    mu_sigma = pyro.param("mu_sigma", torch.ones(p), constraint=constraints.positive)

    mu = pyro.sample("mu", dist.Normal(mu_mu, torch.ones(p)).independent(1))

    sigma_mu = pyro.param("sigma_mu", 0.5*torch.ones(1), constraint=constraints.positive)
    sigma_sigma = pyro.param("sigma_sigma", torch.ones(1), constraint=constraints.positive)

    sigma = pyro.sample("sigma", dist.Normal(sigma_mu, sigma_sigma))

    theta_mu = pyro.param("theta_q", torch.ones(p), constraint=constraints.positive)
    theta = pyro.sample("theta", dist.Dirichlet(theta_mu))

    for ind in range(len(data_y)):
        t = pyro.param("t", torch.ones(p)/p, constraint=constraints.simplex)
        pyro.sample("cat_{0}".format(ind), dist.Categorical(t))

optim = Adam({'lr': 0.05})
svi = SVI(model, guide, optim, loss=Trace_ELBO())
for i in range(4000):

    loss = svi.step(data_y)
    if ((i % 1000) == 0):
        print((loss / float(len(data_y))))
for name in pyro.get_param_store().get_all_param_names():
    print(name, pyro.param(name).data.numpy())

But the means (mu_mu) are small with large variance sigma_mu.

('sigma_sigma', array([0.6202943], dtype=float32))
('sigma_mu', array([41.97691], dtype=float32))
('mu_mu', array([2.8627298, 4.4322577], dtype=float32))
('t', array([0.418196, 0.581804], dtype=float32))
('theta_q', array([11.215491, 12.190287], dtype=float32))
('mu_sigma', array([1., 1.], dtype=float32))

How can I control this? Is there a better approach?

Any thoughts on this?