Multiple observations with same value

I have a dataset where there are a large number of observations, but many values are repeated. For example, if X ~ Poisson(10), and 100,000 observations are drawn from X, there will only be about 25 different values. Instead of having to pass over a vector of length 100,000 to calculate the likelihood, can you instead multiply the likelihood of each distinct value by the number of observations in Pyro? Model below

from numpy.random import poisson, seed
from numpy import unique

from torch import tensor
from torch.distributions.constraints import positive

from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro import distributions as dist
from pyro import plate

import pyro


pyro.enable_validation(True)
pyro.clear_param_store()
pyro.set_rng_seed(1)
seed(1)

n = 1000000
lam = 10

data = poisson(lam, n)
values, counts = unique(data, axis=0, return_counts=True)

data, values, counts = tensor(data).float(), \
        tensor(values).float(), \
        tensor(counts).float()
print(data.mean())

def model(data):
    # priors
    lam = pyro.sample("lam", dist.Exponential(1./10))

    with plate('observe_data'):
        pyro.sample("obs", dist.Poisson(lam), obs=data)

def guide(data):
    rate_q = pyro.param("rate_q", tensor(1./10), constraint=positive)
    pyro.sample("lam", dist.Exponential(rate_q))

adam_params = {"lr": 1.0, "betas": (0.90, 0.999)}
optimizer = Adam(adam_params)

svi = SVI(model, guide, optimizer, loss=Trace_ELBO())

for step in range(1000):
    svi.step(data)

print(pyro.param("rate_q").item())
print(1. / pyro.param("rate_q").item())

It would be nice to pass values and counts instead of data.

you should be able to do something like

with plate('observe_data'), pyro.poutine.scale(num_counts):
        pyro.sample("obs", dist.Poisson(unique_lam), obs=unique_data)

where unique_lam.size(-1)==unique_data.size(-1)==num_counts.size(-1)

1 Like

Thanks @martinjankowiak. I modified my code to

unique_data = stack([values, counts])

def model(data):
    # priors
    lam = pyro.sample("lam", dist.Exponential(1./10))

    unique_values = data[0, :]
    unique_counts = data[1, :]

    with plate('observe_data'), pyro.poutine.scale(unique_counts):
        pyro.sample("obs", dist.Poisson(lam), obs=unique_values)

...
for step in range(1000):
    svi.step(unique_data)

But I get the following error:

  File "poisson.py", line 38, in model
    with plate('observe_data'), pyro.poutine.scale(unique_counts):
  File "/Users/jacobcvt12/pyro/venv/lib/python3.7/site-packages/pyro/poutine/handlers.py", line 10
5, in handler
    "{} is not callable, did you mean to pass it as a keyword arg?".format(fn))
ValueError: tensor([7.0000e+00, 4.7000e+01, 2.2600e+02, 7.8500e+02, 1.8810e+03, 3.7430e+03,
        6.3200e+03, 9.0580e+03, 1.1131e+04, 1.2498e+04, 1.2551e+04, 1.1566e+04,
        9.6870e+03, 7.1540e+03, 5.0760e+03, 3.4680e+03, 2.1590e+03, 1.1980e+03,
        7.4900e+02, 3.4400e+02, 1.8600e+02, 8.7000e+01, 5.1000e+01, 1.5000e+01,
        1.0000e+01, 2.0000e+00, 1.0000e+00]) is not callable, did you mean to pass it as a keyword arg?

It looks like Iā€™m not passing the data correctly. Is there a better way to pass to poutine.scale?

Reading through the documentation, I just needed to change pyro.poutine.scale(unique_counts) to pyro.poutine.scale(scale=unique_counts). Thanks again!