I have a dataset where there are a large number of observations, but many values are repeated. For example, if X ~ Poisson(10), and 100,000 observations are drawn from X, there will only be about 25 different values. Instead of having to pass over a vector of length 100,000 to calculate the likelihood, can you instead multiply the likelihood of each distinct value by the number of observations in Pyro? Model below
from numpy.random import poisson, seed
from numpy import unique
from torch import tensor
from torch.distributions.constraints import positive
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro import distributions as dist
from pyro import plate
import pyro
pyro.enable_validation(True)
pyro.clear_param_store()
pyro.set_rng_seed(1)
seed(1)
n = 1000000
lam = 10
data = poisson(lam, n)
values, counts = unique(data, axis=0, return_counts=True)
data, values, counts = tensor(data).float(), \
tensor(values).float(), \
tensor(counts).float()
print(data.mean())
def model(data):
# priors
lam = pyro.sample("lam", dist.Exponential(1./10))
with plate('observe_data'):
pyro.sample("obs", dist.Poisson(lam), obs=data)
def guide(data):
rate_q = pyro.param("rate_q", tensor(1./10), constraint=positive)
pyro.sample("lam", dist.Exponential(rate_q))
adam_params = {"lr": 1.0, "betas": (0.90, 0.999)}
optimizer = Adam(adam_params)
svi = SVI(model, guide, optimizer, loss=Trace_ELBO())
for step in range(1000):
svi.step(data)
print(pyro.param("rate_q").item())
print(1. / pyro.param("rate_q").item())
It would be nice to pass values
and counts
instead of data
.