Hello,
I’m having a hard time running a poisson regression with just one predictor variable. I keep getting a memory error.
Traceback (most recent call last):
File "<ipython-input-15-a0536389f5f9>", line 4, in <module>
elbo = svi.step(x, y)
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\pyro\infer\svi.py", line 128, in step
loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\pyro\infer\trace_elbo.py", line 126, in loss_and_grads
for model_trace, guide_trace in self._get_traces(model, guide, args, kwargs):
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\pyro\infer\elbo.py", line 170, in _get_traces
yield self._get_trace(model, guide, args, kwargs)
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\pyro\infer\trace_elbo.py", line 53, in _get_trace
"flat", self.max_plate_nesting, model, guide, args, kwargs)
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\pyro\infer\enum.py", line 55, in get_importance_trace
model_trace.compute_log_prob()
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\pyro\poutine\trace_struct.py", line 216, in compute_log_prob
log_p = site["fn"].log_prob(site["value"], *site["args"], **site["kwargs"])
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\distributions\independent.py", line 88, in log_prob
log_prob = self.base_dist.log_prob(value)
File "C:\Users\JORDAN.HOWELL.GITDIR\AppData\Local\Continuum\anaconda3\envs\torch_env\lib\site-packages\torch\distributions\poisson.py", line 63, in log_prob
return (rate.log() * value) - rate - (value + 1).lgamma()
RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 950208322568 bytes. Buy new RAM!
The size of the data is 344,000 rows of 1’ and 0’ for the predictor variable and a long tensor for the y variable.
I basically copied/pasted the code from the regression template example.
def model(x_data, y_data):
a = pyro.sample("mean", dist.Normal(0., 10.))
b = pyro.sample("edit_coeff", dist.Normal(0., 1.))
sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
mean = a + b * x_data
rate = mean.exp()
with pyro.plate("data", len(x)):
pyro.sample("obs", dist.Poisson(rate = rate).independent(1), obs=y_data)
def guide(x_data, y_data):
a_loc = pyro.param('a_mean', torch.tensor(0.))
a_scale = pyro.param('a_std', torch.tensor(1.),
constraint=constraints.positive)
sigma_loc = pyro.param('sigma_loc', torch.tensor(1.),
constraint=constraints.positive)
weights_loc = pyro.param('weights_loc', torch.randn(3))
weights_scale = pyro.param('weights_scale', torch.ones(3),
constraint=constraints.positive)
a = pyro.sample("a", dist.Normal(a_loc, a_scale))
b = pyro.sample("edit_coeff", dist.Normal(weights_loc[0], weights_scale[0]))
sigma = pyro.sample("sigma", dist.Normal(sigma_loc, torch.tensor(0.05)))
mean = a + b * x_data
def summary(samples):
site_stats = {}
for site_name, values in samples.items():
marginal_site = pd.DataFrame(values)
describe = marginal_site.describe(percentiles=[.05, 0.25, 0.5, 0.75, 0.95]).transpose()
site_stats[site_name] = describe[["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
return site_stats
# Utility function to print latent sites' quantile information.
def summary(samples):
site_stats = {}
for site_name, values in samples.items():
marginal_site = pd.DataFrame(values)
describe = marginal_site.describe(percentiles=[.05, 0.25, 0.5, 0.75, 0.95]).transpose()
site_stats[site_name] = describe[["mean", "std", "5%", "25%", "50%", "75%", "95%"]]
return site_stats
svi = SVI(model,
guide,
optim.Adam({"lr": .05}),
loss=Trace_ELBO())
pyro.clear_param_store()
num_iters = 1000
for i in range(num_iters):
elbo = svi.step(x, y)
if i % 500 == 0:
logging.info("Elbo loss: {}".format(elbo))
Should I be doing something else to save on memory? Even my 32GB gpu runs out of memory where I’ve ran pymc3 models bigger than this one fine.