Dear all,

I am new to the Pyro framework and I am trying to learn how to use it for Bayesian inference. I have tried a few examples, but I am now stuck on the following toy example on multiple linear regression:

```
import logging
import numpy as np
import torch
import torch.distributions.constraints as constraints
import pyro
import pyro.infer
import pyro.distributions as dist
from pyro.infer import MCMC, NUTS, Predictive, Trace_ELBO, SVI
pyro.set_rng_seed(101)
logging.basicConfig(format='%(message)s', level=logging.INFO)
intercept_0 = 4
beta_0 = [2, 3] # the _0 represents the true parameter, not to be confused with the intercept
sigma_0 = 1.5
n = 20
NUM_ITERS = 2000
x1 = np.exp(np.random.normal(loc=3, scale=2, size=[n, 1]))
x2 = np.random.binomial(n=1, p=0.4, size=[n, 1])
x = np.hstack((x1, x2))
y = intercept_0 + x@np.array(beta_0) + np.random.normal(loc=0, scale=sigma_0, size=[n,])
x = torch.Tensor(x)
y = torch.Tensor(y)
def regression(x, y):
intercept = pyro.sample("intercept", dist.Normal(loc=0, scale=10))
beta = []
for i in range(x.shape[1]):
beta.append(pyro.sample(f"beta{i}", dist.Normal(loc=0, scale=10)))
sigma = pyro.sample("sigma", dist.InverseGamma(concentration=4, rate=2))
mean = intercept + x.matmul(torch.Tensor(beta))
return pyro.sample("y", dist.Normal(loc=mean, scale=sigma), obs=y)
"""
# code below is too slow... using a guide
# trying to sample from full conditional posterior
nuts_kernel = NUTS(model=regression)
mcmc = MCMC(kernel=nuts_kernel,
num_samples=10000,
num_chains=1,
warmup_steps=1000)
posterior = mcmc.run(x=x, y=y)
"""
def guide(x, y):
intercept_loc = pyro.param("intercept_loc", torch.tensor(0.))
intercept_scale = pyro.param("intercept_scale", torch.tensor(10.), constraint=constraints.positive)
intercept_g = pyro.sample("intercept_g", dist.Normal(loc=intercept_loc, scale=intercept_scale))
beta_g = []
beta_loc = {}
beta_scale = {}
for i in range(x.shape[1]):
beta_loc[i] = pyro.param(f"beta{i}_loc", torch.tensor(0.))
beta_scale[i] = pyro.param(f"beta{i}_scale", torch.tensor(10.), constraint=constraints.positive)
beta_g.append(pyro.sample(f"beta{i}_g", dist.Normal(loc=beta_loc[i], scale=beta_scale[i])))
sigma_concentration = pyro.param("sigma_concentration", torch.tensor(4.), constraint=constraints.positive)
sigma_rate = pyro.param("sigma_rate", torch.tensor(2.), constraint=constraints.positive)
sigma_g = pyro.sample("sigma_g", dist.InverseGamma(concentration=sigma_concentration,
rate=sigma_rate))
mean_g = intercept_g + x.matmul(torch.Tensor(beta_g))
adam_params = {"lr": 0.005, "betas": (0.95, 0.999)}
optimizer = pyro.optim.Adam(adam_params)
pyro.clear_param_store()
svi = SVI(regression,
guide,
pyro.optim.Adam({"lr": .05}),
loss=Trace_ELBO())
for i in range(NUM_ITERS):
elbo = svi.step(x, y)
logging.info("Elbo loss: {}".format(np.log(elbo)))
```

I have two questions:

- Why is the MCMC (in the commented out section) for full conditional posterior sampling really slow? Is this normal, or is there something wrong/highly inefficient in my code?
- I am getting nans when performing the SVI optimization procedure for variational inference. I initially thought that it was because I did not add constraints to my scale parameters, but I still get that error when I do. What is wrong with this code?

Apologies in advance if the solution to my questions are very simple.

Thank you very much,

Larry