- What tutorial are you running?

Bayesian Regression - What version of Pyro are you using?

1.2.1 - Please link or paste relevant code, and steps to reproduce.

Dear Pyro-Team,

I am trying to perform softmax regression on MNIST using mini-batch SGD with a mean-field variational density and multi-variate priors with diagonal covariance over the weight matrix and bias vector. The loss reduces, however accuracy over a training batch is always zero.

The respective code:

```
class SoftmaxRegression(PyroModule):
def __init__(self, in_features, out_features):
super().__init__()
self.linear = PyroModule[nn.Linear](in_features, out_features)
# Multi-variate Normal priors for weight matrix and bias vector
self.linear.weight = PyroSample(
prior=dist.Normal(0., 1.).expand([out_features, in_features]).to_event(self.linear.weight.dim())
)
self.linear.bias = PyroSample(
prior=dist.Normal(0., 10.).expand([out_features]).to_event(self.linear.bias.dim())
)
def forward(self, x, y=None):
# Forward method defines the likelihood function of the statistical model with mean f(x)
mean = self.linear(x)
# Define Categorical likelihood over i.i.d. data set, i.e. for each data point a separate likelihood
with pyro.plate('data', size=x.shape[0]):
likelihood = pyro.sample('likelihood', dist.Categorical(logits=mean), obs=y)
return mean
def train():
pyro.clear_param_store()
num_epochs = 10
train_loader, test_loader, val_loader = get_data_loaders()
data_generator = inf_generator(train_loader)
batches_per_epoch = len(train_loader)
model = SoftmaxRegression(28*28, 10)
variational_density = AutoDiagonalNormal(model=model)
optimizer = optim.Adam({'lr': 1e-2})
svi = SVI(model=model, guide=variational_density, optim=optimizer, loss=Trace_ELBO())
for itr in range(batches_per_epoch * num_epochs):
x, y = data_generator.__next__()
x = x.view(-1, 28*28)
loss = svi.step(x, y)
if itr % batches_per_epoch == 0:
with torch.no_grad():
posterior_predictive = Predictive(model=model, guide=variational_density, num_samples=50,
return_sites=('likelihood', '_RETURN'))
predictive_samples = posterior_predictive(x)
predictive_mean = torch.mean(predictive_samples['_RETURN'], dim=0)
y = one_hot(np.array(y.numpy()), 10)
target = np.argmax(y, axis=1)
pred = np.argmax(predictive_mean, axis=1)
acc = np.sum(pred == target) / 64.
print('Training Batch Accuracy: {} | Loss: {}'.format(acc, loss / len(train_loader)))
```

Thank you for your help.