I’m trying to follow the tutorial found at: Making Your Neural Network Say “I Don’t Know” — Bayesian NNs using Pyro and PyTorch | by Paras Chopra | Towards Data Science
But for some reason my regression model doesn’t seem to be able to improve in accuracy. It trains but sort of stalls around the first accuracy score.
I’ve put the example code below, where in this case it’s a simple linear regression, and it should work out of the box:
import torch
import numpy as np
from torchvision import transforms
import random
import pyro
from pyro.distributions import Normal
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
class NN(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(NN, self).__init__()
self.fc1 = torch.nn.Linear(input_size, hidden_size)
self.out = torch.nn.Linear(hidden_size, output_size)
def forward(self, x):
output = self.fc1(x)
return output
net = NN(1, 1, 1)
x = np.random.uniform(0,1,500)
random.shuffle(x)
m = 3
c = 4
y = (m*x) + c
y = y/y.max()
transform = transforms.Compose([transforms.ToTensor()])
x = torch.tensor(x,dtype=torch.float).unsqueeze(-1)
y = torch.tensor(y,dtype=torch.float)
def model(x_data, y_data):
fc1w_prior = Normal(loc=torch.zeros_like(net.fc1.weight), scale=torch.ones_like(net.fc1.weight))
fc1b_prior = Normal(loc=torch.zeros_like(net.fc1.bias), scale=torch.ones_like(net.fc1.bias))
outw_prior = Normal(loc=torch.zeros_like(net.out.weight), scale=torch.ones_like(net.out.weight))
outb_prior = Normal(loc=torch.zeros_like(net.out.bias), scale=torch.ones_like(net.out.bias))
priors = {'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior, 'out.weight': outw_prior, 'out.bias': outb_prior}
# lift module parameters to random variables sampled from the priors
lifted_module = pyro.random_module("module", net, priors)
# sample a regressor (which also samples w and b)
lifted_reg_model = lifted_module()
scale = pyro.sample("sigma", Uniform(0., 10.))
lhat = torch.nn.LogSoftmax(lifted_reg_model(x_data))
pyro.sample("obs", pyro.distributions.Normal(lhat, scale), obs=y_data)
softplus = torch.nn.Softplus()
def guide(x_data, y_data):
# First layer weight distribution priors
fc1w_mu = torch.randn_like(net.fc1.weight)
fc1w_sigma = torch.randn_like(net.fc1.weight)
fc1w_mu_param = pyro.param("fc1w_mu", fc1w_mu)
fc1w_sigma_param = softplus(pyro.param("fc1w_sigma", fc1w_sigma))
fc1w_prior = Normal(loc=fc1w_mu_param, scale=fc1w_sigma_param)
# First layer bias distribution priors
fc1b_mu = torch.randn_like(net.fc1.bias)
fc1b_sigma = torch.randn_like(net.fc1.bias)
fc1b_mu_param = pyro.param("fc1b_mu", fc1b_mu)
fc1b_sigma_param = softplus(pyro.param("fc1b_sigma", fc1b_sigma))
fc1b_prior = Normal(loc=fc1b_mu_param, scale=fc1b_sigma_param)
# Output layer weight distribution priors
outw_mu = torch.randn_like(net.out.weight)
outw_sigma = torch.randn_like(net.out.weight)
outw_mu_param = pyro.param("outw_mu", outw_mu)
outw_sigma_param = softplus(pyro.param("outw_sigma", outw_sigma))
outw_prior = Normal(loc=outw_mu_param, scale=outw_sigma_param).independent(1)
# Output layer bias distribution priors
outb_mu = torch.randn_like(net.out.bias)
outb_sigma = torch.randn_like(net.out.bias)
outb_mu_param = pyro.param("outb_mu", outb_mu)
outb_sigma_param = softplus(pyro.param("outb_sigma", outb_sigma))
outb_prior = Normal(loc=outb_mu_param, scale=outb_sigma_param)
priors = {'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior, 'out.weight': outw_prior, 'out.bias': outb_prior}
lifted_module = pyro.random_module("module", net, priors)
return lifted_module()
optim = Adam({"lr": 0.01})
svi = SVI(model, guide, optim, loss=Trace_ELBO())
num_iterations = 50
loss = 0
for j in range(num_iterations):
loss = 0
for idx, data in enumerate(x):
# calculate the loss and take a gradient step
loss += svi.step(data, y[idx])
normalizer_train = len(x)
total_epoch_loss_train = loss / normalizer_train
print("Epoch ", j, " Loss ", total_epoch_loss_train)
###############################################################################
num_samples = 10
def predict(x):
sampled_models = [guide(None, None) for _ in range(num_samples)]
yhats = [model(x).data for model in sampled_models]
mean = torch.mean(torch.stack(yhats), 0)
return mean.numpy()
print('Prediction when network is forced to predict')
correct = 0
total = 0
predictions = []
for idx, x_test in enumerate(x):
predicted = predict(x_test)
predictions.append( predict(x_test))
total += 1
correct += np.abs((predicted - y[idx].numpy()))
print("average dist: %.2f " % (correct / total))
predictions = np.hstack(predictions)
print(np.max(predictions), np.min(predictions))
Any help on this matter would be greatly appreciated, thanks in advance!