Sampling a model always returns same result on test point

nliittsc · December 5, 2018, 12:43pm

I am trying to extend the Bayesian Linear Regression tutorial to accomdate more features (and eventually layers).
In the tutorial, we make predictions by sampling the model several times using guide(None), and averaging the predictions. However, doing this in my implementation always returns the same prediction when given the same points. Since we are sampling the model from the posterior, wouldn’t there at least be some random jitter in the predictions? Otherwise, what’s the point of sampling?

Here is my implementation:

def model(X, y):
# Create unit normal prior over the bias
bias_loc = torch.zeros(1)
bias_scale = 100 * torch.ones(1)
b_prior = Normal(bias_loc, bias_scale).independent(1)

#create unit normal priors over the weights
loc = torch.zeros(1, X.shape[1])
scale = 100 * torch.ones(1, X.shape[1])
w_prior = Normal(loc, scale).independent(2)

#place into dictionary
priors = {'linear.weight': w_prior, 'linear.bias': b_prior}

# lift module parameters to random variables sampled from the priors
lifted_module = pyro.random_module("module", regression_model, priors)

# sample a regressor (which also samples w and b)
lifted_reg_model = lifted_module()

with pyro.iarange("map", X.shape[0]):
    #x_data = X
    #y_data = y

    # run the regressor forward conditioned on data
    prediction_mean = lifted_reg_model(X).squeeze(-1)
    #print(prediction_mean.shape)
    
    # condition on the observed data
    pyro.sample("obs",
                Normal(
                    prediction_mean, 
                    1# * torch.ones(X.shape[0])
                ),
                obs=y.squeeze())

def guide(X, y):
#get the shape of the data
#N = samples
#p = features
if X is None:
    p = P
else:
    N, p = X.size()
    
softplus = torch.nn.Softplus()

# define our variational parameters
w_loc = torch.randn(1, p, requires_grad=True)
b_loc = torch.randn(1, requires_grad=True)

# note that we initialize our scales to be pretty narrow
w_log_sig = torch.tensor(-3.0 * torch.randn(1, p) + 0.05, requires_grad=True)
b_log_sig = torch.tensor(-3.0 + 0.05 * torch.randn(1), requires_grad=True)

# register learnable params in the param store
mw_param = pyro.param("guide_mean_weight", w_loc)
sw_param = softplus(pyro.param("guide_log_scale_weight", w_log_sig))
mb_param = pyro.param("guide_mean_bias", b_loc)
sb_param = softplus(pyro.param("guide_log_scale_bias", b_log_sig))

# guide distributions for w and b
w_dist = Normal(mw_param, sw_param).independent(2)
b_dist = Normal(mb_param, sb_param).independent(1)


dists = {'linear.weight': w_dist, 'linear.bias': b_dist}

# overload the parameters in the module with random samples
# from the guide distributions
lifted_module = pyro.random_module("module", regression_model, dists)

# sample a regressor (which also samples w and b)
return lifted_module()

P = 5
LR = 0.005
H = 1
N = 10000

num_iterations = 10000 
pyro.clear_param_store()
optim = Adam({"lr": LR})
svi = SVI(model, guide, optim, loss=Trace_ELBO())
regression_model = torch.nn.Sequential(torch.nn.Linear(P, 1))

def main():
    losses = []
    pyro.clear_param_store()
    X, y = build_linear_dataset(N, P)
    for j in range(num_iterations):
        # calculate the loss and take a gradient step
        loss = svi.step(X, y)
        losses.append(loss / float(N))
        if (j + 1) % 100 == 0:
            print("[iteration %04d] loss: %.4f" % (j + 1, loss / float(N)))
#plot losses
plt.plot(losses)
plt.title("ELBO")
plt.xlabel("step")
plt.ylabel("loss");

if __name__ == '__main__':
    main()

N = 200
P = 5

X_test, y_test = build_linear_dataset(N, P)
loss = nn.MSELoss()
y_preds = torch.zeros(N, 1)
for i in range(10):
    # guide does not require the data
    sampled_reg_model = guide(None, None)

    y_preds = y_preds + sampled_reg_model(X_test)
# take the average of the predictions
y_preds = y_preds / 10
print("Validation Loss: ", loss(y_preds, y_test).item())

nliittsc · December 6, 2018, 2:34am

I am answering this for posterity, since I figured out the problem. I am new to Pytorch in general, so I didn’t realize this was causing it.

The reason the model wasn’t being sampled properly is because the guide’s weights weren’t being updated. The cause of this issue was using this line

#make the model from sequential
regression_model = torch.nn.Sequential(torch.nn.Linear(P, 1))

instead of

class RegressionModel(nn.Module):
def __init__(self, p):
    # p = number of features
    super(RegressionModel, self).__init__()
    self.linear = nn.Linear(p, 1)

def forward(self, x):
    return self.linear(x)

#make the model with the class, not from sequential!
regression_model = RegressionModel(P)

This appears to have solved my issue and the module now properly lifts to a random module and the weights then updated with the guide.