Manually written guide vs Auto guide and *** TypeError: 'dict' object is not callable

FunnyBear · January 26, 2019, 10:49pm

I replaced a manually written guide function (which worked fine) with auto guide by using AutoDiagonalNormal. Then, I got the error
*** TypeError: ‘dict’ object is not callable,
and was not able to figure it out.

The whole code is at here. At the high level,
model:

def model(x_data, y_data):
  fc1w_prior = Normal(loc=torch.zeros_like(net.fc1.weight), scale=torch.ones_like(net.fc1.weight))
  ....

Manually defined guide:

def guide(x_data, y_data): 
  # First layer weight distribution priors
  fc1w_mu = torch.randn_like(net.fc1.weight)
  fc1w_sigma = torch.randn_like(net.fc1.weight)
  fc1w_mu_param = pyro.param("fc1w_mu", fc1w_mu)
  fc1w_sigma_param = softplus(pyro.param("fc1w_sigma", fc1w_sigma))
  fc1w_prior = Normal(loc=fc1w_mu_param, scale=fc1w_sigma_param)
  ...

Auto guide:

guide = AutoDiagonalNormal(model)

def predict(x):
  sampled_models = [guide(None, None) for _ in range(num_samples)]
  pdb.set_trace()
  yhats = [model(x).data for model in sampled_models]   # <---- where error occurs with **auto guide**
  mean = torch.mean(torch.stack(yhats), 0)
  return np.argmax(mean.numpy(), axis=1)

correct = 0
total = 0
for j, data in enumerate(test_loader):
  images, labels = data
  predicted = predict(images.view(-1,28*28))
  total += labels.size(0)

Why replacing a manually defined guide with an auto guide causes the error? The whole error message is:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-67749cc8efa4> in <module>
     12 for j, data in enumerate(test_loader):
     13     images, labels = data
---> 14     predicted = predict(images.view(-1,28*28))
     15     total += labels.size(0)
     16     #correct += (predicted == labels).sum().item()   # corrected, 12/24/2018

<ipython-input-15-67749cc8efa4> in predict(x)
      3     sampled_models = [guide(None, None) for _ in range(num_samples)]
      4     pdb.set_trace()
----> 5     yhats = [model(x).data for model in sampled_models]
      6     mean = torch.mean(torch.stack(yhats), 0)
      7     return np.argmax(mean.numpy(), axis=1)

<ipython-input-15-67749cc8efa4> in <listcomp>(.0)
      3     sampled_models = [guide(None, None) for _ in range(num_samples)]
      4     pdb.set_trace()
----> 5     yhats = [model(x).data for model in sampled_models]
      6     mean = torch.mean(torch.stack(yhats), 0)
      7     return np.argmax(mean.numpy(), axis=1)

TypeError: 'dict' object is not callable

Thanks.

fritzo · January 27, 2019, 11:32pm

You’ll need to record a trace of the guide rather than rely on its return value, something like

sampled_traces = [poutine.trace(guide).get_trace(None, None)
                  for _ in range(num_samples)]
yhats = [tr.nodes["name_of_node_you_want"]["value"].data
         for tr in sampled_traces]

I’ll try to answer in more detail later, but take a look at poutine.trace and how it’s used in tutorials and examples.

FunnyBear · January 29, 2019, 12:18am

Thanks fritzo!

What do I need to replace name_of_node_you_want with? I tried a few things, such as module, but got an error. What was wrong? What I need is something equivalent to yhats = [model(x).data for model in sampled_models]. The detailed code is as follows:

The model:

def model(x_data, y_data):

    fc1w_prior = Normal(loc=torch.zeros_like(net.fc1.weight), scale=torch.ones_like(net.fc1.weight))
    fc1b_prior = Normal(loc=torch.zeros_like(net.fc1.bias), scale=torch.ones_like(net.fc1.bias))
    
    outw_prior = Normal(loc=torch.zeros_like(net.out.weight), scale=torch.ones_like(net.out.weight))
    outb_prior = Normal(loc=torch.zeros_like(net.out.bias), scale=torch.ones_like(net.out.bias))
    
    priors = {'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior,  'out.weight': outw_prior, 'out.bias': outb_prior}
    # lift module parameters to random variables sampled from the priors
    lifted_module = pyro.random_module("module", net, priors)
    # sample a regressor (which also samples w and b)
    lifted_reg_model = lifted_module()
    
    lhat = log_softmax(lifted_reg_model(x_data))
    
    pyro.sample("obs", Categorical(logits=lhat), obs=y_data)

The guide:

from pyro.contrib.autoguide import AutoDiagonalNormal
guide = AutoDiagonalNormal(model)

The SVI:

optim = Adam({“lr”: 0.01})
svi = SVI(model, guide, optim, loss=Trace_ELBO())

Optimization:

num_iterations = 5
loss = 0

for j in range(num_iterations):
    loss = 0
    for batch_id, data in enumerate(train_loader):
        # calculate the loss and take a gradient step
        loss += svi.step(data[0].view(-1,28*28), data[1])
    normalizer_train = len(train_loader.dataset)
    total_epoch_loss_train = loss / normalizer_train
    
    print("Epoch ", j, " Loss ", total_epoch_loss_train)

Do prediction:

import pyro.poutine as poutine
num_samples = 10
def predict(x):
    sampled_traces = [poutine.trace(guide).get_trace(None, None) for _ in range(num_samples)]
    yhats = [tr.nodes["module"]["value"].data for tr in sampled_traces]   # <---- where error, KeyError: 'module', occurs
    mean = torch.mean(torch.stack(yhats), 0)
    return np.argmax(mean.numpy(), axis=1)

correct = 0
total = 0
for j, data in enumerate(test_loader):
    images, labels = data
    predicted = predict(images.view(-1,28*28))
    total += labels.size(0)
    correct += (predicted == labels.cpu().numpy()).sum().item()
print("accuracy: %d %%" % (100 * correct / total))

jpchen · January 29, 2019, 4:02am

in this case "name_of_node" refers to the name of the random variable or param of interest so eg "fc1w_mu" in your manually defined guide above. however your nn output yhat is not being registered in pyro. if you are using pyro.random_module or pyro.module, you should just return the nn in your guide and run it forward on data to generate a predictive sample.

def guide(x_data, y_data):
   ...
   lifted_module = pyro.random_module(...)
   return lifted_module()
nn = guide(None, None)
yhat = nn(x_data)

fritzo · January 29, 2019, 4:47pm

@FunnyBear The usual pattern to define a predict function in Pyro is to trace the guide and replay the model against it:

def predict(*args, **kwargs):
    tr = poutine.trace(guide).get_trace(*args, **kwargs)
    return poutine.replay(model, guide_trace=tr)(*args, **kwargs)

This pattern will have the same return type as your model. If you want to access a random module, I’d suggest returning it from your model. To avoid observation statements, you could refactor your model into two parts: the part up to pyro.random_module (call this part latent_model) and a part that wraps latent_model and adds some observe statements (call this the full model):

def latent_model()
   ...
   lifted_module = pyro.random_module("module", net, priors)
   return lifted_module()

def model(x_data, y_data):
    lifted_reg_model = latent_model()
    lhat = log_softmax(lifted_reg_model(x_data))
    pyro.sample("obs", Categorical(logits=lhat), obs=y_data)

Then you can predict with either a manual guide or an autoguide using just the latent_model:

def predict(*args, **kwargs):
    tr = poutine.trace(guide).get_trace(*args, **kwargs)
    lifted_reg_model = poutine.replay(latent_model, guide_trace=tr)()
    ...prediction stuff...

FunnyBear · January 30, 2019, 1:50am

That helps. But what I saw was that the performance was worse than the manual guide. The manual guide gives an accuracy of 90% with ELBO loss 86, but the auto guide gives only 78% with ELBO loss 93. What could cause the difference in performance? Since here I am sampling the random weights from latent_model which is part model, not guide, does it mean that the weights aren’t using the optimized variational posterior distribution q(w)?

I used the suggested two-part approach in defining the model. The prediction (where trace= is used instead of guide_trace=, which caused an error) is done as:

        lifted_reg_model = poutine.replay(latent_model, trace=tr)()
        yhats.append(log_softmax(lifted_reg_model(xdata)))

Below is the code:

def latent_model():
     fc1w_prior = Normal(loc=torch.zeros_like(net.fc1.weight),   scale=torch.ones_like(net.fc1.weight))
     fc1b_prior = Normal(loc=torch.zeros_like(net.fc1.bias), scale=torch.ones_like(net.fc1.bias))
     outw_prior = Normal(loc=torch.zeros_like(net.out.weight), scale=torch.ones_like(net.out.weight))
     outb_prior = Normal(loc=torch.zeros_like(net.out.bias), scale=torch.ones_like(net.out.bias)) 
     priors = {'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior,  'out.weight': outw_prior, 'out.bias': outb_prior}
     # lift module parameters to random variables sampled from the priors
     lifted_module = pyro.random_module("module", net, priors)
     return lifted_module()

def model(x_data, y_data):
    # sample a regressor (which also samples w and b)
    lifted_reg_model = latent_model()
    lhat = log_softmax(lifted_reg_model(x_data))
    pyro.sample("obs", Categorical(logits=lhat), obs=y_data) 

optim = Adam({"lr": 0.01})
svi = SVI(model, guide, optim, loss=Trace_ELBO())

num_samples = 10

def predict(xdata):
    tr = poutine.trace(guide).get_trace(xdata, None)
    yhats = []
    for i in range(num_samples):
        lifted_reg_model = poutine.replay(latent_model, trace=tr)()
        yhats.append(log_softmax(lifted_reg_model(xdata)))
    mean = torch.mean(torch.stack(yhats), 0)
    return np.argmax(mean.detach().numpy(), axis=1) 

print('Prediction when network is forced to predict')
correct = 0
total = 0
for j, data in enumerate(test_loader):
    images, labels = data
    predicted = predict(images.view(-1,28*28))
    total += labels.size(0)
    correct += (predicted == labels.cpu().numpy()).sum().item()
    
print("accuracy: %d %%" % (100 * correct / total))