Hello,
I am trying to make a Pyro neural network model that process multiple-choice questions. Each questions has 4 options and hence each multiple-choice question represents a classification task with 4 possible class outcomes.
I have shown below the portion of my code that is used to convert the original frequentist neural network model to a Bayesian Pyro model, and making predictions after training. Could someone look at my code and see if they are ok? In particular, I am wondering if the forward
function under the class MyModel
is rightly specified, especially using y = None
as one of the parameters as well as the pyroSample
statement. Thank you,
# load the original frequentist model
model = RobertaForMultipleChoice.from_pretrained('roberta-large')
# convert the `model` to a Pyro model
module.to_pyro_module_(model)
# add a dummy parameter to `model`
model.roberta._dummy_param =
nn.Parameter(torch.tensor(0.).to(dtype=model.dtype, device=model.device))
# add prior functions to the original frequentist `model`
for m in model.modules():
for name, value in list(m.named_parameters(recurse=False)):
if name != "_dummy_param":
setattr(m, name, module.PyroSample(prior=dist.Normal(0, 1)
.expand(value.shape)
.to_event(value.dim())))
# define likelihood function for our Bayesian layer.
class MyModel(PyroModule):
def __init__(self, model, name=""):
self._pyro_name = name
self._pyro_context = pyro.nn.module._Context()
self._pyro_params = model.parameters()
self._modules = modell.modules()
super(MyModel, self).__init__()
def forward(self, model, input, yLabel = None, y = None):
# softmax tensor is a tensor of length 4, and it is
# equivalent to the tensor that stores
# estimated classification probabilities for each of the 4 options
softmax_tensor = model(input)
return pyro.sample("y",
dist.Multinomial(1, probs = softmax_tensor),
obs = yLabel)
### ERROR OCCURS HERE
my_model = MyModel(model)
# define guide
guide = guides.AutoDiagonalNormal(poutine.block(my_model, hide = ['y']))
# parameters for training
optimizer = Adam({"lr": 0.000005200})
scheduler = pyro.optim.StepLR({'optimizer': optimizer_3,
'optim_args': {'lr': 0.000005200}})
svi = SVI(my_model, guide, optimizer, loss=TraceEnum_ELBO(max_plate_nesting=0))
# train `my_model`
train(my_model, svi, num_epoch)
# make predictions
pred_obj = Predictive(my_model, guide=guide, num_samples = 100)
predictions = pred_obj.forward(model, input)