I’m now trying to add calculating a loss metric on a validation set during training.
The model definition is essentially the same as we’ve discussed throughout my posts already. I’ve added an evaluate
function. Right now, I’m keeping it simple by just calculating MAE based on the average of the samples.
def evaluate(model, criterion, val_dataloader_iter, validation_steps, device, metric_agg_fn=None):
model.eval() # Set model to evaluate mode
predictive_obs = Predictive(model, guide=guide, num_samples=100, return_sites = ['obs'])
# statistics
running_loss = 0.0
# Iterate over all the validation data.
for step in range(validation_steps):
pd_batch = next(val_dataloader_iter)
pd_batch['features'] = torch.transpose(torch.stack([pd_batch[x] for x in x_feat]), 0, 1).double()
inputs = pd_batch['features'].to(device)
labels = pd_batch[y_name].to(device)
samples_obs = predictive_obs(inputs)
loss = np.absolute(torch.mean(samples_obs['obs'], dim = 0) - labels).mean()
running_loss += loss
# The losses are averaged across observations for each minibatch.
epoch_loss = running_loss / validation_steps
# metric_agg_fn is used in the distributed training to aggregate the metrics on all workers
print('Validation Loss: {:.4f} '.format(epoch_loss)) # Func: {:.4f} , loss.item()
return epoch_loss
def train_one_epoch_SVI(svi,
train_dataloader_iter, steps_per_epoch, epoch,
device):
running_loss = 0.0
#iii = 0
# Iterate over the data for one epoch.
for step in range(steps_per_epoch):
pd_batch = next(train_dataloader_iter)
pd_batch['features'] = torch.transpose(torch.stack([pd_batch[x] for x in x_feat]), 0, 1).double()
inputs = pd_batch['features'].to(device)
#labels = pd_batch['pmpm'].to(device)
labels = pd_batch[y_name].double()# + .01
labels = labels.to(device)
loss = svi.step(inputs, labels)
#iii = iii + 1
# statistics
running_loss += loss
#scheduler.step()
epoch_loss = running_loss / (steps_per_epoch)
#epoch_acc = running_corrects.double() / (steps_per_epoch * BATCH_SIZE)
if (epoch + 1) % 10 == 0: print('Train Loss: {:.4f}'.format(epoch_loss)) # Func: {:.4f} , loss
return epoch_loss
def train_and_evaluate_SVI(svi, lr=0.001):
criterion = Trace_ELBO()#torch.nn.L1Loss()
#optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# Decay LR by a factor of 0.1 every 7 epochs
#exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(adam, step_size=7, gamma=0.1)
with data.converter_train.make_torch_dataloader(batch_size=BATCH_SIZE) as train_dataloader, \
data.converter_val.make_torch_dataloader(batch_size=BATCH_SIZE) as val_dataloader:
train_dataloader_iter = iter(train_dataloader)
steps_per_epoch = len(data.converter_train) // BATCH_SIZE
val_dataloader_iter = iter(val_dataloader)
validation_steps = max(1, len(data.converter_val) // BATCH_SIZE)
for epoch in range(NUM_EPOCHS):
if (epoch + 1) % 10 == 0: print('Epoch {}/{}'.format(epoch + 1, NUM_EPOCHS))
if (epoch + 1) % 10 == 0: print('-' * 10)
train_loss = train_one_epoch_SVI(svi, train_dataloader_iter, steps_per_epoch, epoch, device)
if (epoch + 1) % 10 == 0: val_loss = evaluate(model, criterion, val_dataloader_iter, validation_steps, device) #val_loss =
#return val_loss
However, while training, the evaluate
function is resulting in errors due to the parameters. ValueError: The parameter rate has invalid values
. The error occurs with my evaluate
function, not the training function. My questions are then, is Predictive
the right function to use ? And, if so, am I missing a function parameter that constrains the model fitted parameters for evaluation?