Gamma distribution modeling concentration and rate - Predictive error

yoshy · June 21, 2021, 6:51pm

I’m now trying to add calculating a loss metric on a validation set during training.

The model definition is essentially the same as we’ve discussed throughout my posts already. I’ve added an evaluate function. Right now, I’m keeping it simple by just calculating MAE based on the average of the samples.

def evaluate(model, criterion, val_dataloader_iter, validation_steps, device, metric_agg_fn=None):
    model.eval()  # Set model to evaluate mode

    predictive_obs = Predictive(model, guide=guide, num_samples=100, return_sites = ['obs'])
  # statistics
    running_loss = 0.0

  # Iterate over all the validation data.
    for step in range(validation_steps):
        pd_batch = next(val_dataloader_iter)
        pd_batch['features'] = torch.transpose(torch.stack([pd_batch[x] for x in x_feat]), 0, 1).double()    
        inputs = pd_batch['features'].to(device)
        labels = pd_batch[y_name].to(device)
        samples_obs = predictive_obs(inputs)
        loss = np.absolute(torch.mean(samples_obs['obs'], dim = 0) - labels).mean()
        running_loss += loss
            
  # The losses are averaged across observations for each minibatch.
    epoch_loss = running_loss / validation_steps
  
  # metric_agg_fn is used in the distributed training to aggregate the metrics on all workers
    print('Validation Loss: {:.4f} '.format(epoch_loss)) # Func: {:.4f} , loss.item()
    return epoch_loss

def train_one_epoch_SVI(svi,  
                    train_dataloader_iter, steps_per_epoch, epoch, 
                    device):
    running_loss = 0.0
    #iii = 0
    # Iterate over the data for one epoch.
    for step in range(steps_per_epoch):
        pd_batch = next(train_dataloader_iter)
        pd_batch['features'] = torch.transpose(torch.stack([pd_batch[x] for x in x_feat]), 0, 1).double()
        inputs = pd_batch['features'].to(device)
        #labels = pd_batch['pmpm'].to(device)
        labels = pd_batch[y_name].double()# + .01
        labels = labels.to(device)
        loss = svi.step(inputs, labels)
        #iii = iii + 1

        # statistics
        running_loss += loss
    #scheduler.step()

    epoch_loss = running_loss / (steps_per_epoch)
    #epoch_acc = running_corrects.double() / (steps_per_epoch * BATCH_SIZE)

    if (epoch + 1) % 10 == 0: print('Train Loss: {:.4f}'.format(epoch_loss)) #  Func: {:.4f} , loss
    return epoch_loss

def train_and_evaluate_SVI(svi, lr=0.001):
    criterion = Trace_ELBO()#torch.nn.L1Loss()
    #optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Decay LR by a factor of 0.1 every 7 epochs
    #exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(adam, step_size=7, gamma=0.1)

    with data.converter_train.make_torch_dataloader(batch_size=BATCH_SIZE) as train_dataloader, \
       data.converter_val.make_torch_dataloader(batch_size=BATCH_SIZE) as val_dataloader:

        train_dataloader_iter = iter(train_dataloader)
        steps_per_epoch = len(data.converter_train) // BATCH_SIZE

        val_dataloader_iter = iter(val_dataloader)
        validation_steps = max(1, len(data.converter_val) // BATCH_SIZE)

        for epoch in range(NUM_EPOCHS):
            if (epoch + 1) % 10 == 0: print('Epoch {}/{}'.format(epoch + 1, NUM_EPOCHS))
            if (epoch + 1) % 10 == 0: print('-' * 10)

            train_loss = train_one_epoch_SVI(svi, train_dataloader_iter, steps_per_epoch, epoch, device)

            if (epoch + 1) % 10 == 0: val_loss = evaluate(model, criterion, val_dataloader_iter, validation_steps, device) #val_loss = 

        #return val_loss

However, while training, the evaluate function is resulting in errors due to the parameters. ValueError: The parameter rate has invalid values. The error occurs with my evaluate function, not the training function. My questions are then, is Predictive the right function to use ? And, if so, am I missing a function parameter that constrains the model fitted parameters for evaluation?