Forecaster unexpectedly returns multiple sample dimensions

I’m trying to create a multivariate forecast (linear regression + gaussian noise). I can create and fit my model, but when I generate samples I unexpectedly get two sample dimensions created. I feel like I’m missing something fundamental here, can anyone point me in the right direction?

import pandas as pd
import numpy as np
import math
import torch
import pyro
import pyro.distributions as dist
import pyro.poutine as poutine
from pyro.contrib.examples.bart import load_bart_od
from pyro.contrib.forecast import ForecastingModel, Forecaster, eval_crps
from pyro.infer.reparam import LocScaleReparam, SymmetricStableReparam
from pyro.ops.tensor_utils import periodic_repeat
from pyro.ops.stats import quantile
import matplotlib.pyplot as plt

%matplotlib inline
assert pyro.__version__.startswith('1.8.2')
pyro.set_rng_seed(20200305)

n_time_points = 100
n_series = 4
n_covariates = 2

data = torch.tensor(np.random.randn(n_time_points,n_series)).double()
covariates = torch.tensor(np.random.randn(n_covariates,n_time_points,n_series)).double()

class Model(ForecastingModel):
    
    # dims: [features, time, data_dims]] 
    def model(self, zero_data, covariates):
       
        n_series = zero_data.size(-1)
        n_covariates = covariates.size(-3) # (the last 'covariate' is a missing value flag that we do not need to do regression on)
        n_time_points = covariates.size(-2)
        
        weight_prior_mu = torch.tensor([0,0]).double()
        weight_scale = torch.tensor(np.diag([1,1])).double()

        noise_loc = torch.tensor(-5).double()
        noise_scale = torch.tensor(5).double()

        with pyro.plate("series", n_series):

            weights = pyro.sample("weights", dist.MultivariateNormal(weight_prior_mu, weight_scale)).double().unsqueeze(-2)
            series_noise_scale = pyro.sample("noise_scale", dist.LogNormal(noise_loc, noise_scale)).unsqueeze(-2)

        weights = weights.transpose(-2,-1)
        covars = covariates.transpose(-3,-1)
        prediction = torch.matmul(covars,weights).transpose(-3,-1)
        noise_dist = dist.Normal(0, series_noise_scale).to_event(1)

        self.predict(noise_dist, prediction)


pyro.set_rng_seed(1)
pyro.clear_param_store()

T0 = 0 
T1 = 80
T2 = 100
forecaster = Forecaster(Model(), data[T0:T1,:], covariates[:,T0:T1,:],
                        learning_rate=0.1, num_steps=500, log_every=50)

samples = forecaster(data[T0:T1,:], covariates[:,T0:T2,:], num_samples=123)
print(samples.shape)

This is the output I see. I was expecting to get a rank 3 tensor with shape [123, 20, 4] .

INFO 	 step    0 loss = 1.66088e+08
INFO 	 step   50 loss = 10481.4
INFO 	 step  100 loss = 15.5462
INFO 	 step  150 loss = 1.56399
INFO 	 step  200 loss = 1.53875
INFO 	 step  250 loss = 1.54792
INFO 	 step  300 loss = 1.52453
INFO 	 step  350 loss = 1.53405
INFO 	 step  400 loss = 1.53494
INFO 	 step  450 loss = 1.53047
torch.Size([123, 123, 20, 4])