Add discret latent variables to my BNN SVI

meeen94 · January 26, 2025, 4:54pm

Hello everyone,

I’m currently working on my master’s thesis, which focuses on robust regression and uncertainty modeling for a manufacturing process. I’ve trained a BNN using SVI, and its performance is nearly on par with our benchmark model catboost.

My main interest lies in understanding the uncertainty of the input parameters. It would be insightful if we could model the “real” values from the sensors as well as their noise:

input_variable = gaussian_noise_function(noise_parameter * real_variable)

I’d like to incorporate discrete latent variables for certain input parameters that represent either the “real” values or better, the “noise parameters.”

Is my current BNN model even suitable for adding discrete latent variables?
How can I start implementing this? Are there specific approaches or Pyro tutorials that do this?

Thank you in advance

import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample

class BNN(PyroModule):
    def __init__(self, in_dim=15, out_dim=1, hid_dim=1, n_hid_layers=5, prior_scale=5.):
        super().__init__()

        self.activation = nn.Tanh()  # could also be ReLU or LeakyReLU
        #self.activation = nn.ReLU()  # could also be ReLU or LeakyReLU
        assert in_dim > 0 and out_dim > 0 and hid_dim > 0 and n_hid_layers > 0  # make sure the dimensions are valid

        # Define the layer sizes and the PyroModule layer list
        self.layer_sizes = [in_dim] + n_hid_layers * [hid_dim] + [out_dim]
        layer_list = [PyroModule[nn.Linear](self.layer_sizes[idx - 1], self.layer_sizes[idx]) for idx in
                      range(1, len(self.layer_sizes))]
        self.layers = PyroModule[torch.nn.ModuleList](layer_list)

        for layer_idx, layer in enumerate(self.layers):
            layer.weight = PyroSample(dist.Normal(0., prior_scale * np.sqrt(2 / self.layer_sizes[layer_idx])).expand(
                [self.layer_sizes[layer_idx + 1], self.layer_sizes[layer_idx]]).to_event(2))
            layer.bias = PyroSample(dist.Normal(0., prior_scale).expand([self.layer_sizes[layer_idx + 1]]).to_event(1))

    def forward(self, x, y=None):
        # Ensure x has the correct shape for in_dim > 1
        x = x.reshape(-1, self.layer_sizes[0])  # Reshape to match the input dimension (e.g., 15)

        # Pass through the network layers
        x = self.activation(self.layers[0](x))  # input --> hidden
        for layer in self.layers[1:-1]:
            x = self.activation(layer(x))  # hidden --> hidden
        mu = self.layers[-1](x).squeeze(-1)  # hidden --> output (squeeze for output shape)

        # Add response noise
        sigma = pyro.sample("sigma", dist.Gamma(0.5, 1))  # Infer the response noise

        # Model the observations
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mu, sigma * sigma), obs=y)

        return mu

from pyro.infer.autoguide import AutoNormal
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
pyro.clear_param_store()
model = BNN(in_dim=15, out_dim=1, hid_dim=64, n_hid_layers=5, prior_scale=5.)


from pyro.infer.autoguide import AutoLowRankMultivariateNormal

# Define the guide with low-rank approximation
guide = AutoLowRankMultivariateNormal(model, rank=20)  


# Define optimizer and SVI
optimizer = Adam({"lr": 0.01})
svi = SVI(model, guide, optimizer, loss=Trace_ELBO())

# Training loop
for epoch in range(12000):
    loss = svi.step(X_train_tensor, y_train_tensor)
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")