Hello everyone,
I’m currently working on my master’s thesis, which focuses on robust regression and uncertainty modeling for a manufacturing process. I’ve trained a BNN using SVI, and its performance is nearly on par with our benchmark model catboost.
My main interest lies in understanding the uncertainty of the input parameters. It would be insightful if we could model the “real” values from the sensors as well as their noise:
input_variable = gaussian_noise_function(noise_parameter * real_variable)
I’d like to incorporate discrete latent variables for certain input parameters that represent either the “real” values or better, the “noise parameters.”
Is my current BNN model even suitable for adding discrete latent variables?
How can I start implementing this? Are there specific approaches or Pyro tutorials that do this?
Thank you in advance
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
class BNN(PyroModule):
def __init__(self, in_dim=15, out_dim=1, hid_dim=1, n_hid_layers=5, prior_scale=5.):
super().__init__()
self.activation = nn.Tanh() # could also be ReLU or LeakyReLU
#self.activation = nn.ReLU() # could also be ReLU or LeakyReLU
assert in_dim > 0 and out_dim > 0 and hid_dim > 0 and n_hid_layers > 0 # make sure the dimensions are valid
# Define the layer sizes and the PyroModule layer list
self.layer_sizes = [in_dim] + n_hid_layers * [hid_dim] + [out_dim]
layer_list = [PyroModule[nn.Linear](self.layer_sizes[idx - 1], self.layer_sizes[idx]) for idx in
range(1, len(self.layer_sizes))]
self.layers = PyroModule[torch.nn.ModuleList](layer_list)
for layer_idx, layer in enumerate(self.layers):
layer.weight = PyroSample(dist.Normal(0., prior_scale * np.sqrt(2 / self.layer_sizes[layer_idx])).expand(
[self.layer_sizes[layer_idx + 1], self.layer_sizes[layer_idx]]).to_event(2))
layer.bias = PyroSample(dist.Normal(0., prior_scale).expand([self.layer_sizes[layer_idx + 1]]).to_event(1))
def forward(self, x, y=None):
# Ensure x has the correct shape for in_dim > 1
x = x.reshape(-1, self.layer_sizes[0]) # Reshape to match the input dimension (e.g., 15)
# Pass through the network layers
x = self.activation(self.layers[0](x)) # input --> hidden
for layer in self.layers[1:-1]:
x = self.activation(layer(x)) # hidden --> hidden
mu = self.layers[-1](x).squeeze(-1) # hidden --> output (squeeze for output shape)
# Add response noise
sigma = pyro.sample("sigma", dist.Gamma(0.5, 1)) # Infer the response noise
# Model the observations
with pyro.plate("data", x.shape[0]):
obs = pyro.sample("obs", dist.Normal(mu, sigma * sigma), obs=y)
return mu
from pyro.infer.autoguide import AutoNormal
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
pyro.clear_param_store()
model = BNN(in_dim=15, out_dim=1, hid_dim=64, n_hid_layers=5, prior_scale=5.)
from pyro.infer.autoguide import AutoLowRankMultivariateNormal
# Define the guide with low-rank approximation
guide = AutoLowRankMultivariateNormal(model, rank=20)
# Define optimizer and SVI
optimizer = Adam({"lr": 0.01})
svi = SVI(model, guide, optimizer, loss=Trace_ELBO())
# Training loop
for epoch in range(12000):
loss = svi.step(X_train_tensor, y_train_tensor)
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {loss:.4f}")