How to differentiate parameters for identical sub-models?

THeede · February 13, 2025, 9:48am

Hi!

I am running into the problem of being unable to differentiate the parameters for two gaussian processes I am trying to combine for a Multi-Output GP model.

I would like to have a lengthscale and variance for each GP, but Pyro seems to refer to the same lengthscale and variance for each GP.

Do you have any suggestions for how to solve this?

Thanks in advance!

Example Code:

import torch
import pyro
import pyro.distributions as dist
from pyro.contrib.gp.kernels import RBF
from pyro.contrib.gp.models import VariationalSparseGP

# X
train_x = torch.rand(100, 2)

# Generate Y
train_y = torch.stack([
    torch.sin(train_x[:, 0] * (2 * 3.14)) + torch.randn(train_x[:, 0].size()) * 0.2,
    torch.cos(train_x[:, 1] * (2 * 3.14)) + torch.randn(train_x[:, 1].size()) * 0.2,
    torch.sin(train_x[:, 0] * (2 * 3.14)) + 2 * torch.cos(train_x[:, 1] * (2 * 3.14)) + torch.randn(train_x[:, 0].size()) * 0.2,
    -torch.cos(train_x[:, 1] * (2 * 3.14)) + torch.randn(train_x[:, 1].size()) * 0.2,
], -1)
train_y = dist.Bernoulli(logits=train_y).sample()

# 20 random inducing points
idx = torch.randperm(train_x.shape[0])[:20]
Xu_init = train_x[idx].clone()

# Number of GPs
n_latents = 2

kernels = [RBF(input_dim=train_x.shape[1], variance=torch.tensor(5.), lengthscale=torch.tensor(1)) for i in range(n_latents)] 
gps = [VariationalSparseGP(X=train_x, y=None, kernel=kernel, Xu=Xu_init, likelihood=None) for kernel in kernels]

def model(X, Y):
    n_species = Y.shape[1]
    n_samples = X.shape[0]
    
    samples_plate = pyro.plate(name="samples_plate", size=n_samples, dim=-2)
    species_plate = pyro.plate(name="species_plate", size=n_species, dim=-1)
    
    w = pyro.param("w", torch.zeros(n_species, n_latents))
    
    logits = torch.zeros_like(Y)

    for i, gaussian_process in enumerate(gps):
        with pyro.contrib.autoname.scope(prefix=f"gp{i}"):
            latent_function, _ = gaussian_process.model()
            
            logits = logits + (latent_function.unsqueeze(1) @ w[:, i].unsqueeze(0))
    
    with samples_plate, species_plate:
        pyro.sample("y", dist.Bernoulli(logits=logits), obs=Y)
        

def guide(X, Y):
    for i, gaussian_process in enumerate(gps):
        with pyro.contrib.autoname.scope(prefix=f"gp{i}"):
            gaussian_process.guide()

pyro.clear_param_store()

modelGP = model
guideGP = guide

svi = pyro.infer.SVI(modelGP, guideGP, pyro.optim.Adam({"lr": 0.1}), loss=pyro.infer.Trace_ELBO())

# Training Loop
for step in range(50):
    loss = svi.step(train_x, train_y)
    
print(pyro.get_param_store().keys())
>>> dict_keys(['u_loc', 'u_scale_tril', 'w', 'Xu', 'kernel.lengthscale', 'kernel.variance'])

fehiepsi · February 16, 2025, 11:40am

Like pytorch nn.Module, you can turn those gps into submodules, see e.g. Inferences for Deep Gaussian Process models in Pyro | fehiepsi's blog

THeede · February 16, 2025, 3:46pm

Thank you for your reply!

One of my main problems was not assigning a variable for each GP, as just storing the GPs in a list made the named parameters overlap.

I managed to set the different GPs in a loop like so:

for i in range(self.n_latents):
    setattr(self, f"GP{i}", VariationalSparseGP(X=X, y=None, kernel=RBF(), Xu=Xu, likelihood=None))