I have been trying to produce a horseshoe prior logistic regression model in pyro.
I have some code that runs but the results don’t look right so I assume I am missing something in specifying the model or in how I set some parameters for inference.
The example I have is
import numpy as np
from scipy.special import expit
from scipy.stats import binom
import torch
torch.set_default_tensor_type(torch.DoubleTensor)
import pyro
from pyro.distributions import Normal, HalfCauchy, Bernoulli
from pyro.infer.mcmc import MCMC, NUTS
def build_toy_dataset(N, D=50, noise_std=0.1):
np.random.seed(1234)
X = np.random.random((N, D))-0.5
W = np.random.random((D, 1))-0.5
b = np.random.random()-0.5
zero_ind = np.arange(D//4, D)
W[zero_ind, :] = 0
y = expit(X.dot(W) + b)
#y = binom.rvs(1, y)
y[y < 0.5] = 0
y[y >= 0.5] = 1
y = y.flatten()
return X, y, W
N, D = 200, 50
X, y, W = build_toy_dataset(N, D)
def model(X, yob):
onevec = torch.ones(X.size(1), 1)
tau = pyro.sample("tau", HalfCauchy(torch.ones(1)))
lam = pyro.sample("lambda", HalfCauchy(onevec))
sig = (lam**2)*(tau**2)
ws = pyro.sample('ws', Normal(torch.zeros_like(sig), sig))
b = pyro.sample('b', Normal(0., 1.))
logit = X.matmul(ws) + b.view(-1, 1)
y = pyro.sample('y', Bernoulli(logits=logit), obs=yob)
return y
Xs, ys = torch.tensor(X), torch.tensor(y)
nuts_kernel = NUTS(model, adapt_step_size=True)
hmc_posterior = MCMC(nuts_kernel, num_samples=200, warmup_steps=200).run(Xs, ys)
wshat = traces.marginal(sites).empirical[sites].mean.detach().cpu().numpy()
In comparison I run a similar model in PyMC3 using the same toy data as
import pymc3 as pm
import theano as T
# set up a placeholder to input data to the pymc3 model
Xᵢ = T.shared(X)
yᵢ = T.shared(y)
# define the model using formulation 1
with pm.Model() as logreg:
λ = pm.HalfCauchy('lambda', beta=1, shape=D)
τ = pm.HalfCauchy('tau', beta=1)
σ = pm.Deterministic('horseshoe', τ*τ*λ*λ)
β = pm.Normal('beta', mu=0, sd=σ, shape=D)
βₒ = pm.Normal('b', mu=0, sd=1)
μ = pm.math.invlogit(Xᵢ.dot(β) + βₒ)
yₒ = pm.Bernoulli('obs', p=μ, observed=yᵢ)
# performance the inference
SEED = 20090425
warmup = 200
num_samples = 200
with logreg:
step = pm.NUTS(target_accept=.8)
logreg_posterior = pm.sample(num_samples, step=step, njobs=2, tune=warmup, random_seed=SEED)
βₕ = logreg_posterior['beta']
βₕ = βₕ[βₕ.shape[0]//2:, :].mean(axis=0)
this seems to provide mean weights that are relatively more sensible.
Am I specifying the model incorrectly?
I understand that I can specify the model differently in a non-centred way (using Inverse Gamma distributions). Although this would speed up sampling I would expect the non-centred version to still provide a reasonable answer.
If the model is specified correctly is there some setting for NUTS and MCMC I should be using?