Horseshoe prior logistic regression

I have been trying to produce a horseshoe prior logistic regression model in pyro.
I have some code that runs but the results don’t look right so I assume I am missing something in specifying the model or in how I set some parameters for inference.

The example I have is

import numpy as np
from scipy.special import expit
from scipy.stats import binom
import torch
torch.set_default_tensor_type(torch.DoubleTensor)
import pyro
from pyro.distributions import Normal, HalfCauchy, Bernoulli
from pyro.infer.mcmc import MCMC, NUTS

def build_toy_dataset(N, D=50, noise_std=0.1):
    np.random.seed(1234)
    X = np.random.random((N, D))-0.5
    W = np.random.random((D, 1))-0.5
    b = np.random.random()-0.5
    zero_ind = np.arange(D//4, D)
    W[zero_ind, :] = 0
    y = expit(X.dot(W) + b)
    #y = binom.rvs(1, y)  
    y[y < 0.5] = 0
    y[y >= 0.5] = 1  
    y = y.flatten()
    return X, y, W

N, D = 200, 50
X, y, W = build_toy_dataset(N, D)


def model(X, yob):
    onevec = torch.ones(X.size(1), 1)
    tau = pyro.sample("tau", HalfCauchy(torch.ones(1)))
    lam = pyro.sample("lambda", HalfCauchy(onevec))
    sig = (lam**2)*(tau**2)
    ws = pyro.sample('ws', Normal(torch.zeros_like(sig), sig))
    b = pyro.sample('b', Normal(0., 1.))
    logit = X.matmul(ws) + b.view(-1, 1)
    y = pyro.sample('y', Bernoulli(logits=logit), obs=yob)
    return y

Xs, ys = torch.tensor(X), torch.tensor(y)
nuts_kernel = NUTS(model, adapt_step_size=True)
hmc_posterior = MCMC(nuts_kernel, num_samples=200, warmup_steps=200).run(Xs, ys)
wshat = traces.marginal(sites).empirical[sites].mean.detach().cpu().numpy()

In comparison I run a similar model in PyMC3 using the same toy data as



import pymc3 as pm
import theano as T

# set up a placeholder to input data to the pymc3 model
Xᵢ = T.shared(X)
yᵢ = T.shared(y)


# define the model using formulation 1
with pm.Model() as logreg:
    λ = pm.HalfCauchy('lambda', beta=1, shape=D)
    τ = pm.HalfCauchy('tau', beta=1)
    σ = pm.Deterministic('horseshoe', τ*τ*λ*λ)
    β = pm.Normal('beta', mu=0, sd=σ, shape=D)
    βₒ = pm.Normal('b', mu=0, sd=1)
    μ = pm.math.invlogit(Xᵢ.dot(β) + βₒ)
    yₒ = pm.Bernoulli('obs', p=μ, observed=yᵢ)
    
# performance the inference
SEED = 20090425
warmup = 200
num_samples = 200
with logreg:
    step = pm.NUTS(target_accept=.8)
    logreg_posterior = pm.sample(num_samples, step=step, njobs=2, tune=warmup, random_seed=SEED)
    βₕ = logreg_posterior['beta']
    βₕ = βₕ[βₕ.shape[0]//2:, :].mean(axis=0)

this seems to provide mean weights that are relatively more sensible.

Am I specifying the model incorrectly?
I understand that I can specify the model differently in a non-centred way (using Inverse Gamma distributions). Although this would speed up sampling I would expect the non-centred version to still provide a reasonable answer.

If the model is specified correctly is there some setting for NUTS and MCMC I should be using?

I correct some points which I can notice in your model. You can specify num_chains=2 in MCMC, jit_compile=True in NUTS and decrease max_tree_depth (if you use Pyro dev version) to make it faster.

def model(X, yob):
    onevec = torch.ones(X.size(1))
    tau = pyro.sample("tau", HalfCauchy(1))
    lam = pyro.sample("lambda", HalfCauchy(onevec).to_event())
    sig = (lam**2)*(tau**2)
    ws = pyro.sample('ws', Normal(0, sig).to_event())
    b = pyro.sample('b', Normal(0, 1))
    logit = X.matmul(ws) + b
    y = pyro.sample('y', Bernoulli(logits=logit).to_event(), obs=yob)
    return y

Another (more intuitively) way is to use pyro.plate to specify batch_shape and eliminate these to_event calls, but both ways are equivalent. So it is better to make the above model works first.

Great, thank you for this.