Horseshoe prior logistic regression

I have been trying to produce a horseshoe prior logistic regression model in pyro.
I have some code that runs but the results don’t look right so I assume I am missing something in specifying the model or in how I set some parameters for inference.

The example I have is

import numpy as np
from scipy.special import expit
from scipy.stats import binom
import torch
import pyro
from pyro.distributions import Normal, HalfCauchy, Bernoulli
from pyro.infer.mcmc import MCMC, NUTS

def build_toy_dataset(N, D=50, noise_std=0.1):
    X = np.random.random((N, D))-0.5
    W = np.random.random((D, 1))-0.5
    b = np.random.random()-0.5
    zero_ind = np.arange(D//4, D)
    W[zero_ind, :] = 0
    y = expit( + b)
    #y = binom.rvs(1, y)  
    y[y < 0.5] = 0
    y[y >= 0.5] = 1  
    y = y.flatten()
    return X, y, W

N, D = 200, 50
X, y, W = build_toy_dataset(N, D)

def model(X, yob):
    onevec = torch.ones(X.size(1), 1)
    tau = pyro.sample("tau", HalfCauchy(torch.ones(1)))
    lam = pyro.sample("lambda", HalfCauchy(onevec))
    sig = (lam**2)*(tau**2)
    ws = pyro.sample('ws', Normal(torch.zeros_like(sig), sig))
    b = pyro.sample('b', Normal(0., 1.))
    logit = X.matmul(ws) + b.view(-1, 1)
    y = pyro.sample('y', Bernoulli(logits=logit), obs=yob)
    return y

Xs, ys = torch.tensor(X), torch.tensor(y)
nuts_kernel = NUTS(model, adapt_step_size=True)
hmc_posterior = MCMC(nuts_kernel, num_samples=200, warmup_steps=200).run(Xs, ys)
wshat = traces.marginal(sites).empirical[sites].mean.detach().cpu().numpy()

In comparison I run a similar model in PyMC3 using the same toy data as

import pymc3 as pm
import theano as T

# set up a placeholder to input data to the pymc3 model
Xᵢ = T.shared(X)
yᵢ = T.shared(y)

# define the model using formulation 1
with pm.Model() as logreg:
    λ = pm.HalfCauchy('lambda', beta=1, shape=D)
    τ = pm.HalfCauchy('tau', beta=1)
    σ = pm.Deterministic('horseshoe', τ*τ*λ*λ)
    β = pm.Normal('beta', mu=0, sd=σ, shape=D)
    βₒ = pm.Normal('b', mu=0, sd=1)
    μ = pm.math.invlogit(Xᵢ.dot(β) + βₒ)
    yₒ = pm.Bernoulli('obs', p=μ, observed=yᵢ)
# performance the inference
SEED = 20090425
warmup = 200
num_samples = 200
with logreg:
    step = pm.NUTS(target_accept=.8)
    logreg_posterior = pm.sample(num_samples, step=step, njobs=2, tune=warmup, random_seed=SEED)
    βₕ = logreg_posterior['beta']
    βₕ = βₕ[βₕ.shape[0]//2:, :].mean(axis=0)

this seems to provide mean weights that are relatively more sensible.

Am I specifying the model incorrectly?
I understand that I can specify the model differently in a non-centred way (using Inverse Gamma distributions). Although this would speed up sampling I would expect the non-centred version to still provide a reasonable answer.

If the model is specified correctly is there some setting for NUTS and MCMC I should be using?

I correct some points which I can notice in your model. You can specify num_chains=2 in MCMC, jit_compile=True in NUTS and decrease max_tree_depth (if you use Pyro dev version) to make it faster.

def model(X, yob):
    onevec = torch.ones(X.size(1))
    tau = pyro.sample("tau", HalfCauchy(1))
    lam = pyro.sample("lambda", HalfCauchy(onevec).to_event())
    sig = (lam**2)*(tau**2)
    ws = pyro.sample('ws', Normal(0, sig).to_event())
    b = pyro.sample('b', Normal(0, 1))
    logit = X.matmul(ws) + b
    y = pyro.sample('y', Bernoulli(logits=logit).to_event(), obs=yob)
    return y

Another (more intuitively) way is to use pyro.plate to specify batch_shape and eliminate these to_event calls, but both ways are equivalent. So it is better to make the above model works first.

Great, thank you for this.