Hey guys.
I want to try bayesian logistic regression on the titanic dataset (using only 6 numeric features) but i found it to be extremely slow. Using NUTS for sampling the posterior (num_samples = 500, warmup_steps = 100) can take 20 minutes! but SOMETIMES it can take 1-2 minutes. That is still relatively slow, but how can this difference be so big?
Is the way I set up my model not optimal?
Here’s the code I used.
class LogisticRegressionV2(object):
def __init__(self, C = 1.0):
self.C = C # Inverse regularization strength (C = 1/lambda)
self.n_samples = None
self.n_features = None #Dimension of the data (number of features)
self.hmc_posterior = None
self.coefficients_summary = None
self.marginal = None # Shape (num_mcmc_samples, n_features + 1)
self.coefficients_MAP = None # Shape (1, n_features + 1)
def prep_data(self, X):
"""
Parameters
X: numpy array, shape (n_samples, n_features)
Returns
X_plus_intercept: numpy array, shape (n_features +1, n_samples) (transposed and added ones for the intercept terms)
"""
self.n_samples = X.shape[0]
self.n_features = X.shape[1]
self.coefficients_names = ["beta_%s"%i for i in range(self.n_features + 1)]
# Add intercept
X_plus_intercept = np.ones((self.n_features +1, self.n_samples))
X_plus_intercept[-self.n_features:, :] = X.T.squeeze()
return X_plus_intercept
def logit_model(self, X_tensor = None, Y_tensor = None):
"""
Parameters
X_tensor: torch tensor, shape (n_features +1, n_samples) (includes intercept term. Result of prep_data(X))
Y_tensor: torch tensor, shape (1, n_samples )
"""
gamma = np.sqrt(self.C)
beta_prior = dist.Normal(torch.zeros(1, self.n_features + 1), torch.ones(1, self.n_features + 1) * gamma)
beta = pyro.sample("beta", beta_prior)
z = torch.mm(beta, X_tensor)
p = torch.sigmoid(z)
likelihood = dist.Bernoulli(p)
pyro.sample("obs", likelihood, obs = Y_tensor)
def fit(self, X, Y, num_samples = 100, warmup_steps = 50):
"""
Parameters
X: numpy array, shape (n_samples, n_features)
Y: numpy array, shape (n_samples, ) or shape(n_samples, 1)
"""
# X gets transformed into shape (n_features +1, n_samples).
# The extra dimension comes from the intercept terms, and the transpose allows matrix multiplication later.
X = self.prep_data(X)
Y = Y.reshape(-1, 1)
X_tensor = torch.tensor(X, dtype = torch.float)
Y_tensor = torch.tensor(Y, dtype = torch.float)
Y_tensor = Y_tensor.transpose(0, 1)
nuts_kernel = NUTS(self.logit_model, adapt_step_size = True, jit_compile = False, adapt_mass_matrix=False)
hmc_posterior = MCMC(nuts_kernel, num_samples = num_samples, warmup_steps = warmup_steps, num_chains = 1)\
.run(X_tensor, Y_tensor)
To load the dataset
df = pd.read_csv("http://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv")
df['sex'] = df.Sex.apply(lambda s: 1 if s=='female' else 0)
df['has siblings or spouses aboard'] = (df['Siblings/Spouses Aboard'] > 0).astype(int)
df['has parents or children aboard'] = (df['Parents/Children Aboard'] > 0).astype(int)
df['has family aboard'] = (df['Siblings/Spouses Aboard'] + df['Parents/Children Aboard'] > 0).astype(int)
df_train = df.sample(frac=0.5, replace=False, random_state=42)
df_test = df.loc[df.index.difference(df_train.index)]
features = ['Pclass', 'Age', 'Fare', 'Siblings/Spouses Aboard',
'Parents/Children Aboard', 'sex']
target = 'Survived'
X_train = df_train[features].values
y_train = df_train[target].values
X_test = df_test[features].values
y_test = df_test[target].values
Running it:
lr_v2 = LogisticRegressionV2()
lr_v2.fit(X_train, y_train, num_samples = 500, warmup_steps = 100)
Is there anything wrong/suboptimal with my implementation? Help would be greatly appreciated!