I want to try bayesian logistic regression on the titanic dataset (using only 6 numeric features) but i found it to be extremely slow. Using NUTS for sampling the posterior (num_samples = 500, warmup_steps = 100) can take 20 minutes! but SOMETIMES it can take 1-2 minutes. That is still relatively slow, but how can this difference be so big?
Is the way I set up my model not optimal?
Here’s the code I used.
class LogisticRegressionV2(object): def __init__(self, C = 1.0): self.C = C # Inverse regularization strength (C = 1/lambda) self.n_samples = None self.n_features = None #Dimension of the data (number of features) self.hmc_posterior = None self.coefficients_summary = None self.marginal = None # Shape (num_mcmc_samples, n_features + 1) self.coefficients_MAP = None # Shape (1, n_features + 1) def prep_data(self, X): """ Parameters X: numpy array, shape (n_samples, n_features) Returns X_plus_intercept: numpy array, shape (n_features +1, n_samples) (transposed and added ones for the intercept terms) """ self.n_samples = X.shape self.n_features = X.shape self.coefficients_names = ["beta_%s"%i for i in range(self.n_features + 1)] # Add intercept X_plus_intercept = np.ones((self.n_features +1, self.n_samples)) X_plus_intercept[-self.n_features:, :] = X.T.squeeze() return X_plus_intercept def logit_model(self, X_tensor = None, Y_tensor = None): """ Parameters X_tensor: torch tensor, shape (n_features +1, n_samples) (includes intercept term. Result of prep_data(X)) Y_tensor: torch tensor, shape (1, n_samples ) """ gamma = np.sqrt(self.C) beta_prior = dist.Normal(torch.zeros(1, self.n_features + 1), torch.ones(1, self.n_features + 1) * gamma) beta = pyro.sample("beta", beta_prior) z = torch.mm(beta, X_tensor) p = torch.sigmoid(z) likelihood = dist.Bernoulli(p) pyro.sample("obs", likelihood, obs = Y_tensor) def fit(self, X, Y, num_samples = 100, warmup_steps = 50): """ Parameters X: numpy array, shape (n_samples, n_features) Y: numpy array, shape (n_samples, ) or shape(n_samples, 1) """ # X gets transformed into shape (n_features +1, n_samples). # The extra dimension comes from the intercept terms, and the transpose allows matrix multiplication later. X = self.prep_data(X) Y = Y.reshape(-1, 1) X_tensor = torch.tensor(X, dtype = torch.float) Y_tensor = torch.tensor(Y, dtype = torch.float) Y_tensor = Y_tensor.transpose(0, 1) nuts_kernel = NUTS(self.logit_model, adapt_step_size = True, jit_compile = False, adapt_mass_matrix=False) hmc_posterior = MCMC(nuts_kernel, num_samples = num_samples, warmup_steps = warmup_steps, num_chains = 1)\ .run(X_tensor, Y_tensor)
To load the dataset
df = pd.read_csv("http://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv") df['sex'] = df.Sex.apply(lambda s: 1 if s=='female' else 0) df['has siblings or spouses aboard'] = (df['Siblings/Spouses Aboard'] > 0).astype(int) df['has parents or children aboard'] = (df['Parents/Children Aboard'] > 0).astype(int) df['has family aboard'] = (df['Siblings/Spouses Aboard'] + df['Parents/Children Aboard'] > 0).astype(int) df_train = df.sample(frac=0.5, replace=False, random_state=42) df_test = df.loc[df.index.difference(df_train.index)] features = ['Pclass', 'Age', 'Fare', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'sex'] target = 'Survived' X_train = df_train[features].values y_train = df_train[target].values X_test = df_test[features].values y_test = df_test[target].values
lr_v2 = LogisticRegressionV2() lr_v2.fit(X_train, y_train, num_samples = 500, warmup_steps = 100)
Is there anything wrong/suboptimal with my implementation? Help would be greatly appreciated!