I'm trying to run a bayesian logistic regression on the wine dataset provided from the sklearn package. As variables, I decided to use alcohol, color_intensity, flavanoids, hue and magnesium where alcohol is my response variable and the rest the predictors. To do so, I'm using pyro and torch packages:
import pyro
import torch
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
import pandas as pd
import numpy as np
from pyro.infer import Predictive
import torch.distributions.constraints as constraints
from sklearn import datasets
pyro.set_rng_seed(0)
#loading data and prepearing dataframe
wine = datasets.load_wine()
data = pd.DataFrame(columns = wine['feature_names'], data=wine['data'] )
#choosiing variables: response and predictors
variables = data[['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']]
#standardization
variables = (variables-variables.min())/(variables.max()-variables.min())
#tensorizing
alcohol = torch.tensor(variables['alcohol'].values, dtype=torch.float)
predictors = torch.stack([torch.tensor(variables[column].values, dtype=torch.float)
for column in ['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']], 1)
#splitting data
k = int(0.8 * len(variables))
x_train, y_train = predictors[:k], alcohol[:k]
x_test, y_test = predictors[k:], alcohol[k:]
#modelling
def model_alcohol(predictors, alcohol):
n_observations, n_predictors = predictors.shape
#weights
w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)))
epsilon = pyro.sample('epsilon', dist.Normal(0.,1.))
#non-linearity
y_hat = torch.sigmoid((w*predictors).sum(dim=1) + epsilon)
sigma = pyro.sample("sigma", dist.Uniform(0.,3.))
with pyro.plate('alcohol', len(alcohol)):
y=pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)
def guide_alcohol(predictors, alcohol=None):
n_observations, n_predictors = predictors.shape
w_loc = pyro.param('w_loc', torch.rand(n_predictors))
w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
w = pyro.sample('w', dist.Normal(w_loc, w_scale))
epsilon_loc = pyro.param('b_loc', torch.rand(1))
epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))
sigma_loc = pyro.param('sigma_loc', torch.rand(n_predictors))
sigma_scale = pyro.param('sigma_scale', torch.rand(n_predictors),
constraint=constraints.positive)
sigma = pyro.sample('sigma', dist.Normal(sigma_loc, sigma_scale))
alcohol_svi = SVI(model=model_alcohol, guide=guide_alcohol, optim=optim.ClippedAdam({'lr' : 0.0002}),
loss=Trace_ELBO())
losses = []
for step in range(10000):
loss = alcohol_svi.step(x_train, y_train)/len(x_train)
losses.append(loss)
As I have to use Stochastic Variational Inference, I have defined both the model and the guide. My problem is now at matching tensor sizes, as I now I get the error:
RuntimeError: The size of tensor a (142) must match the size of tensor b (5) at non-singleton
dimension 0
Trace Shapes:
Param Sites:
Sample Sites:
w dist 5 |
value 5 |
epsilon dist |
value 1 |
sigma dist |
value 5 |
alcohol dist |
value 142 |
I'm kinda new to the idea of modelling on my own, so clearly there are mistakes around the code (hopefully not on the theory behind it). Still, I see I should adjust dimension on the guide maybe? I'm not entirely sure on how to honestly.
Your main problem is that w
is not declared as a single event (.to_event(1)
), and your variance (sigma
) should have the same dim as your observations (()
). The model and guide below fix this; I suggest you look at auto-generated guides in Pyro, and a different prior on sigma
.
def model_alcohol(predictors, alcohol):
n_observations, n_predictors = predictors.shape
# weights
# w is a single event
w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)).to_event(1))
epsilon = pyro.sample('epsilon', dist.Normal(0., 1.))
# non-linearity
y_hat = torch.sigmoid(predictors @ w + epsilon) # (predictors * weight).sum(1) == predictors @ w
sigma = pyro.sample("sigma", dist.Uniform(0., 3.))
with pyro.plate('alcohol', len(alcohol)):
pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)
def guide_alcohol(predictors, alcohol=None):
n_observations, n_predictors = predictors.shape
w_loc = pyro.param('w_loc', torch.rand(n_predictors))
w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
pyro.sample('w', dist.Normal(w_loc, w_scale).to_event(1))
epsilon_loc = pyro.param('b_loc', torch.rand(1))
epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))
sigma_loc = pyro.param('sigma_loc', torch.rand(1))
sigma_scale = pyro.param('sigma_scale', torch.rand(1),
constraint=constraints.positive)
pyro.sample('sigma', dist.HalfNormal(sigma_loc, sigma_scale)) # MUST BE POSITIVE