Implemented LDA does not work

odie · June 6, 2018, 1:40am

Hello

I Implemented LDA model , but has some problem

generate dataset

D = 2          #document
N = [15,8]   #words
K = 5          #topics
V = 15        #vocabulary size

z = [[0]*n for n in N]
data = [[0]*n for n in N]

theta = dist.Dirichlet(torch.zeros([D, K])+0.1).sample()
phi = dist.Dirichlet(torch.zeros([K, V])+0.05).sample()

for d in range(D):
  for n in range(N[d]):
    z[d][n] = dist.Categorical(theta[d, :]).sample()
    data[d][n] = dist.Categorical(phi[z[d][n], :]).sample()

define model and guide


def model(data):
  theta = pyro.sample("theta",dist.Dirichlet(torch.zeros([D, K])+0.1))
  phi = pyro.sample("phi",dist.Dirichlet(torch.zeros([K, V])+0.05))
  
  for d in pyro.irange("D", D):
    for n in pyro.irange("N_%s"%(d), N[d]):
      z = pyro.sample("z_%s_%s"%(d,n), dist.Categorical(theta[d, :]))
      pyro.sample("w_%s_%s"%(d,n), dist.Categorical(phi[z, :]), obs=data[d][n])

def guide(data):
  alpha_q = pyro.param("alpha_q", torch.zeros([D, K])+1,
                       constraint=constraints.positive)
  theta_q = pyro.sample("theta",dist.Dirichlet(alpha_q))
  beta_q = pyro.param("beta_q", torch.zeros([K, V])+1,
                       constraint=constraints.positive)
  phi_q = pyro.sample("phi",dist.Dirichlet(beta_q))
  
  for d in pyro.irange("D", D):
    for n in pyro.irange("N_%s"%(d), N[d]):
      q_i = pyro.param("q_%s_%s"%(d,n), torch.ones(1, K) / K,
                                      constraint=constraints.unit_interval)
      pyro.sample("z_%s_%s"%(d,n), dist.Categorical(q_i))

do VI


svi = SVI(model, guide, optimizer, loss=Trace_ELBO())
svi.step(data)

RuntimeErrorTraceback (most recent call last)
<ipython-input-11-7a63806d0468> in <module>()
----> 1 svi.step(data)

/usr/local/lib/python2.7/dist-packages/pyro/infer/svi.pyc in step(self, *args, **kwargs)
     73         # get loss and compute gradients
     74         with poutine.trace(param_only=True) as param_capture:
---> 75             loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
     76 
     77         params = set(site["value"].unconstrained()

/usr/local/lib/python2.7/dist-packages/pyro/infer/trace_elbo.pyc in loss_and_grads(self, model, guide, *args, **kwargs)
    127                     if not is_identically_zero(score_function_term):
    128                         if log_r is None:
--> 129                             log_r = _compute_log_r(model_trace, guide_trace)
    130                         site = log_r.sum_to(site["cond_indep_stack"])
    131                         surrogate_elbo_particle = surrogate_elbo_particle + (site * score_function_term).sum()

/usr/local/lib/python2.7/dist-packages/pyro/infer/trace_elbo.pyc in _compute_log_r(model_trace, guide_trace)
     22             if not model_site["is_observed"]:
     23                 log_r_term = log_r_term - guide_trace.nodes[name]["log_prob"]
---> 24             log_r.add((stacks[name], log_r_term.detach()))
     25     return log_r
     26 

/usr/local/lib/python2.7/dist-packages/pyro/infer/util.pyc in add(self, *items)
     95             assert all(f.dim < 0 and -len(value.shape) <= f.dim for f in frames)
     96             if frames in self:
---> 97                 self[frames] = self[frames] + value
     98             else:
     99                 self[frames] = value

RuntimeError: The size of tensor a (2) must match the size of tensor b (5) at non-singleton dimension 0

this code can work on only has one document , is the nested loop wrong ? Or other LDA implement suggestion

thanks !!

fritzo · June 6, 2018, 9:03pm

Hi Odie, that’s a nice looking model. Two observations are:

You can vectorize things a bit more, and then
You should use constraints.simplex and random init for the q_i.

Here’s an attempt. First vectorize the generated data:

z = [torch.zeros(n, dtype=torch.long) for n in N]
data = [torch.zeros(n) for n in N]
for d in range(D):
  for n in range(N[d]):
    z[d][n] = dist.Categorical(theta[d, :]).sample()
    data[d][n] = dist.Categorical(phi[z[d][n], :]).sample()

Then vectorize the model and guide:

@poutine.broadcast
def model(data):
  with pyro.iarange("documents_iarange", D):
    theta = pyro.sample("theta", dist.Dirichlet(torch.zeros([D, K])+0.1))
  with pyro.iarange("topics_iarange", K):
    phi = pyro.sample("phi", dist.Dirichlet(torch.zeros([K, V])+0.05))
  
  for d in pyro.irange("documents", D):
    with pyro.iarange("words_%d"%d, N[d]):
      z = pyro.sample("z_%d"%d, dist.Categorical(theta[d]))
      pyro.sample("w_%d"%d, dist.Categorical(phi[z]), obs=data[d])

def guide(data):
  alpha_q = pyro.param("alpha_q", torch.ones([D, K]),
                       constraint=constraints.positive)
  theta_q = pyro.sample("theta", dist.Dirichlet(alpha_q))
  beta_q = pyro.param("beta_q", torch.ones([K, V]),
                      constraint=constraints.positive)
  phi_q = pyro.sample("phi", dist.Dirichlet(beta_q))

  for d in pyro.irange("documents_irange", D):
    with pyro.iarange("words_%d"%d, N[d]):
      q_i = pyro.param("q_%d"%d, torch.randn([N[d], K]).exp(),
                       constraint=constraints.simplex)
      pyro.sample("z_%d"%d, dist.Categorical(q_i))

Does that look like what you want?

odie · June 7, 2018, 2:40am

Thanks a lot !!! That what i looking for
Code more clean and readable than before , but still has some bug
I change code in model and guide , and add independent inside
But i am not sure why can’t use pyro.iarange("documents_iarange", D) , i think it same like use iarange


@pyro.poutine.broadcast
def model(data):
    
  theta = pyro.sample("theta",dist.Dirichlet(torch.ones([D, K])).independent(1))
  phi = pyro.sample("phi",dist.Dirichlet(torch.ones([K, V])).independent(1))
  
  for d in pyro.irange("documents", D):
    with pyro.iarange("words_%d"%d, N[d]):
      z = pyro.sample("z_%d"%d, dist.Categorical(theta[d]))
      pyro.sample("w_%d"%d, dist.Categorical(phi[z]), obs=data[d])

def guide(data):
  alpha_q = pyro.param("alpha_q", torch.ones([D, K]),
                       constraint=constraints.positive)
  theta_q = pyro.sample("theta", dist.Dirichlet(alpha_q).independent(1))
  beta_q = pyro.param("beta_q", torch.ones([K, V]),
                      constraint=constraints.positive)
  phi_q = pyro.sample("phi", dist.Dirichlet(beta_q).independent(1))

  for d in pyro.irange("documents", D):
    with pyro.iarange("words_%d"%d, N[d]):
      q_i = pyro.param("q_%d"%d, torch.randn([N[d], K]).exp(),
                       constraint=constraints.simplex)
      pyro.sample("z_%d"%d, dist.Categorical(q_i))

i put code in colabs

irfanbulu · June 27, 2018, 12:42pm

I have a question about this model. From the structure of the guide and the model, it seems that theta (topic proportions for the given documents) is kept as a global variable. Shouldn’t it be a local variable?

fritzo · June 27, 2018, 1:49pm

I believe theta[d] is the local variable for document d. The given model is equivalent to

@pyro.poutine.broadcast
def model(data):
  phi = pyro.sample("phi",dist.Dirichlet(torch.ones([K, V])).independent(1))
  for d in pyro.irange("documents", D):
    theta_d = pyro.sample("theta_%d"%d, dist.Dirichlet(torch.ones([K])))
    with pyro.iarange("words_%d"%d, N[d]):
      z = pyro.sample("z_%d"%d, dist.Categorical(theta_d))
      pyro.sample("w_%d"%d, dist.Categorical(phi[z]), obs=data[d])

irfanbulu · June 27, 2018, 2:06pm

Thanks fritzo for the clarification

newmodels · August 23, 2018, 4:09am

I’m new to this so sorry if this is a simple question:

How would we actually get the phi and theta matrices from this?

odie · August 27, 2018, 6:42am

You can see my colab , can use pyro.param('q_1') get parameters in guide

TreeDiagram · December 19, 2018, 1:18am

I am also new to here and happy to find such a clear ad simple LDA example.

While training with the above model and guide function pairs, I have a question about the subsample_size and am wondering if there exist ways to train with mini-batches in LDA?

Here are my training code:

adam_params = {"lr": 0.01, "betas": (0.90, 0.999)}
optimizer = Adam(adam_params)

pyro.clear_param_store()

svi = SVI(model, guide, optimizer, loss=TraceEnum_ELBO(max_iarange_nesting=1))

losses = []

for _ in range(3000):
    loss = svi.step(data) # all the data here
    losses.append(loss)
    
plt.plot(list(range(3000)), losses)
plt.title('ELBO')
plt.xlabel('step')
plt.show()