Hello
I Implemented LDA model , but has some problem
generate dataset
D = 2 #document
N = [15,8] #words
K = 5 #topics
V = 15 #vocabulary size
z = [[0]*n for n in N]
data = [[0]*n for n in N]
theta = dist.Dirichlet(torch.zeros([D, K])+0.1).sample()
phi = dist.Dirichlet(torch.zeros([K, V])+0.05).sample()
for d in range(D):
for n in range(N[d]):
z[d][n] = dist.Categorical(theta[d, :]).sample()
data[d][n] = dist.Categorical(phi[z[d][n], :]).sample()
define model and guide
def model(data):
theta = pyro.sample("theta",dist.Dirichlet(torch.zeros([D, K])+0.1))
phi = pyro.sample("phi",dist.Dirichlet(torch.zeros([K, V])+0.05))
for d in pyro.irange("D", D):
for n in pyro.irange("N_%s"%(d), N[d]):
z = pyro.sample("z_%s_%s"%(d,n), dist.Categorical(theta[d, :]))
pyro.sample("w_%s_%s"%(d,n), dist.Categorical(phi[z, :]), obs=data[d][n])
def guide(data):
alpha_q = pyro.param("alpha_q", torch.zeros([D, K])+1,
constraint=constraints.positive)
theta_q = pyro.sample("theta",dist.Dirichlet(alpha_q))
beta_q = pyro.param("beta_q", torch.zeros([K, V])+1,
constraint=constraints.positive)
phi_q = pyro.sample("phi",dist.Dirichlet(beta_q))
for d in pyro.irange("D", D):
for n in pyro.irange("N_%s"%(d), N[d]):
q_i = pyro.param("q_%s_%s"%(d,n), torch.ones(1, K) / K,
constraint=constraints.unit_interval)
pyro.sample("z_%s_%s"%(d,n), dist.Categorical(q_i))
do VI
svi = SVI(model, guide, optimizer, loss=Trace_ELBO())
svi.step(data)
RuntimeErrorTraceback (most recent call last)
<ipython-input-11-7a63806d0468> in <module>()
----> 1 svi.step(data)
/usr/local/lib/python2.7/dist-packages/pyro/infer/svi.pyc in step(self, *args, **kwargs)
73 # get loss and compute gradients
74 with poutine.trace(param_only=True) as param_capture:
---> 75 loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
76
77 params = set(site["value"].unconstrained()
/usr/local/lib/python2.7/dist-packages/pyro/infer/trace_elbo.pyc in loss_and_grads(self, model, guide, *args, **kwargs)
127 if not is_identically_zero(score_function_term):
128 if log_r is None:
--> 129 log_r = _compute_log_r(model_trace, guide_trace)
130 site = log_r.sum_to(site["cond_indep_stack"])
131 surrogate_elbo_particle = surrogate_elbo_particle + (site * score_function_term).sum()
/usr/local/lib/python2.7/dist-packages/pyro/infer/trace_elbo.pyc in _compute_log_r(model_trace, guide_trace)
22 if not model_site["is_observed"]:
23 log_r_term = log_r_term - guide_trace.nodes[name]["log_prob"]
---> 24 log_r.add((stacks[name], log_r_term.detach()))
25 return log_r
26
/usr/local/lib/python2.7/dist-packages/pyro/infer/util.pyc in add(self, *items)
95 assert all(f.dim < 0 and -len(value.shape) <= f.dim for f in frames)
96 if frames in self:
---> 97 self[frames] = self[frames] + value
98 else:
99 self[frames] = value
RuntimeError: The size of tensor a (2) must match the size of tensor b (5) at non-singleton dimension 0
this code can work on only has one document , is the nested loop wrong ? Or other LDA implement suggestion
thanks !!