```
def model_0(sequences, lengths, args, batch_size=None, include_prior=True):
assert not torch._C._get_tracing_state()
num_sequences, max_length, data_dim = sequences.shape
with poutine.mask(mask=include_prior):
# Our prior on transition probabilities will be:
# stay in the same state with 90% probability; uniformly jump to another
# state with 10% probability.
probs_x = pyro.sample(
"probs_x",
dist.Dirichlet(0.9 * torch.eye(args.hidden_dim) + 0.1).to_event(1),
)
# We put a weak prior on the conditional probability of a tone sounding.
# We know that on average about 4 of 88 tones are active, so we'll set a
# rough weak prior of 10% of the notes being active at any one time.
probs_y = pyro.sample(
"probs_y",
dist.Beta(0.1, 0.9).expand([args.hidden_dim, data_dim]).to_event(2),
)
# In this first model we'll sequentially iterate over sequences in a
# minibatch; this will make it easy to reason about tensor shapes.
tones_plate = pyro.plate("tones", data_dim, dim=-1)
for i in pyro.plate("sequences", len(sequences), batch_size):
length = lengths[i]
sequence = sequences[i, :length]
x = 0
for t in pyro.markov(range(length)):
# On the next line, we'll overwrite the value of x with an updated
# value. If we wanted to record all x values, we could instead
# write x[t] = pyro.sample(...x[t-1]...).
x = pyro.sample(
"x_{}_{}".format(i, t),
dist.Categorical(probs_x[x]),
infer={"enumerate": "parallel"},
)
with tones_plate:
pyro.sample(
"y_{}_{}".format(i, t),
dist.Bernoulli(probs_y[x.squeeze(-1)]),
obs=sequence[t],
)
```

This is the tutorial for HMM. Why uses to_event(2) in

```
probs_y = pyro.sample(
"probs_y",
dist.Beta(0.1, 0.9).expand([args.hidden_dim, data_dim]).to_event(2),
)
```

Arenâ€™t the emissions independent?