Hello, pyro and pytorch beginner here. When I did code from pyro SVI tutorial:

```
# clear the param store in case we're in a REPL
pyro.clear_param_store()
# create some data with 6 observed heads and 4 observed tails
data = []
for _ in range(6):
data.append(Variable(torch.ones(1)))
for _ in range(4):
data.append(Variable(torch.zeros(1)))
def model(data):
# define the hyperparameters that control the beta prior
alpha0 = Variable(torch.Tensor([10.0]))
beta0 = Variable(torch.Tensor([10.0]))
# sample f from the beta prior
f = pyro.sample("latent_fairness", dist.beta, alpha0, beta0)
# loop over the observed data
for i in range(len(data)):
# observe datapoint i using the bernoulli likelihood
pyro.observe("obs_{}".format(i), dist.bernoulli, data[i], f)
def guide(data):
# define the initial values of the two variational parameters
# we initialize the guide near the model prior (except a bit sharper)
log_alpha_q_0 = Variable(torch.Tensor([np.log(15.0)]), requires_grad=True)
log_beta_q_0 = Variable(torch.Tensor([np.log(15.0)]), requires_grad=True)
# register the two variational parameters with Pyro
log_alpha_q = pyro.param("log_alpha_q", log_alpha_q_0)
log_beta_q = pyro.param("log_beta_q", log_beta_q_0)
alpha_q, beta_q = torch.exp(log_alpha_q), torch.exp(log_beta_q)
# sample latent_fairness from Beta(alpha_q, beta_q)
pyro.sample("latent_fairness", dist.beta, alpha_q, beta_q)
# setup the optimizer
adam_params = {"lr": 0.0005, "betas": (0.90, 0.999)}
optimizer = Adam(adam_params)
# setup the inference algorithm
svi = SVI(model, guide, optimizer, loss="ELBO", num_particles=7)
n_steps = 4000
# do gradient steps
start = time.time()
for step in range(n_steps):
svi.step(data)
if step % 100 == 0:
print('.', end='')
end = time.time()
print(start - end)
# grab the learned variational parameters
alpha_q = torch.exp(pyro.param("log_alpha_q")).data.numpy()[0]
beta_q = torch.exp(pyro.param("log_beta_q")).data.numpy()[0]
# here we use some facts about the beta distribution
# compute the inferred mean of the coin's fairness
inferred_mean = alpha_q / (alpha_q + beta_q)
# compute inferred standard deviation
factor = beta_q / (alpha_q * (1.0 + alpha_q + beta_q))
inferred_std = inferred_mean * np.sqrt(factor)
print("\nbased on the data and our prior belief, the fairness " +
"of the coin is %.3f +- %.3f" % (inferred_mean, inferred_std))
```

I compared it with edward:

```
import edward as ed
from edward import models as mod
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
N_TRIAL=10
P=0.6
data = np.random.binomial(1, P, N_TRIAL)
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("input"):
X = tf.constant(data, dtype=tf.int32)
with tf.name_scope("model"):
theta = mod.Beta(1.0,1.0)
model = mod.Bernoulli(probs=tf.ones(N_TRIAL)*theta)
with tf.name_scope("posterior"):
alpha = tf.Variable(tf.ones(theta.shape) )
beta = tf.Variable(tf.ones(theta.shape) )
qtheta = mod.Beta(alpha, beta)
with tf.Session(graph=graph) as sess:
n_iter = 3000
loss = np.zeros(n_iter)
inference = ed.KLqp(
{
theta: qtheta,
}, data={
model: X,
})
optimizer = tf.train.AdamOptimizer(0.0005,0.90, 0.999)
inference.initialize(
n_iter=n_iter, optimizer=optimizer, n_samples=1, n_print=n_iter // 20)
sess.run(tf.global_variables_initializer())
for i in range(inference.n_iter):
info_dict = inference.update()
inference.print_progress(info_dict)
loss[i] = info_dict["loss"]
if i % inference.n_print == 0:
print("=")
inference.finalize()
alpha_result = alpha.eval()
beta_result = beta.eval()
theta_sample = qtheta.eval()
# inference.run(n_iter=10000, optimizer=optimizer)
plt.plot(range(n_iter), loss)
plt.show()
print(np.mean(theta_sample))
```

The execution time for pyro is about 10 minutes but edward code is about 13 seconds. Why pyro code is much slower than comparable edward code? I performed these code in non-gpu laptop with 8core cpu.