I have a model which is trying to simulate a Generalized Linear Model where the target distribution is a Gamma distribution with censored data. I have the model defined as follows:

```
from scipy.stats import gamma as scipygamma
def model_gamma_cen(X, y, censored_label):
linear_combination = pyro.sample(f"beta_0", dist.Normal(0.0, 1.0))
for i in range(0, X.shape[1]):
beta_i = pyro.sample(f"beta_{i}", dist.Normal(0.0, 1.0))
linear_combination = linear_combination + (beta_i * X[:, i])
mean = torch.exp(linear_combination)
rate = pyro.sample("rate", dist.HalfCauchy(scale=10.0))
shape = mean * rate
with pyro.plate("data", y.shape[0]):
# non-censored data
outcome_dist = dist.Gamma(shape, rate)
with pyro.poutine.mask(mask = (censored_label == 0.0)):
observation = pyro.sample("obs", outcome_dist, obs=y)
with pyro.poutine.mask(mask = (censored_label == 1.0)):
truncation_prob = torch.tensor(1 - scipygamma(shape.detach(), rate.detach()).cdf(y).astype(np.float32))
censored_observation = pyro.sample("censorship",
dist.Bernoulli(truncation_prob),
obs=torch.tensor(1.0))
```

And I have my guide defined as follows:

```
def guide_gamma_cen(X, y, censored_label):
mu_intercept = pyro.param("mu_intercept", torch.tensor(0.0))
sigma_intercept = pyro.param("sigma_intercept", torch.tensor(1.0))
linear_combination = pyro.sample(f"beta_0",
dist.Normal(mu_intercept, sigma_intercept))
for i in range(0, X.shape[1]):
mu_coef = pyro.param(f"mu_{i}", torch.tensor(0.0))
sigma_coef = pyro.param(f"sigma_{i}", torch.tensor(1.0))
beta_i = pyro.sample(f"beta_{i}", dist.Normal(mu_coef, sigma_coef))
scale_rate = pyro.param("scale_rate", torch.tensor(10.0))
rate = pyro.sample("rate", dist.HalfCauchy(scale=scale_rate))
```

When I run these with an SVI that uses Trace_ELBO, and a ClippedAdam optimizer, after a few iterations, the vale for scale_rate becomes nan.

However, if instead of my own guide I use an autoguide (e.g. AutoDiagonalNormal), things work out. Why is my guide failing?