I am getting the following error. It says CUDA error:device-side assert triggered. The images and labels are dumped in the GPU but the parameters to be learnt in the bayesian inference are about 1.7 million ! Is the error due to the fact that the memory is unavailable to store these many parameters?
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing…
Done!
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:70: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:71: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:72: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:73: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
RuntimeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pyro/poutine/trace_messenger.py in call(self, *args, **kwargs)
146 try:
→ 147 ret = self.fn(*args, **kwargs)
148 except (ValueError, RuntimeError):
in guide(self, images, labels)
74 cn1_dist=dist.Categorical(pcn1_param)
—> 75 print(cn1_dist.sample())
76 cn2_dist=dist.Categorical(pcn2_param)
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in repr(self)
65 if sys.version_info > (3,):
—> 66 return torch._tensor_str._str(self)
67 else:
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in _str(self)
276 suffixes.append(‘dtype=’ + str(self.dtype))
→ 277 tensor_str = _tensor_str(self, indent)
278
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in _tensor_str(self, indent)
194 self = self.float()
→ 195 formatter = _Formatter(get_summarized_data(self) if summarize else self)
196 return _tensor_str_with_formatter(self, indent, formatter, summarize)
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in init(self, tensor)
79 for value in tensor_view:
—> 80 value_str = ‘{}’.format(value)
81 self.max_width = max(self.max_width, len(value_str))
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in format(self, format_spec)
377 if self.dim() == 0:
→ 378 return self.item().format(format_spec)
379 return object.format(self, format_spec)
RuntimeError: CUDA error: device-side assert triggered
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
in ()
1 instance=Ternary_bayesian_neural_network(prior_cn1,prior_cn2,prior_fp1,prior_fp2,pcn1,pcn2,pfp1,pfp2)
----> 2 instance.do_inference()
in do_inference(self)
115 images=images.to(device)
116 labels=labels.to(device)
→ 117 elbo=svi.step(images,labels)
118 if i%50==0:
119 logging.info(“ELBO Loss{}”.format(elbo))
/usr/local/lib/python3.6/dist-packages/pyro/infer/svi.py in step(self, *args, **kwargs)
97 # get loss and compute gradients
98 with poutine.trace(param_only=True) as param_capture:
—> 99 loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
100
101 params = set(site[“value”].unconstrained()
/usr/local/lib/python3.6/dist-packages/pyro/infer/trace_elbo.py in loss_and_grads(self, model, guide, *args, **kwargs)
123 loss = 0.0
124 # grab a trace from the generator
→ 125 for model_trace, guide_trace in self._get_traces(model, guide, *args, **kwargs):
126 loss_particle, surrogate_loss_particle = self._differentiable_loss_particle(model_trace, guide_trace)
127 loss += loss_particle / self.num_particles
/usr/local/lib/python3.6/dist-packages/pyro/infer/elbo.py in _get_traces(self, model, guide, *args, **kwargs)
162 else:
163 for i in range(self.num_particles):
→ 164 yield self._get_trace(model, guide, *args, **kwargs)
/usr/local/lib/python3.6/dist-packages/pyro/infer/trace_elbo.py in _get_trace(self, model, guide, *args, **kwargs)
50 “”"
51 model_trace, guide_trace = get_importance_trace(
—> 52 “flat”, self.max_plate_nesting, model, guide, *args, **kwargs)
53 if is_validation_enabled():
54 check_if_enumerated(guide_trace)
/usr/local/lib/python3.6/dist-packages/pyro/infer/enum.py in get_importance_trace(graph_type, max_plate_nesting, model, guide, *args, **kwargs)
40 against it.
41 “”"
—> 42 guide_trace = poutine.trace(guide, graph_type=graph_type).get_trace(*args, **kwargs)
43 model_trace = poutine.trace(poutine.replay(model, trace=guide_trace),
44 graph_type=graph_type).get_trace(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/pyro/poutine/trace_messenger.py in get_trace(self, *args, **kwargs)
167 Calls this poutine and returns its trace instead of the function’s return value.
168 “”"
→ 169 self(*args, **kwargs)
170 return self.msngr.get_trace()
/usr/local/lib/python3.6/dist-packages/pyro/poutine/trace_messenger.py in call(self, *args, **kwargs)
151 six.reraise(exc_type,
152 exc_type(u"{}\n{}“.format(exc_value, shapes)),
→ 153 traceback)
154 self.msngr.trace.add_node(”_RETURN", name=“_RETURN”, type=“return”, value=ret)
155 return ret
/usr/local/lib/python3.6/dist-packages/six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.traceback is not tb:
→ 692 raise value.with_traceback(tb)
693 raise value
694 finally:
/usr/local/lib/python3.6/dist-packages/pyro/poutine/trace_messenger.py in call(self, *args, **kwargs)
145 args=args, kwargs=kwargs)
146 try:
→ 147 ret = self.fn(*args, **kwargs)
148 except (ValueError, RuntimeError):
149 exc_type, exc_value, traceback = sys.exc_info()
in guide(self, images, labels)
73 pfp2_param=pyro.param(“fp2_param”,torch.tensor(self.pfp2),constraint=constraints.simplex)
74 cn1_dist=dist.Categorical(pcn1_param)
—> 75 print(cn1_dist.sample())
76 cn2_dist=dist.Categorical(pcn2_param)
77 fp1_dist=dist.Categorical(pfp1_param)
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in repr(self)
64 # characters to replace unicode characters with.
65 if sys.version_info > (3,):
—> 66 return torch._tensor_str._str(self)
67 else:
68 if hasattr(sys.stdout, ‘encoding’):
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in _str(self)
275 if not has_default_dtype:
276 suffixes.append(‘dtype=’ + str(self.dtype))
→ 277 tensor_str = _tensor_str(self, indent)
278
279 if self.layout != torch.strided:
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in _tensor_str(self, indent)
193 if self.dtype is torch.float16:
194 self = self.float()
→ 195 formatter = _Formatter(get_summarized_data(self) if summarize else self)
196 return _tensor_str_with_formatter(self, indent, formatter, summarize)
197
/usr/local/lib/python3.6/dist-packages/torch/_tensor_str.py in init(self, tensor)
78 if not self.floating_dtype:
79 for value in tensor_view:
—> 80 value_str = ‘{}’.format(value)
81 self.max_width = max(self.max_width, len(value_str))
82
/usr/local/lib/python3.6/dist-packages/torch/tensor.py in format(self, format_spec)
376 def format(self, format_spec):
377 if self.dim() == 0:
→ 378 return self.item().format(format_spec)
379 return object.format(self, format_spec)
380
RuntimeError: CUDA error: device-side assert triggered
Trace Shapes:
Param Sites:
pcn1_param 800 3
pcn2_param 51200 3
pfp1_param 524288 3
fp2_param 5120 3
Sample Sites:
the numbers on the left: 800,51200,524288,5120 are the number of weights in each layer and 3 is the number of paramters( of a multinomial distribution) per weight. Or is there any other possible mistake i did