Exception has occurred: RuntimeError
one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.LongTensor [35, 1]] is at version 2; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
loss = csis.step()
I keep getting this error, when I did ‘set_detect_anomaly=True’ I got this trace
char_class_samples, _ = sample_from_transformer(self.guide_format, X, "char_format", len(X)) all_prob = model.forward(input, trg) output = self.transformer.forward(src.unsqueeze(1), trg.unsqueeze(1), src_mask, trg_mask) embedded_src = self.enc_embed(src) result = self.forward(*input, **kwargs) self.norm_type, self.scale_grad_by_freq, self.sparse) return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
Here’s my Transformer handler
import torch
from const import *
from model.Transformer import Transformer
class TransformerHandler():def __init__(self, encoder_vocab: list, decoder_vocab: list, decoder_sos_idx: int): super(TransformerHandler, self).__init__() self.encoder_dim = len(encoder_vocab) self.decoder_dim = len(decoder_vocab) self.decoder_sos_idx = decoder_sos_idx self.decoder_pad_idx = decoder_vocab.index(PAD) self.encoder_pad_idx = encoder_vocab.index(PAD) self.decoder_eos_idx = decoder_vocab.index(EOS) self.transformer = Transformer(self.encoder_dim, self.decoder_dim, self.encoder_pad_idx, self.decoder_pad_idx) def forward(self, src: torch.Tensor, trg: torch.Tensor): src_mask = self.get_pad_mask(src, self.encoder_pad_idx) trg_mask = self.get_pad_mask(trg, self.decoder_pad_idx) output = self.transformer.forward(src.unsqueeze(1), trg.unsqueeze(1), src_mask, trg_mask) return output def get_pad_mask(self, seq, pad_idx): return (seq != pad_idx)
And here’s my transformer
import math
import torch
from torch import nn
from const import *
class Transformer(nn.Module):def __init__(self, input_dim: int, output_dim: int, input_pad_idx: int, output_pad_idx: int, d_model: int = 512, num_head: int = 8, num_e_layer: int = 6, num_d_layer: int = 6, ff_dim: int = 2048, drop_out: float = 0.1): ''' Args: input_dim: Size of the vocab of the input output_dim: Size of the vocab for output num_head: Number of heads in mutliheaded attention models num_e_layer: Number of sub-encoder layers num_d_layer: Number of sub-decoder layers ff_dim: Dimension of feedforward network in mulihead models d_model: The dimension to embed input and output features into drop_out: The drop out percentage ''' super(Transformer, self).__init__() self.transformer = nn.Transformer(d_model, num_head, num_e_layer, num_d_layer, ff_dim, drop_out) self.dec_embed = nn.Embedding(output_dim, d_model, padding_idx=output_pad_idx) self.enc_embed = nn.Embedding(input_dim, d_model, padding_idx=input_pad_idx) self.fc1 = nn.Linear(d_model, output_dim) self.softmax = nn.Softmax(dim=2) self.to(DEVICE) def forward(self, src: torch.Tensor, trg: torch.Tensor, src_mask: torch.Tensor = None, trg_mask: torch.Tensor = None): embedded_src = self.enc_embed(src) embedded_trg = self.dec_embed(trg) output = self.transformer.forward(embedded_src, embedded_trg, src_mask, trg_mask) return self.softmax(self.fc1(output))
When I remove nn.embed it works. Why is this a problem for Pyro?