I’m working on translating a PyTorch model from CPU (where it works) to GPU (where it so far doesn’t). The error message (clipped to the important bits) is as follows:
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-12-a7bb230c924c> in <module> 1 model = FeedforwardTabularModel() 2 model.cuda() ----> 3 model.fit(X_train_sample.values, y_train_sample.values) <ipython-input-11-40b1edae7417> in fit(self, X, y) 100 for epoch in range(self.n_epochs): 101 for i, (X_batch, y_batch) in enumerate(batches): --> 102 y_pred = model(X_batch).squeeze() 103 # scheduler.batch_step() # Disabled due to a bug, see above. 104 loss = self.loss_fn(y_pred, y_batch) [...] /opt/conda/lib/python3.6/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 1482 # remove once script supports set_grad_enabled 1483 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type) -> 1484 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 1485 1486 RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select
Here is the full model definition:
import torch from torch import nn import torch.utils.data # ^ https://discuss.pytorch.org/t/attributeerror-module-torch-utils-has-no-attribute-data/1666 class FeedforwardTabularModel(nn.Module): def __init__(self): super().__init__() self.batch_size = 512 self.base_lr, self.max_lr = 0.001, 0.003 self.n_epochs = 5 self.cat_vars_embedding_vector_lengths = [ (1115, 80), (7, 4), (3, 3), (12, 6), (31, 10), (2, 2), (25, 10), (26, 10), (4, 3), (3, 3), (4, 3), (23, 9), (8, 4), (12, 6), (52, 15), (22, 9), (6, 4), (6, 4), (3, 3), (3, 3), (8, 4), (8, 4) ] self.loss_fn = torch.nn.MSELoss() self.score_fn = torch.nn.MSELoss() # Layer 1: embeddings. self.embeddings = [] for (in_size, out_size) in self.cat_vars_embedding_vector_lengths: emb = nn.Embedding(in_size, out_size) self.embeddings.append(emb) # Layer 1: dropout. self.embedding_dropout = nn.Dropout(0.04) # Layer 1: batch normalization (of the continuous variables). self.cont_batch_norm = nn.BatchNorm1d(16, eps=1e-05, momentum=0.1) # Layers 2 through 9: sequential feedforward model. self.seq_model = nn.Sequential(*[ nn.Linear(in_features=215, out_features=1000, bias=True), nn.ReLU(), nn.BatchNorm1d(1000, eps=1e-05, momentum=0.1), nn.Dropout(p=0.001), nn.Linear(in_features=1000, out_features=500, bias=True), nn.ReLU(), nn.BatchNorm1d(500, eps=1e-05, momentum=0.1), nn.Dropout(p=0.01), nn.Linear(in_features=500, out_features=1, bias=True) ]) def forward(self, x): # Layer 1: embeddings. inp_offset = 0 embedding_subvectors = [] for emb in self.embeddings: index = torch.tensor(inp_offset, dtype=torch.int64).cuda() inp = torch.index_select(x, dim=1, index=index).long().cuda() out = emb(inp) out = out.view(out.shape[2], out.shape[0], 1).squeeze() embedding_subvectors.append(out) inp_offset += 1 out_cat = torch.cat(embedding_subvectors) out_cat = out_cat.view(out_cat.shape[::-1]) # Layer 1: dropout. out_cat = self.embedding_dropout(out_cat) # Layer 1: batch normalization (of the continuous variables). out_cont = self.cont_batch_norm(x[:, inp_offset:]) out = torch.cat((out_cat, out_cont), dim=1) # Layers 2 through 9: sequential feedforward model. out = self.seq_model(out) return out def fit(self, X, y): self.train() # TODO: set a random seed to invoke determinism. # cf. https://github.com/pytorch/pytorch/issues/11278 X = torch.tensor(X, dtype=torch.float32).cuda() y = torch.tensor(y, dtype=torch.float32).cuda() # The build of PyTorch on Kaggle has a blog that prevents us from using # CyclicLR with ADAM. Cf. GH#19003. # optimizer = torch.optim.Adam(model.parameters(), lr=max_lr) # scheduler = torch.optim.lr_scheduler.CyclicLR( # optimizer, base_lr=base_lr, max_lr=max_lr, # step_size_up=300, step_size_down=300, # mode='exp_range', gamma=0.99994 # ) optimizer = torch.optim.Adam(model.parameters(), lr=(self.base_lr + self.max_lr) / 2) batches = torch.utils.data.DataLoader( torch.utils.data.TensorDataset(X, y), batch_size=self.batch_size, shuffle=True ) for epoch in range(self.n_epochs): for i, (X_batch, y_batch) in enumerate(batches): y_pred = model(X_batch).squeeze() # scheduler.batch_step() # Disabled due to a bug, see above. loss = self.loss_fn(y_pred, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() print( f"Epoch {epoch + 1}/{self.n_epochs}, Loss {loss.detach().numpy()}" ) def predict(self, X): self.eval() with torch.no_grad(): y_pred = model(torch.tensor(X, dtype=torch.float32).cuda()) return y_pred.squeeze() def score(self, X, y): y_pred = self.predict(X) y = torch.tensor(y, dtype=torch.float32).cuda() return self.score_fn(y, y_pred) model = FeedforwardTabularModel() model.cuda() model.fit(X_train_sample.values, y_train_sample.values)
This type of error typically occurs when there is a tensor in the model that should be on GPU but is on CPU instead. But as far as I can tell, I’ve already placed .cuda()
calls at all of the necessary places: every time a torch.tensor
is declared, and running model.cuda()
before model.fit
.
What is causing this error?
Advertisement
Answer
Someone on a separate forum provided the solution:
Pytorch requires your to do
self.module_name = module
for things to work correctly. It’s okay to keep them in a list. Just do something likesetattr(self, 'emb_{}'.format(i), emb)
for each step in that loop.
Because I was managing my embedding layers in a list, whereas PyTorch requires all layers be registered as an attribute on the model object, they were not automatically moved over to GPU memory when model.cuda()
was called. Tricky!