I had train a BertClassifier model using pytorch. After creating my best.pt I would like to make in production my model and using it to predict and classifier starting from a sample, so I resume them from the checkpoint. Otherwise after put it in evaluation and freeze model, I use .predict to make in work on my sample but I’m encountering this Attribute Error. I had also inizialize it before calling the checkpoint. When I am wrong? Thank you for your help!
def save_ckp(state, is_best, checkpoint_path, best_model_path): """ function created to save checkpoint, the latest one and the best one. This creates flexibility: either you are interested in the state of the latest checkpoint or the best checkpoint. state: checkpoint we want to save is_best: is this the best checkpoint; min validation loss checkpoint_path: path to save checkpoint best_model_path: path to save best model """ f_path = checkpoint_path # save checkpoint data to the path given, checkpoint_path torch.save(state, f_path) # if it is a best model, min validation loss if is_best: best_fpath = best_model_path # copy that checkpoint file to best path given, best_model_path shutil.copyfile(f_path, best_fpath) def load_ckp(checkpoint_fpath, model, optimizer): """ checkpoint_path: path to save checkpoint model: model that we want to load checkpoint parameters into optimizer: optimizer we defined in previous training """ # load check point checkpoint = torch.load(checkpoint_fpath) # initialize state_dict from checkpoint to model model.load_state_dict(checkpoint['state_dict']) # initialize optimizer from checkpoint to optimizer optimizer.load_state_dict(checkpoint['optimizer']) # initialize valid_loss_min from checkpoint to valid_loss_min valid_loss_min = checkpoint['valid_loss_min'] # return model, optimizer, epoch value, min validation loss return model, optimizer, checkpoint['epoch'], valid_loss_min.item() #Create the BertClassfier class class BertClassifier(nn.Module): """Bert Model for Classification Tasks.""" def __init__(self, freeze_bert=True): """ @param bert: a BertModel object @param classifier: a torch.nn.Module classifier @param freeze_bert (bool): Set `False` to fine-tune the BERT model """ super(BertClassifier, self).__init__() ....... def forward(self, input_ids, attention_mask): ''' Feed input to BERT and the classifier to compute logits. @param input_ids (torch.Tensor): an input tensor with shape (batch_size, max_length) @param attention_mask (torch.Tensor): a tensor that hold attention mask information with shape (batch_size, max_length) @return logits (torch.Tensor): an output tensor with shape (batch_size, num_labels) ''' # Feed input to BERT outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) # Extract the last hidden state of the token `[CLS]` for classification task last_hidden_state_cls = outputs[0][:, 0, :] # Feed input to classifier to compute logits logits = self.classifier(last_hidden_state_cls) return logits def initialize_model(epochs): """ Initialize the Bert Classifier, the optimizer and the learning rate scheduler.""" # Instantiate Bert Classifier bert_classifier = BertClassifier(freeze_bert=False) # Tell PyTorch to run the model on GPU bert_classifier = bert_classifier.to(device) # Create the optimizer optimizer = AdamW(bert_classifier.parameters(), lr=lr, # Default learning rate eps=1e-8 # Default epsilon value ) # Total number of training steps total_steps = len(train_dataloader) * epochs # Set up the learning rate scheduler scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, # Default value num_training_steps=total_steps) return bert_classifier, optimizer, scheduler def train(model, train_dataloader, val_dataloader, valid_loss_min_input, checkpoint_path, best_model_path, start_epochs, epochs, evaluation=True): """Train the BertClassifier model.""" # Start training loop logging.info("--Start training...n") # Initialize tracker for minimum validation loss valid_loss_min = valid_loss_min_input for epoch_i in range(start_epochs, epochs): # ======================================= # Training # ======================================= # Print the header of the result table logging.info((f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")) # Measure the elapsed time of each epoch t0_epoch, t0_batch = time.time(), time.time() # Reset tracking variables at the beginning of each epoch total_loss, batch_loss, batch_counts = 0, 0, 0 # Put the model into the training mode model.train() # For each batch of training data... for step, batch in enumerate(train_dataloader): batch_counts +=1 # Load batch to GPU b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch) # Zero out any previously calculated gradients model.zero_grad() # Perform a forward pass. This will return logits. logits = model(b_input_ids, b_attn_mask) # Compute loss and accumulate the loss values loss = loss_fn(logits, b_labels) batch_loss += loss.item() total_loss += loss.item() # Perform a backward pass to calculate gradients loss.backward() # Clip the norm of the gradients to 1.0 to prevent "exploding gradients" torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Update parameters and the learning rate optimizer.step() scheduler.step() # Print the loss values and time elapsed for every 20 batches if (step % 500 == 0 and step != 0) or (step == len(train_dataloader) - 1): # Calculate time elapsed for 20 batches time_elapsed = time.time() - t0_batch # Print training results logging.info(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}") # Reset batch tracking variables batch_loss, batch_counts = 0, 0 t0_batch = time.time() # Calculate the average loss over the entire training data avg_train_loss = total_loss / len(train_dataloader) logging.info("-"*70) # ======================================= # Evaluation # ======================================= if evaluation == True: # After the completion of each training epoch, measure the model's performance # on our validation set. val_loss, val_accuracy = evaluate(model, val_dataloader) # Print performance over the entire training data time_elapsed = time.time() - t0_epoch logging.info(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^10.6f} | {time_elapsed:^9.2f}") logging.info("-"*70) logging.info("n") # create checkpoint variable and add important data checkpoint = { 'epoch': epoch_i + 1, 'valid_loss_min': val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } # save checkpoint save_ckp(checkpoint, False, checkpoint_path, best_model_path) ## TODO: save the model if validation loss has decreased if val_loss <= valid_loss_min: print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,val_loss)) # save checkpoint as best model save_ckp(checkpoint, True, checkpoint_path, best_model_path) valid_loss_min = val_loss logging.info("-----------------Training complete--------------------------") def evaluate(model, val_dataloader): """After the completion of each training epoch, measure the model's performance on our validation set.""" # Put the model into the evaluation mode. The dropout layers are disabled during the test time. model.eval() # Tracking variables val_accuracy = [] val_loss = [] # For each batch in our validation set... for batch in val_dataloader: # Load batch to GPU b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch) # Compute logits with torch.no_grad(): logits = model(b_input_ids, b_attn_mask) # Compute loss loss = loss_fn(logits, b_labels) val_loss.append(loss.item()) # Get the predictions preds = torch.argmax(logits, dim=1).flatten() # Calculate the accuracy rate accuracy = (preds == b_labels).cpu().numpy().mean() * 100 val_accuracy.append(accuracy) # Compute the average accuracy and loss over the validation set. val_loss = np.mean(val_loss) val_accuracy = np.mean(val_accuracy) return val_loss, val_accuracy bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs) train(model = bert_classifier ......) bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs) model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer) model.eval() model.freeze() sample = { "seq": "ABCDE",} predictions = model.predict(sample)
AttributeError: 'BertClassifier' object has no attribute 'predict'
Advertisement
Answer
Generally, people wrote the prediction function for you. If not, you need to handle the low level stuff. After this line, you loaded the trained parameters. model, optimizer, start_epoch, valid_loss_min = load_ckp(r”./best_model/best_model.pt”, bert_classifier, optimizer)
After that, you need to do the model.forward(intput_seq,this_attention_mask_maybe_null). You can see the forward method here is the : def forward(self, input_ids, attention_mask) in the model.