I am trying to implement bidirectional LSTM on time series data. The main file calls the dataloader to load the data for the model.
Main.py
import copy import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.optim.lr_scheduler import StepLR import numpy as np import time import utils import models from models import rits_i from models import brits_i from models import rits from models import brits import argparse import data_loader import pandas as pd import ujson as json from sklearn import metrics from ipdb import set_trace parser = argparse.ArgumentParser() parser.add_argument('--epochs', type = int, default = 1000) parser.add_argument('--batch_size', type = int, default = 32) parser.add_argument('--model', type = str) args = parser.parse_args() def train(model): optimizer = optim.Adam(model.parameters(), lr = 1e-3) data_iter = data_loader.get_loader(batch_size = args.batch_size) for epoch in range(args.epochs): model.train() run_loss = 0.0 for idx, data in enumerate(data_iter): data = utils.to_var(data) ret = model.run_on_batch(data, optimizer) run_loss += ret['loss'].data[0] print('r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)),) if epoch % 1 == 0: evaluate(model, data_iter) def evaluate(model, val_iter): model.eval() labels = [] preds = [] evals = [] imputations = [] for idx, data in enumerate(val_iter): data = utils.to_var(data) ret = model.run_on_batch(data, None) pred = ret['predictions'].data.cpu().numpy() label = ret['labels'].data.cpu().numpy() is_train = ret['is_train'].data.cpu().numpy() eval_masks = ret['eval_masks'].data.cpu().numpy() eval_ = ret['evals'].data.cpu().numpy() imputation = ret['imputations'].data.cpu().numpy() evals += eval_[np.where(eval_masks == 1)].tolist() imputations += imputation[np.where(eval_masks == 1)].tolist() # collect test label & prediction pred = pred[np.where(is_train == 0)] label = label[np.where(is_train == 0)] labels += label.tolist() preds += pred.tolist() labels = np.asarray(labels).astype('int32') preds = np.asarray(preds) print('AUC {}'.format(metrics.roc_auc_score(labels, preds))) evals = np.asarray(evals) imputations = np.asarray(imputations) print('MAE', np.abs(evals - imputations).mean()) print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum()) def run(): model = getattr(models, args.model).Model() if torch.cuda.is_available(): model = model.cuda() train(model) if __name__ == '__main__': run()
data_loader.py
import os import time import ujson as json import numpy as np import pandas as pd import torch import torch.nn as nn from torch.utils.data import Dataset, DataLoader class MySet(Dataset): def __init__(self): super(MySet, self).__init__() self.content = open('./json/json').readlines() indices = np.arange(len(self.content)) val_indices = np.random.choice(indices, len(self.content) // 5) self.val_indices = set(val_indices.tolist()) def __len__(self): return len(self.content) def __getitem__(self, idx): rec = json.loads(self.content[idx]) if idx in self.val_indices: rec['is_train'] = 0 else: rec['is_train'] = 1 return rec def collate_fn(recs): forward = map(lambda x: x['forward'], recs) backward = map(lambda x: x['backward'], recs) def to_tensor_dict(recs): values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs)) masks = torch.FloatTensor(map(lambda r: map(lambda x: x['masks'], r), recs)) deltas = torch.FloatTensor(map(lambda r: map(lambda x: x['deltas'], r), recs)) forwards = torch.FloatTensor(map(lambda r: map(lambda x: x['forwards'], r), recs)) evals = torch.FloatTensor(map(lambda r: map(lambda x: x['evals'], r), recs)) eval_masks = torch.FloatTensor(map(lambda r: map(lambda x: x['eval_masks'], r), recs)) return {'values': values, 'forwards': forwards, 'masks': masks, 'deltas': deltas, 'evals': evals, 'eval_masks': eval_masks} ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)} ret_dict['labels'] = torch.FloatTensor(map(lambda x: x['label'], recs)) ret_dict['is_train'] = torch.FloatTensor(map(lambda x: x['is_train'], recs)) return ret_dict def get_loader(batch_size = 64, shuffle = True): data_set = MySet() data_iter = DataLoader(dataset = data_set, batch_size = batch_size, num_workers = 4, shuffle = shuffle, pin_memory = True, collate_fn = collate_fn ) return data_iter
but I am unable to resolve the error TypeError: new(): data must be a sequence (got map)
Following message is being received in the terminal:
C:Usersankitanaconda3python.exe "C:Program FilesJetBrainsPyCharm Community Edition 2021.2pluginspython-cehelperspydevpydevd.py" --multiproc --qt-support=auto --client 127.0.0.1 --port 61292 --file C:/Users/ankit/PycharmProjects/BRITS/main.py --epochs 1000 --batch_size 32 --model brits Connected to pydev debugger (build 212.4746.96) Traceback (most recent call last): File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 521, in __next__ data = self._next_data() File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 1203, in _next_data return self._process_data(data) File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 1229, in _process_data data.reraise() File "C:Usersankitanaconda3libsite-packagestorch_utils.py", line 425, in reraise raise self.exc_type(msg) TypeError: Caught TypeError in DataLoader worker process 0. Original Traceback (most recent call last): File "C:Usersankitanaconda3libsite-packagestorchutilsdata_utilsworker.py", line 287, in _worker_loop data = fetcher.fetch(index) File "C:Usersankitanaconda3libsite-packagestorchutilsdata_utilsfetch.py", line 47, in fetch return self.collate_fn(data) File "C:UsersankitPycharmProjectsBRITSdata_loader.py", line 48, in collate_fn ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)} File "C:UsersankitPycharmProjectsBRITSdata_loader.py", line 38, in to_tensor_dict values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs)) TypeError: new(): data must be a sequence (got map)
The input data is in the jason format (below is the partial data):
{"forward":[{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"deltas":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"deltas":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,-1.2017103673,0.0,0.0,-0.5174302535,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3987459261,0.0,0.0,0.0,0.0,0.0,-0.4149215779,-0.0992249514,0.1738832786,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,0.0,0.0,0.0,0.3156699689,0.0],"deltas":[2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0]
I researched and got that it might be issue with python 3 but I ran on python 2 but still facing the same issue. Please help me resolve the issue.
Advertisement
Answer
I haven’t looked or tried running all of your code, but at a glance, this line is clearly wrong
torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
In python3, map()
returns a map
object, not a list. FloatTensor(..)
expects the later, i.e. a list. Just cast all your map objects with list()
torch.FloatTensor(list(map(lambda r: map(lambda x: x['values'], r), recs)))