I am trying to implement bidirectional LSTM on time series data. The main file calls the dataloader to load the data for the model.
Main.py
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import numpy as np
import time
import utils
import models
from models import rits_i
from models import brits_i
from models import rits
from models import brits
import argparse
import data_loader
import pandas as pd
import ujson as json
from sklearn import metrics
from ipdb import set_trace
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type = int, default = 1000)
parser.add_argument('--batch_size', type = int, default = 32)
parser.add_argument('--model', type = str)
args = parser.parse_args()
def train(model):
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
data_iter = data_loader.get_loader(batch_size = args.batch_size)
for epoch in range(args.epochs):
model.train()
run_loss = 0.0
for idx, data in enumerate(data_iter):
data = utils.to_var(data)
ret = model.run_on_batch(data, optimizer)
run_loss += ret['loss'].data[0]
print('r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)),)
if epoch % 1 == 0:
evaluate(model, data_iter)
def evaluate(model, val_iter):
model.eval()
labels = []
preds = []
evals = []
imputations = []
for idx, data in enumerate(val_iter):
data = utils.to_var(data)
ret = model.run_on_batch(data, None)
pred = ret['predictions'].data.cpu().numpy()
label = ret['labels'].data.cpu().numpy()
is_train = ret['is_train'].data.cpu().numpy()
eval_masks = ret['eval_masks'].data.cpu().numpy()
eval_ = ret['evals'].data.cpu().numpy()
imputation = ret['imputations'].data.cpu().numpy()
evals += eval_[np.where(eval_masks == 1)].tolist()
imputations += imputation[np.where(eval_masks == 1)].tolist()
# collect test label & prediction
pred = pred[np.where(is_train == 0)]
label = label[np.where(is_train == 0)]
labels += label.tolist()
preds += pred.tolist()
labels = np.asarray(labels).astype('int32')
preds = np.asarray(preds)
print('AUC {}'.format(metrics.roc_auc_score(labels, preds)))
evals = np.asarray(evals)
imputations = np.asarray(imputations)
print('MAE', np.abs(evals - imputations).mean())
print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())
def run():
model = getattr(models, args.model).Model()
if torch.cuda.is_available():
model = model.cuda()
train(model)
if __name__ == '__main__':
run()
data_loader.py
import os
import time
import ujson as json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
class MySet(Dataset):
def __init__(self):
super(MySet, self).__init__()
self.content = open('./json/json').readlines()
indices = np.arange(len(self.content))
val_indices = np.random.choice(indices, len(self.content) // 5)
self.val_indices = set(val_indices.tolist())
def __len__(self):
return len(self.content)
def __getitem__(self, idx):
rec = json.loads(self.content[idx])
if idx in self.val_indices:
rec['is_train'] = 0
else:
rec['is_train'] = 1
return rec
def collate_fn(recs):
forward = map(lambda x: x['forward'], recs)
backward = map(lambda x: x['backward'], recs)
def to_tensor_dict(recs):
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
masks = torch.FloatTensor(map(lambda r: map(lambda x: x['masks'], r), recs))
deltas = torch.FloatTensor(map(lambda r: map(lambda x: x['deltas'], r), recs))
forwards = torch.FloatTensor(map(lambda r: map(lambda x: x['forwards'], r), recs))
evals = torch.FloatTensor(map(lambda r: map(lambda x: x['evals'], r), recs))
eval_masks = torch.FloatTensor(map(lambda r: map(lambda x: x['eval_masks'], r), recs))
return {'values': values, 'forwards': forwards, 'masks': masks, 'deltas': deltas, 'evals': evals, 'eval_masks': eval_masks}
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
ret_dict['labels'] = torch.FloatTensor(map(lambda x: x['label'], recs))
ret_dict['is_train'] = torch.FloatTensor(map(lambda x: x['is_train'], recs))
return ret_dict
def get_loader(batch_size = 64, shuffle = True):
data_set = MySet()
data_iter = DataLoader(dataset = data_set,
batch_size = batch_size,
num_workers = 4,
shuffle = shuffle,
pin_memory = True,
collate_fn = collate_fn
)
return data_iter
but I am unable to resolve the error TypeError: new(): data must be a sequence (got map)
Following message is being received in the terminal:
C:Usersankitanaconda3python.exe "C:Program FilesJetBrainsPyCharm Community Edition 2021.2pluginspython-cehelperspydevpydevd.py" --multiproc --qt-support=auto --client 127.0.0.1 --port 61292 --file C:/Users/ankit/PycharmProjects/BRITS/main.py --epochs 1000 --batch_size 32 --model brits
Connected to pydev debugger (build 212.4746.96)
Traceback (most recent call last):
File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 521, in __next__
data = self._next_data()
File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 1203, in _next_data
return self._process_data(data)
File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 1229, in _process_data
data.reraise()
File "C:Usersankitanaconda3libsite-packagestorch_utils.py", line 425, in reraise
raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "C:Usersankitanaconda3libsite-packagestorchutilsdata_utilsworker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "C:Usersankitanaconda3libsite-packagestorchutilsdata_utilsfetch.py", line 47, in fetch
return self.collate_fn(data)
File "C:UsersankitPycharmProjectsBRITSdata_loader.py", line 48, in collate_fn
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
File "C:UsersankitPycharmProjectsBRITSdata_loader.py", line 38, in to_tensor_dict
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
TypeError: new(): data must be a sequence (got map)
The input data is in the jason format (below is the partial data):
{"forward":[{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"deltas":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"deltas":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,-1.2017103673,0.0,0.0,-0.5174302535,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3987459261,0.0,0.0,0.0,0.0,0.0,-0.4149215779,-0.0992249514,0.1738832786,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,0.0,0.0,0.0,0.3156699689,0.0],"deltas":[2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0]
I researched and got that it might be issue with python 3 but I ran on python 2 but still facing the same issue. Please help me resolve the issue.
Advertisement
Answer
I haven’t looked or tried running all of your code, but at a glance, this line is clearly wrong
torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
In python3, map() returns a map object, not a list. FloatTensor(..) expects the later, i.e. a list. Just cast all your map objects with list()
torch.FloatTensor(list(map(lambda r: map(lambda x: x['values'], r), recs)))