I am trying to implement bidirectional LSTM on time series data. The main file calls the dataloader to load the data for the model.
Main.py
JavaScript
x
106
106
1
import copy
2
import torch
3
import torch.nn as nn
4
import torch.nn.functional as F
5
import torch.optim as optim
6
from torch.optim.lr_scheduler import StepLR
7
8
import numpy as np
9
10
import time
11
import utils
12
import models
13
from models import rits_i
14
from models import brits_i
15
from models import rits
16
from models import brits
17
18
import argparse
19
import data_loader
20
import pandas as pd
21
import ujson as json
22
23
from sklearn import metrics
24
25
from ipdb import set_trace
26
27
parser = argparse.ArgumentParser()
28
parser.add_argument('--epochs', type = int, default = 1000)
29
parser.add_argument('--batch_size', type = int, default = 32)
30
parser.add_argument('--model', type = str)
31
args = parser.parse_args()
32
33
def train(model):
34
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
35
36
data_iter = data_loader.get_loader(batch_size = args.batch_size)
37
38
for epoch in range(args.epochs):
39
model.train()
40
41
run_loss = 0.0
42
43
for idx, data in enumerate(data_iter):
44
data = utils.to_var(data)
45
ret = model.run_on_batch(data, optimizer)
46
47
run_loss += ret['loss'].data[0]
48
49
print('r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)),)
50
51
if epoch % 1 == 0:
52
evaluate(model, data_iter)
53
54
def evaluate(model, val_iter):
55
model.eval()
56
57
labels = []
58
preds = []
59
60
evals = []
61
imputations = []
62
63
for idx, data in enumerate(val_iter):
64
data = utils.to_var(data)
65
ret = model.run_on_batch(data, None)
66
67
pred = ret['predictions'].data.cpu().numpy()
68
label = ret['labels'].data.cpu().numpy()
69
is_train = ret['is_train'].data.cpu().numpy()
70
71
eval_masks = ret['eval_masks'].data.cpu().numpy()
72
eval_ = ret['evals'].data.cpu().numpy()
73
imputation = ret['imputations'].data.cpu().numpy()
74
75
evals += eval_[np.where(eval_masks == 1)].tolist()
76
imputations += imputation[np.where(eval_masks == 1)].tolist()
77
78
# collect test label & prediction
79
pred = pred[np.where(is_train == 0)]
80
label = label[np.where(is_train == 0)]
81
82
labels += label.tolist()
83
preds += pred.tolist()
84
85
labels = np.asarray(labels).astype('int32')
86
preds = np.asarray(preds)
87
88
print('AUC {}'.format(metrics.roc_auc_score(labels, preds)))
89
90
evals = np.asarray(evals)
91
imputations = np.asarray(imputations)
92
93
print('MAE', np.abs(evals - imputations).mean())
94
print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())
95
96
def run():
97
model = getattr(models, args.model).Model()
98
99
if torch.cuda.is_available():
100
model = model.cuda()
101
102
train(model)
103
104
if __name__ == '__main__':
105
run()
106
data_loader.py
JavaScript
1
66
66
1
import os
2
import time
3
4
import ujson as json
5
import numpy as np
6
import pandas as pd
7
8
import torch
9
import torch.nn as nn
10
from torch.utils.data import Dataset, DataLoader
11
12
class MySet(Dataset):
13
def __init__(self):
14
super(MySet, self).__init__()
15
self.content = open('./json/json').readlines()
16
17
indices = np.arange(len(self.content))
18
val_indices = np.random.choice(indices, len(self.content) // 5)
19
20
self.val_indices = set(val_indices.tolist())
21
22
def __len__(self):
23
return len(self.content)
24
25
def __getitem__(self, idx):
26
rec = json.loads(self.content[idx])
27
if idx in self.val_indices:
28
rec['is_train'] = 0
29
else:
30
rec['is_train'] = 1
31
return rec
32
33
def collate_fn(recs):
34
forward = map(lambda x: x['forward'], recs)
35
backward = map(lambda x: x['backward'], recs)
36
37
def to_tensor_dict(recs):
38
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
39
masks = torch.FloatTensor(map(lambda r: map(lambda x: x['masks'], r), recs))
40
deltas = torch.FloatTensor(map(lambda r: map(lambda x: x['deltas'], r), recs))
41
forwards = torch.FloatTensor(map(lambda r: map(lambda x: x['forwards'], r), recs))
42
43
evals = torch.FloatTensor(map(lambda r: map(lambda x: x['evals'], r), recs))
44
eval_masks = torch.FloatTensor(map(lambda r: map(lambda x: x['eval_masks'], r), recs))
45
46
return {'values': values, 'forwards': forwards, 'masks': masks, 'deltas': deltas, 'evals': evals, 'eval_masks': eval_masks}
47
48
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
49
50
ret_dict['labels'] = torch.FloatTensor(map(lambda x: x['label'], recs))
51
ret_dict['is_train'] = torch.FloatTensor(map(lambda x: x['is_train'], recs))
52
53
return ret_dict
54
55
def get_loader(batch_size = 64, shuffle = True):
56
data_set = MySet()
57
data_iter = DataLoader(dataset = data_set,
58
batch_size = batch_size,
59
num_workers = 4,
60
shuffle = shuffle,
61
pin_memory = True,
62
collate_fn = collate_fn
63
)
64
65
return data_iter
66
but I am unable to resolve the error TypeError: new(): data must be a sequence (got map)
Following message is being received in the terminal:
JavaScript
1
23
23
1
C:Usersankitanaconda3python.exe "C:Program FilesJetBrainsPyCharm Community Edition 2021.2pluginspython-cehelperspydevpydevd.py" --multiproc --qt-support=auto --client 127.0.0.1 --port 61292 --file C:/Users/ankit/PycharmProjects/BRITS/main.py --epochs 1000 --batch_size 32 --model brits
2
Connected to pydev debugger (build 212.4746.96)
3
Traceback (most recent call last):
4
File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 521, in __next__
5
data = self._next_data()
6
File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 1203, in _next_data
7
return self._process_data(data)
8
File "C:Usersankitanaconda3libsite-packagestorchutilsdatadataloader.py", line 1229, in _process_data
9
data.reraise()
10
File "C:Usersankitanaconda3libsite-packagestorch_utils.py", line 425, in reraise
11
raise self.exc_type(msg)
12
TypeError: Caught TypeError in DataLoader worker process 0.
13
Original Traceback (most recent call last):
14
File "C:Usersankitanaconda3libsite-packagestorchutilsdata_utilsworker.py", line 287, in _worker_loop
15
data = fetcher.fetch(index)
16
File "C:Usersankitanaconda3libsite-packagestorchutilsdata_utilsfetch.py", line 47, in fetch
17
return self.collate_fn(data)
18
File "C:UsersankitPycharmProjectsBRITSdata_loader.py", line 48, in collate_fn
19
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
20
File "C:UsersankitPycharmProjectsBRITSdata_loader.py", line 38, in to_tensor_dict
21
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
22
TypeError: new(): data must be a sequence (got map)
23
The input data is in the jason format (below is the partial data):
JavaScript
1
2
1
{"forward":[{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"deltas":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"deltas":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,-1.2017103673,0.0,0.0,-0.5174302535,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3987459261,0.0,0.0,0.0,0.0,0.0,-0.4149215779,-0.0992249514,0.1738832786,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,0.0,0.0,0.0,0.3156699689,0.0],"deltas":[2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0]
2
I researched and got that it might be issue with python 3 but I ran on python 2 but still facing the same issue. Please help me resolve the issue.
Advertisement
Answer
I haven’t looked or tried running all of your code, but at a glance, this line is clearly wrong
JavaScript
1
2
1
torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
2
In python3, map()
returns a map
object, not a list. FloatTensor(..)
expects the later, i.e. a list. Just cast all your map objects with list()
JavaScript
1
2
1
torch.FloatTensor(list(map(lambda r: map(lambda x: x['values'], r), recs)))
2