I am receiving the following error when importing pandas
in a Python
program
JavaScript
x
13
13
1
monas-mbp:book mona$ sudo pip install python-dateutil
2
Requirement already satisfied (use --upgrade to upgrade): python-dateutil in /System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python
3
Cleaning up
4
monas-mbp:book mona$ python t1.py
5
No module named dateutil.parser
6
Traceback (most recent call last):
7
File "t1.py", line 4, in <module>
8
import pandas as pd
9
File "/Library/Python/2.7/site-packages/pandas/__init__.py", line 6, in <module>
10
from . import hashtable, tslib, lib
11
File "tslib.pyx", line 31, in init pandas.tslib (pandas/tslib.c:48782)
12
ImportError: No module named dateutil.parser
13
Also here’s the program:
JavaScript
1
256
256
1
import codecs
2
from math import sqrt
3
import numpy as np
4
import pandas as pd
5
6
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0,
7
"Norah Jones": 4.5, "Phoenix": 5.0,
8
"Slightly Stoopid": 1.5,
9
"The Strokes": 2.5, "Vampire Weekend": 2.0},
10
11
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5,
12
"Deadmau5": 4.0, "Phoenix": 2.0,
13
"Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
14
15
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0,
16
"Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5,
17
"Slightly Stoopid": 1.0},
18
19
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0,
20
"Deadmau5": 4.5, "Phoenix": 3.0,
21
"Slightly Stoopid": 4.5, "The Strokes": 4.0,
22
"Vampire Weekend": 2.0},
23
24
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0,
25
"Norah Jones": 4.0, "The Strokes": 4.0,
26
"Vampire Weekend": 1.0},
27
28
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0,
29
"Norah Jones": 5.0, "Phoenix": 5.0,
30
"Slightly Stoopid": 4.5, "The Strokes": 4.0,
31
"Vampire Weekend": 4.0},
32
33
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0,
34
"Norah Jones": 3.0, "Phoenix": 5.0,
35
"Slightly Stoopid": 4.0, "The Strokes": 5.0},
36
37
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0,
38
"Phoenix": 4.0, "Slightly Stoopid": 2.5,
39
"The Strokes": 3.0}
40
}
41
42
43
44
class recommender:
45
46
def __init__(self, data, k=1, metric='pearson', n=5):
47
""" initialize recommender
48
currently, if data is dictionary the recommender is initialized
49
to it.
50
For all other data types of data, no initialization occurs
51
k is the k value for k nearest neighbor
52
metric is which distance formula to use
53
n is the maximum number of recommendations to make"""
54
self.k = k
55
self.n = n
56
self.username2id = {}
57
self.userid2name = {}
58
self.productid2name = {}
59
# for some reason I want to save the name of the metric
60
self.metric = metric
61
if self.metric == 'pearson':
62
self.fn = self.pearson
63
#
64
# if data is dictionary set recommender data to it
65
#
66
if type(data).__name__ == 'dict':
67
self.data = data
68
69
def convertProductID2name(self, id):
70
"""Given product id number return product name"""
71
if id in self.productid2name:
72
return self.productid2name[id]
73
else:
74
return id
75
76
77
def userRatings(self, id, n):
78
"""Return n top ratings for user with id"""
79
print ("Ratings for " + self.userid2name[id])
80
ratings = self.data[id]
81
print(len(ratings))
82
ratings = list(ratings.items())
83
ratings = [(self.convertProductID2name(k), v)
84
for (k, v) in ratings]
85
# finally sort and return
86
ratings.sort(key=lambda artistTuple: artistTuple[1],
87
reverse = True)
88
ratings = ratings[:n]
89
for rating in ratings:
90
print("%st%i" % (rating[0], rating[1]))
91
92
93
94
95
def loadBookDB(self, path=''):
96
"""loads the BX book dataset. Path is where the BX files are
97
located"""
98
self.data = {}
99
i = 0
100
#
101
# First load book ratings into self.data
102
#
103
f = codecs.open(path + "BX-Book-Ratings.csv", 'r', 'utf8')
104
for line in f:
105
i += 1
106
#separate line into fields
107
fields = line.split(';')
108
user = fields[0].strip('"')
109
book = fields[1].strip('"')
110
rating = int(fields[2].strip().strip('"'))
111
if user in self.data:
112
currentRatings = self.data[user]
113
else:
114
currentRatings = {}
115
currentRatings[book] = rating
116
self.data[user] = currentRatings
117
f.close()
118
#
119
# Now load books into self.productid2name
120
# Books contains isbn, title, and author among other fields
121
#
122
f = codecs.open(path + "BX-Books.csv", 'r', 'utf8')
123
for line in f:
124
i += 1
125
#separate line into fields
126
fields = line.split(';')
127
isbn = fields[0].strip('"')
128
title = fields[1].strip('"')
129
author = fields[2].strip().strip('"')
130
title = title + ' by ' + author
131
self.productid2name[isbn] = title
132
f.close()
133
#
134
# Now load user info into both self.userid2name and
135
# self.username2id
136
#
137
f = codecs.open(path + "BX-Users.csv", 'r', 'utf8')
138
for line in f:
139
i += 1
140
#print(line)
141
#separate line into fields
142
fields = line.split(';')
143
userid = fields[0].strip('"')
144
location = fields[1].strip('"')
145
if len(fields) > 3:
146
age = fields[2].strip().strip('"')
147
else:
148
age = 'NULL'
149
if age != 'NULL':
150
value = location + ' (age: ' + age + ')'
151
else:
152
value = location
153
self.userid2name[userid] = value
154
self.username2id[location] = userid
155
f.close()
156
print(i)
157
158
159
def pearson(self, rating1, rating2):
160
sum_xy = 0
161
sum_x = 0
162
sum_y = 0
163
sum_x2 = 0
164
sum_y2 = 0
165
n = 0
166
for key in rating1:
167
if key in rating2:
168
n += 1
169
x = rating1[key]
170
y = rating2[key]
171
sum_xy += x * y
172
sum_x += x
173
sum_y += y
174
sum_x2 += pow(x, 2)
175
sum_y2 += pow(y, 2)
176
if n == 0:
177
return 0
178
# now compute denominator
179
denominator = (sqrt(sum_x2 - pow(sum_x, 2) / n)
180
* sqrt(sum_y2 - pow(sum_y, 2) / n))
181
if denominator == 0:
182
return 0
183
else:
184
return (sum_xy - (sum_x * sum_y) / n) / denominator
185
186
187
def computeNearestNeighbor(self, username):
188
"""creates a sorted list of users based on their distance to
189
username"""
190
distances = []
191
for instance in self.data:
192
if instance != username:
193
distance = self.fn(self.data[username],
194
self.data[instance])
195
distances.append((instance, distance))
196
# sort based on distance -- closest first
197
distances.sort(key=lambda artistTuple: artistTuple[1],
198
reverse=True)
199
return distances
200
201
def recommend(self, user):
202
"""Give list of recommendations"""
203
recommendations = {}
204
# first get list of users ordered by nearness
205
nearest = self.computeNearestNeighbor(user)
206
#
207
# now get the ratings for the user
208
#
209
userRatings = self.data[user]
210
#
211
# determine the total distance
212
totalDistance = 0.0
213
for i in range(self.k):
214
totalDistance += nearest[i][1]
215
# now iterate through the k nearest neighbors
216
# accumulating their ratings
217
for i in range(self.k):
218
# compute slice of pie
219
weight = nearest[i][1] / totalDistance
220
# get the name of the person
221
name = nearest[i][0]
222
# get the ratings for this person
223
neighborRatings = self.data[name]
224
# get the name of the person
225
# now find bands neighbor rated that user didn't
226
for artist in neighborRatings:
227
if not artist in userRatings:
228
if artist not in recommendations:
229
recommendations[artist] = (neighborRatings[artist]
230
* weight)
231
else:
232
recommendations[artist] = (recommendations[artist]
233
+ neighborRatings[artist]
234
* weight)
235
# now make list from dictionary
236
recommendations = list(recommendations.items())
237
recommendations = [(self.convertProductID2name(k), v)
238
for (k, v) in recommendations]
239
# finally sort and return
240
recommendations.sort(key=lambda artistTuple: artistTuple[1],
241
reverse = True)
242
# Return the first n items
243
return recommendations[:self.n]
244
245
r = recommender(users)
246
# The author implementation
247
r.loadBookDB('/Users/mona/Downloads/BX-Dump/')
248
249
ratings = pd.read_csv('/Users/danialt/BX-CSV-Dump/BX-Book-Ratings.csv', sep=";", quotechar=""", escapechar="\")
250
books = pd.read_csv('/Users/danialt/BX-CSV-Dump/BX-Books.csv', sep=";", quotechar=""", escapechar="\")
251
users = pd.read_csv('/Users/danialt/BX-CSV-Dump/BX-Users.csv', sep=";", quotechar=""", escapechar="\")
252
253
254
255
pivot_rating = ratings.pivot(index='User-ID', columns='ISBN', values='Book-Rating')
256
Advertisement
Answer
On Ubuntu you may need to install the package manager pip
first:
JavaScript
1
2
1
sudo apt-get install python-pip
2
Then install the python-dateutil
package with:
JavaScript
1
2
1
sudo pip install python-dateutil
2