I have a pandas dataframe like this:
JavaScript
x
5
1
User-Id Training-Id TrainingTaken
2
0 4327024 25 10
3
1 6662572 3 10
4
2 3757520 26 10
5
and I need to convert it to a Matrix like they do here: https://github.com/tr1ten/Anime-Recommender-System/blob/main/HybridRecommenderSystem.ipynb Cell 13.
So I did the following:
JavaScript
1
13
13
1
from lightfm import LightFM
2
from lightfm.evaluation import precision_at_k
3
import pandas as pd
4
import numpy as np
5
import matplotlib.pyplot as plt
6
import pandas_profiling
7
from scipy.sparse import csr_matrix
8
from lightfm.evaluation import auc_score
9
from lightfm.data import Dataset
10
11
user_training_interaction = pd.pivot_table(trainingtaken, index='User-Id', columns='Training-Id', values='TrainingTaken')
12
user_training_interaction.fillna(0,inplace=True)
13
user_training_csr = csr_matrix(user_training_interaction.values)
But I get this error:
JavaScript
1
60
60
1
---------------------------------------------------------------------------
2
DataError Traceback (most recent call last)
3
<ipython-input-96-5a2c7ba28976> in <module>
4
10 from lightfm.data import Dataset
5
11
6
---> 12 user_training_interaction = pd.pivot_table(trainingtaken, index='User-Id', columns='Training-Id', values='TrainingTaken')
7
13 user_training_interaction.fillna(0,inplace=True)
8
14 user_training_csr = csr_matrix(user_training_interaction.values)
9
10
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/reshape/pivot.py in pivot_table(data, values, index, columns, aggfunc, fill_value, margins, dropna, margins_name, observed)
11
110
12
111 grouped = data.groupby(keys, observed=observed)
13
--> 112 agged = grouped.agg(aggfunc)
14
113 if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
15
114 agged = agged.dropna(how="all")
16
17
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
18
949 func = maybe_mangle_lambdas(func)
19
950
20
--> 951 result, how = self._aggregate(func, *args, **kwargs)
21
952 if how is None:
22
953 return result
23
24
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
25
305
26
306 if isinstance(arg, str):
27
--> 307 return self._try_aggregate_string_function(arg, *args, **kwargs), None
28
308
29
309 if isinstance(arg, dict):
30
31
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/base.py in _try_aggregate_string_function(self, arg, *args, **kwargs)
32
261 if f is not None:
33
262 if callable(f):
34
--> 263 return f(*args, **kwargs)
35
264
36
265 # people may try to aggregate on a non-callable attribute
37
38
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/groupby.py in mean(self, numeric_only)
39
1396 "mean",
40
1397 alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only),
41
-> 1398 numeric_only=numeric_only,
42
1399 )
43
1400
44
45
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in _cython_agg_general(self, how, alt, numeric_only, min_count)
46
1020 ) -> DataFrame:
47
1021 agg_blocks, agg_items = self._cython_agg_blocks(
48
-> 1022 how, alt=alt, numeric_only=numeric_only, min_count=min_count
49
1023 )
50
1024 return self._wrap_agged_blocks(agg_blocks, items=agg_items)
51
52
/anaconda/envs/azureml_py36/lib/python3.6/site-packages/pandas/core/groupby/generic.py in _cython_agg_blocks(self, how, alt, numeric_only, min_count)
53
1128
54
1129 if not (agg_blocks or split_frames):
55
-> 1130 raise DataError("No numeric types to aggregate")
56
1131
57
1132 if split_items:
58
59
DataError: No numeric types to aggregate
60
What am I missing?
Advertisement
Answer
The Pandas Documentation states:
While pivot() provides general purpose pivoting with various data types (strings, numerics, etc.), pandas also provides pivot_table() for pivoting with aggregation of numeric data
Make sure the column is numeric. Without seeing how you create trainingtaken
I can’t provide more specific guidance. However the following may help:
- Make sure you handle “empty” values in that column. The Pandas guide is a very good place to start. Pandas points out that “a column of integers with even one missing values is cast to floating-point dtype”.
- If working with a dataframe, the column can be cast to a specific type via
your_df.your_col.astype(int)
or for your example,pd.trainingtaken.astype(int)