Here is my code.
JavaScript
x
75
75
1
import pandas as pd
2
import numpy as np
3
import json
4
from xgboost import XGBRegressor
5
from sklearn.model_selection import train_test_split
6
from sklearn.metrics import r2_score, mean_squared_error
7
from sklearn.preprocessing import StandardScaler
8
9
training_data = pd.read_csv('/Users/aus10/Desktop/MLB_Data/Test_Training_Data/MLB_Training_Data.csv')
10
df_model = training_data.copy()
11
scaler = StandardScaler()
12
13
features = [['OBS', 'Runs']]
14
for feature in features:
15
df_model[feature] = scaler.fit_transform(df_model[feature])
16
17
test_data = pd.read_csv('/Users/aus10/Desktop/MLB_Data/Test_Training_Data/Test_Data.csv')
18
X = training_data.iloc[:,1] #independent columns
19
y = training_data.iloc[:,-1] #target column
20
X = X.values.reshape(-1,1)
21
22
results = []
23
24
# fit final model
25
model = XGBRegressor(objective="reg:squarederror", random_state=42)
26
model.fit(X, y)
27
28
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=4)
29
30
y_train_pred = model.predict(X_train)
31
y_test_pred = model.predict(X_test)
32
33
model.fit(X_train, y_train)
34
35
y_pred = model.predict(X_test)
36
37
print('MSE train: %.3f, test: %.3f' % (
38
round(mean_squared_error(y_train, y_train_pred),2),
39
round(mean_squared_error(y_test, y_test_pred),2)
40
))
41
42
print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
43
44
# define one new data instance
45
46
index = 0
47
count = 0
48
49
while count < len(test_data):
50
team = test_data.loc[index].at['Team']
51
OBS = test_data.loc[index].at['OBS']
52
53
Xnew = [[ OBS ]]
54
# make a prediction
55
ynew = model.predict(Xnew)
56
# show the inputs and predicted outputs
57
results.append(
58
{
59
'Team': team,
60
'Runs': (round(ynew[0],2))
61
})
62
index += 1
63
count += 1
64
65
sorted_results = sorted(results, key=lambda k: k['Runs'], reverse=True)
66
67
df = pd.DataFrame(sorted_results, columns=[
68
'Team', 'Runs'])
69
writer = pd.ExcelWriter('/Users/aus10/Desktop/MLB_Data/ML/Results/Projected_Runs_XGBoost.xlsx', engine='xlsxwriter') # pylint: disable=abstract-class-instantiated
70
df.to_excel(writer, sheet_name='Sheet1', index=False)
71
df.style.set_properties(**{'text-align': 'center'})
72
pd.set_option('display.max_colwidth', 100)
73
pd.set_option('display.width', 1000)
74
writer.save()
75
and the error I’m getting is TypeError: Input data can not be a list.
The data coming from test_data
is a csv with a team name and obs which is a float
like this NYY 0.324
Every way to solve it I’ve seen is just to put it in a 2d array like I did – Xnew = [[ OBS ]]
,
but I’m still getting the error.
Is there something else I need to do to the test_data coming in? I tried using values.reshape
, but that didn’t fix it either.
Advertisement
Answer
You need to transform your Xnew
:
JavaScript
1
2
1
Xnew = np.array(Xnew).reshape((1,-1))
2