Skip to content
Advertisement

LSTM neural network test to predict SPY prices giving me this error after training

Error is as follows:

Traceback (most recent call last):
  File "/Users/myname/PycharmProjects/LSTM1P/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3080, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 4554, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 4562, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Open'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/myname/PycharmProjects/LSTM1P/matplottest.py", line 47, in <module>
    dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
  File "/Users/myname/PycharmProjects/LSTM1P/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 3024, in __getitem__
    indexer = self.columns.get_loc(key)
  File "/Users/myname/PycharmProjects/LSTM1P/venv/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 3082, in get_loc
    raise KeyError(key) from err
KeyError: 'Open'

My Code is as follows:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras

url = 'https://raw.githubusercontent.com/khammerberg53/MLPROJ1/main/SP500.csv'
dataset_train = pd.read_csv(url)
training_set = dataset_train.iloc[:, 1:2].values

dataset_train.head()
print(dataset_train.head())

from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)

X_train = []
y_train = []
for i in range(60, 2000):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense

model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X_train,y_train,epochs=100,batch_size=32)

url = 'https://raw.githubusercontent.com/khammerberg53/MLPROJ1/main/SP500%20test%20setcsv.csv'
dataset_train = pd.read_csv(url)
training_set = dataset_train.iloc[:, 1:2].values

dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(3, 100):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

plt.plot(real_stock_price, color = 'black', label = 'TATA Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted TATA Stock Price')
plt.title('TATA Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('TATA Stock Price')
plt.legend()
plt.show()

print(plt.show())

Not sure what’s going on….

Advertisement

Answer

Just check you train dataset, there is no Open column there, so dataset_train['Open'] fails:

url = 'https://raw.githubusercontent.com/khammerberg53/MLPROJ1/main/SP500%20test%20setcsv.csv'
dataset_train = pd.read_csv(url)
dataset_train.head()

Output:

         Date   Value
0  1994-09-02  470.99
1  1994-09-09  468.18
2  1994-09-16  471.19
3  1994-09-23  459.67
4  1994-09-30  462.71

Maybe you want to use dataset_train['Value'] instead

1 People found this is helpful
Advertisement