I built a model based on LSTM and trained it to predict stock price changes during the day, where the unit of time is one second. The test data gives a result comparable to the real one, but I need to get a forecast for the future outside the existing range. What am I doing wrong?
close = df['close']
values = close.values
values = values.reshape(-1, 1)
training_scaler = MinMaxScaler(feature_range=(0, 1))
testing_input = values
testing_input = training_scaler.fit_transform(testing_input)
testing = []
for i in range(50, len(testing_input) + 50):
testing.append(testing_input[i - 50:i][0])
testing = np.array(testing)
testing = np.reshape(testing, (testing.shape[0], testing.shape[1], 1))
predict = model.predict(testing)
predict = training_scaler.inverse_transform(predict)
plt.plot(values, color='blue', label='Stock Price')
plt.plot(predict, color='red', label='Predicted Stock Price')
plt.title('Changes')
plt.xlabel('Timeline')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
It turns out that the model predicts data that I already know. How can I predict future data?
I found a solution. The problem was that I had trained the model incorrectly and it was unable to predict data outside the set. The code below works correctly:
def learn(self, dataset_path: str) -> Sequential:
df = pd.read_csv(dataset_path)
y = df['close'].fillna(method='ffill')
y = y.values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(y)
y = scaler.transform(y)
n_lookback = int(len(y) * 0.24)
n_forecast = int(len(y) * 0.12)
X = []
Y = []
for i in range(n_lookback, len(y) - n_forecast + 1):
X.append(y[i - n_lookback: i])
Y.append(y[i: i + n_forecast])
X = np.array(X, dtype=np.float16)
Y = np.array(Y, dtype=np.float16)
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(n_lookback, 1)))
model.add(LSTM(units=50))
model.add(Dense(n_forecast))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, Y, epochs=30, batch_size=128)
return model
Call predict method:
def predict(self, model: Sequential, df: pd.DataFrame) -> pd.DataFrame:
y = df['close'].fillna(method='ffill')
y = y.values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(y)
y = scaler.transform(y)
n_lookback = int(len(y) * 0.24)
n_forecast = int(len(y) * 0.12)
X_ = y[- n_lookback:]
X_ = X_.reshape(1, n_lookback, 1)
Y_ = model.predict(X_).reshape(-1, 1)
Y_ = scaler.inverse_transform(Y_)
timestamp_step = 1_000_000
df_future = pd.DataFrame(columns=['unix', 'Forecast'])
unix_range = np.array(
range(int(df['unix'].iloc[0] / timestamp_step), int(df['unix'].iloc[-1] / timestamp_step) + 1)
)
df_future['unix'] = np.array(range(unix_range[-1], (unix_range[-1] + n_forecast) - 1))
df_future['Forecast'] = pd.Series(Y_.flatten())
return df_future[df_future['Forecast'].notna()]