python pandas dataframe tensorflow keras

How to predict list elements outside the bounds of a py dataframe?

I built a model based on LSTM and trained it to predict stock price changes during the day, where the unit of time is one second. The test data gives a result comparable to the real one, but I need to get a forecast for the future outside the existing range. What am I doing wrong?


        close = df['close']
        values = close.values
        values = values.reshape(-1, 1)

        training_scaler = MinMaxScaler(feature_range=(0, 1))

        testing_input = values
        testing_input = training_scaler.fit_transform(testing_input)
        testing = []
        for i in range(50, len(testing_input) + 50):
            testing.append(testing_input[i - 50:i][0])

        testing = np.array(testing)
        testing = np.reshape(testing, (testing.shape[0], testing.shape[1], 1))
        predict = model.predict(testing)
        predict = training_scaler.inverse_transform(predict)

        plt.plot(values, color='blue', label='Stock Price')
        plt.plot(predict, color='red', label='Predicted Stock Price')
        plt.title('Changes')
        plt.xlabel('Timeline')
        plt.ylabel('Stock Price')
        plt.legend()
        plt.show()

My results

It turns out that the model predicts data that I already know. How can I predict future data?

Solution

I found a solution. The problem was that I had trained the model incorrectly and it was unable to predict data outside the set. The code below works correctly:

def learn(self, dataset_path: str) -> Sequential:
    df = pd.read_csv(dataset_path)
    y = df['close'].fillna(method='ffill')
    y = y.values.reshape(-1, 1)

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler = scaler.fit(y)
    y = scaler.transform(y)

    n_lookback = int(len(y) * 0.24)
    n_forecast = int(len(y) * 0.12)

    X = []
    Y = []

    for i in range(n_lookback, len(y) - n_forecast + 1):
        X.append(y[i - n_lookback: i])
        Y.append(y[i: i + n_forecast])

    X = np.array(X, dtype=np.float16)
    Y = np.array(Y, dtype=np.float16)

    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(n_lookback, 1)))
    model.add(LSTM(units=50))
    model.add(Dense(n_forecast))

    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X, Y, epochs=30, batch_size=128)

    return model

Call predict method:

def predict(self, model: Sequential, df: pd.DataFrame) -> pd.DataFrame:
    y = df['close'].fillna(method='ffill')
    y = y.values.reshape(-1, 1)

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler = scaler.fit(y)
    y = scaler.transform(y)

    n_lookback = int(len(y) * 0.24)
    n_forecast = int(len(y) * 0.12)

    X_ = y[- n_lookback:]
    X_ = X_.reshape(1, n_lookback, 1)

    Y_ = model.predict(X_).reshape(-1, 1)
    Y_ = scaler.inverse_transform(Y_)

    timestamp_step = 1_000_000

    df_future = pd.DataFrame(columns=['unix', 'Forecast'])
    unix_range = np.array(
        range(int(df['unix'].iloc[0] / timestamp_step), int(df['unix'].iloc[-1] / timestamp_step) + 1)
    )
    df_future['unix'] = np.array(range(unix_range[-1], (unix_range[-1] + n_forecast) - 1))
    df_future['Forecast'] = pd.Series(Y_.flatten())

    return df_future[df_future['Forecast'].notna()]