Spaces:
Sleeping
Sleeping
File size: 6,423 Bytes
1ed042b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from datetime import timedelta
# Title and description
st.title("Stock Price Prediction with LSTM")
st.write("This application uses LSTM (Long Short-Term Memory) neural networks to predict stock prices.")
# Load the data directly (replace 'AAPL_dataset_copied.csv' with your actual file path)
data = pd.read_csv('AAPL_dataset_copied.csv')
# Convert 'date' column to datetime and set as index
data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
# Select only the 'Close' column
data = data[['close']]
# Show the first few rows of the dataset
st.subheader("Dataset Preview")
st.write(data.head())
# Normalize the data for faster convergence
scaler = MinMaxScaler(feature_range=(0, 1))
data['close_scaled'] = scaler.fit_transform(data[['close']])
# Split data into training (70%), validation (15%), and testing (15%) sets
train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.15)
train_data = data['close_scaled'][:train_size].values.reshape(-1, 1)
val_data = data['close_scaled'][train_size:train_size + val_size].values.reshape(-1, 1)
test_data = data['close_scaled'][train_size + val_size:].values.reshape(-1, 1)
# Function to create sequences for LSTM
def create_sequences(dataset, time_step=60):
X, Y = [], []
for i in range(len(dataset) - time_step):
X.append(dataset[i:(i + time_step), 0])
Y.append(dataset[i + time_step, 0])
return np.array(X), np.array(Y)
# Define time step (e.g., 60 days)
time_step = 60
X_train, y_train = create_sequences(train_data, time_step)
X_val, y_val = create_sequences(val_data, time_step)
X_test, y_test = create_sequences(test_data, time_step)
# Reshape input to be [samples, time steps, features] for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
# Build the LSTM model with Dropout for regularization
model = Sequential([
LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], 1)),
Dropout(0.2),
LSTM(50, return_sequences=True),
Dropout(0.2),
LSTM(50, return_sequences=False),
Dropout(0.2),
Dense(25),
Dense(1)
])
# Compile the model with Adam optimizer and mean squared error loss
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
# Train the model without EarlyStopping
st.write("Training the LSTM model...")
history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
epochs=50, batch_size=64, verbose=1)
# Evaluate on the test data
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
# Make predictions on the test data
train_predict = model.predict(X_train)
val_predict = model.predict(X_val)
test_predict = model.predict(X_test)
# Inverse transform the predictions and actual values to original scale
train_predict = scaler.inverse_transform(train_predict)
val_predict = scaler.inverse_transform(val_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train = scaler.inverse_transform([y_train])
y_val = scaler.inverse_transform([y_val])
y_test = scaler.inverse_transform([y_test])
# Calculate evaluation metrics
train_rmse = np.sqrt(mean_squared_error(y_train[0], train_predict[:,0]))
val_rmse = np.sqrt(mean_squared_error(y_val[0], val_predict[:,0]))
test_rmse = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
train_mae = mean_absolute_error(y_train[0], train_predict[:,0])
val_mae = mean_absolute_error(y_val[0], val_predict[:,0])
test_mae = mean_absolute_error(y_test[0], test_predict[:,0])
# Mean Absolute Percentage Error (MAPE) as accuracy
mape = np.mean(np.abs((y_test[0] - test_predict[:, 0]) / y_test[0])) * 100
accuracy = 100 - mape
st.write(f"LSTM Model - Train RMSE: {train_rmse:.2f}, Train MAE: {train_mae:.2f}")
st.write(f"LSTM Model - Validation RMSE: {val_rmse:.2f}, Validation MAE: {val_mae:.2f}")
st.write(f"LSTM Model - Test RMSE: {test_rmse:.2f}, Test MAE: {test_mae:.2f}")
st.write(f"LSTM Model - Test Accuracy: {accuracy:.2f}%")
# Plot the results
st.subheader("Prediction Results")
plt.figure(figsize=(14,6))
plt.plot(data.index[:train_size], scaler.inverse_transform(train_data), label='Training Data')
plt.plot(data.index[train_size + time_step:train_size + time_step + len(val_predict)], val_predict, label='Validation Predictions')
plt.plot(data.index[train_size + val_size + time_step:], test_predict, label='Test Predictions')
plt.plot(data.index[train_size + val_size + time_step:], y_test[0], label='Actual Test Data')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend(['Training Data', 'Validation Predictions', 'Test Predictions', 'Actual Test Data'], loc='upper left')
st.pyplot(plt)
# User-defined future prediction days
num_days_to_predict = st.slider("Select the number of days to predict into the future", min_value=1, max_value=30, value=10)
# Predict future prices for the next 'num_days_to_predict' days
temp_input = np.array(test_data[-time_step:]).reshape(-1).tolist()
lst_output = []
for i in range(num_days_to_predict):
if len(temp_input) > time_step:
x_input = np.array(temp_input[-time_step:])
x_input = x_input.reshape((1, time_step, 1))
yhat = model.predict(x_input, verbose=0)
temp_input.append(yhat[0][0])
lst_output.append(yhat[0][0])
else:
x_input = np.array(temp_input).reshape((1, time_step, 1))
yhat = model.predict(x_input, verbose=0)
temp_input.append(yhat[0][0])
lst_output.append(yhat[0][0])
# Inverse transform future predictions to the original scale
future_predictions = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))
# Generate dates for future predictions
last_date = data.index[-1]
future_dates = [last_date + timedelta(days=i) for i in range(1, num_days_to_predict + 1)]
# Display future predictions with dates
st.subheader(f"Future Predictions for the next {num_days_to_predict} days:")
future_df = pd.DataFrame({'Date': future_dates, 'Predicted Price (LSTM)': future_predictions.flatten()})
st.write(future_df)
|