Rahul-Crudcook commited on
Commit
1ed042b
·
verified ·
1 Parent(s): a1c4c8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -155
app.py CHANGED
@@ -1,155 +1,155 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- from sklearn.preprocessing import MinMaxScaler
6
- from sklearn.metrics import mean_squared_error, mean_absolute_error
7
- from tensorflow.keras.models import Sequential
8
- from tensorflow.keras.layers import Dense, LSTM, Dropout
9
- from datetime import timedelta
10
-
11
- # Title and description
12
- st.title("Stock Price Prediction with LSTM")
13
- st.write("This application uses LSTM (Long Short-Term Memory) neural networks to predict stock prices.")
14
-
15
- # Load the data directly (replace 'AAPL_dataset_copied.csv' with your actual file path)
16
- data = pd.read_csv('AAPL_dataset_copied.csv')
17
-
18
- # Convert 'date' column to datetime and set as index
19
- data['date'] = pd.to_datetime(data['date'])
20
- data.set_index('date', inplace=True)
21
-
22
- # Select only the 'Close' column
23
- data = data[['close']]
24
-
25
- # Show the first few rows of the dataset
26
- st.subheader("Dataset Preview")
27
- st.write(data.head())
28
-
29
- # Normalize the data for faster convergence
30
- scaler = MinMaxScaler(feature_range=(0, 1))
31
- data['close_scaled'] = scaler.fit_transform(data[['close']])
32
-
33
- # Split data into training (70%), validation (15%), and testing (15%) sets
34
- train_size = int(len(data) * 0.7)
35
- val_size = int(len(data) * 0.15)
36
- train_data = data['close_scaled'][:train_size].values.reshape(-1, 1)
37
- val_data = data['close_scaled'][train_size:train_size + val_size].values.reshape(-1, 1)
38
- test_data = data['close_scaled'][train_size + val_size:].values.reshape(-1, 1)
39
-
40
- # Function to create sequences for LSTM
41
- def create_sequences(dataset, time_step=60):
42
- X, Y = [], []
43
- for i in range(len(dataset) - time_step):
44
- X.append(dataset[i:(i + time_step), 0])
45
- Y.append(dataset[i + time_step, 0])
46
- return np.array(X), np.array(Y)
47
-
48
- # Define time step (e.g., 60 days)
49
- time_step = 60
50
- X_train, y_train = create_sequences(train_data, time_step)
51
- X_val, y_val = create_sequences(val_data, time_step)
52
- X_test, y_test = create_sequences(test_data, time_step)
53
-
54
- # Reshape input to be [samples, time steps, features] for LSTM
55
- X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
56
- X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
57
- X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
58
-
59
- # Build the LSTM model with Dropout for regularization
60
- model = Sequential([
61
- LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], 1)),
62
- Dropout(0.2),
63
- LSTM(50, return_sequences=True),
64
- Dropout(0.2),
65
- LSTM(50, return_sequences=False),
66
- Dropout(0.2),
67
- Dense(25),
68
- Dense(1)
69
- ])
70
-
71
- # Compile the model with Adam optimizer and mean squared error loss
72
- model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
73
-
74
- # Train the model without EarlyStopping
75
- st.write("Training the LSTM model...")
76
- history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
77
- epochs=100, batch_size=64, verbose=1)
78
-
79
- # Evaluate on the test data
80
- test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
81
-
82
- # Make predictions on the test data
83
- train_predict = model.predict(X_train)
84
- val_predict = model.predict(X_val)
85
- test_predict = model.predict(X_test)
86
-
87
- # Inverse transform the predictions and actual values to original scale
88
- train_predict = scaler.inverse_transform(train_predict)
89
- val_predict = scaler.inverse_transform(val_predict)
90
- test_predict = scaler.inverse_transform(test_predict)
91
- y_train = scaler.inverse_transform([y_train])
92
- y_val = scaler.inverse_transform([y_val])
93
- y_test = scaler.inverse_transform([y_test])
94
-
95
- # Calculate evaluation metrics
96
- train_rmse = np.sqrt(mean_squared_error(y_train[0], train_predict[:,0]))
97
- val_rmse = np.sqrt(mean_squared_error(y_val[0], val_predict[:,0]))
98
- test_rmse = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
99
-
100
- train_mae = mean_absolute_error(y_train[0], train_predict[:,0])
101
- val_mae = mean_absolute_error(y_val[0], val_predict[:,0])
102
- test_mae = mean_absolute_error(y_test[0], test_predict[:,0])
103
-
104
- # Mean Absolute Percentage Error (MAPE) as accuracy
105
- mape = np.mean(np.abs((y_test[0] - test_predict[:, 0]) / y_test[0])) * 100
106
- accuracy = 100 - mape
107
-
108
- st.write(f"LSTM Model - Train RMSE: {train_rmse:.2f}, Train MAE: {train_mae:.2f}")
109
- st.write(f"LSTM Model - Validation RMSE: {val_rmse:.2f}, Validation MAE: {val_mae:.2f}")
110
- st.write(f"LSTM Model - Test RMSE: {test_rmse:.2f}, Test MAE: {test_mae:.2f}")
111
- st.write(f"LSTM Model - Test Accuracy: {accuracy:.2f}%")
112
-
113
- # Plot the results
114
- st.subheader("Prediction Results")
115
- plt.figure(figsize=(14,6))
116
- plt.plot(data.index[:train_size], scaler.inverse_transform(train_data), label='Training Data')
117
- plt.plot(data.index[train_size + time_step:train_size + time_step + len(val_predict)], val_predict, label='Validation Predictions')
118
- plt.plot(data.index[train_size + val_size + time_step:], test_predict, label='Test Predictions')
119
- plt.plot(data.index[train_size + val_size + time_step:], y_test[0], label='Actual Test Data')
120
- plt.xlabel('Date')
121
- plt.ylabel('Stock Price')
122
- plt.legend(['Training Data', 'Validation Predictions', 'Test Predictions', 'Actual Test Data'], loc='upper left')
123
- st.pyplot(plt)
124
-
125
- # User-defined future prediction days
126
- num_days_to_predict = st.slider("Select the number of days to predict into the future", min_value=1, max_value=30, value=10)
127
-
128
- # Predict future prices for the next 'num_days_to_predict' days
129
- temp_input = np.array(test_data[-time_step:]).reshape(-1).tolist()
130
- lst_output = []
131
-
132
- for i in range(num_days_to_predict):
133
- if len(temp_input) > time_step:
134
- x_input = np.array(temp_input[-time_step:])
135
- x_input = x_input.reshape((1, time_step, 1))
136
- yhat = model.predict(x_input, verbose=0)
137
- temp_input.append(yhat[0][0])
138
- lst_output.append(yhat[0][0])
139
- else:
140
- x_input = np.array(temp_input).reshape((1, time_step, 1))
141
- yhat = model.predict(x_input, verbose=0)
142
- temp_input.append(yhat[0][0])
143
- lst_output.append(yhat[0][0])
144
-
145
- # Inverse transform future predictions to the original scale
146
- future_predictions = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))
147
-
148
- # Generate dates for future predictions
149
- last_date = data.index[-1]
150
- future_dates = [last_date + timedelta(days=i) for i in range(1, num_days_to_predict + 1)]
151
-
152
- # Display future predictions with dates
153
- st.subheader(f"Future Predictions for the next {num_days_to_predict} days:")
154
- future_df = pd.DataFrame({'Date': future_dates, 'Predicted Price (LSTM)': future_predictions.flatten()})
155
- st.write(future_df)
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.preprocessing import MinMaxScaler
6
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
7
+ from tensorflow.keras.models import Sequential
8
+ from tensorflow.keras.layers import Dense, LSTM, Dropout
9
+ from datetime import timedelta
10
+
11
+ # Title and description
12
+ st.title("Stock Price Prediction with LSTM")
13
+ st.write("This application uses LSTM (Long Short-Term Memory) neural networks to predict stock prices.")
14
+
15
+ # Load the data directly (replace 'AAPL_dataset_copied.csv' with your actual file path)
16
+ data = pd.read_csv('AAPL_dataset_copied.csv')
17
+
18
+ # Convert 'date' column to datetime and set as index
19
+ data['date'] = pd.to_datetime(data['date'])
20
+ data.set_index('date', inplace=True)
21
+
22
+ # Select only the 'Close' column
23
+ data = data[['close']]
24
+
25
+ # Show the first few rows of the dataset
26
+ st.subheader("Dataset Preview")
27
+ st.write(data.head())
28
+
29
+ # Normalize the data for faster convergence
30
+ scaler = MinMaxScaler(feature_range=(0, 1))
31
+ data['close_scaled'] = scaler.fit_transform(data[['close']])
32
+
33
+ # Split data into training (70%), validation (15%), and testing (15%) sets
34
+ train_size = int(len(data) * 0.7)
35
+ val_size = int(len(data) * 0.15)
36
+ train_data = data['close_scaled'][:train_size].values.reshape(-1, 1)
37
+ val_data = data['close_scaled'][train_size:train_size + val_size].values.reshape(-1, 1)
38
+ test_data = data['close_scaled'][train_size + val_size:].values.reshape(-1, 1)
39
+
40
+ # Function to create sequences for LSTM
41
+ def create_sequences(dataset, time_step=60):
42
+ X, Y = [], []
43
+ for i in range(len(dataset) - time_step):
44
+ X.append(dataset[i:(i + time_step), 0])
45
+ Y.append(dataset[i + time_step, 0])
46
+ return np.array(X), np.array(Y)
47
+
48
+ # Define time step (e.g., 60 days)
49
+ time_step = 60
50
+ X_train, y_train = create_sequences(train_data, time_step)
51
+ X_val, y_val = create_sequences(val_data, time_step)
52
+ X_test, y_test = create_sequences(test_data, time_step)
53
+
54
+ # Reshape input to be [samples, time steps, features] for LSTM
55
+ X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
56
+ X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
57
+ X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
58
+
59
+ # Build the LSTM model with Dropout for regularization
60
+ model = Sequential([
61
+ LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], 1)),
62
+ Dropout(0.2),
63
+ LSTM(50, return_sequences=True),
64
+ Dropout(0.2),
65
+ LSTM(50, return_sequences=False),
66
+ Dropout(0.2),
67
+ Dense(25),
68
+ Dense(1)
69
+ ])
70
+
71
+ # Compile the model with Adam optimizer and mean squared error loss
72
+ model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])
73
+
74
+ # Train the model without EarlyStopping
75
+ st.write("Training the LSTM model...")
76
+ history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
77
+ epochs=50, batch_size=64, verbose=1)
78
+
79
+ # Evaluate on the test data
80
+ test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
81
+
82
+ # Make predictions on the test data
83
+ train_predict = model.predict(X_train)
84
+ val_predict = model.predict(X_val)
85
+ test_predict = model.predict(X_test)
86
+
87
+ # Inverse transform the predictions and actual values to original scale
88
+ train_predict = scaler.inverse_transform(train_predict)
89
+ val_predict = scaler.inverse_transform(val_predict)
90
+ test_predict = scaler.inverse_transform(test_predict)
91
+ y_train = scaler.inverse_transform([y_train])
92
+ y_val = scaler.inverse_transform([y_val])
93
+ y_test = scaler.inverse_transform([y_test])
94
+
95
+ # Calculate evaluation metrics
96
+ train_rmse = np.sqrt(mean_squared_error(y_train[0], train_predict[:,0]))
97
+ val_rmse = np.sqrt(mean_squared_error(y_val[0], val_predict[:,0]))
98
+ test_rmse = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
99
+
100
+ train_mae = mean_absolute_error(y_train[0], train_predict[:,0])
101
+ val_mae = mean_absolute_error(y_val[0], val_predict[:,0])
102
+ test_mae = mean_absolute_error(y_test[0], test_predict[:,0])
103
+
104
+ # Mean Absolute Percentage Error (MAPE) as accuracy
105
+ mape = np.mean(np.abs((y_test[0] - test_predict[:, 0]) / y_test[0])) * 100
106
+ accuracy = 100 - mape
107
+
108
+ st.write(f"LSTM Model - Train RMSE: {train_rmse:.2f}, Train MAE: {train_mae:.2f}")
109
+ st.write(f"LSTM Model - Validation RMSE: {val_rmse:.2f}, Validation MAE: {val_mae:.2f}")
110
+ st.write(f"LSTM Model - Test RMSE: {test_rmse:.2f}, Test MAE: {test_mae:.2f}")
111
+ st.write(f"LSTM Model - Test Accuracy: {accuracy:.2f}%")
112
+
113
+ # Plot the results
114
+ st.subheader("Prediction Results")
115
+ plt.figure(figsize=(14,6))
116
+ plt.plot(data.index[:train_size], scaler.inverse_transform(train_data), label='Training Data')
117
+ plt.plot(data.index[train_size + time_step:train_size + time_step + len(val_predict)], val_predict, label='Validation Predictions')
118
+ plt.plot(data.index[train_size + val_size + time_step:], test_predict, label='Test Predictions')
119
+ plt.plot(data.index[train_size + val_size + time_step:], y_test[0], label='Actual Test Data')
120
+ plt.xlabel('Date')
121
+ plt.ylabel('Stock Price')
122
+ plt.legend(['Training Data', 'Validation Predictions', 'Test Predictions', 'Actual Test Data'], loc='upper left')
123
+ st.pyplot(plt)
124
+
125
+ # User-defined future prediction days
126
+ num_days_to_predict = st.slider("Select the number of days to predict into the future", min_value=1, max_value=30, value=10)
127
+
128
+ # Predict future prices for the next 'num_days_to_predict' days
129
+ temp_input = np.array(test_data[-time_step:]).reshape(-1).tolist()
130
+ lst_output = []
131
+
132
+ for i in range(num_days_to_predict):
133
+ if len(temp_input) > time_step:
134
+ x_input = np.array(temp_input[-time_step:])
135
+ x_input = x_input.reshape((1, time_step, 1))
136
+ yhat = model.predict(x_input, verbose=0)
137
+ temp_input.append(yhat[0][0])
138
+ lst_output.append(yhat[0][0])
139
+ else:
140
+ x_input = np.array(temp_input).reshape((1, time_step, 1))
141
+ yhat = model.predict(x_input, verbose=0)
142
+ temp_input.append(yhat[0][0])
143
+ lst_output.append(yhat[0][0])
144
+
145
+ # Inverse transform future predictions to the original scale
146
+ future_predictions = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))
147
+
148
+ # Generate dates for future predictions
149
+ last_date = data.index[-1]
150
+ future_dates = [last_date + timedelta(days=i) for i in range(1, num_days_to_predict + 1)]
151
+
152
+ # Display future predictions with dates
153
+ st.subheader(f"Future Predictions for the next {num_days_to_predict} days:")
154
+ future_df = pd.DataFrame({'Date': future_dates, 'Predicted Price (LSTM)': future_predictions.flatten()})
155
+ st.write(future_df)