Okcan commited on
Commit
318e02f
·
verified ·
1 Parent(s): 6569de8

Upload Bitcoin_Price_Prediction_Model_(LSTM).ipynb

Browse files
Bitcoin_Price_Prediction_Model_(LSTM).ipynb ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "metadata": {
21
+ "id": "Z6OeRBuqH7cJ"
22
+ },
23
+ "outputs": [],
24
+ "source": [
25
+ "#@title Step 1: Installing and Importing Necessary Libraries\n",
26
+ "# We are installing the necessary libraries in the Google Colab environment.\n",
27
+ "# yfinance: To fetch financial data from Yahoo Finance.\n",
28
+ "# tensorflow: To build and train the neural network.\n",
29
+ "# scikit-learn: For data preprocessing (normalization).\n",
30
+ "!pip install yfinance tensorflow scikit-learn pandas matplotlib -q\n",
31
+ "\n",
32
+ "import numpy as np\n",
33
+ "import pandas as pd\n",
34
+ "import matplotlib.pyplot as plt\n",
35
+ "import yfinance as yf\n",
36
+ "from sklearn.preprocessing import MinMaxScaler\n",
37
+ "from tensorflow.keras.models import Sequential\n",
38
+ "from tensorflow.keras.layers import LSTM, Dense, Dropout\n",
39
+ "import datetime\n",
40
+ "\n",
41
+ "print(\"Libraries have been successfully installed and imported!\")\n",
42
+ "\n",
43
+ "\n",
44
+ "#@title Step 2: Fetching and Visualizing Bitcoin Data\n",
45
+ "# Let's fetch the BTC-USD (Bitcoin/US Dollar) data for the last few years.\n",
46
+ "start_date = '2019-01-01'\n",
47
+ "# We set the end date to today's date.\n",
48
+ "end_date = datetime.date.today().strftime(\"%Y-%m-%d\")\n",
49
+ "\n",
50
+ "try:\n",
51
+ " btc_data = yf.download('BTC-USD', start=start_date, end=end_date)\n",
52
+ " print(f\"Bitcoin data between {start_date} and {end_date} has been fetched.\")\n",
53
+ " print(\"First 5 rows of the dataset:\")\n",
54
+ " print(btc_data.head())\n",
55
+ "\n",
56
+ " # Let's plot the 'Close' prices of the dataset in a graph.\n",
57
+ " plt.figure(figsize=(14, 7))\n",
58
+ " plt.style.use('seaborn-v0_8-darkgrid')\n",
59
+ " plt.plot(btc_data['Close'], color='orange')\n",
60
+ " plt.title('Bitcoin Closing Prices (BTC-USD)', fontsize=16)\n",
61
+ " plt.xlabel('Date', fontsize=12)\n",
62
+ " plt.ylabel('Price (USD)', fontsize=12)\n",
63
+ " plt.legend(['Closing Price'])\n",
64
+ " plt.show()\n",
65
+ "\n",
66
+ "except Exception as e:\n",
67
+ " print(f\"An error occurred while fetching data: {e}\")\n",
68
+ "\n",
69
+ "\n",
70
+ "#@title Step 3: Data Preprocessing\n",
71
+ "# We are preparing the data to train our model.\n",
72
+ "\n",
73
+ "# We will only use the 'Close' column.\n",
74
+ "close_data = btc_data['Close'].values.reshape(-1, 1)\n",
75
+ "\n",
76
+ "# We are scaling the data between 0 and 1 (Normalization).\n",
77
+ "# Neural networks work more efficiently with data in this range.\n",
78
+ "scaler = MinMaxScaler(feature_range=(0, 1))\n",
79
+ "scaled_data = scaler.fit_transform(close_data)\n",
80
+ "\n",
81
+ "# We are splitting the dataset: 80% for training, 20% for testing.\n",
82
+ "training_data_len = int(np.ceil(len(scaled_data) * 0.8))\n",
83
+ "\n",
84
+ "# Let's create the training data.\n",
85
+ "train_data = scaled_data[0:int(training_data_len), :]\n",
86
+ "\n",
87
+ "# Let's prepare the x_train and y_train sets for training.\n",
88
+ "# The model will predict the next day's price by looking at the past 60 days' prices.\n",
89
+ "prediction_days = 60\n",
90
+ "x_train = []\n",
91
+ "y_train = []\n",
92
+ "\n",
93
+ "for i in range(prediction_days, len(train_data)):\n",
94
+ " x_train.append(train_data[i-prediction_days:i, 0])\n",
95
+ " y_train.append(train_data[i, 0])\n",
96
+ "\n",
97
+ "# Converting the lists to numpy arrays.\n",
98
+ "x_train, y_train = np.array(x_train), np.array(y_train)\n",
99
+ "\n",
100
+ "# Reshaping the data into a 3D format suitable for the LSTM model: [number of samples, time steps, number of features]\n",
101
+ "x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))\n",
102
+ "print(f\"Training data prepared. x_train shape: {x_train.shape}\")\n",
103
+ "\n",
104
+ "\n",
105
+ "#@title Step 4: Building the LSTM Model\n",
106
+ "# We are designing our neural network model using Keras.\n",
107
+ "\n",
108
+ "model = Sequential()\n",
109
+ "\n",
110
+ "# Layer 1: LSTM layer with 50 neurons. `return_sequences=True` because we will send data to the next LSTM layer.\n",
111
+ "model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))\n",
112
+ "model.add(Dropout(0.2)) # We are deactivating 20% of the neurons to prevent overfitting.\n",
113
+ "\n",
114
+ "# Layer 2: LSTM layer with 50 neurons.\n",
115
+ "model.add(LSTM(units=50, return_sequences=False))\n",
116
+ "model.add(Dropout(0.2))\n",
117
+ "\n",
118
+ "# Output Layer: Consists of 1 neuron as we will predict a single value (the price).\n",
119
+ "model.add(Dense(units=1))\n",
120
+ "\n",
121
+ "# Compiling the model. 'adam' is a popular optimizer. 'mean_squared_error' is the loss function.\n",
122
+ "model.compile(optimizer='adam', loss='mean_squared_error')\n",
123
+ "\n",
124
+ "# Let's see the model's architecture.\n",
125
+ "model.summary()\n",
126
+ "\n",
127
+ "\n",
128
+ "#@title Step 5: Training the Model\n",
129
+ "# We are training the model with the prepared data.\n",
130
+ "# epochs: The number of times the model will process the entire dataset.\n",
131
+ "# batch_size: The number of data samples the model will see in each iteration.\n",
132
+ "print(\"Starting model training...\")\n",
133
+ "history = model.fit(x_train, y_train, batch_size=32, epochs=25)\n",
134
+ "print(\"Model training completed!\")\n",
135
+ "\n",
136
+ "\n",
137
+ "#@title Step 6: Testing the Model and Evaluating Results\n",
138
+ "# Let's create the test data.\n",
139
+ "test_data = scaled_data[training_data_len - prediction_days:, :]\n",
140
+ "\n",
141
+ "# Let's prepare the x_test and y_test sets.\n",
142
+ "x_test = []\n",
143
+ "y_test = close_data[training_data_len:, :] # y_test is the original (unscaled) data.\n",
144
+ "\n",
145
+ "for i in range(prediction_days, len(test_data)):\n",
146
+ " x_test.append(test_data[i-prediction_days:i, 0])\n",
147
+ "\n",
148
+ "x_test = np.array(x_test)\n",
149
+ "x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))\n",
150
+ "\n",
151
+ "# Let's make predictions on the test data with the model.\n",
152
+ "predictions = model.predict(x_test)\n",
153
+ "\n",
154
+ "# Let's scale the predictions back to the original price (from 0-1 range to USD).\n",
155
+ "predictions = scaler.inverse_transform(predictions)\n",
156
+ "\n",
157
+ "# Let's calculate RMSE (Root Mean Squared Error) to measure the model's performance.\n",
158
+ "rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))\n",
159
+ "print(f'\\nModel Error Rate on Test Data (RMSE): {rmse:.2f} USD')\n",
160
+ "\n",
161
+ "# Let's show the actual and predicted prices on the same graph.\n",
162
+ "train = btc_data[:training_data_len]\n",
163
+ "valid = btc_data[training_data_len:].copy() # Using .copy() to avoid SettingWithCopyWarning.\n",
164
+ "valid.loc[:, 'Predictions'] = predictions\n",
165
+ "\n",
166
+ "plt.figure(figsize=(16, 8))\n",
167
+ "plt.title('Model Predictions vs Actual Prices', fontsize=16)\n",
168
+ "plt.xlabel('Date', fontsize=12)\n",
169
+ "plt.ylabel('Closing Price (USD)', fontsize=12)\n",
170
+ "plt.plot(train['Close'], color='blue', alpha=0.6)\n",
171
+ "plt.plot(valid['Close'], color='green')\n",
172
+ "plt.plot(valid['Predictions'], color='red', linestyle='--')\n",
173
+ "plt.legend(['Training Data', 'Actual Price', 'Predicted Price'], loc='upper left')\n",
174
+ "plt.show()\n",
175
+ "\n",
176
+ "# Let's take a closer look at the last 15 days of predictions.\n",
177
+ "print(\"\\nLast 15 Days of Actual and Predicted Prices:\")\n",
178
+ "print(valid[['Close', 'Predictions']].tail(15))\n",
179
+ "\n",
180
+ "\n",
181
+ "#@title Step 7: Using the Model to Predict the Future\n",
182
+ "\n",
183
+ "# Get the last 60 days of data\n",
184
+ "last_60_days = scaled_data[-prediction_days:]\n",
185
+ "X_predict = np.reshape(last_60_days, (1, prediction_days, 1))\n",
186
+ "\n",
187
+ "# Make a guess\n",
188
+ "predicted_price_scaled = model.predict(X_predict)\n",
189
+ "predicted_price = scaler.inverse_transform(predicted_price_scaled)\n",
190
+ "\n",
191
+ "# Date information\n",
192
+ "tomorrow = datetime.date.today() + datetime.timedelta(days=1)\n",
193
+ "\n",
194
+ "# Convert with float() to avoid errors\n",
195
+ "last_row = btc_data.tail(1)\n",
196
+ "last_index = last_row.index[0]\n",
197
+ "last_actual_price = float(last_row['Close'].iloc[0])\n",
198
+ "\n",
199
+ "# Print results\n",
200
+ "print(\"\\n\" + \"=\"*50)\n",
201
+ "print(\"FUTURE PREDICTION\")\n",
202
+ "print(\"=\"*50)\n",
203
+ "print(f\"Last closing price({last_index.strftime('%Y-%m-%d')}): {last_actual_price:.2f} USD\")\n",
204
+ "print(f\"The model {tomorrow.strftime('%Y-%m-%d')} Bitcoin price prediction for: {float(predicted_price[0][0]):.2f} USD\")\n",
205
+ "print(\"=\"*50)\n",
206
+ "print(\"\\nWARNING: This model is for educational purposes only and does not constitute financial advice.\")\n",
207
+ "\n",
208
+ "\n"
209
+ ]
210
+ }
211
+ ]
212
+ }