{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "physical_devices = tf.config.list_physical_devices('GPU')\n", "try:\n", " # Disable all GPUS\n", " tf.config.set_visible_devices([], 'GPU')\n", " visible_devices = tf.config.get_visible_devices()\n", " for device in visible_devices:\n", " assert device.device_type != 'GPU'\n", "except:\n", " # Invalid device or cannot modify virtual devices once initialized.\n", " pass" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import yfinance as yf\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "from sklearn.compose import ColumnTransformer\n", "import joblib\n", "import keras\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "TEST_DAYS = 10\n", "PERIOD = '5y'" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "INDICATOR_DATASET = False" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "INCLUDE_COMMODITIES = True" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if INDICATOR_DATASET:\n", " d = joblib.load('nifty_data.pkl')\n", "else:\n", " d = yf.download(\n", " tickers=\"^NSEI\",\n", " period=PERIOD,\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " )\n", " if INCLUDE_COMMODITIES:\n", " gold = yf.download(\n", " tickers=\"GC=F\",\n", " period=PERIOD,\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " ).add_prefix(prefix='gold_')\n", " crude = yf.download(\n", " tickers=\"CL=F\",\n", " period=PERIOD,\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " ).add_prefix(prefix='crude_')\n", " d = pd.concat([d, gold, crude], axis=1)\n", " \n", " d['target'] = d.Open/d.Close.shift(-1)\n", " d.target = d.target.apply(np.floor)\n", "\n", " d['change'] = abs(d['Close'].pct_change(fill_method=None) * 100)\n", "\n", " d['High'] = d['High'].pct_change(fill_method=None) * 100\n", " d['Low'] = d['Low'].pct_change(fill_method=None) * 100\n", " d['Open'] = d['Open'].pct_change(fill_method=None) * 100\n", " d['Close'] = d['Close'].pct_change(fill_method=None) * 100 \n", "\n", " if INCLUDE_COMMODITIES:\n", " d['gold_High'] = d['gold_High'].pct_change(fill_method=None) * 100\n", " d['gold_Low'] = d['gold_Low'].pct_change(fill_method=None) * 100\n", " d['gold_Open'] = d['gold_Open'].pct_change(fill_method=None) * 100\n", " d['gold_Close'] = d['gold_Close'].pct_change(fill_method=None) * 100\n", "\n", " d['crude_High'] = d['crude_High'].pct_change(fill_method=None) * 100\n", " d['crude_Low'] = d['crude_Low'].pct_change(fill_method=None) * 100\n", " d['crude_Open'] = d['crude_Open'].pct_change(fill_method=None) * 100\n", " d['crude_Close'] = d['crude_Close'].pct_change(fill_method=None) * 100\n", " # d.rename(columns = {'HighNew':'High','LowNew':'Low','OpenNew':'Open','CloseNew':'Close'}, inplace = True)\n", "\n", " # Remove outliers when Market closes +- 3.5%\n", " d = d[d['change'] < 3]\n", " d.dropna(inplace=True)\n", " d.tail()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def preprocessBeforeScaling(df):\n", " df['High'] = df['High'].pct_change(fill_method=None) * 100\n", " df['Low'] = df['Low'].pct_change(fill_method=None) * 100\n", " df['Open'] = df['Open'].pct_change(fill_method=None) * 100\n", " df['Close'] = df['Close'].pct_change(fill_method=None) * 100 \n", "\n", " if INCLUDE_COMMODITIES:\n", " df['gold_High'] = df['gold_High'].pct_change(fill_method=None) * 100\n", " df['gold_Low'] = df['gold_Low'].pct_change(fill_method=None) * 100\n", " df['gold_Open'] = df['gold_Open'].pct_change(fill_method=None) * 100\n", " df['gold_Close'] = df['gold_Close'].pct_change(fill_method=None) * 100\n", "\n", " df['crude_High'] = df['crude_High'].pct_change(fill_method=None) * 100\n", " df['crude_Low'] = df['crude_Low'].pct_change(fill_method=None) * 100\n", " df['crude_Open'] = df['crude_Open'].pct_change(fill_method=None) * 100\n", " df['crude_Close'] = df['crude_Close'].pct_change(fill_method=None) * 100\n", " \n", " df = df.ffill().dropna()\n", " return df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_dataset = d.tail(TEST_DAYS)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "d = d[:-(TEST_DAYS+1)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if INDICATOR_DATASET:\n", " x = d.drop(columns=['target'])\n", " y = d.target\n", "else:\n", " if INCLUDE_COMMODITIES:\n", " # x = d.drop(columns=['target', 'Adj Close', 'Volume', 'change', 'gold_Adj Close', 'gold_Volume', 'crude_Adj Close', 'crude_Volume'], errors='ignore')\n", " x = d.drop(columns=['target', 'Adj Close', 'Volume', 'change', 'gold_Open', 'gold_High', 'gold_Low', 'gold_Adj Close', 'gold_Volume', 'crude_Open', 'crude_High', 'crude_Low', 'crude_Adj Close', 'crude_Volume'], errors='ignore')\n", " else:\n", " x = d.drop(columns=['target', 'Adj Close', 'Volume', 'change'], errors='ignore')\n", " y = d.target" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('No. of Bullish samples: {}'.format(y[y == 0].size))\n", "print('No. of Bearish samples: {}'.format(y[y == 1].size))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if not INDICATOR_DATASET:\n", " print(\"Using StandardScaler\")\n", " scaler = StandardScaler()\n", " x = scaler.fit_transform(x.to_numpy())\n", " x\n", "else:\n", " print(\"Using ColumnTransformer\")\n", " col_names = ['Open', 'High', 'Low', 'Close', 'ATR']\n", " scaler = ColumnTransformer(\n", " [('StandardScaler', StandardScaler(), col_names)],\n", " remainder='passthrough'\n", " )\n", " x = scaler.fit_transform(x)\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "visible_devices" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "from keras import Sequential\n", "from keras import Model\n", "from keras.layers import Dense\n", "from keras.optimizers import legacy, SGD\n", "import keras\n", "\n", "lr_list = []\n", "def scheduler(epoch, lr):\n", " if epoch < 2:\n", " lr = lr\n", " else:\n", " lr = lr * tf.math.exp(-0.0025)\n", " lr_list.append(lr)\n", " return lr\n", "\n", "units = 64 #128 #1024\n", "# sgd = SGD(learning_rate=0.0001, momentum=0.0, nesterov=True)\n", "sgd = legacy.SGD(learning_rate=0.001, momentum=0.9, nesterov=True)\n", "kernel_init = 'he_uniform'\n", "activation = 'relu'\n", "\n", "callback_mc = keras.callbacks.ModelCheckpoint(\n", " 'best_model.h5',\n", " verbose=1,\n", " monitor='val_accuracy',\n", " save_best_only=True,\n", " mode='auto'\n", " )\n", "callback_es = keras.callbacks.EarlyStopping(\n", " monitor='val_accuracy',\n", " mode='auto',\n", " verbose=0,\n", " patience=100\n", ")\n", "callback_lr = keras.callbacks.LearningRateScheduler(scheduler)\n", "\n", "model = Sequential([\n", " Dense(units, kernel_initializer=kernel_init, activation=activation, input_dim=x.shape[1]),\n", " # Dense(units, kernel_initializer=kernel_init, activation=activation),\n", " Dense(units//2, kernel_initializer=kernel_init, activation=activation),\n", " Dense(units//4, kernel_initializer=kernel_init, activation=activation),\n", " Dense(units//8, kernel_initializer=kernel_init, activation=activation),\n", " Dense(units//16, kernel_initializer=kernel_init, activation=activation),\n", " Dense(units//32, kernel_initializer=kernel_init, activation=activation),\n", " Dense(1, kernel_initializer=kernel_init, activation='sigmoid'),\n", "])\n", "model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "BATCH_SIZE = int(len(y)/6.6125) #128 #24 #4\n", "BATCH_SIZE = 256\n", "print(f'BATCH SIZE = {BATCH_SIZE}')\n", "history = model.fit(x, y, callbacks=[callback_mc, callback_es, callback_lr], batch_size=BATCH_SIZE, epochs=750, validation_split=0.15, verbose=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "acc = history.history['accuracy']\n", "loss = history.history['loss']\n", "\n", "plt.figure(figsize=(21,6))\n", "plt.rcParams['figure.figsize'] = [8,8]\n", "plt.rcParams['font.size'] = 14\n", "plt.rcParams['axes.grid'] = True\n", "plt.rcParams['figure.facecolor'] = 'white'\n", "\n", "plt.subplot(1, 3, 1)\n", "plt.plot(acc, label='Training Accuracy')\n", "plt.legend(loc='lower right')\n", "plt.ylabel('Accuracy')\n", "plt.title(f'\\nTrain Accuracy: {round(acc[-1],8)}')\n", "\n", "plt.subplot(1, 3, 2)\n", "plt.plot(loss, label='Training Loss')\n", "plt.legend(loc='upper right')\n", "plt.ylabel('Cross Entropy')\n", "plt.title(f'\\nTrain Loss: {round(loss[-1],8)}')\n", "plt.xlabel('epoch')\n", "\n", "plt.subplot(1, 3, 3)\n", "plt.plot(lr_list, label='Learning Rate')\n", "plt.legend(loc='upper right')\n", "plt.ylabel('LR')\n", "plt.title(f'\\nLearning Rate')\n", "plt.xlabel('epoch')\n", "\n", "plt.tight_layout(pad=3.0)\n", "plt.show()\n", "\n", "acc = history.history['val_accuracy']\n", "loss = history.history['val_loss']\n", "\n", "plt.figure(figsize=(14,6))\n", "plt.rcParams['figure.figsize'] = [8,8]\n", "plt.rcParams['font.size'] = 14\n", "plt.rcParams['axes.grid'] = True\n", "plt.rcParams['figure.facecolor'] = 'white'\n", "plt.subplot(1, 2, 1)\n", "plt.plot(acc, label='Val Accuracy')\n", "plt.legend(loc='lower right')\n", "plt.ylabel('Accuracy')\n", "plt.title(f'\\nTest Accuracy: {round(acc[-1],8)}')\n", "\n", "plt.subplot(1, 2, 2)\n", "plt.plot(loss, label='Val Loss')\n", "plt.legend(loc='upper right')\n", "plt.ylabel('Cross Entropy')\n", "plt.title(f'\\nTest Loss: {round(loss[-1],8)}')\n", "plt.xlabel('epoch')\n", "plt.tight_layout(pad=3.0)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Try Realtime Inference" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "metrics = {\n", " \"TP\": 0, \"FP\": 0, \"TN\": 0, \"FN\": 0\n", "}" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Open High Low Close gold_Close crude_Close\n", "Date \n", "2023-11-15 0.697093 0.221577 0.441300 0.093698 -0.086659 -2.044465\n", "2023-11-16 0.118561 0.924435 0.241831 0.456152 1.214226 -4.904777\n", "2023-11-17 0.000258 -0.348423 0.206090 -0.168976 -0.115936 4.101506\n", "2023-11-20 0.286664 -0.250181 0.015512 -0.191573 -0.196812 2.253260\n", "2023-11-21 0.201458 0.367730 0.424752 0.453947 1.092183 0.219070\n", "2023-11-22 0.066257 -0.017897 -0.254131 0.143803 -0.395140 -0.861512\n", "2023-11-23 0.224673 0.250180 0.420732 -0.049716 -0.395140 -0.861512\n", "2023-11-24 -0.095063 -0.212833 -0.090467 -0.036869 -0.395140 -0.861512\n", "15-11-2023 Nifty Prediction -> Market may Close BEARISH on 16-11-2023! Actual -> BEARISH, Prediction -> Correct, Pred = 0.59\n", "16-11-2023 Nifty Prediction -> Market may Close BULLISH on 17-11-2023! Actual -> BEARISH, Prediction -> Wrong, Pred = 0.2\n", "17-11-2023 Nifty Prediction -> Market may Close BEARISH on 18-11-2023! Actual -> BEARISH, Prediction -> Correct, Pred = 0.59\n", "20-11-2023 Nifty Prediction -> Market may Close BEARISH on 21-11-2023! Actual -> BULLISH, Prediction -> Wrong, Pred = 0.59\n", "21-11-2023 Nifty Prediction -> Market may Close BULLISH on 22-11-2023! Actual -> BEARISH, Prediction -> Wrong, Pred = 0.28\n", "22-11-2023 Nifty Prediction -> Market may Close BULLISH on 23-11-2023! Actual -> BEARISH, Prediction -> Wrong, Pred = 0.48\n", "23-11-2023 Nifty Prediction -> Market may Close BEARISH on 24-11-2023! Actual -> BEARISH, Prediction -> Correct, Pred = 0.57\n", "24-11-2023 Nifty Prediction -> Market may Close BEARISH on 25-11-2023! Actual -> BULLISH, Prediction -> Wrong, Pred = 0.56\n", "Correct: 3, Wrong: 5, Accuracy: 0.375\n", "{'TP': 0, 'FP': 2, 'TN': 3, 'FN': 3}\n" ] } ], "source": [ "endpoint = keras.models.load_model('nifty_model_v3.h5')\n", "# endpoint = keras.models.load_model('best_model.h5')\n", "try:\n", " scaler\n", "except NameError:\n", " # pkl = joblib.load('nifty_model.pkl')\n", " pkl = joblib.load('nifty_model_v3.pkl')\n", " scaler = pkl['scaler']\n", "today = yf.download(\n", " tickers=\"^NSEI\",\n", " period=f'{TEST_DAYS}d',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " )\n", "if INCLUDE_COMMODITIES:\n", " gold = yf.download(\n", " tickers=\"GC=F\",\n", " period=f'{TEST_DAYS}d',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " ).add_prefix(prefix='gold_')\n", " crude = yf.download(\n", " tickers=\"CL=F\",\n", " period=f'{TEST_DAYS}d',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " ).add_prefix(prefix='crude_')\n", "\n", " today = pd.concat([today, gold, crude], axis=1)\n", " today = today.drop(columns=['Adj Close', 'Volume', 'gold_Adj Close', 'gold_Volume', 'crude_Adj Close', 'crude_Volume'])\n", "else:\n", " today = today.drop(columns=['Adj Close', 'Volume'])\n", "\n", "###\n", "today = preprocessBeforeScaling(today)\n", "today = today.drop(columns=['gold_Open', 'gold_High', 'gold_Low', 'crude_Open', 'crude_High', 'crude_Low'])\n", "print(today)\n", "###\n", "\n", "cnt_correct, cnt_wrong = 0, 0\n", "for i in range(-TEST_DAYS,0):\n", " try:\n", " df = today.iloc[i]\n", " twr = today.iloc[i+1]['Close']\n", " except IndexError:\n", " continue\n", " df = scaler.transform([df])\n", " pred = endpoint.predict([df], verbose=0)\n", "\n", " if twr > today.iloc[i]['Open']:\n", " fact = \"BULLISH\"\n", " else:\n", " fact = \"BEARISH\"\n", "\n", " if pred > 0.5:\n", " out = \"BEARISH\"\n", " else:\n", " out = \"BULLISH\"\n", "\n", " if out == fact:\n", " cnt_correct += 1\n", " if out == \"BULLISH\":\n", " metrics[\"TP\"] += 1\n", " else:\n", " metrics[\"TN\"] += 1\n", " else:\n", " cnt_wrong += 1\n", " if out == \"BULLISH\":\n", " metrics[\"FN\"] += 1\n", " else:\n", " metrics[\"FP\"] += 1\n", "\n", " \n", " print(\"{} Nifty Prediction -> Market may Close {} on {}! Actual -> {}, Prediction -> {}, Pred = {}\".format(\n", " today.iloc[i].name.strftime(\"%d-%m-%Y\"),\n", " out,\n", " (today.iloc[i].name + pd.Timedelta(days=1)).strftime(\"%d-%m-%Y\"),\n", " fact,\n", " \"Correct\" if fact == out else \"Wrong\",\n", " str(np.round(pred[0][0], 2))\n", " )\n", " )\n", "\n", "print(\"Correct: {}, Wrong: {}, Accuracy: {}\".format(cnt_correct, cnt_wrong, cnt_correct/(cnt_correct+cnt_wrong)))\n", "print(metrics)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save Model for Screeni-py integration" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pkl = {\n", " # 'model': model,\n", " 'scaler': scaler,\n", " 'columns': ['Open', 'Close', 'High', 'Low', 'gold_Close', 'crude_Close']\n", "}\n", "\n", "joblib.dump(pkl, 'nifty_model.pkl')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pkl = joblib.load('nifty_model_v3.pkl')\n", "z = yf.download(\n", " tickers=\"^NSEI\",\n", " period='5d',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " )\n", "if INCLUDE_COMMODITIES:\n", " gold = yf.download(\n", " tickers=\"GC=F\",\n", " period='5d',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " ).add_prefix(prefix='gold_')\n", " crude = yf.download(\n", " tickers=\"CL=F\",\n", " period='5d',\n", " interval='1d',\n", " progress=False,\n", " timeout=10\n", " ).add_prefix(prefix='crude_')\n", " z = pd.concat([z, gold, crude], axis=1)\n", "z = preprocessBeforeScaling(z)\n", "z = z.iloc[-1]\n", "z = z[pkl['columns']]\n", "print(z)\n", "z = pkl['scaler'].transform([z])\n", "endpoint.predict(z)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pkl['model'].save('nifty_model.h5')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pkl" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "del pkl['model']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pkl" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def getSigmoidConfidence(x):\n", " out_min, out_max = 0, 100\n", " if x > 0.5:\n", " in_min = 0.50001\n", " in_max = 1\n", " else:\n", " in_min = 0\n", " in_max = 0.5\n", " return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min\n", "\n", "map_range(0.9633487, 0.5, 1, 0, 100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.13 ('ds')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "272f5af4762c02c25377d17b8d5be1b9d83b050e7634f4572d665f6d13ef995d" } } }, "nbformat": 4, "nbformat_minor": 2 }