{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from model_day import get_data, walk_forward_validation_seq\n", "import xgboost as xgb" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "getting econ tickers: 100%|██████████| 3/3 [00:01<00:00, 2.62it/s]\n", "Getting release dates: 100%|██████████| 8/8 [00:02<00:00, 3.85it/s]\n", "Making indicators: 100%|██████████| 8/8 [00:00<00:00, 2664.95it/s]\n", "Merging econ data: 100%|██████████| 8/8 [00:00<00:00, 999.15it/s]\n" ] } ], "source": [ "data, df_final, final_row = get_data()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1\n", "data['HighPct'] = (data['High'] / data['PrevClose']) - 1\n", "data['LowPct'] = (data['Low'] / data['PrevClose']) - 1\n", "data['ClosePct'] = data['ClosePct'].shift(-1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "LR Model: 100%|██████████| 1178/1178 [00:03<00:00, 385.55it/s]\n", "d:\\Projects\\gamedayspx\\model_day.py:63: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n", "CLF Model: 100%|██████████| 1078/1078 [00:09<00:00, 119.55it/s]\n" ] } ], "source": [ "res1, model1, model2 = walk_forward_validation_seq(df_final.dropna(axis=0), 'Target_clf', 'Target', 100, 1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "xgb.plot_importance(model2, importance_type='gain')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import roc_auc_score, precision_score, recall_score\n", "\n", "# st.subheader('New Prediction')\n", "\n", "# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})\n", "df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})\n", "df_probas.columns = ['PctGreen','NumObs','NumGreen']\n", "\n", "roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)\n", "precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)\n", "recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)\n", "len_all = len(res1)\n", "\n", "res2_filtered = res1.loc[(res1['Predicted'] > 0.625) | (res1['Predicted'] <= 0.375)]\n", "\n", "roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)\n", "precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)\n", "recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)\n", "len_hi = len(res2_filtered)\n", "\n", "df_performance = pd.DataFrame(\n", " index=[\n", " 'N',\n", " 'ROC AUC',\n", " 'Precision',\n", " 'Recall'\n", " ],\n", " columns = [\n", " 'All',\n", " 'High Confidence'\n", " ],\n", " data = [\n", " [len_all, len_hi],\n", " [roc_auc_score_all, roc_auc_score_hi],\n", " [precision_score_all, precision_score_hi],\n", " [recall_score_all, recall_score_hi]\n", " ]\n", ").round(2)\n", "\n", "def get_acc(t, p):\n", " if t == False and p <= 0.375:\n", " return '✅'\n", " elif t == True and p > 0.625:\n", " return '✅'\n", " elif t == False and p > 0.625:\n", " return '❌'\n", " elif t == True and p <= 0.375:\n", " return '❌'\n", " else:\n", " return '🟨'\n", "\n", "perf_daily = res1.copy()\n", "perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "perf_daily1 = perf_daily.merge(data['ClosePct'], left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "res2 = res1.merge(data[['ClosePct','HighPct','LowPct']], left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "int_labels = ['(-∞, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, ∞]']\n", "# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})\n", "df_probas = res2.groupby(pd.cut(res2['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean], 'HighPct':[np.mean], 'LowPct':[np.mean]})\n", "df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','AvgHigh','AvgLow']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PctGreenNumObsNumGreenAvgPerfAvgHighAvgLow
Predicted
(-∞, .20]0.21428611224-0.0129560.009253-0.007881
(.20, .40]0.32270925181-0.0040480.006433-0.005791
(.40, .60]0.504630216109-0.0001730.006079-0.006083
(.60, .80]0.6450222311490.0026800.006207-0.005687
(.80, ∞]0.7910452682120.0090380.006807-0.007949
\n", "
" ], "text/plain": [ " PctGreen NumObs NumGreen AvgPerf AvgHigh AvgLow\n", "Predicted \n", "(-∞, .20] 0.214286 112 24 -0.012956 0.009253 -0.007881\n", "(.20, .40] 0.322709 251 81 -0.004048 0.006433 -0.005791\n", "(.40, .60] 0.504630 216 109 -0.000173 0.006079 -0.006083\n", "(.60, .80] 0.645022 231 149 0.002680 0.006207 -0.005687\n", "(.80, ∞] 0.791045 268 212 0.009038 0.006807 -0.007949" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_probas" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "res2['Quantile'] = pd.cut(res2['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "# Assuming you have a DataFrame 'res2' with the columns 'Quantile' and 'ClosePct'\n", "# Assuming you have a list 'int_labels' containing the unique values for 'Quantile'\n", "\n", "# Create a 2x3 grid of subplots\n", "fig, axs = plt.subplots(2, 3, figsize=(15, 8))\n", "\n", "# Loop through the 'int_labels' and plot the histograms in each subplot\n", "for i, lbl in enumerate(int_labels):\n", " # Get the subplot position based on the index i\n", " row = i // 3\n", " col = i % 3\n", " \n", " # Filter the DataFrame based on the specified value\n", " data_subset = res2.loc[res2['Quantile'] == lbl, 'LowPct']\n", " \n", " # Plot the histogram in the corresponding subplot\n", " axs[row, col].hist(data_subset)\n", " axs[row, col].set_title(lbl)\n", "\n", "# Add some space between the subplots\n", "plt.tight_layout()\n", "\n", "# Show the plot\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# Investigate EM\n", "data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))\n", "data['VIX_EM_High'] = data['Close'] + data['VIX_EM']\n", "data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']\n", "\n", "data['VIX_EM_125'] = data['VIX_EM'] * 1.25\n", "data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']\n", "data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']\n", "\n", "data['VIX_EM_15'] = data['VIX_EM'] * 1.5\n", "data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']\n", "data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']\n", "\n", "data['VIX_EM'] = data['VIX_EM'].shift(1)\n", "data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)\n", "data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)\n", "\n", "data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)\n", "data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)\n", "data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)\n", "\n", "data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)\n", "data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)\n", "data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VIX_EMVIX_EM_15VIX_EM_15_HighClose
index
2018-07-02NaNNaNNaN2726.709961
2018-07-0326.79558740.1933812766.9033422713.219971
2018-07-0527.58596941.3789542754.5989252736.610107
2018-07-0625.80681838.7102272775.3203352759.820068
2018-07-0923.24405534.8660832794.6861512784.169922
...............
2023-07-2841.18809961.7821484599.1923044582.229980
2023-07-3138.47749257.7162384639.9462194588.959961
2023-08-0139.40123759.1018564648.0618174576.729980
2023-08-0240.16115160.2417264636.9717064513.390137
2023-08-0345.74658268.6198734582.0100104501.890137
\n", "

1281 rows × 4 columns

\n", "
" ], "text/plain": [ " VIX_EM VIX_EM_15 VIX_EM_15_High Close\n", "index \n", "2018-07-02 NaN NaN NaN 2726.709961\n", "2018-07-03 26.795587 40.193381 2766.903342 2713.219971\n", "2018-07-05 27.585969 41.378954 2754.598925 2736.610107\n", "2018-07-06 25.806818 38.710227 2775.320335 2759.820068\n", "2018-07-09 23.244055 34.866083 2794.686151 2784.169922\n", "... ... ... ... ...\n", "2023-07-28 41.188099 61.782148 4599.192304 4582.229980\n", "2023-07-31 38.477492 57.716238 4639.946219 4588.959961\n", "2023-08-01 39.401237 59.101856 4648.061817 4576.729980\n", "2023-08-02 40.161151 60.241726 4636.971706 4513.390137\n", "2023-08-03 45.746582 68.619873 4582.010010 4501.890137\n", "\n", "[1281 rows x 4 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[['VIX_EM','VIX_EM_15','VIX_EM_15_High','Close']]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8032786885245902" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How often did price close within EM?\n", "len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.33099141295862605" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How often was EM tested?\n", "len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.8930523028883685" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How often did price close within EM?\n", "len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.19750195160031225" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How often was EM tested?\n", "len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9383294301327089" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How often did price close within EM?\n", "len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.10772833723653395" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How often was EM tested?\n", "len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py39", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }