wnstnb commited on
Commit
1bd00e6
Β·
1 Parent(s): 2949f94

qol changes:

Browse files
Files changed (3) hide show
  1. app.py +55 -49
  2. model_intra.py +44 -44
  3. troubleshoot_day_model.ipynb +95 -204
app.py CHANGED
@@ -42,11 +42,9 @@ def convert_to_datetime(time_str, pst_now):
42
  return datetime.combine(pst_now.date(), time_obj.time())
43
 
44
  # Function to determine the emoji for each time
45
- @st.cache_data
46
- def get_time_emojis(_times, _pst_now):
47
  pst_now_time = pst_now.time()
48
  us_holidays = get_us_holidays()
49
- emojis = []
50
  idxs = []
51
 
52
  for t in _times:
@@ -55,24 +53,19 @@ def get_time_emojis(_times, _pst_now):
55
  # Check if the time is less than or equal to the current time in PST
56
  if time_obj.time() <= pst_now_time:
57
  idxs.append(True)
58
- emoji = "βœ…"
59
 
60
  else:
61
  # Check if it's a business day or a holiday
62
  if is_business_day(time_obj) or time_obj.date() in us_holidays:
63
  idxs.append(False)
64
- emoji = "β›”"
65
  else:
66
  idxs.append(True)
67
- emoji = "βœ…"
68
-
69
- emojis.append(emoji)
70
-
71
- return emojis, idxs
72
 
73
  # Example usage:
74
  times_list = ['06:30', '07:00', '07:30', '08:00', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30', '12:00', '12:30']
75
- emojis_list, idxs = get_time_emojis(times_list, pst_now)
76
 
77
  def get_last_idx(arr):
78
  try:
@@ -87,23 +80,22 @@ st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Docum
87
 
88
  if "mode" not in st.session_state:
89
  st.session_state.mode = "Auto"
 
90
 
91
  with st.form("choose_model"):
92
  t1, t2 = st.columns(2)
93
  with t1:
94
- mode = st.radio('Choose mode', options=['Auto','Manual'], horizontal=True)
 
95
  with t2:
96
  submitted = st.form_submit_button('πŸƒπŸ½β€β™‚οΈ Run',use_container_width=True)
97
- sub1, sub2 = st.columns(2)
98
- with sub1:
99
- change_mode = st.form_submit_button('πŸ‘ŠπŸ½ Change Mode',use_container_width=True)
100
- with sub2:
101
- cleared = st.form_submit_button('🧹 Clear',use_container_width=True)
102
 
103
  option = st.select_slider(
104
  f"""Change mode to Manual, and select time πŸ‘‰πŸ½ Run.""",
105
  times_list,
106
- format_func=lambda x: f"{emojis_list[times_list.index(x)]} {x}",
107
  disabled = mode == 'Auto'
108
  )
109
 
@@ -113,36 +105,38 @@ with st.form("choose_model"):
113
  option = option
114
 
115
  if change_mode:
116
- st.write(f"Changed to {mode}.{' Model will run for {}.'.format(option) if mode == 'Auto' else ''}")
117
 
118
  if cleared:
119
  st.cache_data.clear()
120
 
121
  if option == '':
122
- st.write('No model selected.')
123
 
124
  if submitted:
125
 
126
  fname=f'performance_for_{option}_model.csv'
 
 
127
 
128
- if option == '06:30':
129
 
130
- from model_day import *
131
 
132
- fname='performance_for_open_model.csv'
133
 
134
- with st.spinner('Loading data...'):
135
  data, df_final, final_row = get_data()
136
- # st.success("βœ… Historical data")
137
 
138
- with st.spinner("Training models..."):
139
  def train_models():
140
  res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
141
  return res1, xgbr, seq2
142
  res1, xgbr, seq2 = train_models()
143
- # st.success("βœ… Models trained")
144
 
145
- with st.spinner("Getting new prediction..."):
146
 
147
  # Get last row
148
  new_pred = data.loc[final_row, model_cols]
@@ -166,6 +160,8 @@ with st.form("choose_model"):
166
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
167
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
168
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
 
 
169
  new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
170
  new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
171
  new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
@@ -184,24 +180,24 @@ with st.form("choose_model"):
184
  new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float)
185
 
186
  seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
187
-
188
- else:
189
 
190
- from model_intra import *
191
- idx = times_list.index(option)
192
 
193
- with st.spinner('Loading data...'):
194
  data, df_final, final_row = get_data(idx)
195
- # st.success("βœ… Historical data")
196
 
197
- with st.spinner("Training models..."):
198
  def train_models():
199
- res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 100, 1)
200
  return res1, xgbr
201
  res1, xgbr = train_models()
202
- # st.success("βœ… Models trained")
203
 
204
- with st.spinner("Getting new prediction..."):
205
 
206
  # Get last row
207
  new_pred = data.loc[final_row, model_cols]
@@ -230,10 +226,10 @@ with st.form("choose_model"):
230
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
231
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
232
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
233
- new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
234
- new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
235
- new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
236
- new_pred['OpenH2'] = new_pred['OpenH2'].astype(float)
237
  new_pred['L1TouchPct'] = new_pred['L1TouchPct'].astype(float)
238
  new_pred['L2TouchPct'] = new_pred['L2TouchPct'].astype(float)
239
  new_pred['H1TouchPct'] = new_pred['H1TouchPct'].astype(float)
@@ -246,7 +242,13 @@ with st.form("choose_model"):
246
  new_pred['H2BreakTouchPct'] = new_pred['H2BreakTouchPct'].astype(float)
247
  new_pred['L1BreakTouchPct'] = new_pred['L1BreakTouchPct'].astype(float)
248
  new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float)
 
 
249
  new_pred['GreenProbas'] = new_pred['GreenProbas'].astype(float)
 
 
 
 
250
 
251
  seq_proba = seq_predict_proba(new_pred, xgbr)
252
 
@@ -264,12 +266,16 @@ with st.form("choose_model"):
264
  data['ClosePct'] = data['ClosePct'].shift(-1)
265
  res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
266
  # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
267
- df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.median, lambda x: np.quantile(x, 0.25), lambda x: np.quantile(x, 0.75)]})
 
 
 
 
268
 
269
- df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','25P','75P']
270
  df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
271
- df_probas['25P'] = df_probas['25P'].apply(lambda x: f'{x:.2%}')
272
- df_probas['75P'] = df_probas['75P'].apply(lambda x: f'{x:.2%}')
273
 
274
  green_proba = seq_proba[0]
275
  red_proba = 1 - green_proba
@@ -293,8 +299,8 @@ with st.form("choose_model"):
293
  red_hist_proba = 1 - historical_proba
294
  num_obs = df_probas.loc[q, 'NumObs']
295
  mid = df_probas.loc[q, 'AvgPerf']
296
- lo = df_probas.loc[q, '25P']
297
- hi = df_probas.loc[q, '75P']
298
  intv = f'({q.left:.03f}, {q.right:.03f}])'
299
 
300
  qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
@@ -381,7 +387,7 @@ with st.form("choose_model"):
381
  [f"{curr_close30:.0f}"],
382
  [f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"],
383
  [f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"],
384
- [f"{(1+float(hi.strip('%'))/100) * prev_close :.0f}"]
385
  ],
386
  columns=['Targets'])
387
 
 
42
  return datetime.combine(pst_now.date(), time_obj.time())
43
 
44
  # Function to determine the emoji for each time
45
+ def get_time(_times, _pst_now):
 
46
  pst_now_time = pst_now.time()
47
  us_holidays = get_us_holidays()
 
48
  idxs = []
49
 
50
  for t in _times:
 
53
  # Check if the time is less than or equal to the current time in PST
54
  if time_obj.time() <= pst_now_time:
55
  idxs.append(True)
 
56
 
57
  else:
58
  # Check if it's a business day or a holiday
59
  if is_business_day(time_obj) or time_obj.date() in us_holidays:
60
  idxs.append(False)
 
61
  else:
62
  idxs.append(True)
63
+
64
+ return idxs
 
 
 
65
 
66
  # Example usage:
67
  times_list = ['06:30', '07:00', '07:30', '08:00', '08:30', '09:00', '09:30', '10:00', '10:30', '11:00', '11:30', '12:00', '12:30']
68
+ idxs = get_time(times_list, pst_now)
69
 
70
  def get_last_idx(arr):
71
  try:
 
80
 
81
  if "mode" not in st.session_state:
82
  st.session_state.mode = "Auto"
83
+ st.session_state.option = times_list[idx_use]
84
 
85
  with st.form("choose_model"):
86
  t1, t2 = st.columns(2)
87
  with t1:
88
+ mode = st.radio('Choose mode', options=['Auto','Manual'], horizontal=True, label_visibility='collapsed')
89
+ change_mode = st.form_submit_button('πŸ‘ŠπŸ½ Confirm Mode',use_container_width=True)
90
  with t2:
91
  submitted = st.form_submit_button('πŸƒπŸ½β€β™‚οΈ Run',use_container_width=True)
92
+ cleared = st.form_submit_button('🧹 Clear',use_container_width=True)
93
+
 
 
 
94
 
95
  option = st.select_slider(
96
  f"""Change mode to Manual, and select time πŸ‘‰πŸ½ Run.""",
97
  times_list,
98
+ # format_func=lambda x: f"{emojis_list[times_list.index(x)]} {x}",
99
  disabled = mode == 'Auto'
100
  )
101
 
 
105
  option = option
106
 
107
  if change_mode:
108
+ st.info(f"Changed to {mode}.{' Model will run for {}.'.format(option) if mode == 'Auto' else ''}")
109
 
110
  if cleared:
111
  st.cache_data.clear()
112
 
113
  if option == '':
114
+ st.info('No model selected.')
115
 
116
  if submitted:
117
 
118
  fname=f'performance_for_{option}_model.csv'
119
+
120
+ with st.status(f"Running model for {option}..."):
121
 
122
+ if option == '06:30':
123
 
124
+ from model_day import *
125
 
126
+ fname='performance_for_open_model.csv'
127
 
128
+ st.write('Loading data...')
129
  data, df_final, final_row = get_data()
130
+ # st.success("βœ… Historical data")
131
 
132
+ st.write("Training models...")
133
  def train_models():
134
  res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
135
  return res1, xgbr, seq2
136
  res1, xgbr, seq2 = train_models()
137
+ # st.success("βœ… Models trained")
138
 
139
+ st.write("Getting new prediction...")
140
 
141
  # Get last row
142
  new_pred = data.loc[final_row, model_cols]
 
160
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
161
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
162
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
163
+ # new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
164
+ # new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
165
  new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
166
  new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
167
  new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
 
180
  new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float)
181
 
182
  seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
183
+
184
+ else:
185
 
186
+ from model_intra import *
187
+ idx = times_list.index(option)
188
 
189
+ st.write('Loading data...')
190
  data, df_final, final_row = get_data(idx)
191
+ # st.success("βœ… Historical data")
192
 
193
+ st.write("Training models...")
194
  def train_models():
195
+ res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 120, 1)
196
  return res1, xgbr
197
  res1, xgbr = train_models()
198
+ # st.success("βœ… Models trained")
199
 
200
+ st.write("Getting new prediction...")
201
 
202
  # Get last row
203
  new_pred = data.loc[final_row, model_cols]
 
226
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
227
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
228
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
229
+ # new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
230
+ # new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
231
+ # new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
232
+ # new_pred['OpenH2'] = new_pred['OpenH2'].astype(float)
233
  new_pred['L1TouchPct'] = new_pred['L1TouchPct'].astype(float)
234
  new_pred['L2TouchPct'] = new_pred['L2TouchPct'].astype(float)
235
  new_pred['H1TouchPct'] = new_pred['H1TouchPct'].astype(float)
 
242
  new_pred['H2BreakTouchPct'] = new_pred['H2BreakTouchPct'].astype(float)
243
  new_pred['L1BreakTouchPct'] = new_pred['L1BreakTouchPct'].astype(float)
244
  new_pred['L2BreakTouchPct'] = new_pred['L2BreakTouchPct'].astype(float)
245
+ new_pred['H1BreakH2TouchPct'] = new_pred['H1BreakH2TouchPct'].astype(float)
246
+ new_pred['L1BreakL2TouchPct'] = new_pred['L1BreakL2TouchPct'].astype(float)
247
  new_pred['GreenProbas'] = new_pred['GreenProbas'].astype(float)
248
+ new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
249
+ new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
250
+ new_pred['H1TouchGreenPct'] = new_pred['H1TouchGreenPct'].astype(float)
251
+ new_pred['L1TouchRedPct'] = new_pred['L1TouchRedPct'].astype(float)
252
 
253
  seq_proba = seq_predict_proba(new_pred, xgbr)
254
 
 
266
  data['ClosePct'] = data['ClosePct'].shift(-1)
267
  res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
268
  # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
269
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({
270
+ 'True':[np.mean,len,np.sum],
271
+ 'ClosePct':[np.mean,
272
+ lambda x: np.mean([r for r in x if r < 0]),
273
+ lambda x: np.mean([r for r in x if r > 0])]})
274
 
275
+ df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','AvgDown','AvgUp']
276
  df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
277
+ df_probas['AvgDown'] = df_probas['AvgDown'].apply(lambda x: f'{x:.2%}')
278
+ df_probas['AvgUp'] = df_probas['AvgUp'].apply(lambda x: f'{x:.2%}')
279
 
280
  green_proba = seq_proba[0]
281
  red_proba = 1 - green_proba
 
299
  red_hist_proba = 1 - historical_proba
300
  num_obs = df_probas.loc[q, 'NumObs']
301
  mid = df_probas.loc[q, 'AvgPerf']
302
+ lo = df_probas.loc[q, 'AvgDown']
303
+ hi = df_probas.loc[q, 'AvgUp']
304
  intv = f'({q.left:.03f}, {q.right:.03f}])'
305
 
306
  qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
 
387
  [f"{curr_close30:.0f}"],
388
  [f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"],
389
  [f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"],
390
+ [f"{(1+float(hi.strip('%'))/100) * prev_close:.0f}"]
391
  ],
392
  columns=['Targets'])
393
 
model_intra.py CHANGED
@@ -12,6 +12,9 @@ import datetime
12
  from pandas.tseries.offsets import BDay
13
  from datasets import load_dataset
14
  import lightgbm as lgb
 
 
 
15
 
16
  model_cols = [
17
  'BigNewsDay',
@@ -32,10 +35,12 @@ model_cols = [
32
  'OHLC4_VIX',
33
  'OHLC4_VIX_n1',
34
  'OHLC4_VIX_n2',
35
- 'OpenL1',
36
- 'OpenL2',
37
- 'OpenH1',
38
- 'OpenH2',
 
 
39
  'L1TouchPct',
40
  'L2TouchPct',
41
  'H1TouchPct',
@@ -48,7 +53,11 @@ model_cols = [
48
  'H1BreakTouchPct',
49
  'H2BreakTouchPct',
50
  'L1BreakTouchPct',
51
- 'L2BreakTouchPct'
 
 
 
 
52
  # 'GapFillGreenProba'
53
  ]
54
 
@@ -59,20 +68,23 @@ def walk_forward_validation(df, target_column, num_training_rows, num_periods):
59
  df[target_column] = df[target_column].astype(bool)
60
 
61
  # Model
62
- model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
 
 
63
 
64
  overall_results = []
65
  # Iterate over the rows in the DataFrame, one step at a time
66
- for i in tqdm(range(num_training_rows, df.shape[0] - num_periods + 1),desc='LGB Model'):
67
- # Split the data into training and test sets
68
- X_train = df.drop(target_column, axis=1).iloc[:i]
69
- y_train = df[target_column].iloc[:i]
70
- X_test = df.drop(target_column, axis=1).iloc[i:i+num_periods]
71
- y_test = df[target_column].iloc[i:i+num_periods]
72
-
 
73
  # Fit the model to the training data
 
74
  model.fit(X_train, y_train)
75
-
76
  # Make a prediction on the test data
77
  predictions = model.predict_proba(X_test)[:,-1]
78
 
@@ -87,7 +99,7 @@ def walk_forward_validation(df, target_column, num_training_rows, num_periods):
87
  return df.groupby(pd.cut(df[col_name], q))['True'].mean()
88
 
89
  greenprobas = []
90
- for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas'):
91
  try:
92
  df_q = get_quantiles(df_results.iloc[:i], 'Predicted', 7)
93
  for q in df_q.index:
@@ -123,18 +135,6 @@ def get_data(periods_30m = 1):
123
  dates.append(release_date_tag.text)
124
  return dates
125
 
126
- def parse_release_dates_obs(series_id: str) -> List[str]:
127
- obs_url = f'https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&realtime_start=2015-01-01&include_release_dates_with_no_data=true&api_key={API_KEY_FRED}'
128
- r = requests.get(obs_url)
129
- text = r.text
130
- soup = BeautifulSoup(text, 'xml')
131
- observations = []
132
- for observation_tag in soup.find_all('observation'):
133
- date = observation_tag.get('date')
134
- value = observation_tag.get('value')
135
- observations.append((date, value))
136
- return observations
137
-
138
  econ_dfs = {}
139
 
140
  econ_tickers = [
@@ -144,20 +144,10 @@ def get_data(periods_30m = 1):
144
  ]
145
 
146
  for et in tqdm(econ_tickers, desc='getting econ tickers'):
147
- # p = parse_release_dates_obs(et)
148
- # df = pd.DataFrame(columns = ['ds',et], data = p)
149
  df = pdr.get_data_fred(et)
150
  df.index = df.index.rename('ds')
151
- # df.index = pd.to_datetime(df.index.rename('ds')).dt.tz_localize(None)
152
- # df['ds'] = pd.to_datetime(df['ds']).dt.tz_localize(None)
153
  econ_dfs[et] = df
154
 
155
- # walcl = pd.DataFrame(columns = ['ds','WALCL'], data = p)
156
- # walcl['ds'] = pd.to_datetime(walcl['ds']).dt.tz_localize(None)
157
-
158
- # nfci = pd.DataFrame(columns = ['ds','NFCI'], data = p2)
159
- # nfci['ds'] = pd.to_datetime(nfci['ds']).dt.tz_localize(None)
160
-
161
  release_ids = [
162
  "10", # "Consumer Price Index"
163
  "46", # "Producer Price Index"
@@ -197,8 +187,6 @@ def get_data(periods_30m = 1):
197
  releases[rid]['name']: 1
198
  })
199
  releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
200
- # releases[rid]['df']['ds'] = pd.to_datetime(releases[rid]['df']['ds']).dt.tz_localize(None)
201
- # releases[rid]['df'] = releases[rid]['df'].set_index('ds')
202
 
203
  vix = yf.Ticker('^VIX')
204
  spx = yf.Ticker('^GSPC')
@@ -255,22 +243,20 @@ def get_data(periods_30m = 1):
255
 
256
  df_intra.columns = ['Open30','High30','Low30','Close30']
257
 
258
- prices_vix = vix.history(start='2018-07-01', interval='1d')
259
- prices_spx = spx.history(start='2018-07-01', interval='1d')
260
  prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
261
  prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
262
  prices_spx.index = prices_spx['index']
263
  prices_spx = prices_spx.drop(columns='index')
264
  prices_spx.index = pd.DatetimeIndex(prices_spx.index)
265
 
266
-
267
  prices_vix['index'] = [str(x).split()[0] for x in prices_vix.index]
268
  prices_vix['index'] = pd.to_datetime(prices_vix['index']).dt.date
269
  prices_vix.index = prices_vix['index']
270
  prices_vix = prices_vix.drop(columns='index')
271
  prices_vix.index = pd.DatetimeIndex(prices_vix.index)
272
 
273
-
274
  data = prices_spx.merge(df_intra, left_index=True, right_index=True)
275
  data = data.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
276
 
@@ -291,6 +277,7 @@ def get_data(periods_30m = 1):
291
  data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
292
  data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
293
  data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
 
294
  data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1)
295
  data['OHLC4_Trend_n1'] = data['OHLC4_Trend_n1'].astype(float)
296
  data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(1)
@@ -310,6 +297,9 @@ def get_data(periods_30m = 1):
310
  data['CurrentHigh30'] = data['High30'].shift(-1)
311
  data['CurrentLow30'] = data['Low30'].shift(-1)
312
  data['CurrentClose30'] = data['Close30'].shift(-1)
 
 
 
313
  data['HistClose30toPrevClose'] = (data['Close30'] / data['PrevClose']) - 1
314
 
315
 
@@ -354,8 +344,12 @@ def get_data(periods_30m = 1):
354
  H1Touch = lambda x: x['High'] > x['H1'],
355
  H2Touch = lambda x: x['High'] > x['H2'],
356
  L1Break = lambda x: x['Close'] < x['L1'],
 
 
357
  L2Break = lambda x: x['Close'] < x['L2'],
358
  H1Break = lambda x: x['Close'] > x['H1'],
 
 
359
  H2Break = lambda x: x['Close'] > x['H2'],
360
  OpenL1 = lambda x: np.where(x['Open'] < x['L1'], 1, 0),
361
  OpenL2 = lambda x: np.where(x['Open'] < x['L2'], 1, 0),
@@ -395,6 +389,11 @@ def get_data(periods_30m = 1):
395
  data['H2BreakTouchPct'] = data['H2Break'].rolling(100).sum() / data['H2Touch'].rolling(100).sum()
396
  data['L1BreakTouchPct'] = data['L1Break'].rolling(100).sum() / data['L1Touch'].rolling(100).sum()
397
  data['L2BreakTouchPct'] = data['L2Break'].rolling(100).sum() / data['L2Touch'].rolling(100).sum()
 
 
 
 
 
398
 
399
  def get_quintiles(df, col_name, q):
400
  return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()
@@ -461,5 +460,6 @@ def get_data(periods_30m = 1):
461
  exp_row = data.index[-1]
462
 
463
  df_final = data.loc[:final_row, model_cols + ['Target', 'Target_clf']]
464
- df_final = df_final.dropna(subset=['Target','Target_clf','Perf5Day_n1'])
 
465
  return data, df_final, final_row
 
12
  from pandas.tseries.offsets import BDay
13
  from datasets import load_dataset
14
  import lightgbm as lgb
15
+ from sklearn.model_selection import TimeSeriesSplit
16
+
17
+ data_start_date = '2018-07-01'
18
 
19
  model_cols = [
20
  'BigNewsDay',
 
35
  'OHLC4_VIX',
36
  'OHLC4_VIX_n1',
37
  'OHLC4_VIX_n2',
38
+ 'OHLC4_Current_Trend',
39
+ 'OHLC4_Trend',
40
+ # 'OpenL1',
41
+ # 'OpenL2',
42
+ # 'OpenH1',
43
+ # 'OpenH2',
44
  'L1TouchPct',
45
  'L2TouchPct',
46
  'H1TouchPct',
 
53
  'H1BreakTouchPct',
54
  'H2BreakTouchPct',
55
  'L1BreakTouchPct',
56
+ 'L2BreakTouchPct',
57
+ 'H1BreakH2TouchPct',
58
+ 'L1BreakL2TouchPct',
59
+ 'H1TouchGreenPct',
60
+ 'L1TouchRedPct'
61
  # 'GapFillGreenProba'
62
  ]
63
 
 
68
  df[target_column] = df[target_column].astype(bool)
69
 
70
  # Model
71
+ # model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
72
+
73
+ tscv = TimeSeriesSplit(n_splits=len(df)-1, max_train_size=None, test_size=num_periods) # num_splits is the number of splits you want
74
 
75
  overall_results = []
76
  # Iterate over the rows in the DataFrame, one step at a time
77
+ # Split the time series data using TimeSeriesSplit
78
+ for train_index, test_index in tqdm(tscv.split(df), total=tscv.n_splits):
79
+ # Extract the training and testing data for the current split
80
+ X_train = df.drop(target_column, axis=1).iloc[train_index]
81
+ y_train = df[target_column].iloc[train_index]
82
+ X_test = df.drop(target_column, axis=1).iloc[test_index]
83
+ y_test = df[target_column].iloc[test_index]
84
+
85
  # Fit the model to the training data
86
+ model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
87
  model.fit(X_train, y_train)
 
88
  # Make a prediction on the test data
89
  predictions = model.predict_proba(X_test)[:,-1]
90
 
 
99
  return df.groupby(pd.cut(df[col_name], q))['True'].mean()
100
 
101
  greenprobas = []
102
+ for i, pct in tqdm(enumerate(df_results['Predicted']), desc='Calibrating Probas',total=len(df_results)):
103
  try:
104
  df_q = get_quantiles(df_results.iloc[:i], 'Predicted', 7)
105
  for q in df_q.index:
 
135
  dates.append(release_date_tag.text)
136
  return dates
137
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  econ_dfs = {}
139
 
140
  econ_tickers = [
 
144
  ]
145
 
146
  for et in tqdm(econ_tickers, desc='getting econ tickers'):
 
 
147
  df = pdr.get_data_fred(et)
148
  df.index = df.index.rename('ds')
 
 
149
  econ_dfs[et] = df
150
 
 
 
 
 
 
 
151
  release_ids = [
152
  "10", # "Consumer Price Index"
153
  "46", # "Producer Price Index"
 
187
  releases[rid]['name']: 1
188
  })
189
  releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
 
 
190
 
191
  vix = yf.Ticker('^VIX')
192
  spx = yf.Ticker('^GSPC')
 
243
 
244
  df_intra.columns = ['Open30','High30','Low30','Close30']
245
 
246
+ prices_vix = vix.history(start=data_start_date, interval='1d')
247
+ prices_spx = spx.history(start=data_start_date, interval='1d')
248
  prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
249
  prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
250
  prices_spx.index = prices_spx['index']
251
  prices_spx = prices_spx.drop(columns='index')
252
  prices_spx.index = pd.DatetimeIndex(prices_spx.index)
253
 
 
254
  prices_vix['index'] = [str(x).split()[0] for x in prices_vix.index]
255
  prices_vix['index'] = pd.to_datetime(prices_vix['index']).dt.date
256
  prices_vix.index = prices_vix['index']
257
  prices_vix = prices_vix.drop(columns='index')
258
  prices_vix.index = pd.DatetimeIndex(prices_vix.index)
259
 
 
260
  data = prices_spx.merge(df_intra, left_index=True, right_index=True)
261
  data = data.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
262
 
 
277
  data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
278
  data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
279
  data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
280
+ data['OHLC4_Trend'] = data['OHLC4_Trend'].astype(bool)
281
  data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1)
282
  data['OHLC4_Trend_n1'] = data['OHLC4_Trend_n1'].astype(float)
283
  data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(1)
 
297
  data['CurrentHigh30'] = data['High30'].shift(-1)
298
  data['CurrentLow30'] = data['Low30'].shift(-1)
299
  data['CurrentClose30'] = data['Close30'].shift(-1)
300
+ data['CurrentOHLC430'] = data[['CurrentOpen30','CurrentHigh30','CurrentLow30','CurrentClose30']].max(axis=1)
301
+ data['OHLC4_Current_Trend'] = data['CurrentOHLC430'] > data['OHLC4']
302
+ data['OHLC4_Current_Trend'] = data['OHLC4_Current_Trend'].astype(bool)
303
  data['HistClose30toPrevClose'] = (data['Close30'] / data['PrevClose']) - 1
304
 
305
 
 
344
  H1Touch = lambda x: x['High'] > x['H1'],
345
  H2Touch = lambda x: x['High'] > x['H2'],
346
  L1Break = lambda x: x['Close'] < x['L1'],
347
+ L1TouchRed = lambda x: (x['Low'] < x['L2']) & (x['Close'] < x['PrevClose']),
348
+ L2TouchL1Break = lambda x: (x['Low'] < x['L2']) & (x['Close'] < x['L1']),
349
  L2Break = lambda x: x['Close'] < x['L2'],
350
  H1Break = lambda x: x['Close'] > x['H1'],
351
+ H1TouchGreen = lambda x: (x['High'] > x['H1']) & (x['Close'] > x['PrevClose']),
352
+ H2TouchH1Break = lambda x: (x['High'] > x['H2']) & (x['Close'] > x['H1']),
353
  H2Break = lambda x: x['Close'] > x['H2'],
354
  OpenL1 = lambda x: np.where(x['Open'] < x['L1'], 1, 0),
355
  OpenL2 = lambda x: np.where(x['Open'] < x['L2'], 1, 0),
 
389
  data['H2BreakTouchPct'] = data['H2Break'].rolling(100).sum() / data['H2Touch'].rolling(100).sum()
390
  data['L1BreakTouchPct'] = data['L1Break'].rolling(100).sum() / data['L1Touch'].rolling(100).sum()
391
  data['L2BreakTouchPct'] = data['L2Break'].rolling(100).sum() / data['L2Touch'].rolling(100).sum()
392
+ data['L1TouchRedPct'] = data['L1TouchRed'].rolling(100).sum() / data['L1Touch'].rolling(100).sum()
393
+ data['H1TouchGreenPct'] = data['H1TouchGreen'].rolling(100).sum() / data['H1Touch'].rolling(100).sum()
394
+
395
+ data['H1BreakH2TouchPct'] = data['H2TouchH1Break'].rolling(100).sum() / data['H2Touch'].rolling(100).sum()
396
+ data['L1BreakL2TouchPct'] = data['L2TouchL1Break'].rolling(100).sum() / data['L2Touch'].rolling(100).sum()
397
 
398
  def get_quintiles(df, col_name, q):
399
  return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()
 
460
  exp_row = data.index[-1]
461
 
462
  df_final = data.loc[:final_row, model_cols + ['Target', 'Target_clf']]
463
+ df_final = df_final.dropna(subset=['Target','Target_clf'])
464
+ # df_final = df_final.dropna(subset=['Target','Target_clf','Perf5Day_n1'])
465
  return data, df_final, final_row
troubleshoot_day_model.ipynb CHANGED
@@ -2,78 +2,92 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
  "import pandas as pd\n",
10
  "import numpy as np\n",
11
  "from model_intra import get_data, walk_forward_validation\n",
12
- "import lightgbm as lgb"
 
13
  ]
14
  },
15
  {
16
  "cell_type": "code",
17
- "execution_count": 2,
18
  "metadata": {},
19
  "outputs": [
20
  {
21
  "name": "stderr",
22
  "output_type": "stream",
23
  "text": [
24
- "getting econ tickers: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3/3 [00:00<00:00, 3.10it/s]\n",
25
- "Getting release dates: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:01<00:00, 4.87it/s]\n",
26
- "Making indicators: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:00<00:00, 3997.91it/s]\n",
27
- "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--spx_intra-e0e5e7af8fd43022/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n",
28
- "Merging econ data: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:00<00:00, 799.22it/s]\n"
29
  ]
30
  }
31
  ],
32
  "source": [
33
- "data, df_final, final_row = get_data(5)"
34
- ]
35
- },
36
- {
37
- "cell_type": "code",
38
- "execution_count": 3,
39
- "metadata": {},
40
- "outputs": [],
41
- "source": [
42
- "data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1\n",
43
- "data['HighPct'] = (data['High'] / data['PrevClose']) - 1\n",
44
- "data['LowPct'] = (data['Low'] / data['PrevClose']) - 1\n",
45
- "data['ClosePct'] = data['ClosePct'].shift(-1)"
46
- ]
47
- },
48
- {
49
- "cell_type": "code",
50
- "execution_count": 25,
51
- "metadata": {},
52
- "outputs": [],
53
- "source": [
54
- "# Calculate the rolling likelihood\n",
55
- "rolling_likelihood = (data['H1Break'] & data['H1Touch']==True).rolling(window=100).mean()"
56
  ]
57
  },
58
  {
59
  "cell_type": "code",
60
- "execution_count": 36,
61
  "metadata": {},
62
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  "source": [
64
- "data['H1BreakTouchPct'] = data['H1Break'].expanding().sum() / data['H1Touch'].expanding().sum()\n",
65
- "data['H2BreakTouchPct'] = data['H2Break'].expanding().sum() / data['H2Touch'].expanding().sum()\n",
66
- "data['H1BreakTouchPct'] = data['L1Break'].expanding().sum() / data['L1Touch'].expanding().sum()\n",
67
- "data['H2BreakTouchPct'] = data['L2Break'].expanding().sum() / data['L2Touch'].expanding().sum()"
68
  ]
69
  },
70
  {
71
  "cell_type": "code",
72
- "execution_count": 10,
73
  "metadata": {},
74
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  "source": [
76
- "l = [False, False, False, False, False,False,False,False]"
77
  ]
78
  },
79
  {
@@ -82,197 +96,80 @@
82
  "metadata": {},
83
  "outputs": [
84
  {
85
- "ename": "ValueError",
86
- "evalue": "True is not in list",
87
- "output_type": "error",
88
- "traceback": [
89
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
90
- "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
91
- "\u001b[1;32md:\\Projects\\gamedayspx\\troubleshoot_day_model.ipynb Cell 7\u001b[0m line \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> <a href='vscode-notebook-cell:/d%3A/Projects/gamedayspx/troubleshoot_day_model.ipynb#X60sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m place \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m l[::\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m]\u001b[39m.\u001b[39;49mindex(\u001b[39mTrue\u001b[39;49;00m) \u001b[39melse\u001b[39;00m \u001b[39mlen\u001b[39m(l) \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m \u001b[39m-\u001b[39m l[::\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\u001b[39m.\u001b[39mindex(\u001b[39mTrue\u001b[39;00m)\n",
92
- "\u001b[1;31mValueError\u001b[0m: True is not in list"
 
93
  ]
94
  }
95
  ],
96
  "source": [
97
- "place = 0 if not l[::-1].index(True) else len(l) - 1 - l[::-1].index(True)"
98
- ]
99
- },
100
- {
101
- "cell_type": "code",
102
- "execution_count": 12,
103
- "metadata": {},
104
- "outputs": [],
105
- "source": [
106
- "z = ['a','b','c','d','e']"
 
 
 
107
  ]
108
  },
109
  {
110
  "cell_type": "code",
111
- "execution_count": 37,
112
  "metadata": {},
113
  "outputs": [
114
  {
115
  "data": {
116
- "text/html": [
117
- "<div>\n",
118
- "<style scoped>\n",
119
- " .dataframe tbody tr th:only-of-type {\n",
120
- " vertical-align: middle;\n",
121
- " }\n",
122
- "\n",
123
- " .dataframe tbody tr th {\n",
124
- " vertical-align: top;\n",
125
- " }\n",
126
- "\n",
127
- " .dataframe thead th {\n",
128
- " text-align: right;\n",
129
- " }\n",
130
- "</style>\n",
131
- "<table border=\"1\" class=\"dataframe\">\n",
132
- " <thead>\n",
133
- " <tr style=\"text-align: right;\">\n",
134
- " <th></th>\n",
135
- " <th>H2Touch</th>\n",
136
- " <th>H2Break</th>\n",
137
- " <th>H2BreakTouch</th>\n",
138
- " </tr>\n",
139
- " </thead>\n",
140
- " <tbody>\n",
141
- " <tr>\n",
142
- " <th>2018-07-02</th>\n",
143
- " <td>False</td>\n",
144
- " <td>False</td>\n",
145
- " <td>NaN</td>\n",
146
- " </tr>\n",
147
- " <tr>\n",
148
- " <th>2018-07-03</th>\n",
149
- " <td>False</td>\n",
150
- " <td>False</td>\n",
151
- " <td>NaN</td>\n",
152
- " </tr>\n",
153
- " <tr>\n",
154
- " <th>2018-07-05</th>\n",
155
- " <td>False</td>\n",
156
- " <td>False</td>\n",
157
- " <td>NaN</td>\n",
158
- " </tr>\n",
159
- " <tr>\n",
160
- " <th>2018-07-06</th>\n",
161
- " <td>False</td>\n",
162
- " <td>False</td>\n",
163
- " <td>NaN</td>\n",
164
- " </tr>\n",
165
- " <tr>\n",
166
- " <th>2018-07-09</th>\n",
167
- " <td>False</td>\n",
168
- " <td>False</td>\n",
169
- " <td>NaN</td>\n",
170
- " </tr>\n",
171
- " <tr>\n",
172
- " <th>...</th>\n",
173
- " <td>...</td>\n",
174
- " <td>...</td>\n",
175
- " <td>...</td>\n",
176
- " </tr>\n",
177
- " <tr>\n",
178
- " <th>2023-10-10</th>\n",
179
- " <td>True</td>\n",
180
- " <td>False</td>\n",
181
- " <td>0.588235</td>\n",
182
- " </tr>\n",
183
- " <tr>\n",
184
- " <th>2023-10-11</th>\n",
185
- " <td>False</td>\n",
186
- " <td>False</td>\n",
187
- " <td>0.588235</td>\n",
188
- " </tr>\n",
189
- " <tr>\n",
190
- " <th>2023-10-12</th>\n",
191
- " <td>False</td>\n",
192
- " <td>False</td>\n",
193
- " <td>0.588235</td>\n",
194
- " </tr>\n",
195
- " <tr>\n",
196
- " <th>2023-10-13</th>\n",
197
- " <td>False</td>\n",
198
- " <td>False</td>\n",
199
- " <td>0.588235</td>\n",
200
- " </tr>\n",
201
- " <tr>\n",
202
- " <th>2023-10-16</th>\n",
203
- " <td>True</td>\n",
204
- " <td>False</td>\n",
205
- " <td>0.571429</td>\n",
206
- " </tr>\n",
207
- " </tbody>\n",
208
- "</table>\n",
209
- "<p>1332 rows Γ— 3 columns</p>\n",
210
- "</div>"
211
- ],
212
  "text/plain": [
213
- " H2Touch H2Break H2BreakTouch\n",
214
- "2018-07-02 False False NaN\n",
215
- "2018-07-03 False False NaN\n",
216
- "2018-07-05 False False NaN\n",
217
- "2018-07-06 False False NaN\n",
218
- "2018-07-09 False False NaN\n",
219
- "... ... ... ...\n",
220
- "2023-10-10 True False 0.588235\n",
221
- "2023-10-11 False False 0.588235\n",
222
- "2023-10-12 False False 0.588235\n",
223
- "2023-10-13 False False 0.588235\n",
224
- "2023-10-16 True False 0.571429\n",
225
- "\n",
226
- "[1332 rows x 3 columns]"
227
  ]
228
  },
229
- "execution_count": 37,
230
  "metadata": {},
231
  "output_type": "execute_result"
232
  }
233
  ],
234
  "source": [
235
- "data[['H2Touch','H2Break','H2BreakTouch']]"
236
  ]
237
  },
238
  {
239
  "cell_type": "code",
240
- "execution_count": 22,
241
  "metadata": {},
242
  "outputs": [
243
  {
244
  "data": {
 
245
  "text/plain": [
246
- "2018-07-02 NaN\n",
247
- "2018-07-03 NaN\n",
248
- "2018-07-05 NaN\n",
249
- "2018-07-06 NaN\n",
250
- "2018-07-09 NaN\n",
251
- " ... \n",
252
- "2023-10-10 0.22\n",
253
- "2023-10-11 0.21\n",
254
- "2023-10-12 0.21\n",
255
- "2023-10-13 0.21\n",
256
- "2023-10-16 0.22\n",
257
- "Name: H1BreakPct, Length: 1332, dtype: float64"
258
  ]
259
  },
260
- "execution_count": 22,
261
- "metadata": {},
262
- "output_type": "execute_result"
 
263
  }
264
  ],
265
  "source": [
266
- "data['H1BreakPct']"
267
- ]
268
- },
269
- {
270
- "cell_type": "code",
271
- "execution_count": null,
272
- "metadata": {},
273
- "outputs": [],
274
- "source": [
275
- "res1, model1 = walk_forward_validation(df_final.dropna(axis=0), 'Target_clf', 100, 1)"
276
  ]
277
  },
278
  {
@@ -281,13 +178,7 @@
281
  "metadata": {},
282
  "outputs": [],
283
  "source": [
284
- "import matplotlib.pyplot as plt\n",
285
- "\n",
286
- "# Plot feature importances\n",
287
- "plt.figure(figsize=(10, 12))\n",
288
- "lgb.plot_importance(model1) # Adjust max_num_features as needed\n",
289
- "plt.title(\"Feature Importances\")\n",
290
- "plt.show()"
291
  ]
292
  },
293
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 10,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
  "import pandas as pd\n",
10
  "import numpy as np\n",
11
  "from model_intra import get_data, walk_forward_validation\n",
12
+ "import lightgbm as lgb\n",
13
+ "from sklearn.linear_model import LogisticRegression"
14
  ]
15
  },
16
  {
17
  "cell_type": "code",
18
+ "execution_count": 11,
19
  "metadata": {},
20
  "outputs": [
21
  {
22
  "name": "stderr",
23
  "output_type": "stream",
24
  "text": [
25
+ "getting econ tickers: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3/3 [00:00<00:00, 6.05it/s]\n",
26
+ "Getting release dates: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:01<00:00, 5.73it/s]\n",
27
+ "Making indicators: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:00<00:00, 3997.43it/s]\n",
28
+ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--spx_intra-b08c599e86f95628/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n",
29
+ "Merging econ data: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:00<00:00, 865.85it/s]\n"
30
  ]
31
  }
32
  ],
33
  "source": [
34
+ "data, df_final, final_row = get_data(2)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
+ "execution_count": 12,
40
  "metadata": {},
41
+ "outputs": [
42
+ {
43
+ "data": {
44
+ "text/plain": [
45
+ "L1BreakL2TouchPct 0.875\n",
46
+ "H1TouchGreenPct 0.785714\n",
47
+ "L1TouchRedPct 0.575\n",
48
+ "Target -0.009281\n",
49
+ "Target_clf False\n",
50
+ "Name: 2023-10-24 00:00:00, dtype: object"
51
+ ]
52
+ },
53
+ "execution_count": 12,
54
+ "metadata": {},
55
+ "output_type": "execute_result"
56
+ }
57
+ ],
58
  "source": [
59
+ "data.loc[final_row, df_final.columns[-5:]]"
 
 
 
60
  ]
61
  },
62
  {
63
  "cell_type": "code",
64
+ "execution_count": 13,
65
  "metadata": {},
66
+ "outputs": [
67
+ {
68
+ "name": "stderr",
69
+ "output_type": "stream",
70
+ "text": [
71
+ "d:\\Projects\\gamedayspx\\model_intra.py:68: SettingWithCopyWarning: \n",
72
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
73
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
74
+ "\n",
75
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
76
+ " df[target_column] = df[target_column].astype(bool)\n",
77
+ " 0%| | 0/1238 [00:00<?, ?it/s]"
78
+ ]
79
+ },
80
+ {
81
+ "name": "stderr",
82
+ "output_type": "stream",
83
+ "text": [
84
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1238/1238 [00:10<00:00, 116.79it/s]\n",
85
+ "Calibrating Probas: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1238/1238 [00:02<00:00, 418.46it/s]\n"
86
+ ]
87
+ }
88
+ ],
89
  "source": [
90
+ "res1, model1 = walk_forward_validation(df_final.dropna(axis=0), 'Target_clf', 250, 1)"
91
  ]
92
  },
93
  {
 
96
  "metadata": {},
97
  "outputs": [
98
  {
99
+ "name": "stdout",
100
+ "output_type": "stream",
101
+ "text": [
102
+ "0.7675170574106744\n",
103
+ "0.6997319034852547\n",
104
+ "0.7933130699088146\n",
105
+ "0.8022859842930231\n",
106
+ "0.7675276752767528\n",
107
+ "0.8721174004192872\n"
108
  ]
109
  }
110
  ],
111
  "source": [
112
+ "from sklearn.metrics import roc_auc_score, precision_score, recall_score\n",
113
+ "\n",
114
+ "cal = res1.dropna()\n",
115
+ "\n",
116
+ "print(roc_auc_score(cal['True'], cal['CalibPredicted']))\n",
117
+ "print(precision_score(cal['True'], cal['CalibPredicted'] > .5))\n",
118
+ "print(recall_score(cal['True'], cal['CalibPredicted'] > .5))\n",
119
+ "\n",
120
+ "res1_hi_confidence = cal.query('(CalibPredicted >= .6) | (CalibPredicted < 0.4)')\n",
121
+ "\n",
122
+ "print(roc_auc_score(res1_hi_confidence['True'], res1_hi_confidence['CalibPredicted']))\n",
123
+ "print(precision_score(res1_hi_confidence['True'], res1_hi_confidence['CalibPredicted'] > .5))\n",
124
+ "print(recall_score(res1_hi_confidence['True'], res1_hi_confidence['CalibPredicted'] > .5))"
125
  ]
126
  },
127
  {
128
  "cell_type": "code",
129
+ "execution_count": 9,
130
  "metadata": {},
131
  "outputs": [
132
  {
133
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "text/plain": [
135
+ "0.7691056910569106"
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  ]
137
  },
138
+ "execution_count": 9,
139
  "metadata": {},
140
  "output_type": "execute_result"
141
  }
142
  ],
143
  "source": [
144
+ "len(res1_hi_confidence) / len(cal)"
145
  ]
146
  },
147
  {
148
  "cell_type": "code",
149
+ "execution_count": 8,
150
  "metadata": {},
151
  "outputs": [
152
  {
153
  "data": {
154
+ "image/png": "",
155
  "text/plain": [
156
+ "<Figure size 1152x864 with 1 Axes>"
 
 
 
 
 
 
 
 
 
 
 
157
  ]
158
  },
159
+ "metadata": {
160
+ "needs_background": "light"
161
+ },
162
+ "output_type": "display_data"
163
  }
164
  ],
165
  "source": [
166
+ "import matplotlib.pyplot as plt\n",
167
+ "\n",
168
+ "# Plot feature importances\n",
169
+ "# plt.figure(figsize=(16, 12))\n",
170
+ "lgb.plot_importance(model1, figsize=(16, 12)) # Adjust max_num_features as needed\n",
171
+ "plt.title(\"Feature Importances\")\n",
172
+ "plt.show()"
 
 
 
173
  ]
174
  },
175
  {
 
178
  "metadata": {},
179
  "outputs": [],
180
  "source": [
181
+ "XXX"
 
 
 
 
 
 
182
  ]
183
  },
184
  {