wnstnb commited on
Commit
bed31fe
ยท
1 Parent(s): 6b16014

big change to form and QOL

Browse files
Files changed (1) hide show
  1. app.py +681 -664
app.py CHANGED
@@ -10,669 +10,686 @@ st.set_page_config(
10
  )
11
 
12
  st.title('๐ŸŽฎ Gameday Model for $SPX')
13
- col1, col2 = st.columns(2)
14
- with col1:
15
- st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
16
- with col2:
17
- st.caption('Clear before next run.')
18
- if st.button("๐Ÿงน Clear All"):
 
 
 
 
 
 
 
 
19
  st.cache_data.clear()
20
 
21
- option = st.selectbox(
22
- 'Select a model, then run.',
23
- ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins'))
24
-
25
- if option == '':
26
- st.write('Gotta pick one.')
27
-
28
- elif option == '๐ŸŒž At Open':
29
- if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'):
30
- from model_day import *
31
- with st.spinner('Loading data...'):
32
- data, df_final, final_row = get_data()
33
- # st.success("โœ… Historical data")
34
-
35
- with st.spinner("Training models..."):
36
- def train_models():
37
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
38
- return res1, xgbr, seq2
39
- res1, xgbr, seq2 = train_models()
40
- # st.success("โœ… Models trained")
41
-
42
- with st.spinner("Getting new prediction..."):
43
-
44
- # Get last row
45
- new_pred = data.loc[final_row, ['BigNewsDay',
46
- 'Quarter',
47
- 'Perf5Day',
48
- 'Perf5Day_n1',
49
- 'DaysGreen',
50
- 'DaysRed',
51
- 'CurrentGap',
52
- 'RangePct',
53
- 'RangePct_n1',
54
- 'RangePct_n2',
55
- 'OHLC4_VIX',
56
- 'OHLC4_VIX_n1',
57
- 'OHLC4_VIX_n2']]
58
-
59
- new_pred = pd.DataFrame(new_pred).T
60
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
61
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
62
- curr_date = final_row + BDay(1)
63
- curr_date = curr_date.strftime('%Y-%m-%d')
64
-
65
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
66
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
67
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
68
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
69
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
70
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
71
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
72
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
73
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
74
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
75
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
76
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
77
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
78
-
79
- st.success("โœ… All done!")
80
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
81
-
82
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
83
-
84
- green_proba = seq_proba[0]
85
- red_proba = 1 - green_proba
86
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
87
- stdev = 0.01
88
- score = None
89
- num_obs = None
90
- cond = None
91
- historical_proba = None
92
- text_cond = None
93
- operator = None
94
-
95
- if do_not_play:
96
- text_cond = '๐ŸŸจ'
97
- operator = ''
98
- score = seq_proba[0]
99
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
100
- num_obs = len(res1.loc[cond])
101
- historical_proba = res1.loc[cond, 'True'].mean()
102
-
103
-
104
- elif green_proba > red_proba:
105
- # If the day is predicted to be green, say so
106
- text_cond = '๐ŸŸฉ'
107
- operator = '>='
108
- score = green_proba
109
- # How many with this score?
110
- cond = (res1['Predicted'] >= green_proba)
111
- num_obs = len(res1.loc[cond])
112
- # How often green?
113
- historical_proba = res1.loc[cond, 'True'].mean()
114
- # print(cond)
115
-
116
- elif green_proba <= red_proba:
117
- # If the day is predicted to be green, say so
118
- text_cond = '๐ŸŸฅ'
119
- operator = '<='
120
- score = red_proba
121
- # How many with this score?
122
- cond = (res1['Predicted'] <= red_proba)
123
- num_obs = len(res1.loc[cond])
124
- # How often green?
125
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
126
- # print(cond)
127
-
128
- score_fmt = f'{score:.1%}'
129
-
130
- results = pd.DataFrame(index=[
131
- 'PrevClose',
132
- 'Confidence Score',
133
- 'Success Rate',
134
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
135
- ], data = [
136
- f"{data.loc[final_row,'Close']:.2f}",
137
- f'{text_cond} {score:.1%}',
138
- f'{historical_proba:.1%}',
139
- num_obs,
140
- ])
141
-
142
- results.columns = ['Outputs']
143
-
144
- # st.subheader('New Prediction')
145
-
146
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
147
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
148
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
149
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
150
-
151
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
152
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
153
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
154
- len_all = len(res1)
155
-
156
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
157
-
158
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
159
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
160
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
161
- len_hi = len(res2_filtered)
162
-
163
- df_performance = pd.DataFrame(
164
- index=[
165
- 'N',
166
- 'ROC AUC',
167
- 'Precision',
168
- 'Recall'
169
- ],
170
- columns = [
171
- 'All',
172
- 'High Confidence'
173
- ],
174
- data = [
175
- [len_all, len_hi],
176
- [roc_auc_score_all, roc_auc_score_hi],
177
- [precision_score_all, precision_score_hi],
178
- [recall_score_all, recall_score_hi]
179
- ]
180
- ).round(2)
181
-
182
- def get_acc(t, p):
183
- if t == False and p <= 0.4:
184
- return 'โœ…'
185
- elif t == True and p > 0.6:
186
- return 'โœ…'
187
- elif t == False and p > 0.6:
188
- return 'โŒ'
189
- elif t == True and p <= 0.4:
190
- return 'โŒ'
191
- else:
192
- return '๐ŸŸจ'
193
-
194
- def get_acc_text(t, p):
195
- if t == False and p <= 0.4:
196
- return 'Correct'
197
- elif t == True and p > 0.6:
198
- return 'Correct'
199
- elif t == False and p > 0.6:
200
- return 'Incorrect'
201
- elif t == True and p <= 0.4:
202
- return 'Incorrect'
203
- else:
204
- return 'No Action'
205
-
206
- perf_daily = res1.copy()
207
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
208
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
209
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
210
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
211
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
212
-
213
- def convert_df(df):
214
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
215
- return df.to_csv()
216
-
217
- csv = convert_df(perf_daily)
218
-
219
- with tab1:
220
- st.subheader(f'Pred for {curr_date} as of 6:30AM PST')
221
- st.write(results)
222
- st.write(df_probas)
223
- with tab2:
224
- st.subheader('Latest Data for Pred')
225
- st.write(new_pred)
226
- with tab3:
227
- st.subheader('Historical Data')
228
- st.write(df_final)
229
- with tab4:
230
- st.subheader('Performance')
231
- st.write(df_performance)
232
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
233
- st.download_button(
234
- label="Download Historical Performance",
235
- data=csv,
236
- file_name='performance_for_at_open_model.csv',
237
- )
238
-
239
- elif option == 'โŒš 30 Mins':
240
- if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'):
241
- from model_30m import *
242
- with st.spinner('Loading data...'):
243
- data, df_final, final_row = get_data()
244
- # st.success("โœ… Historical data")
245
-
246
- with st.spinner("Training models..."):
247
- def train_models():
248
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
249
- return res1, xgbr, seq2
250
- res1, xgbr, seq2 = train_models()
251
- # st.success("โœ… Models trained")
252
-
253
- with st.spinner("Getting new prediction..."):
254
-
255
- # Get last row
256
- new_pred = data.loc[final_row, ['BigNewsDay',
257
- 'Quarter',
258
- 'Perf5Day',
259
- 'Perf5Day_n1',
260
- 'DaysGreen',
261
- 'DaysRed',
262
- 'CurrentHigh30toClose',
263
- 'CurrentLow30toClose',
264
- 'CurrentClose30toClose',
265
- 'CurrentRange30',
266
- 'GapFill30',
267
- 'CurrentGap',
268
- 'RangePct',
269
- 'RangePct_n1',
270
- 'RangePct_n2',
271
- 'OHLC4_VIX',
272
- 'OHLC4_VIX_n1',
273
- 'OHLC4_VIX_n2']]
274
-
275
- new_pred = pd.DataFrame(new_pred).T
276
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
277
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
278
- curr_date = final_row + BDay(1)
279
- curr_date = curr_date.strftime('%Y-%m-%d')
280
-
281
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
282
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
283
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
284
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
285
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
286
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
287
- new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
288
- new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
289
- new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
290
- new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
291
- new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
292
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
293
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
294
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
295
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
296
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
297
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
298
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
299
-
300
- st.success("โœ… All done!")
301
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
302
-
303
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
304
-
305
- green_proba = seq_proba[0]
306
- red_proba = 1 - green_proba
307
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
308
- stdev = 0.01
309
- score = None
310
- num_obs = None
311
- cond = None
312
- historical_proba = None
313
- text_cond = None
314
- operator = None
315
-
316
- if do_not_play:
317
- text_cond = '๐ŸŸจ'
318
- operator = ''
319
- score = seq_proba[0]
320
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
321
- num_obs = len(res1.loc[cond])
322
- historical_proba = res1.loc[cond, 'True'].mean()
323
-
324
-
325
- elif green_proba > red_proba:
326
- # If the day is predicted to be green, say so
327
- text_cond = '๐ŸŸฉ'
328
- operator = '>='
329
- score = green_proba
330
- # How many with this score?
331
- cond = (res1['Predicted'] >= green_proba)
332
- num_obs = len(res1.loc[cond])
333
- # How often green?
334
- historical_proba = res1.loc[cond, 'True'].mean()
335
- # print(cond)
336
-
337
- elif green_proba <= red_proba:
338
- # If the day is predicted to be green, say so
339
- text_cond = '๐ŸŸฅ'
340
- operator = '<='
341
- score = red_proba
342
- # How many with this score?
343
- cond = (res1['Predicted'] <= red_proba)
344
- num_obs = len(res1.loc[cond])
345
- # How often green?
346
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
347
- # print(cond)
348
-
349
- score_fmt = f'{score:.1%}'
350
-
351
- results = pd.DataFrame(index=[
352
- 'PrevClose',
353
- 'Confidence Score',
354
- 'Success Rate',
355
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
356
- ], data = [
357
- f"{data.loc[final_row,'Close']:.2f}",
358
- f'{text_cond} {score:.1%}',
359
- f'{historical_proba:.1%}',
360
- num_obs,
361
- ])
362
-
363
- results.columns = ['Outputs']
364
-
365
- # st.subheader('New Prediction')
366
-
367
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
368
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
369
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
370
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
371
-
372
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
373
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
374
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
375
- len_all = len(res1)
376
-
377
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
378
-
379
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
380
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
381
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
382
- len_hi = len(res2_filtered)
383
-
384
- df_performance = pd.DataFrame(
385
- index=[
386
- 'N',
387
- 'ROC AUC',
388
- 'Precision',
389
- 'Recall'
390
- ],
391
- columns = [
392
- 'All',
393
- 'High Confidence'
394
- ],
395
- data = [
396
- [len_all, len_hi],
397
- [roc_auc_score_all, roc_auc_score_hi],
398
- [precision_score_all, precision_score_hi],
399
- [recall_score_all, recall_score_hi]
400
- ]
401
- ).round(2)
402
-
403
- def get_acc(t, p):
404
- if t == False and p <= 0.4:
405
- return 'โœ…'
406
- elif t == True and p > 0.6:
407
- return 'โœ…'
408
- elif t == False and p > 0.6:
409
- return 'โŒ'
410
- elif t == True and p <= 0.4:
411
- return 'โŒ'
412
- else:
413
- return '๐ŸŸจ'
414
-
415
- def get_acc_text(t, p):
416
- if t == False and p <= 0.4:
417
- return 'Correct'
418
- elif t == True and p > 0.6:
419
- return 'Correct'
420
- elif t == False and p > 0.6:
421
- return 'Incorrect'
422
- elif t == True and p <= 0.4:
423
- return 'Incorrect'
424
- else:
425
- return 'No Action'
426
-
427
- perf_daily = res1.copy()
428
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
429
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
430
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
431
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
432
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
433
-
434
- def convert_df(df):
435
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
436
- return df.to_csv()
437
-
438
- csv = convert_df(perf_daily)
439
-
440
- with tab1:
441
- st.subheader(f'Pred for {curr_date} as of 7AM PST')
442
- st.write(results)
443
- st.write(df_probas)
444
- with tab2:
445
- st.subheader('Latest Data for Pred')
446
- st.write(new_pred)
447
- with tab3:
448
- st.subheader('Historical Data')
449
- st.write(df_final)
450
- with tab4:
451
- st.subheader('Performance')
452
- st.write(df_performance)
453
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
454
- st.download_button(
455
- label="Download Historical Performance",
456
- data=csv,
457
- file_name='performance_for_30m_model.csv',
458
- )
459
-
460
- elif option == 'โณ 60 Mins':
461
- if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'):
462
- from model_1h import *
463
- with st.spinner('Loading data...'):
464
- data, df_final, final_row = get_data()
465
- # st.success("โœ… Historical data")
466
-
467
- with st.spinner("Training models..."):
468
- def train_models():
469
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
470
- return res1, xgbr, seq2
471
- res1, xgbr, seq2 = train_models()
472
- # st.success("โœ… Models trained")
473
-
474
- with st.spinner("Getting new prediction..."):
475
-
476
- # Get last row
477
- new_pred = data.loc[final_row, ['BigNewsDay',
478
- 'Quarter',
479
- 'Perf5Day',
480
- 'Perf5Day_n1',
481
- 'DaysGreen',
482
- 'DaysRed',
483
- 'CurrentHigh30toClose',
484
- 'CurrentLow30toClose',
485
- 'CurrentClose30toClose',
486
- 'CurrentRange30',
487
- 'GapFill30',
488
- 'CurrentGap',
489
- 'RangePct',
490
- 'RangePct_n1',
491
- 'RangePct_n2',
492
- 'OHLC4_VIX',
493
- 'OHLC4_VIX_n1',
494
- 'OHLC4_VIX_n2']]
495
-
496
- new_pred = pd.DataFrame(new_pred).T
497
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
498
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
499
- curr_date = final_row + BDay(1)
500
- curr_date = curr_date.strftime('%Y-%m-%d')
501
-
502
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
503
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
504
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
505
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
506
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
507
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
508
- new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
509
- new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
510
- new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
511
- new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
512
- new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
513
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
514
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
515
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
516
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
517
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
518
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
519
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
520
-
521
- st.success("โœ… All done!")
522
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
523
-
524
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
525
-
526
- green_proba = seq_proba[0]
527
- red_proba = 1 - green_proba
528
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
529
- stdev = 0.01
530
- score = None
531
- num_obs = None
532
- cond = None
533
- historical_proba = None
534
- text_cond = None
535
- operator = None
536
-
537
- if do_not_play:
538
- text_cond = '๐ŸŸจ'
539
- operator = ''
540
- score = seq_proba[0]
541
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
542
- num_obs = len(res1.loc[cond])
543
- historical_proba = res1.loc[cond, 'True'].mean()
544
-
545
-
546
- elif green_proba > red_proba:
547
- # If the day is predicted to be green, say so
548
- text_cond = '๐ŸŸฉ'
549
- operator = '>='
550
- score = green_proba
551
- # How many with this score?
552
- cond = (res1['Predicted'] >= green_proba)
553
- num_obs = len(res1.loc[cond])
554
- # How often green?
555
- historical_proba = res1.loc[cond, 'True'].mean()
556
- # print(cond)
557
-
558
- elif green_proba <= red_proba:
559
- # If the day is predicted to be green, say so
560
- text_cond = '๐ŸŸฅ'
561
- operator = '<='
562
- score = red_proba
563
- # How many with this score?
564
- cond = (res1['Predicted'] <= red_proba)
565
- num_obs = len(res1.loc[cond])
566
- # How often green?
567
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
568
- # print(cond)
569
-
570
- score_fmt = f'{score:.1%}'
571
-
572
- results = pd.DataFrame(index=[
573
- 'PrevClose',
574
- 'Confidence Score',
575
- 'Success Rate',
576
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
577
- ], data = [
578
- f"{data.loc[final_row,'Close']:.2f}",
579
- f'{text_cond} {score:.1%}',
580
- f'{historical_proba:.1%}',
581
- num_obs,
582
- ])
583
-
584
- results.columns = ['Outputs']
585
-
586
- # st.subheader('New Prediction')
587
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
588
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
589
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
590
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
591
-
592
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
593
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
594
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
595
- len_all = len(res1)
596
-
597
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
598
-
599
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
600
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
601
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
602
- len_hi = len(res2_filtered)
603
-
604
- df_performance = pd.DataFrame(
605
- index=[
606
- 'N',
607
- 'ROC AUC',
608
- 'Precision',
609
- 'Recall'
610
- ],
611
- columns = [
612
- 'All',
613
- 'High Confidence'
614
- ],
615
- data = [
616
- [len_all, len_hi],
617
- [roc_auc_score_all, roc_auc_score_hi],
618
- [precision_score_all, precision_score_hi],
619
- [recall_score_all, recall_score_hi]
620
- ]
621
- ).round(2)
622
-
623
- def get_acc(t, p):
624
- if t == False and p <= 0.4:
625
- return 'โœ…'
626
- elif t == True and p > 0.6:
627
- return 'โœ…'
628
- elif t == False and p > 0.6:
629
- return 'โŒ'
630
- elif t == True and p <= 0.4:
631
- return 'โŒ'
632
- else:
633
- return '๐ŸŸจ'
634
-
635
- def get_acc_text(t, p):
636
- if t == False and p <= 0.4:
637
- return 'Correct'
638
- elif t == True and p > 0.6:
639
- return 'Correct'
640
- elif t == False and p > 0.6:
641
- return 'Incorrect'
642
- elif t == True and p <= 0.4:
643
- return 'Incorrect'
644
- else:
645
- return 'No Action'
646
-
647
- perf_daily = res1.copy()
648
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
649
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
650
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
651
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
652
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
653
-
654
- def convert_df(df):
655
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
656
- return df.to_csv()
657
-
658
- csv = convert_df(perf_daily)
659
-
660
- with tab1:
661
- st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
662
- st.write(results)
663
- st.write(df_probas)
664
- with tab2:
665
- st.subheader('Latest Data for Pred')
666
- st.write(new_pred)
667
- with tab3:
668
- st.subheader('Historical Data')
669
- st.write(df_final)
670
- with tab4:
671
- st.subheader('Performance')
672
- st.write(df_performance)
673
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
674
- st.download_button(
675
- label="Download Historical Performance",
676
- data=csv,
677
- file_name='performance_for_60m_model.csv',
678
- )
 
 
 
 
 
 
 
 
 
 
10
  )
11
 
12
  st.title('๐ŸŽฎ Gameday Model for $SPX')
13
+ st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
14
+ with st.form("choose_model"):
15
+
16
+ option = st.selectbox(
17
+ 'Select a model, then run.',
18
+ ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins'))
19
+ col1, col2 = st.columns(2)
20
+ with col1:
21
+ submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
22
+
23
+ with col2:
24
+ cleared = st.form_submit_button('๐Ÿงน Clear All',use_container_width=True)
25
+
26
+ if cleared:
27
  st.cache_data.clear()
28
 
29
+ if option == '':
30
+ st.write('No model selected.')
31
+
32
+ if submitted:
33
+
34
+ if option == '๐ŸŒž At Open':
35
+ # runday = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
36
+ # if runday:
37
+ from model_day import *
38
+ with st.spinner('Loading data...'):
39
+ data, df_final, final_row = get_data()
40
+ # st.success("โœ… Historical data")
41
+
42
+ with st.spinner("Training models..."):
43
+ def train_models():
44
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
45
+ return res1, xgbr, seq2
46
+ res1, xgbr, seq2 = train_models()
47
+ # st.success("โœ… Models trained")
48
+
49
+ with st.spinner("Getting new prediction..."):
50
+
51
+ # Get last row
52
+ new_pred = data.loc[final_row, ['BigNewsDay',
53
+ 'Quarter',
54
+ 'Perf5Day',
55
+ 'Perf5Day_n1',
56
+ 'DaysGreen',
57
+ 'DaysRed',
58
+ 'CurrentGap',
59
+ 'RangePct',
60
+ 'RangePct_n1',
61
+ 'RangePct_n2',
62
+ 'OHLC4_VIX',
63
+ 'OHLC4_VIX_n1',
64
+ 'OHLC4_VIX_n2']]
65
+
66
+ new_pred = pd.DataFrame(new_pred).T
67
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
68
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
69
+ curr_date = final_row + BDay(1)
70
+ curr_date = curr_date.strftime('%Y-%m-%d')
71
+
72
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
73
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
74
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
75
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
76
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
77
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
78
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
79
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
80
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
81
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
82
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
83
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
84
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
85
+
86
+ st.success("โœ… All done!")
87
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
88
+
89
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
90
+
91
+ green_proba = seq_proba[0]
92
+ red_proba = 1 - green_proba
93
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
94
+ stdev = 0.01
95
+ score = None
96
+ num_obs = None
97
+ cond = None
98
+ historical_proba = None
99
+ text_cond = None
100
+ operator = None
101
+
102
+ if do_not_play:
103
+ text_cond = '๐ŸŸจ'
104
+ operator = ''
105
+ score = seq_proba[0]
106
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
107
+ num_obs = len(res1.loc[cond])
108
+ historical_proba = res1.loc[cond, 'True'].mean()
109
+
110
+
111
+ elif green_proba > red_proba:
112
+ # If the day is predicted to be green, say so
113
+ text_cond = '๐ŸŸฉ'
114
+ operator = '>='
115
+ score = green_proba
116
+ # How many with this score?
117
+ cond = (res1['Predicted'] >= green_proba)
118
+ num_obs = len(res1.loc[cond])
119
+ # How often green?
120
+ historical_proba = res1.loc[cond, 'True'].mean()
121
+ # print(cond)
122
+
123
+ elif green_proba <= red_proba:
124
+ # If the day is predicted to be green, say so
125
+ text_cond = '๐ŸŸฅ'
126
+ operator = '<='
127
+ score = red_proba
128
+ # How many with this score?
129
+ cond = (res1['Predicted'] <= red_proba)
130
+ num_obs = len(res1.loc[cond])
131
+ # How often green?
132
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
133
+ # print(cond)
134
+
135
+ score_fmt = f'{score:.1%}'
136
+
137
+ results = pd.DataFrame(index=[
138
+ 'PrevClose',
139
+ 'Confidence Score',
140
+ 'Success Rate',
141
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
142
+ ], data = [
143
+ f"{data.loc[final_row,'Close']:.2f}",
144
+ f'{text_cond} {score:.1%}',
145
+ f'{historical_proba:.1%}',
146
+ num_obs,
147
+ ])
148
+
149
+ results.columns = ['Outputs']
150
+
151
+ # st.subheader('New Prediction')
152
+
153
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
154
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
155
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
156
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
157
+
158
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
159
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
160
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
161
+ len_all = len(res1)
162
+
163
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
164
+
165
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
166
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
167
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
168
+ len_hi = len(res2_filtered)
169
+
170
+ df_performance = pd.DataFrame(
171
+ index=[
172
+ 'N',
173
+ 'ROC AUC',
174
+ 'Precision',
175
+ 'Recall'
176
+ ],
177
+ columns = [
178
+ 'All',
179
+ 'High Confidence'
180
+ ],
181
+ data = [
182
+ [len_all, len_hi],
183
+ [roc_auc_score_all, roc_auc_score_hi],
184
+ [precision_score_all, precision_score_hi],
185
+ [recall_score_all, recall_score_hi]
186
+ ]
187
+ ).round(2)
188
+
189
+ def get_acc(t, p):
190
+ if t == False and p <= 0.4:
191
+ return 'โœ…'
192
+ elif t == True and p > 0.6:
193
+ return 'โœ…'
194
+ elif t == False and p > 0.6:
195
+ return 'โŒ'
196
+ elif t == True and p <= 0.4:
197
+ return 'โŒ'
198
+ else:
199
+ return '๐ŸŸจ'
200
+
201
+ def get_acc_text(t, p):
202
+ if t == False and p <= 0.4:
203
+ return 'Correct'
204
+ elif t == True and p > 0.6:
205
+ return 'Correct'
206
+ elif t == False and p > 0.6:
207
+ return 'Incorrect'
208
+ elif t == True and p <= 0.4:
209
+ return 'Incorrect'
210
+ else:
211
+ return 'No Action'
212
+
213
+ perf_daily = res1.copy()
214
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
215
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
216
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
217
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
218
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
219
+
220
+ def convert_df(df):
221
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
222
+ return df.to_csv()
223
+
224
+ csv = convert_df(perf_daily)
225
+
226
+ with tab1:
227
+ st.subheader(f'Pred for {curr_date} as of 6:30AM PST')
228
+ st.write(results)
229
+ st.write(df_probas)
230
+ with tab2:
231
+ st.subheader('Latest Data for Pred')
232
+ st.write(new_pred)
233
+ with tab3:
234
+ st.subheader('Historical Data')
235
+ st.write(df_final)
236
+ with tab4:
237
+ st.subheader('Performance')
238
+ st.write(df_performance)
239
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
240
+ # st.download_button(
241
+ # label="Download Historical Performance",
242
+ # data=csv,
243
+ fname='performance_for_at_open_model.csv'
244
+ # )
245
+
246
+ elif option == 'โŒš 30 Mins':
247
+ # run30 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
248
+ # if run30:
249
+ from model_30m import *
250
+ with st.spinner('Loading data...'):
251
+ data, df_final, final_row = get_data()
252
+ # st.success("โœ… Historical data")
253
+
254
+ with st.spinner("Training models..."):
255
+ def train_models():
256
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
257
+ return res1, xgbr, seq2
258
+ res1, xgbr, seq2 = train_models()
259
+ # st.success("โœ… Models trained")
260
+
261
+ with st.spinner("Getting new prediction..."):
262
+
263
+ # Get last row
264
+ new_pred = data.loc[final_row, ['BigNewsDay',
265
+ 'Quarter',
266
+ 'Perf5Day',
267
+ 'Perf5Day_n1',
268
+ 'DaysGreen',
269
+ 'DaysRed',
270
+ 'CurrentHigh30toClose',
271
+ 'CurrentLow30toClose',
272
+ 'CurrentClose30toClose',
273
+ 'CurrentRange30',
274
+ 'GapFill30',
275
+ 'CurrentGap',
276
+ 'RangePct',
277
+ 'RangePct_n1',
278
+ 'RangePct_n2',
279
+ 'OHLC4_VIX',
280
+ 'OHLC4_VIX_n1',
281
+ 'OHLC4_VIX_n2']]
282
+
283
+ new_pred = pd.DataFrame(new_pred).T
284
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
285
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
286
+ curr_date = final_row + BDay(1)
287
+ curr_date = curr_date.strftime('%Y-%m-%d')
288
+
289
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
290
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
291
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
292
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
293
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
294
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
295
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
296
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
297
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
298
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
299
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
300
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
301
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
302
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
303
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
304
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
305
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
306
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
307
+
308
+ st.success("โœ… All done!")
309
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
310
+
311
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
312
+
313
+ green_proba = seq_proba[0]
314
+ red_proba = 1 - green_proba
315
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
316
+ stdev = 0.01
317
+ score = None
318
+ num_obs = None
319
+ cond = None
320
+ historical_proba = None
321
+ text_cond = None
322
+ operator = None
323
+
324
+ if do_not_play:
325
+ text_cond = '๐ŸŸจ'
326
+ operator = ''
327
+ score = seq_proba[0]
328
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
329
+ num_obs = len(res1.loc[cond])
330
+ historical_proba = res1.loc[cond, 'True'].mean()
331
+
332
+
333
+ elif green_proba > red_proba:
334
+ # If the day is predicted to be green, say so
335
+ text_cond = '๐ŸŸฉ'
336
+ operator = '>='
337
+ score = green_proba
338
+ # How many with this score?
339
+ cond = (res1['Predicted'] >= green_proba)
340
+ num_obs = len(res1.loc[cond])
341
+ # How often green?
342
+ historical_proba = res1.loc[cond, 'True'].mean()
343
+ # print(cond)
344
+
345
+ elif green_proba <= red_proba:
346
+ # If the day is predicted to be green, say so
347
+ text_cond = '๐ŸŸฅ'
348
+ operator = '<='
349
+ score = red_proba
350
+ # How many with this score?
351
+ cond = (res1['Predicted'] <= red_proba)
352
+ num_obs = len(res1.loc[cond])
353
+ # How often green?
354
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
355
+ # print(cond)
356
+
357
+ score_fmt = f'{score:.1%}'
358
+
359
+ results = pd.DataFrame(index=[
360
+ 'PrevClose',
361
+ 'Confidence Score',
362
+ 'Success Rate',
363
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
364
+ ], data = [
365
+ f"{data.loc[final_row,'Close']:.2f}",
366
+ f'{text_cond} {score:.1%}',
367
+ f'{historical_proba:.1%}',
368
+ num_obs,
369
+ ])
370
+
371
+ results.columns = ['Outputs']
372
+
373
+ # st.subheader('New Prediction')
374
+
375
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
376
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
377
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
378
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
379
+
380
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
381
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
382
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
383
+ len_all = len(res1)
384
+
385
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
386
+
387
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
388
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
389
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
390
+ len_hi = len(res2_filtered)
391
+
392
+ df_performance = pd.DataFrame(
393
+ index=[
394
+ 'N',
395
+ 'ROC AUC',
396
+ 'Precision',
397
+ 'Recall'
398
+ ],
399
+ columns = [
400
+ 'All',
401
+ 'High Confidence'
402
+ ],
403
+ data = [
404
+ [len_all, len_hi],
405
+ [roc_auc_score_all, roc_auc_score_hi],
406
+ [precision_score_all, precision_score_hi],
407
+ [recall_score_all, recall_score_hi]
408
+ ]
409
+ ).round(2)
410
+
411
+ def get_acc(t, p):
412
+ if t == False and p <= 0.4:
413
+ return 'โœ…'
414
+ elif t == True and p > 0.6:
415
+ return 'โœ…'
416
+ elif t == False and p > 0.6:
417
+ return 'โŒ'
418
+ elif t == True and p <= 0.4:
419
+ return 'โŒ'
420
+ else:
421
+ return '๐ŸŸจ'
422
+
423
+ def get_acc_text(t, p):
424
+ if t == False and p <= 0.4:
425
+ return 'Correct'
426
+ elif t == True and p > 0.6:
427
+ return 'Correct'
428
+ elif t == False and p > 0.6:
429
+ return 'Incorrect'
430
+ elif t == True and p <= 0.4:
431
+ return 'Incorrect'
432
+ else:
433
+ return 'No Action'
434
+
435
+ perf_daily = res1.copy()
436
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
437
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
438
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
439
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
440
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
441
+
442
+ def convert_df(df):
443
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
444
+ return df.to_csv()
445
+
446
+ csv = convert_df(perf_daily)
447
+
448
+ with tab1:
449
+ st.subheader(f'Pred for {curr_date} as of 7AM PST')
450
+ st.write(results)
451
+ st.write(df_probas)
452
+ with tab2:
453
+ st.subheader('Latest Data for Pred')
454
+ st.write(new_pred)
455
+ with tab3:
456
+ st.subheader('Historical Data')
457
+ st.write(df_final)
458
+ with tab4:
459
+ st.subheader('Performance')
460
+ st.write(df_performance)
461
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
462
+ # st.download_button(
463
+ # label="Download Historical Performance",
464
+ # data=csv,
465
+ fname='performance_for_30m_model.csv'
466
+ # )
467
+
468
+ elif option == 'โณ 60 Mins':
469
+ # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
470
+ # if run60:
471
+ from model_1h import *
472
+ with st.spinner('Loading data...'):
473
+ data, df_final, final_row = get_data()
474
+ # st.success("โœ… Historical data")
475
+
476
+ with st.spinner("Training models..."):
477
+ def train_models():
478
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
479
+ return res1, xgbr, seq2
480
+ res1, xgbr, seq2 = train_models()
481
+ # st.success("โœ… Models trained")
482
+
483
+ with st.spinner("Getting new prediction..."):
484
+
485
+ # Get last row
486
+ new_pred = data.loc[final_row, ['BigNewsDay',
487
+ 'Quarter',
488
+ 'Perf5Day',
489
+ 'Perf5Day_n1',
490
+ 'DaysGreen',
491
+ 'DaysRed',
492
+ 'CurrentHigh30toClose',
493
+ 'CurrentLow30toClose',
494
+ 'CurrentClose30toClose',
495
+ 'CurrentRange30',
496
+ 'GapFill30',
497
+ 'CurrentGap',
498
+ 'RangePct',
499
+ 'RangePct_n1',
500
+ 'RangePct_n2',
501
+ 'OHLC4_VIX',
502
+ 'OHLC4_VIX_n1',
503
+ 'OHLC4_VIX_n2']]
504
+
505
+ new_pred = pd.DataFrame(new_pred).T
506
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
507
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
508
+ curr_date = final_row + BDay(1)
509
+ curr_date = curr_date.strftime('%Y-%m-%d')
510
+
511
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
512
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
513
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
514
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
515
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
516
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
517
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
518
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
519
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
520
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
521
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
522
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
523
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
524
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
525
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
526
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
527
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
528
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
529
+
530
+ st.success("โœ… All done!")
531
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
532
+
533
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
534
+
535
+ green_proba = seq_proba[0]
536
+ red_proba = 1 - green_proba
537
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
538
+ stdev = 0.01
539
+ score = None
540
+ num_obs = None
541
+ cond = None
542
+ historical_proba = None
543
+ text_cond = None
544
+ operator = None
545
+
546
+ if do_not_play:
547
+ text_cond = '๐ŸŸจ'
548
+ operator = ''
549
+ score = seq_proba[0]
550
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
551
+ num_obs = len(res1.loc[cond])
552
+ historical_proba = res1.loc[cond, 'True'].mean()
553
+
554
+
555
+ elif green_proba > red_proba:
556
+ # If the day is predicted to be green, say so
557
+ text_cond = '๐ŸŸฉ'
558
+ operator = '>='
559
+ score = green_proba
560
+ # How many with this score?
561
+ cond = (res1['Predicted'] >= green_proba)
562
+ num_obs = len(res1.loc[cond])
563
+ # How often green?
564
+ historical_proba = res1.loc[cond, 'True'].mean()
565
+ # print(cond)
566
+
567
+ elif green_proba <= red_proba:
568
+ # If the day is predicted to be green, say so
569
+ text_cond = '๐ŸŸฅ'
570
+ operator = '<='
571
+ score = red_proba
572
+ # How many with this score?
573
+ cond = (res1['Predicted'] <= red_proba)
574
+ num_obs = len(res1.loc[cond])
575
+ # How often green?
576
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
577
+ # print(cond)
578
+
579
+ score_fmt = f'{score:.1%}'
580
+
581
+ results = pd.DataFrame(index=[
582
+ 'PrevClose',
583
+ 'Confidence Score',
584
+ 'Success Rate',
585
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
586
+ ], data = [
587
+ f"{data.loc[final_row,'Close']:.2f}",
588
+ f'{text_cond} {score:.1%}',
589
+ f'{historical_proba:.1%}',
590
+ num_obs,
591
+ ])
592
+
593
+ results.columns = ['Outputs']
594
+
595
+ # st.subheader('New Prediction')
596
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
597
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
598
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
599
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
600
+
601
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
602
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
603
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
604
+ len_all = len(res1)
605
+
606
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
607
+
608
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
609
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
610
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
611
+ len_hi = len(res2_filtered)
612
+
613
+ df_performance = pd.DataFrame(
614
+ index=[
615
+ 'N',
616
+ 'ROC AUC',
617
+ 'Precision',
618
+ 'Recall'
619
+ ],
620
+ columns = [
621
+ 'All',
622
+ 'High Confidence'
623
+ ],
624
+ data = [
625
+ [len_all, len_hi],
626
+ [roc_auc_score_all, roc_auc_score_hi],
627
+ [precision_score_all, precision_score_hi],
628
+ [recall_score_all, recall_score_hi]
629
+ ]
630
+ ).round(2)
631
+
632
+ def get_acc(t, p):
633
+ if t == False and p <= 0.4:
634
+ return 'โœ…'
635
+ elif t == True and p > 0.6:
636
+ return 'โœ…'
637
+ elif t == False and p > 0.6:
638
+ return 'โŒ'
639
+ elif t == True and p <= 0.4:
640
+ return 'โŒ'
641
+ else:
642
+ return '๐ŸŸจ'
643
+
644
+ def get_acc_text(t, p):
645
+ if t == False and p <= 0.4:
646
+ return 'Correct'
647
+ elif t == True and p > 0.6:
648
+ return 'Correct'
649
+ elif t == False and p > 0.6:
650
+ return 'Incorrect'
651
+ elif t == True and p <= 0.4:
652
+ return 'Incorrect'
653
+ else:
654
+ return 'No Action'
655
+
656
+ perf_daily = res1.copy()
657
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
658
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
659
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
660
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
661
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
662
+
663
+ def convert_df(df):
664
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
665
+ return df.to_csv()
666
+
667
+ csv = convert_df(perf_daily)
668
+
669
+ with tab1:
670
+ st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
671
+ st.write(results)
672
+ st.write(df_probas)
673
+ with tab2:
674
+ st.subheader('Latest Data for Pred')
675
+ st.write(new_pred)
676
+ with tab3:
677
+ st.subheader('Historical Data')
678
+ st.write(df_final)
679
+ with tab4:
680
+ st.subheader('Performance')
681
+ st.write(df_performance)
682
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
683
+ # st.download_button(
684
+ # label="Download Historical Performance",
685
+ # data=csv,
686
+ fname='performance_for_60m_model.csv'
687
+ # )
688
+
689
+ if submitted:
690
+ st.download_button(
691
+ label="Download Historical Performance",
692
+ data=csv,
693
+ file_name=fname,
694
+ )
695
+ st.caption('โš ๏ธ Downloading the CSV will reload the page. โš ๏ธ')