Winston B commited on
Commit
643c6c8
ยท
1 Parent(s): 0921e55

add option for 90m model

Browse files
Files changed (1) hide show
  1. app.py +915 -694
app.py CHANGED
@@ -1,695 +1,916 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from sklearn.metrics import roc_auc_score, precision_score, recall_score
5
- from pandas.tseries.offsets import BDay
6
-
7
- st.set_page_config(
8
- page_title="Gameday Model for $SPX",
9
- page_icon="๐ŸŽฎ"
10
- )
11
-
12
- st.title('๐ŸŽฎ Gameday Model for $SPX')
13
- st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
14
- with st.form("choose_model"):
15
-
16
- option = st.selectbox(
17
- 'Select a model, then run.',
18
- ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins'))
19
- col1, col2 = st.columns(2)
20
- with col1:
21
- submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
22
-
23
- with col2:
24
- cleared = st.form_submit_button('๐Ÿงน Clear All',use_container_width=True)
25
-
26
- if cleared:
27
- st.cache_data.clear()
28
-
29
- if option == '':
30
- st.write('No model selected.')
31
-
32
- if submitted:
33
-
34
- if option == '๐ŸŒž At Open':
35
- # runday = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
36
- # if runday:
37
- from model_day import *
38
- with st.spinner('Loading data...'):
39
- data, df_final, final_row = get_data()
40
- # st.success("โœ… Historical data")
41
-
42
- with st.spinner("Training models..."):
43
- def train_models():
44
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
45
- return res1, xgbr, seq2
46
- res1, xgbr, seq2 = train_models()
47
- # st.success("โœ… Models trained")
48
-
49
- with st.spinner("Getting new prediction..."):
50
-
51
- # Get last row
52
- new_pred = data.loc[final_row, ['BigNewsDay',
53
- 'Quarter',
54
- 'Perf5Day',
55
- 'Perf5Day_n1',
56
- 'DaysGreen',
57
- 'DaysRed',
58
- 'CurrentGap',
59
- 'RangePct',
60
- 'RangePct_n1',
61
- 'RangePct_n2',
62
- 'OHLC4_VIX',
63
- 'OHLC4_VIX_n1',
64
- 'OHLC4_VIX_n2']]
65
-
66
- new_pred = pd.DataFrame(new_pred).T
67
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
68
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
69
- curr_date = final_row + BDay(1)
70
- curr_date = curr_date.strftime('%Y-%m-%d')
71
-
72
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
73
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
74
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
75
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
76
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
77
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
78
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
79
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
80
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
81
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
82
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
83
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
84
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
85
-
86
- st.success("โœ… All done!")
87
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
88
-
89
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
90
-
91
- green_proba = seq_proba[0]
92
- red_proba = 1 - green_proba
93
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
94
- stdev = 0.01
95
- score = None
96
- num_obs = None
97
- cond = None
98
- historical_proba = None
99
- text_cond = None
100
- operator = None
101
-
102
- if do_not_play:
103
- text_cond = '๐ŸŸจ'
104
- operator = ''
105
- score = seq_proba[0]
106
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
107
- num_obs = len(res1.loc[cond])
108
- historical_proba = res1.loc[cond, 'True'].mean()
109
-
110
-
111
- elif green_proba > red_proba:
112
- # If the day is predicted to be green, say so
113
- text_cond = '๐ŸŸฉ'
114
- operator = '>='
115
- score = green_proba
116
- # How many with this score?
117
- cond = (res1['Predicted'] >= green_proba)
118
- num_obs = len(res1.loc[cond])
119
- # How often green?
120
- historical_proba = res1.loc[cond, 'True'].mean()
121
- # print(cond)
122
-
123
- elif green_proba <= red_proba:
124
- # If the day is predicted to be green, say so
125
- text_cond = '๐ŸŸฅ'
126
- operator = '<='
127
- score = red_proba
128
- # How many with this score?
129
- cond = (res1['Predicted'] <= red_proba)
130
- num_obs = len(res1.loc[cond])
131
- # How often green?
132
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
133
- # print(cond)
134
-
135
- score_fmt = f'{score:.1%}'
136
-
137
- results = pd.DataFrame(index=[
138
- 'PrevClose',
139
- 'Confidence Score',
140
- 'Success Rate',
141
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
142
- ], data = [
143
- f"{data.loc[final_row,'Close']:.2f}",
144
- f'{text_cond} {score:.1%}',
145
- f'{historical_proba:.1%}',
146
- num_obs,
147
- ])
148
-
149
- results.columns = ['Outputs']
150
-
151
- # st.subheader('New Prediction')
152
-
153
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
154
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
155
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
156
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
157
-
158
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
159
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
160
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
161
- len_all = len(res1)
162
-
163
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
164
-
165
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
166
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
167
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
168
- len_hi = len(res2_filtered)
169
-
170
- df_performance = pd.DataFrame(
171
- index=[
172
- 'N',
173
- 'ROC AUC',
174
- 'Precision',
175
- 'Recall'
176
- ],
177
- columns = [
178
- 'All',
179
- 'High Confidence'
180
- ],
181
- data = [
182
- [len_all, len_hi],
183
- [roc_auc_score_all, roc_auc_score_hi],
184
- [precision_score_all, precision_score_hi],
185
- [recall_score_all, recall_score_hi]
186
- ]
187
- ).round(2)
188
-
189
- def get_acc(t, p):
190
- if t == False and p <= 0.4:
191
- return 'โœ…'
192
- elif t == True and p > 0.6:
193
- return 'โœ…'
194
- elif t == False and p > 0.6:
195
- return 'โŒ'
196
- elif t == True and p <= 0.4:
197
- return 'โŒ'
198
- else:
199
- return '๐ŸŸจ'
200
-
201
- def get_acc_text(t, p):
202
- if t == False and p <= 0.4:
203
- return 'Correct'
204
- elif t == True and p > 0.6:
205
- return 'Correct'
206
- elif t == False and p > 0.6:
207
- return 'Incorrect'
208
- elif t == True and p <= 0.4:
209
- return 'Incorrect'
210
- else:
211
- return 'No Action'
212
-
213
- perf_daily = res1.copy()
214
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
215
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
216
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
217
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
218
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
219
-
220
- def convert_df(df):
221
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
222
- return df.to_csv()
223
-
224
- csv = convert_df(perf_daily)
225
-
226
- with tab1:
227
- st.subheader(f'Pred for {curr_date} as of 6:30AM PST')
228
- st.write(results)
229
- st.write(df_probas)
230
- with tab2:
231
- st.subheader('Latest Data for Pred')
232
- st.write(new_pred)
233
- with tab3:
234
- st.subheader('Historical Data')
235
- st.write(df_final)
236
- with tab4:
237
- st.subheader('Performance')
238
- st.write(df_performance)
239
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
240
- # st.download_button(
241
- # label="Download Historical Performance",
242
- # data=csv,
243
- fname='performance_for_at_open_model.csv'
244
- # )
245
-
246
- elif option == 'โŒš 30 Mins':
247
- # run30 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
248
- # if run30:
249
- from model_30m import *
250
- with st.spinner('Loading data...'):
251
- data, df_final, final_row = get_data()
252
- # st.success("โœ… Historical data")
253
-
254
- with st.spinner("Training models..."):
255
- def train_models():
256
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
257
- return res1, xgbr, seq2
258
- res1, xgbr, seq2 = train_models()
259
- # st.success("โœ… Models trained")
260
-
261
- with st.spinner("Getting new prediction..."):
262
-
263
- # Get last row
264
- new_pred = data.loc[final_row, ['BigNewsDay',
265
- 'Quarter',
266
- 'Perf5Day',
267
- 'Perf5Day_n1',
268
- 'DaysGreen',
269
- 'DaysRed',
270
- 'CurrentHigh30toClose',
271
- 'CurrentLow30toClose',
272
- 'CurrentClose30toClose',
273
- 'CurrentRange30',
274
- 'GapFill30',
275
- 'CurrentGap',
276
- 'RangePct',
277
- 'RangePct_n1',
278
- 'RangePct_n2',
279
- 'OHLC4_VIX',
280
- 'OHLC4_VIX_n1',
281
- 'OHLC4_VIX_n2']]
282
-
283
- new_pred = pd.DataFrame(new_pred).T
284
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
285
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
286
- curr_date = final_row + BDay(1)
287
- curr_date = curr_date.strftime('%Y-%m-%d')
288
-
289
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
290
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
291
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
292
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
293
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
294
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
295
- new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
296
- new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
297
- new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
298
- new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
299
- new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
300
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
301
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
302
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
303
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
304
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
305
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
306
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
307
-
308
- st.success("โœ… All done!")
309
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
310
-
311
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
312
-
313
- green_proba = seq_proba[0]
314
- red_proba = 1 - green_proba
315
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
316
- stdev = 0.01
317
- score = None
318
- num_obs = None
319
- cond = None
320
- historical_proba = None
321
- text_cond = None
322
- operator = None
323
-
324
- if do_not_play:
325
- text_cond = '๐ŸŸจ'
326
- operator = ''
327
- score = seq_proba[0]
328
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
329
- num_obs = len(res1.loc[cond])
330
- historical_proba = res1.loc[cond, 'True'].mean()
331
-
332
-
333
- elif green_proba > red_proba:
334
- # If the day is predicted to be green, say so
335
- text_cond = '๐ŸŸฉ'
336
- operator = '>='
337
- score = green_proba
338
- # How many with this score?
339
- cond = (res1['Predicted'] >= green_proba)
340
- num_obs = len(res1.loc[cond])
341
- # How often green?
342
- historical_proba = res1.loc[cond, 'True'].mean()
343
- # print(cond)
344
-
345
- elif green_proba <= red_proba:
346
- # If the day is predicted to be green, say so
347
- text_cond = '๐ŸŸฅ'
348
- operator = '<='
349
- score = red_proba
350
- # How many with this score?
351
- cond = (res1['Predicted'] <= red_proba)
352
- num_obs = len(res1.loc[cond])
353
- # How often green?
354
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
355
- # print(cond)
356
-
357
- score_fmt = f'{score:.1%}'
358
-
359
- results = pd.DataFrame(index=[
360
- 'PrevClose',
361
- 'Confidence Score',
362
- 'Success Rate',
363
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
364
- ], data = [
365
- f"{data.loc[final_row,'Close']:.2f}",
366
- f'{text_cond} {score:.1%}',
367
- f'{historical_proba:.1%}',
368
- num_obs,
369
- ])
370
-
371
- results.columns = ['Outputs']
372
-
373
- # st.subheader('New Prediction')
374
-
375
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
376
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
377
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
378
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
379
-
380
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
381
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
382
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
383
- len_all = len(res1)
384
-
385
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
386
-
387
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
388
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
389
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
390
- len_hi = len(res2_filtered)
391
-
392
- df_performance = pd.DataFrame(
393
- index=[
394
- 'N',
395
- 'ROC AUC',
396
- 'Precision',
397
- 'Recall'
398
- ],
399
- columns = [
400
- 'All',
401
- 'High Confidence'
402
- ],
403
- data = [
404
- [len_all, len_hi],
405
- [roc_auc_score_all, roc_auc_score_hi],
406
- [precision_score_all, precision_score_hi],
407
- [recall_score_all, recall_score_hi]
408
- ]
409
- ).round(2)
410
-
411
- def get_acc(t, p):
412
- if t == False and p <= 0.4:
413
- return 'โœ…'
414
- elif t == True and p > 0.6:
415
- return 'โœ…'
416
- elif t == False and p > 0.6:
417
- return 'โŒ'
418
- elif t == True and p <= 0.4:
419
- return 'โŒ'
420
- else:
421
- return '๐ŸŸจ'
422
-
423
- def get_acc_text(t, p):
424
- if t == False and p <= 0.4:
425
- return 'Correct'
426
- elif t == True and p > 0.6:
427
- return 'Correct'
428
- elif t == False and p > 0.6:
429
- return 'Incorrect'
430
- elif t == True and p <= 0.4:
431
- return 'Incorrect'
432
- else:
433
- return 'No Action'
434
-
435
- perf_daily = res1.copy()
436
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
437
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
438
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
439
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
440
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
441
-
442
- def convert_df(df):
443
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
444
- return df.to_csv()
445
-
446
- csv = convert_df(perf_daily)
447
-
448
- with tab1:
449
- st.subheader(f'Pred for {curr_date} as of 7AM PST')
450
- st.write(results)
451
- st.write(df_probas)
452
- with tab2:
453
- st.subheader('Latest Data for Pred')
454
- st.write(new_pred)
455
- with tab3:
456
- st.subheader('Historical Data')
457
- st.write(df_final)
458
- with tab4:
459
- st.subheader('Performance')
460
- st.write(df_performance)
461
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
462
- # st.download_button(
463
- # label="Download Historical Performance",
464
- # data=csv,
465
- fname='performance_for_30m_model.csv'
466
- # )
467
-
468
- elif option == 'โณ 60 Mins':
469
- # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
470
- # if run60:
471
- from model_1h import *
472
- with st.spinner('Loading data...'):
473
- data, df_final, final_row = get_data()
474
- # st.success("โœ… Historical data")
475
-
476
- with st.spinner("Training models..."):
477
- def train_models():
478
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
479
- return res1, xgbr, seq2
480
- res1, xgbr, seq2 = train_models()
481
- # st.success("โœ… Models trained")
482
-
483
- with st.spinner("Getting new prediction..."):
484
-
485
- # Get last row
486
- new_pred = data.loc[final_row, ['BigNewsDay',
487
- 'Quarter',
488
- 'Perf5Day',
489
- 'Perf5Day_n1',
490
- 'DaysGreen',
491
- 'DaysRed',
492
- 'CurrentHigh30toClose',
493
- 'CurrentLow30toClose',
494
- 'CurrentClose30toClose',
495
- 'CurrentRange30',
496
- 'GapFill30',
497
- 'CurrentGap',
498
- 'RangePct',
499
- 'RangePct_n1',
500
- 'RangePct_n2',
501
- 'OHLC4_VIX',
502
- 'OHLC4_VIX_n1',
503
- 'OHLC4_VIX_n2']]
504
-
505
- new_pred = pd.DataFrame(new_pred).T
506
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
507
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
508
- curr_date = final_row + BDay(1)
509
- curr_date = curr_date.strftime('%Y-%m-%d')
510
-
511
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
512
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
513
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
514
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
515
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
516
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
517
- new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
518
- new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
519
- new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
520
- new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
521
- new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
522
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
523
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
524
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
525
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
526
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
527
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
528
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
529
-
530
- st.success("โœ… All done!")
531
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
532
-
533
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
534
-
535
- green_proba = seq_proba[0]
536
- red_proba = 1 - green_proba
537
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
538
- stdev = 0.01
539
- score = None
540
- num_obs = None
541
- cond = None
542
- historical_proba = None
543
- text_cond = None
544
- operator = None
545
-
546
- if do_not_play:
547
- text_cond = '๐ŸŸจ'
548
- operator = ''
549
- score = seq_proba[0]
550
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
551
- num_obs = len(res1.loc[cond])
552
- historical_proba = res1.loc[cond, 'True'].mean()
553
-
554
-
555
- elif green_proba > red_proba:
556
- # If the day is predicted to be green, say so
557
- text_cond = '๐ŸŸฉ'
558
- operator = '>='
559
- score = green_proba
560
- # How many with this score?
561
- cond = (res1['Predicted'] >= green_proba)
562
- num_obs = len(res1.loc[cond])
563
- # How often green?
564
- historical_proba = res1.loc[cond, 'True'].mean()
565
- # print(cond)
566
-
567
- elif green_proba <= red_proba:
568
- # If the day is predicted to be green, say so
569
- text_cond = '๐ŸŸฅ'
570
- operator = '<='
571
- score = red_proba
572
- # How many with this score?
573
- cond = (res1['Predicted'] <= red_proba)
574
- num_obs = len(res1.loc[cond])
575
- # How often green?
576
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
577
- # print(cond)
578
-
579
- score_fmt = f'{score:.1%}'
580
-
581
- results = pd.DataFrame(index=[
582
- 'PrevClose',
583
- 'Confidence Score',
584
- 'Success Rate',
585
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
586
- ], data = [
587
- f"{data.loc[final_row,'Close']:.2f}",
588
- f'{text_cond} {score:.1%}',
589
- f'{historical_proba:.1%}',
590
- num_obs,
591
- ])
592
-
593
- results.columns = ['Outputs']
594
-
595
- # st.subheader('New Prediction')
596
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
597
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
598
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
599
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
600
-
601
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
602
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
603
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
604
- len_all = len(res1)
605
-
606
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
607
-
608
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
609
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
610
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
611
- len_hi = len(res2_filtered)
612
-
613
- df_performance = pd.DataFrame(
614
- index=[
615
- 'N',
616
- 'ROC AUC',
617
- 'Precision',
618
- 'Recall'
619
- ],
620
- columns = [
621
- 'All',
622
- 'High Confidence'
623
- ],
624
- data = [
625
- [len_all, len_hi],
626
- [roc_auc_score_all, roc_auc_score_hi],
627
- [precision_score_all, precision_score_hi],
628
- [recall_score_all, recall_score_hi]
629
- ]
630
- ).round(2)
631
-
632
- def get_acc(t, p):
633
- if t == False and p <= 0.4:
634
- return 'โœ…'
635
- elif t == True and p > 0.6:
636
- return 'โœ…'
637
- elif t == False and p > 0.6:
638
- return 'โŒ'
639
- elif t == True and p <= 0.4:
640
- return 'โŒ'
641
- else:
642
- return '๐ŸŸจ'
643
-
644
- def get_acc_text(t, p):
645
- if t == False and p <= 0.4:
646
- return 'Correct'
647
- elif t == True and p > 0.6:
648
- return 'Correct'
649
- elif t == False and p > 0.6:
650
- return 'Incorrect'
651
- elif t == True and p <= 0.4:
652
- return 'Incorrect'
653
- else:
654
- return 'No Action'
655
-
656
- perf_daily = res1.copy()
657
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
658
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
659
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
660
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
661
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
662
-
663
- def convert_df(df):
664
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
665
- return df.to_csv()
666
-
667
- csv = convert_df(perf_daily)
668
-
669
- with tab1:
670
- st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
671
- st.write(results)
672
- st.write(df_probas)
673
- with tab2:
674
- st.subheader('Latest Data for Pred')
675
- st.write(new_pred)
676
- with tab3:
677
- st.subheader('Historical Data')
678
- st.write(df_final)
679
- with tab4:
680
- st.subheader('Performance')
681
- st.write(df_performance)
682
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
683
- # st.download_button(
684
- # label="Download Historical Performance",
685
- # data=csv,
686
- fname='performance_for_60m_model.csv'
687
- # )
688
-
689
- if submitted:
690
- st.download_button(
691
- label="Download Historical Performance",
692
- data=csv,
693
- file_name=fname,
694
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
  st.caption('โš ๏ธ Downloading the CSV will reload the page. โš ๏ธ')
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.metrics import roc_auc_score, precision_score, recall_score
5
+ from pandas.tseries.offsets import BDay
6
+
7
+ st.set_page_config(
8
+ page_title="Gameday Model for $SPX",
9
+ page_icon="๐ŸŽฎ"
10
+ )
11
+
12
+ st.title('๐ŸŽฎ Gameday Model for $SPX')
13
+ st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
14
+ with st.form("choose_model"):
15
+
16
+ option = st.selectbox(
17
+ 'Select a model, then run.',
18
+ ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins', '๐Ÿ•ฐ 90 Mins'))
19
+ col1, col2 = st.columns(2)
20
+ with col1:
21
+ submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
22
+
23
+ with col2:
24
+ cleared = st.form_submit_button('๐Ÿงน Clear All',use_container_width=True)
25
+
26
+ if cleared:
27
+ st.cache_data.clear()
28
+
29
+ if option == '':
30
+ st.write('No model selected.')
31
+
32
+ if submitted:
33
+
34
+ if option == '๐ŸŒž At Open':
35
+ # runday = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
36
+ # if runday:
37
+ from model_day import *
38
+ with st.spinner('Loading data...'):
39
+ data, df_final, final_row = get_data()
40
+ # st.success("โœ… Historical data")
41
+
42
+ with st.spinner("Training models..."):
43
+ def train_models():
44
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
45
+ return res1, xgbr, seq2
46
+ res1, xgbr, seq2 = train_models()
47
+ # st.success("โœ… Models trained")
48
+
49
+ with st.spinner("Getting new prediction..."):
50
+
51
+ # Get last row
52
+ new_pred = data.loc[final_row, ['BigNewsDay',
53
+ 'Quarter',
54
+ 'Perf5Day',
55
+ 'Perf5Day_n1',
56
+ 'DaysGreen',
57
+ 'DaysRed',
58
+ 'CurrentGap',
59
+ 'RangePct',
60
+ 'RangePct_n1',
61
+ 'RangePct_n2',
62
+ 'OHLC4_VIX',
63
+ 'OHLC4_VIX_n1',
64
+ 'OHLC4_VIX_n2']]
65
+
66
+ new_pred = pd.DataFrame(new_pred).T
67
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
68
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
69
+ curr_date = final_row + BDay(1)
70
+ curr_date = curr_date.strftime('%Y-%m-%d')
71
+
72
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
73
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
74
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
75
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
76
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
77
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
78
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
79
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
80
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
81
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
82
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
83
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
84
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
85
+
86
+ st.success("โœ… All done!")
87
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
88
+
89
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
90
+
91
+ green_proba = seq_proba[0]
92
+ red_proba = 1 - green_proba
93
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
94
+ stdev = 0.01
95
+ score = None
96
+ num_obs = None
97
+ cond = None
98
+ historical_proba = None
99
+ text_cond = None
100
+ operator = None
101
+
102
+ if do_not_play:
103
+ text_cond = '๐ŸŸจ'
104
+ operator = ''
105
+ score = seq_proba[0]
106
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
107
+ num_obs = len(res1.loc[cond])
108
+ historical_proba = res1.loc[cond, 'True'].mean()
109
+
110
+
111
+ elif green_proba > red_proba:
112
+ # If the day is predicted to be green, say so
113
+ text_cond = '๐ŸŸฉ'
114
+ operator = '>='
115
+ score = green_proba
116
+ # How many with this score?
117
+ cond = (res1['Predicted'] >= green_proba)
118
+ num_obs = len(res1.loc[cond])
119
+ # How often green?
120
+ historical_proba = res1.loc[cond, 'True'].mean()
121
+ # print(cond)
122
+
123
+ elif green_proba <= red_proba:
124
+ # If the day is predicted to be green, say so
125
+ text_cond = '๐ŸŸฅ'
126
+ operator = '<='
127
+ score = red_proba
128
+ # How many with this score?
129
+ cond = (res1['Predicted'] <= red_proba)
130
+ num_obs = len(res1.loc[cond])
131
+ # How often green?
132
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
133
+ # print(cond)
134
+
135
+ score_fmt = f'{score:.1%}'
136
+
137
+ results = pd.DataFrame(index=[
138
+ 'PrevClose',
139
+ 'Confidence Score',
140
+ 'Success Rate',
141
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
142
+ ], data = [
143
+ f"{data.loc[final_row,'Close']:.2f}",
144
+ f'{text_cond} {score:.1%}',
145
+ f'{historical_proba:.1%}',
146
+ num_obs,
147
+ ])
148
+
149
+ results.columns = ['Outputs']
150
+
151
+ # st.subheader('New Prediction')
152
+
153
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
154
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
155
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
156
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
157
+
158
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
159
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
160
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
161
+ len_all = len(res1)
162
+
163
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
164
+
165
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
166
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
167
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
168
+ len_hi = len(res2_filtered)
169
+
170
+ df_performance = pd.DataFrame(
171
+ index=[
172
+ 'N',
173
+ 'ROC AUC',
174
+ 'Precision',
175
+ 'Recall'
176
+ ],
177
+ columns = [
178
+ 'All',
179
+ 'High Confidence'
180
+ ],
181
+ data = [
182
+ [len_all, len_hi],
183
+ [roc_auc_score_all, roc_auc_score_hi],
184
+ [precision_score_all, precision_score_hi],
185
+ [recall_score_all, recall_score_hi]
186
+ ]
187
+ ).round(2)
188
+
189
+ def get_acc(t, p):
190
+ if t == False and p <= 0.4:
191
+ return 'โœ…'
192
+ elif t == True and p > 0.6:
193
+ return 'โœ…'
194
+ elif t == False and p > 0.6:
195
+ return 'โŒ'
196
+ elif t == True and p <= 0.4:
197
+ return 'โŒ'
198
+ else:
199
+ return '๐ŸŸจ'
200
+
201
+ def get_acc_text(t, p):
202
+ if t == False and p <= 0.4:
203
+ return 'Correct'
204
+ elif t == True and p > 0.6:
205
+ return 'Correct'
206
+ elif t == False and p > 0.6:
207
+ return 'Incorrect'
208
+ elif t == True and p <= 0.4:
209
+ return 'Incorrect'
210
+ else:
211
+ return 'No Action'
212
+
213
+ perf_daily = res1.copy()
214
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
215
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
216
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
217
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
218
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
219
+
220
+ def convert_df(df):
221
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
222
+ return df.to_csv()
223
+
224
+ csv = convert_df(perf_daily)
225
+
226
+ with tab1:
227
+ st.subheader(f'Pred for {curr_date} as of 6:30AM PST')
228
+ st.write(results)
229
+ st.write(df_probas)
230
+ with tab2:
231
+ st.subheader('Latest Data for Pred')
232
+ st.write(new_pred)
233
+ with tab3:
234
+ st.subheader('Historical Data')
235
+ st.write(df_final)
236
+ with tab4:
237
+ st.subheader('Performance')
238
+ st.write(df_performance)
239
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
240
+ # st.download_button(
241
+ # label="Download Historical Performance",
242
+ # data=csv,
243
+ fname='performance_for_at_open_model.csv'
244
+ # )
245
+
246
+ elif option == 'โŒš 30 Mins':
247
+ # run30 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
248
+ # if run30:
249
+ from model_30m import *
250
+ with st.spinner('Loading data...'):
251
+ data, df_final, final_row = get_data()
252
+ # st.success("โœ… Historical data")
253
+
254
+ with st.spinner("Training models..."):
255
+ def train_models():
256
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
257
+ return res1, xgbr, seq2
258
+ res1, xgbr, seq2 = train_models()
259
+ # st.success("โœ… Models trained")
260
+
261
+ with st.spinner("Getting new prediction..."):
262
+
263
+ # Get last row
264
+ new_pred = data.loc[final_row, ['BigNewsDay',
265
+ 'Quarter',
266
+ 'Perf5Day',
267
+ 'Perf5Day_n1',
268
+ 'DaysGreen',
269
+ 'DaysRed',
270
+ 'CurrentHigh30toClose',
271
+ 'CurrentLow30toClose',
272
+ 'CurrentClose30toClose',
273
+ 'CurrentRange30',
274
+ 'GapFill30',
275
+ 'CurrentGap',
276
+ 'RangePct',
277
+ 'RangePct_n1',
278
+ 'RangePct_n2',
279
+ 'OHLC4_VIX',
280
+ 'OHLC4_VIX_n1',
281
+ 'OHLC4_VIX_n2']]
282
+
283
+ new_pred = pd.DataFrame(new_pred).T
284
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
285
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
286
+ curr_date = final_row + BDay(1)
287
+ curr_date = curr_date.strftime('%Y-%m-%d')
288
+
289
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
290
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
291
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
292
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
293
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
294
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
295
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
296
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
297
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
298
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
299
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
300
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
301
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
302
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
303
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
304
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
305
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
306
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
307
+
308
+ st.success("โœ… All done!")
309
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
310
+
311
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
312
+
313
+ green_proba = seq_proba[0]
314
+ red_proba = 1 - green_proba
315
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
316
+ stdev = 0.01
317
+ score = None
318
+ num_obs = None
319
+ cond = None
320
+ historical_proba = None
321
+ text_cond = None
322
+ operator = None
323
+
324
+ if do_not_play:
325
+ text_cond = '๐ŸŸจ'
326
+ operator = ''
327
+ score = seq_proba[0]
328
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
329
+ num_obs = len(res1.loc[cond])
330
+ historical_proba = res1.loc[cond, 'True'].mean()
331
+
332
+
333
+ elif green_proba > red_proba:
334
+ # If the day is predicted to be green, say so
335
+ text_cond = '๐ŸŸฉ'
336
+ operator = '>='
337
+ score = green_proba
338
+ # How many with this score?
339
+ cond = (res1['Predicted'] >= green_proba)
340
+ num_obs = len(res1.loc[cond])
341
+ # How often green?
342
+ historical_proba = res1.loc[cond, 'True'].mean()
343
+ # print(cond)
344
+
345
+ elif green_proba <= red_proba:
346
+ # If the day is predicted to be green, say so
347
+ text_cond = '๐ŸŸฅ'
348
+ operator = '<='
349
+ score = red_proba
350
+ # How many with this score?
351
+ cond = (res1['Predicted'] <= red_proba)
352
+ num_obs = len(res1.loc[cond])
353
+ # How often green?
354
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
355
+ # print(cond)
356
+
357
+ score_fmt = f'{score:.1%}'
358
+
359
+ results = pd.DataFrame(index=[
360
+ 'PrevClose',
361
+ 'Confidence Score',
362
+ 'Success Rate',
363
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
364
+ ], data = [
365
+ f"{data.loc[final_row,'Close']:.2f}",
366
+ f'{text_cond} {score:.1%}',
367
+ f'{historical_proba:.1%}',
368
+ num_obs,
369
+ ])
370
+
371
+ results.columns = ['Outputs']
372
+
373
+ # st.subheader('New Prediction')
374
+
375
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
376
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
377
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
378
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
379
+
380
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
381
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
382
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
383
+ len_all = len(res1)
384
+
385
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
386
+
387
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
388
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
389
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
390
+ len_hi = len(res2_filtered)
391
+
392
+ df_performance = pd.DataFrame(
393
+ index=[
394
+ 'N',
395
+ 'ROC AUC',
396
+ 'Precision',
397
+ 'Recall'
398
+ ],
399
+ columns = [
400
+ 'All',
401
+ 'High Confidence'
402
+ ],
403
+ data = [
404
+ [len_all, len_hi],
405
+ [roc_auc_score_all, roc_auc_score_hi],
406
+ [precision_score_all, precision_score_hi],
407
+ [recall_score_all, recall_score_hi]
408
+ ]
409
+ ).round(2)
410
+
411
+ def get_acc(t, p):
412
+ if t == False and p <= 0.4:
413
+ return 'โœ…'
414
+ elif t == True and p > 0.6:
415
+ return 'โœ…'
416
+ elif t == False and p > 0.6:
417
+ return 'โŒ'
418
+ elif t == True and p <= 0.4:
419
+ return 'โŒ'
420
+ else:
421
+ return '๐ŸŸจ'
422
+
423
+ def get_acc_text(t, p):
424
+ if t == False and p <= 0.4:
425
+ return 'Correct'
426
+ elif t == True and p > 0.6:
427
+ return 'Correct'
428
+ elif t == False and p > 0.6:
429
+ return 'Incorrect'
430
+ elif t == True and p <= 0.4:
431
+ return 'Incorrect'
432
+ else:
433
+ return 'No Action'
434
+
435
+ perf_daily = res1.copy()
436
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
437
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
438
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
439
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
440
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
441
+
442
+ def convert_df(df):
443
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
444
+ return df.to_csv()
445
+
446
+ csv = convert_df(perf_daily)
447
+
448
+ with tab1:
449
+ st.subheader(f'Pred for {curr_date} as of 7AM PST')
450
+ st.write(results)
451
+ st.write(df_probas)
452
+ with tab2:
453
+ st.subheader('Latest Data for Pred')
454
+ st.write(new_pred)
455
+ with tab3:
456
+ st.subheader('Historical Data')
457
+ st.write(df_final)
458
+ with tab4:
459
+ st.subheader('Performance')
460
+ st.write(df_performance)
461
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
462
+ # st.download_button(
463
+ # label="Download Historical Performance",
464
+ # data=csv,
465
+ fname='performance_for_30m_model.csv'
466
+ # )
467
+
468
+ elif option == 'โณ 60 Mins':
469
+ # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
470
+ # if run60:
471
+ from model_1h import *
472
+ with st.spinner('Loading data...'):
473
+ data, df_final, final_row = get_data()
474
+ # st.success("โœ… Historical data")
475
+
476
+ with st.spinner("Training models..."):
477
+ def train_models():
478
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
479
+ return res1, xgbr, seq2
480
+ res1, xgbr, seq2 = train_models()
481
+ # st.success("โœ… Models trained")
482
+
483
+ with st.spinner("Getting new prediction..."):
484
+
485
+ # Get last row
486
+ new_pred = data.loc[final_row, ['BigNewsDay',
487
+ 'Quarter',
488
+ 'Perf5Day',
489
+ 'Perf5Day_n1',
490
+ 'DaysGreen',
491
+ 'DaysRed',
492
+ 'CurrentHigh30toClose',
493
+ 'CurrentLow30toClose',
494
+ 'CurrentClose30toClose',
495
+ 'CurrentRange30',
496
+ 'GapFill30',
497
+ 'CurrentGap',
498
+ 'RangePct',
499
+ 'RangePct_n1',
500
+ 'RangePct_n2',
501
+ 'OHLC4_VIX',
502
+ 'OHLC4_VIX_n1',
503
+ 'OHLC4_VIX_n2']]
504
+
505
+ new_pred = pd.DataFrame(new_pred).T
506
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
507
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
508
+ curr_date = final_row + BDay(1)
509
+ curr_date = curr_date.strftime('%Y-%m-%d')
510
+
511
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
512
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
513
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
514
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
515
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
516
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
517
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
518
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
519
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
520
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
521
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
522
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
523
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
524
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
525
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
526
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
527
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
528
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
529
+
530
+ st.success("โœ… All done!")
531
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
532
+
533
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
534
+
535
+ green_proba = seq_proba[0]
536
+ red_proba = 1 - green_proba
537
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
538
+ stdev = 0.01
539
+ score = None
540
+ num_obs = None
541
+ cond = None
542
+ historical_proba = None
543
+ text_cond = None
544
+ operator = None
545
+
546
+ if do_not_play:
547
+ text_cond = '๐ŸŸจ'
548
+ operator = ''
549
+ score = seq_proba[0]
550
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
551
+ num_obs = len(res1.loc[cond])
552
+ historical_proba = res1.loc[cond, 'True'].mean()
553
+
554
+
555
+ elif green_proba > red_proba:
556
+ # If the day is predicted to be green, say so
557
+ text_cond = '๐ŸŸฉ'
558
+ operator = '>='
559
+ score = green_proba
560
+ # How many with this score?
561
+ cond = (res1['Predicted'] >= green_proba)
562
+ num_obs = len(res1.loc[cond])
563
+ # How often green?
564
+ historical_proba = res1.loc[cond, 'True'].mean()
565
+ # print(cond)
566
+
567
+ elif green_proba <= red_proba:
568
+ # If the day is predicted to be green, say so
569
+ text_cond = '๐ŸŸฅ'
570
+ operator = '<='
571
+ score = red_proba
572
+ # How many with this score?
573
+ cond = (res1['Predicted'] <= red_proba)
574
+ num_obs = len(res1.loc[cond])
575
+ # How often green?
576
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
577
+ # print(cond)
578
+
579
+ score_fmt = f'{score:.1%}'
580
+
581
+ results = pd.DataFrame(index=[
582
+ 'PrevClose',
583
+ 'Confidence Score',
584
+ 'Success Rate',
585
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
586
+ ], data = [
587
+ f"{data.loc[final_row,'Close']:.2f}",
588
+ f'{text_cond} {score:.1%}',
589
+ f'{historical_proba:.1%}',
590
+ num_obs,
591
+ ])
592
+
593
+ results.columns = ['Outputs']
594
+
595
+ # st.subheader('New Prediction')
596
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
597
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
598
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
599
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
600
+
601
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
602
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
603
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
604
+ len_all = len(res1)
605
+
606
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
607
+
608
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
609
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
610
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
611
+ len_hi = len(res2_filtered)
612
+
613
+ df_performance = pd.DataFrame(
614
+ index=[
615
+ 'N',
616
+ 'ROC AUC',
617
+ 'Precision',
618
+ 'Recall'
619
+ ],
620
+ columns = [
621
+ 'All',
622
+ 'High Confidence'
623
+ ],
624
+ data = [
625
+ [len_all, len_hi],
626
+ [roc_auc_score_all, roc_auc_score_hi],
627
+ [precision_score_all, precision_score_hi],
628
+ [recall_score_all, recall_score_hi]
629
+ ]
630
+ ).round(2)
631
+
632
+ def get_acc(t, p):
633
+ if t == False and p <= 0.4:
634
+ return 'โœ…'
635
+ elif t == True and p > 0.6:
636
+ return 'โœ…'
637
+ elif t == False and p > 0.6:
638
+ return 'โŒ'
639
+ elif t == True and p <= 0.4:
640
+ return 'โŒ'
641
+ else:
642
+ return '๐ŸŸจ'
643
+
644
+ def get_acc_text(t, p):
645
+ if t == False and p <= 0.4:
646
+ return 'Correct'
647
+ elif t == True and p > 0.6:
648
+ return 'Correct'
649
+ elif t == False and p > 0.6:
650
+ return 'Incorrect'
651
+ elif t == True and p <= 0.4:
652
+ return 'Incorrect'
653
+ else:
654
+ return 'No Action'
655
+
656
+ perf_daily = res1.copy()
657
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
658
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
659
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
660
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
661
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
662
+
663
+ def convert_df(df):
664
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
665
+ return df.to_csv()
666
+
667
+ csv = convert_df(perf_daily)
668
+
669
+ with tab1:
670
+ st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
671
+ st.write(results)
672
+ st.write(df_probas)
673
+ with tab2:
674
+ st.subheader('Latest Data for Pred')
675
+ st.write(new_pred)
676
+ with tab3:
677
+ st.subheader('Historical Data')
678
+ st.write(df_final)
679
+ with tab4:
680
+ st.subheader('Performance')
681
+ st.write(df_performance)
682
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
683
+ # st.download_button(
684
+ # label="Download Historical Performance",
685
+ # data=csv,
686
+ fname='performance_for_60m_model.csv'
687
+ # )
688
+
689
+ elif option == '๐Ÿ•ฐ 90 Mins':
690
+ # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
691
+ # if run60:
692
+ from model_90m import *
693
+ with st.spinner('Loading data...'):
694
+ data, df_final, final_row = get_data()
695
+ # st.success("โœ… Historical data")
696
+
697
+ with st.spinner("Training models..."):
698
+ def train_models():
699
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
700
+ return res1, xgbr, seq2
701
+ res1, xgbr, seq2 = train_models()
702
+ # st.success("โœ… Models trained")
703
+
704
+ with st.spinner("Getting new prediction..."):
705
+
706
+ # Get last row
707
+ new_pred = data.loc[final_row, ['BigNewsDay',
708
+ 'Quarter',
709
+ 'Perf5Day',
710
+ 'Perf5Day_n1',
711
+ 'DaysGreen',
712
+ 'DaysRed',
713
+ 'CurrentHigh30toClose',
714
+ 'CurrentLow30toClose',
715
+ 'CurrentClose30toClose',
716
+ 'CurrentRange30',
717
+ 'GapFill30',
718
+ 'CurrentGap',
719
+ 'RangePct',
720
+ 'RangePct_n1',
721
+ 'RangePct_n2',
722
+ 'OHLC4_VIX',
723
+ 'OHLC4_VIX_n1',
724
+ 'OHLC4_VIX_n2']]
725
+
726
+ new_pred = pd.DataFrame(new_pred).T
727
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
728
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
729
+ curr_date = final_row + BDay(1)
730
+ curr_date = curr_date.strftime('%Y-%m-%d')
731
+
732
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
733
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
734
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
735
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
736
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
737
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
738
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
739
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
740
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
741
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
742
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
743
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
744
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
745
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
746
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
747
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
748
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
749
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
750
+
751
+ st.success("โœ… All done!")
752
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
753
+
754
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
755
+
756
+ green_proba = seq_proba[0]
757
+ red_proba = 1 - green_proba
758
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
759
+ stdev = 0.01
760
+ score = None
761
+ num_obs = None
762
+ cond = None
763
+ historical_proba = None
764
+ text_cond = None
765
+ operator = None
766
+
767
+ if do_not_play:
768
+ text_cond = '๐ŸŸจ'
769
+ operator = ''
770
+ score = seq_proba[0]
771
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
772
+ num_obs = len(res1.loc[cond])
773
+ historical_proba = res1.loc[cond, 'True'].mean()
774
+
775
+
776
+ elif green_proba > red_proba:
777
+ # If the day is predicted to be green, say so
778
+ text_cond = '๐ŸŸฉ'
779
+ operator = '>='
780
+ score = green_proba
781
+ # How many with this score?
782
+ cond = (res1['Predicted'] >= green_proba)
783
+ num_obs = len(res1.loc[cond])
784
+ # How often green?
785
+ historical_proba = res1.loc[cond, 'True'].mean()
786
+ # print(cond)
787
+
788
+ elif green_proba <= red_proba:
789
+ # If the day is predicted to be green, say so
790
+ text_cond = '๐ŸŸฅ'
791
+ operator = '<='
792
+ score = red_proba
793
+ # How many with this score?
794
+ cond = (res1['Predicted'] <= red_proba)
795
+ num_obs = len(res1.loc[cond])
796
+ # How often green?
797
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
798
+ # print(cond)
799
+
800
+ score_fmt = f'{score:.1%}'
801
+
802
+ results = pd.DataFrame(index=[
803
+ 'PrevClose',
804
+ 'Confidence Score',
805
+ 'Success Rate',
806
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
807
+ ], data = [
808
+ f"{data.loc[final_row,'Close']:.2f}",
809
+ f'{text_cond} {score:.1%}',
810
+ f'{historical_proba:.1%}',
811
+ num_obs,
812
+ ])
813
+
814
+ results.columns = ['Outputs']
815
+
816
+ # st.subheader('New Prediction')
817
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
818
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
819
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
820
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
821
+
822
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
823
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
824
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
825
+ len_all = len(res1)
826
+
827
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
828
+
829
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
830
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
831
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
832
+ len_hi = len(res2_filtered)
833
+
834
+ df_performance = pd.DataFrame(
835
+ index=[
836
+ 'N',
837
+ 'ROC AUC',
838
+ 'Precision',
839
+ 'Recall'
840
+ ],
841
+ columns = [
842
+ 'All',
843
+ 'High Confidence'
844
+ ],
845
+ data = [
846
+ [len_all, len_hi],
847
+ [roc_auc_score_all, roc_auc_score_hi],
848
+ [precision_score_all, precision_score_hi],
849
+ [recall_score_all, recall_score_hi]
850
+ ]
851
+ ).round(2)
852
+
853
+ def get_acc(t, p):
854
+ if t == False and p <= 0.4:
855
+ return 'โœ…'
856
+ elif t == True and p > 0.6:
857
+ return 'โœ…'
858
+ elif t == False and p > 0.6:
859
+ return 'โŒ'
860
+ elif t == True and p <= 0.4:
861
+ return 'โŒ'
862
+ else:
863
+ return '๐ŸŸจ'
864
+
865
+ def get_acc_text(t, p):
866
+ if t == False and p <= 0.4:
867
+ return 'Correct'
868
+ elif t == True and p > 0.6:
869
+ return 'Correct'
870
+ elif t == False and p > 0.6:
871
+ return 'Incorrect'
872
+ elif t == True and p <= 0.4:
873
+ return 'Incorrect'
874
+ else:
875
+ return 'No Action'
876
+
877
+ perf_daily = res1.copy()
878
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
879
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
880
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
881
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
882
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
883
+
884
+ def convert_df(df):
885
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
886
+ return df.to_csv()
887
+
888
+ csv = convert_df(perf_daily)
889
+
890
+ with tab1:
891
+ st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
892
+ st.write(results)
893
+ st.write(df_probas)
894
+ with tab2:
895
+ st.subheader('Latest Data for Pred')
896
+ st.write(new_pred)
897
+ with tab3:
898
+ st.subheader('Historical Data')
899
+ st.write(df_final)
900
+ with tab4:
901
+ st.subheader('Performance')
902
+ st.write(df_performance)
903
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
904
+ # st.download_button(
905
+ # label="Download Historical Performance",
906
+ # data=csv,
907
+ fname='performance_for_90m_model.csv'
908
+ # )
909
+
910
+ if submitted:
911
+ st.download_button(
912
+ label="Download Historical Performance",
913
+ data=csv,
914
+ file_name=fname,
915
+ )
916
  st.caption('โš ๏ธ Downloading the CSV will reload the page. โš ๏ธ')