wnstnb commited on
Commit
a994b61
ยท
1 Parent(s): 389a9f2

some UI changes

Browse files
Files changed (1) hide show
  1. app.py +573 -562
app.py CHANGED
@@ -14,568 +14,579 @@ st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Docum
14
 
15
  if st.button("๐Ÿงน Clear All"):
16
  st.cache_data.clear()
17
-
18
- if st.button('๐ŸŒž At Open'):
19
- from model_day import *
20
- with st.spinner('Loading data...'):
21
- data, df_final, final_row = get_data()
22
- # st.success("โœ… Historical data")
23
-
24
- with st.spinner("Training models..."):
25
- def train_models():
26
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
27
- return res1, xgbr, seq2
28
- res1, xgbr, seq2 = train_models()
29
- # st.success("โœ… Models trained")
30
-
31
- with st.spinner("Getting new prediction..."):
32
-
33
- # Get last row
34
- new_pred = data.loc[final_row, ['BigNewsDay',
35
- 'Quarter',
36
- 'Perf5Day',
37
- 'Perf5Day_n1',
38
- 'DaysGreen',
39
- 'DaysRed',
40
- 'CurrentGap',
41
- 'RangePct',
42
- 'RangePct_n1',
43
- 'RangePct_n2',
44
- 'OHLC4_VIX',
45
- 'OHLC4_VIX_n1',
46
- 'OHLC4_VIX_n2']]
47
-
48
- new_pred = pd.DataFrame(new_pred).T
49
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
50
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
51
- curr_date = final_row + BDay(1)
52
- curr_date = curr_date.strftime('%Y-%m-%d')
53
-
54
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
55
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
56
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
57
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
58
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
59
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
60
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
61
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
62
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
63
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
64
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
65
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
66
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
67
-
68
- st.success("โœ… All done!")
69
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
70
-
71
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
72
-
73
- green_proba = seq_proba[0]
74
- red_proba = 1 - green_proba
75
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
76
- stdev = 0.01
77
- score = None
78
- num_obs = None
79
- cond = None
80
- historical_proba = None
81
- text_cond = None
82
- operator = None
83
-
84
- if do_not_play:
85
- text_cond = '๐ŸŸจ'
86
- operator = ''
87
- score = seq_proba[0]
88
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
89
- num_obs = len(res1.loc[cond])
90
- historical_proba = res1.loc[cond, 'True'].mean()
91
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- elif green_proba > red_proba:
94
- # If the day is predicted to be green, say so
95
- text_cond = '๐ŸŸฉ'
96
- operator = '>='
97
- score = green_proba
98
- # How many with this score?
99
- cond = (res1['Predicted'] >= green_proba)
100
- num_obs = len(res1.loc[cond])
101
- # How often green?
102
- historical_proba = res1.loc[cond, 'True'].mean()
103
- # print(cond)
104
-
105
- elif green_proba <= red_proba:
106
- # If the day is predicted to be green, say so
107
- text_cond = '๐ŸŸฅ'
108
- operator = '<='
109
- score = red_proba
110
- # How many with this score?
111
- cond = (res1['Predicted'] <= red_proba)
112
- num_obs = len(res1.loc[cond])
113
- # How often green?
114
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
115
- # print(cond)
116
-
117
- score_fmt = f'{score:.1%}'
118
-
119
- results = pd.DataFrame(index=[
120
- 'PrevClose',
121
- 'Confidence Score',
122
- 'Success Rate',
123
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
124
- ], data = [
125
- f"{data.loc[final_row,'Close']:.2f}",
126
- f'{text_cond} {score:.1%}',
127
- f'{historical_proba:.1%}',
128
- num_obs,
129
- ])
130
-
131
- results.columns = ['Outputs']
132
-
133
- # st.subheader('New Prediction')
134
-
135
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
136
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
137
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
138
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
139
-
140
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
141
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
142
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
143
- len_all = len(res1)
144
-
145
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
146
-
147
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
148
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
149
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
150
- len_hi = len(res2_filtered)
151
-
152
- df_performance = pd.DataFrame(
153
- index=[
154
- 'N',
155
- 'ROC AUC',
156
- 'Precision',
157
- 'Recall'
158
- ],
159
- columns = [
160
- 'All',
161
- 'High Confidence'
162
- ],
163
- data = [
164
- [len_all, len_hi],
165
- [roc_auc_score_all, roc_auc_score_hi],
166
- [precision_score_all, precision_score_hi],
167
- [recall_score_all, recall_score_hi]
168
- ]
169
- ).round(2)
170
-
171
- def get_acc(t, p):
172
- if t == False and p <= 0.4:
173
- return 'โœ…'
174
- elif t == True and p > 0.6:
175
- return 'โœ…'
176
- elif t == False and p > 0.6:
177
- return 'โŒ'
178
- elif t == True and p <= 0.4:
179
- return 'โŒ'
180
- else:
181
- return '๐ŸŸจ'
182
-
183
- perf_daily = res1.copy()
184
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
185
-
186
- tab1.subheader(f'Pred for {curr_date} as of 6:30AM PST')
187
- tab1.write(results)
188
- tab1.write(df_probas)
189
-
190
- tab2.subheader('Latest Data for Pred')
191
- tab2.write(new_pred)
192
-
193
- tab3.subheader('Historical Data')
194
- tab3.write(df_final)
195
-
196
- tab4.subheader('Performance')
197
- tab4.write(df_performance)
198
- tab4.write(perf_daily)
199
-
200
- if st.button('โŒš After 30 Mins'):
201
- from model_30m import *
202
- with st.spinner('Loading data...'):
203
- data, df_final, final_row = get_data()
204
- # st.success("โœ… Historical data")
205
-
206
- with st.spinner("Training models..."):
207
- def train_models():
208
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
209
- return res1, xgbr, seq2
210
- res1, xgbr, seq2 = train_models()
211
- # st.success("โœ… Models trained")
212
-
213
- with st.spinner("Getting new prediction..."):
214
-
215
- # Get last row
216
- new_pred = data.loc[final_row, ['BigNewsDay',
217
- 'Quarter',
218
- 'Perf5Day',
219
- 'Perf5Day_n1',
220
- 'DaysGreen',
221
- 'DaysRed',
222
- 'CurrentHigh30toClose',
223
- 'CurrentLow30toClose',
224
- 'CurrentClose30toClose',
225
- 'CurrentRange30',
226
- 'GapFill30',
227
- 'CurrentGap',
228
- 'RangePct',
229
- 'RangePct_n1',
230
- 'RangePct_n2',
231
- 'OHLC4_VIX',
232
- 'OHLC4_VIX_n1',
233
- 'OHLC4_VIX_n2']]
234
-
235
- new_pred = pd.DataFrame(new_pred).T
236
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
237
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
238
- curr_date = final_row + BDay(1)
239
- curr_date = curr_date.strftime('%Y-%m-%d')
240
-
241
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
242
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
243
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
244
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
245
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
246
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
247
- new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
248
- new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
249
- new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
250
- new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
251
- new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
252
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
253
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
254
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
255
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
256
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
257
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
258
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
259
-
260
- st.success("โœ… All done!")
261
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
262
-
263
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
264
-
265
- green_proba = seq_proba[0]
266
- red_proba = 1 - green_proba
267
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
268
- stdev = 0.01
269
- score = None
270
- num_obs = None
271
- cond = None
272
- historical_proba = None
273
- text_cond = None
274
- operator = None
275
-
276
- if do_not_play:
277
- text_cond = '๐ŸŸจ'
278
- operator = ''
279
- score = seq_proba[0]
280
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
281
- num_obs = len(res1.loc[cond])
282
- historical_proba = res1.loc[cond, 'True'].mean()
283
-
 
284
 
285
- elif green_proba > red_proba:
286
- # If the day is predicted to be green, say so
287
- text_cond = '๐ŸŸฉ'
288
- operator = '>='
289
- score = green_proba
290
- # How many with this score?
291
- cond = (res1['Predicted'] >= green_proba)
292
- num_obs = len(res1.loc[cond])
293
- # How often green?
294
- historical_proba = res1.loc[cond, 'True'].mean()
295
- # print(cond)
296
-
297
- elif green_proba <= red_proba:
298
- # If the day is predicted to be green, say so
299
- text_cond = '๐ŸŸฅ'
300
- operator = '<='
301
- score = red_proba
302
- # How many with this score?
303
- cond = (res1['Predicted'] <= red_proba)
304
- num_obs = len(res1.loc[cond])
305
- # How often green?
306
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
307
- # print(cond)
308
-
309
- score_fmt = f'{score:.1%}'
310
-
311
- results = pd.DataFrame(index=[
312
- 'PrevClose',
313
- 'Confidence Score',
314
- 'Success Rate',
315
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
316
- ], data = [
317
- f"{data.loc[final_row,'Close']:.2f}",
318
- f'{text_cond} {score:.1%}',
319
- f'{historical_proba:.1%}',
320
- num_obs,
321
- ])
322
-
323
- results.columns = ['Outputs']
324
-
325
- # st.subheader('New Prediction')
326
-
327
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
328
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
329
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
330
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
331
-
332
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
333
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
334
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
335
- len_all = len(res1)
336
-
337
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
338
-
339
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
340
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
341
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
342
- len_hi = len(res2_filtered)
343
-
344
- df_performance = pd.DataFrame(
345
- index=[
346
- 'N',
347
- 'ROC AUC',
348
- 'Precision',
349
- 'Recall'
350
- ],
351
- columns = [
352
- 'All',
353
- 'High Confidence'
354
- ],
355
- data = [
356
- [len_all, len_hi],
357
- [roc_auc_score_all, roc_auc_score_hi],
358
- [precision_score_all, precision_score_hi],
359
- [recall_score_all, recall_score_hi]
360
- ]
361
- ).round(2)
362
-
363
- def get_acc(t, p):
364
- if t == False and p <= 0.4:
365
- return 'โœ…'
366
- elif t == True and p > 0.6:
367
- return 'โœ…'
368
- elif t == False and p > 0.6:
369
- return 'โŒ'
370
- elif t == True and p <= 0.4:
371
- return 'โŒ'
372
- else:
373
- return '๐ŸŸจ'
374
-
375
- perf_daily = res1.copy()
376
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
377
-
378
- tab1.subheader(f'Pred for {curr_date} as of 7AM PST')
379
- tab1.write(results)
380
- tab1.write(df_probas)
381
-
382
- tab2.subheader('Latest Data for Pred')
383
- tab2.write(new_pred)
384
-
385
- tab3.subheader('Historical Data')
386
- tab3.write(df_final)
387
-
388
- tab4.subheader('Performance')
389
- tab4.write(df_performance)
390
- tab4.write(perf_daily.sort_index(ascending=False))
391
-
392
- if st.button('โณ After 60 Mins'):
393
- from model_1h import *
394
- with st.spinner('Loading data...'):
395
- data, df_final, final_row = get_data()
396
- # st.success("โœ… Historical data")
397
-
398
- with st.spinner("Training models..."):
399
- def train_models():
400
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
401
- return res1, xgbr, seq2
402
- res1, xgbr, seq2 = train_models()
403
- # st.success("โœ… Models trained")
404
-
405
- with st.spinner("Getting new prediction..."):
406
-
407
- # Get last row
408
- new_pred = data.loc[final_row, ['BigNewsDay',
409
- 'Quarter',
410
- 'Perf5Day',
411
- 'Perf5Day_n1',
412
- 'DaysGreen',
413
- 'DaysRed',
414
- 'CurrentHigh30toClose',
415
- 'CurrentLow30toClose',
416
- 'CurrentClose30toClose',
417
- 'CurrentRange30',
418
- 'GapFill30',
419
- 'CurrentGap',
420
- 'RangePct',
421
- 'RangePct_n1',
422
- 'RangePct_n2',
423
- 'OHLC4_VIX',
424
- 'OHLC4_VIX_n1',
425
- 'OHLC4_VIX_n2']]
426
-
427
- new_pred = pd.DataFrame(new_pred).T
428
- # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
429
- # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
430
- curr_date = final_row + BDay(1)
431
- curr_date = curr_date.strftime('%Y-%m-%d')
432
-
433
- new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
434
- new_pred['Quarter'] = new_pred['Quarter'].astype(int)
435
- new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
436
- new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
437
- new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
438
- new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
439
- new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
440
- new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
441
- new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
442
- new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
443
- new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
444
- new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
445
- new_pred['RangePct'] = new_pred['RangePct'].astype(float)
446
- new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
447
- new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
448
- new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
449
- new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
450
- new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
451
-
452
- st.success("โœ… All done!")
453
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
454
-
455
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
456
-
457
- green_proba = seq_proba[0]
458
- red_proba = 1 - green_proba
459
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
460
- stdev = 0.01
461
- score = None
462
- num_obs = None
463
- cond = None
464
- historical_proba = None
465
- text_cond = None
466
- operator = None
467
-
468
- if do_not_play:
469
- text_cond = '๐ŸŸจ'
470
- operator = ''
471
- score = seq_proba[0]
472
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
473
- num_obs = len(res1.loc[cond])
474
- historical_proba = res1.loc[cond, 'True'].mean()
475
-
476
 
477
- elif green_proba > red_proba:
478
- # If the day is predicted to be green, say so
479
- text_cond = '๐ŸŸฉ'
480
- operator = '>='
481
- score = green_proba
482
- # How many with this score?
483
- cond = (res1['Predicted'] >= green_proba)
484
- num_obs = len(res1.loc[cond])
485
- # How often green?
486
- historical_proba = res1.loc[cond, 'True'].mean()
487
- # print(cond)
488
-
489
- elif green_proba <= red_proba:
490
- # If the day is predicted to be green, say so
491
- text_cond = '๐ŸŸฅ'
492
- operator = '<='
493
- score = red_proba
494
- # How many with this score?
495
- cond = (res1['Predicted'] <= red_proba)
496
- num_obs = len(res1.loc[cond])
497
- # How often green?
498
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
499
- # print(cond)
500
-
501
- score_fmt = f'{score:.1%}'
502
-
503
- results = pd.DataFrame(index=[
504
- 'PrevClose',
505
- 'Confidence Score',
506
- 'Success Rate',
507
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
508
- ], data = [
509
- f"{data.loc[final_row,'Close']:.2f}",
510
- f'{text_cond} {score:.1%}',
511
- f'{historical_proba:.1%}',
512
- num_obs,
513
- ])
514
-
515
- results.columns = ['Outputs']
516
-
517
- # st.subheader('New Prediction')
518
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
519
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
520
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
521
- df_probas.columns = ['PctGreen','NumObs','NumGreen']
522
-
523
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
524
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
525
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
526
- len_all = len(res1)
527
-
528
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
529
-
530
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
531
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
532
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
533
- len_hi = len(res2_filtered)
534
-
535
- df_performance = pd.DataFrame(
536
- index=[
537
- 'N',
538
- 'ROC AUC',
539
- 'Precision',
540
- 'Recall'
541
- ],
542
- columns = [
543
- 'All',
544
- 'High Confidence'
545
- ],
546
- data = [
547
- [len_all, len_hi],
548
- [roc_auc_score_all, roc_auc_score_hi],
549
- [precision_score_all, precision_score_hi],
550
- [recall_score_all, recall_score_hi]
551
- ]
552
- ).round(2)
553
-
554
- def get_acc(t, p):
555
- if t == False and p <= 0.4:
556
- return 'โœ…'
557
- elif t == True and p > 0.6:
558
- return 'โœ…'
559
- elif t == False and p > 0.6:
560
- return 'โŒ'
561
- elif t == True and p <= 0.4:
562
- return 'โŒ'
563
- else:
564
- return '๐ŸŸจ'
565
-
566
- perf_daily = res1.copy()
567
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
568
-
569
- tab1.subheader(f'Pred for {curr_date} as of 7:30AM PST')
570
- tab1.write(results)
571
- tab1.write(df_probas)
572
-
573
- tab2.subheader('Latest Data for Pred')
574
- tab2.write(new_pred)
575
-
576
- tab3.subheader('Historical Data')
577
- tab3.write(df_final)
578
-
579
- tab4.subheader('Performance')
580
- tab4.write(df_performance)
581
- tab4.write(perf_daily)
 
14
 
15
  if st.button("๐Ÿงน Clear All"):
16
  st.cache_data.clear()
17
+ col1, col2 = st.columns(2)
18
+
19
+ option = st.selectbox(
20
+ 'Select a model, then run.',
21
+ ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins'))
22
+
23
+ if option == '':
24
+ st.write('Gotta pick one.')
25
+
26
+ elif option == '๐ŸŒž At Open':
27
+ if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'):
28
+ from model_day import *
29
+ with st.spinner('Loading data...'):
30
+ data, df_final, final_row = get_data()
31
+ # st.success("โœ… Historical data")
32
+
33
+ with st.spinner("Training models..."):
34
+ def train_models():
35
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
36
+ return res1, xgbr, seq2
37
+ res1, xgbr, seq2 = train_models()
38
+ # st.success("โœ… Models trained")
39
+
40
+ with st.spinner("Getting new prediction..."):
41
+
42
+ # Get last row
43
+ new_pred = data.loc[final_row, ['BigNewsDay',
44
+ 'Quarter',
45
+ 'Perf5Day',
46
+ 'Perf5Day_n1',
47
+ 'DaysGreen',
48
+ 'DaysRed',
49
+ 'CurrentGap',
50
+ 'RangePct',
51
+ 'RangePct_n1',
52
+ 'RangePct_n2',
53
+ 'OHLC4_VIX',
54
+ 'OHLC4_VIX_n1',
55
+ 'OHLC4_VIX_n2']]
56
+
57
+ new_pred = pd.DataFrame(new_pred).T
58
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
59
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
60
+ curr_date = final_row + BDay(1)
61
+ curr_date = curr_date.strftime('%Y-%m-%d')
62
+
63
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
64
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
65
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
66
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
67
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
68
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
69
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
70
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
71
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
72
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
73
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
74
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
75
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
76
+
77
+ st.success("โœ… All done!")
78
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
79
+
80
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
81
+
82
+ green_proba = seq_proba[0]
83
+ red_proba = 1 - green_proba
84
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
85
+ stdev = 0.01
86
+ score = None
87
+ num_obs = None
88
+ cond = None
89
+ historical_proba = None
90
+ text_cond = None
91
+ operator = None
92
+
93
+ if do_not_play:
94
+ text_cond = '๐ŸŸจ'
95
+ operator = ''
96
+ score = seq_proba[0]
97
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
98
+ num_obs = len(res1.loc[cond])
99
+ historical_proba = res1.loc[cond, 'True'].mean()
100
+
101
+
102
+ elif green_proba > red_proba:
103
+ # If the day is predicted to be green, say so
104
+ text_cond = '๐ŸŸฉ'
105
+ operator = '>='
106
+ score = green_proba
107
+ # How many with this score?
108
+ cond = (res1['Predicted'] >= green_proba)
109
+ num_obs = len(res1.loc[cond])
110
+ # How often green?
111
+ historical_proba = res1.loc[cond, 'True'].mean()
112
+ # print(cond)
113
+
114
+ elif green_proba <= red_proba:
115
+ # If the day is predicted to be green, say so
116
+ text_cond = '๐ŸŸฅ'
117
+ operator = '<='
118
+ score = red_proba
119
+ # How many with this score?
120
+ cond = (res1['Predicted'] <= red_proba)
121
+ num_obs = len(res1.loc[cond])
122
+ # How often green?
123
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
124
+ # print(cond)
125
+
126
+ score_fmt = f'{score:.1%}'
127
+
128
+ results = pd.DataFrame(index=[
129
+ 'PrevClose',
130
+ 'Confidence Score',
131
+ 'Success Rate',
132
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
133
+ ], data = [
134
+ f"{data.loc[final_row,'Close']:.2f}",
135
+ f'{text_cond} {score:.1%}',
136
+ f'{historical_proba:.1%}',
137
+ num_obs,
138
+ ])
139
+
140
+ results.columns = ['Outputs']
141
+
142
+ # st.subheader('New Prediction')
143
+
144
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
145
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
146
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
147
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
148
+
149
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
150
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
151
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
152
+ len_all = len(res1)
153
+
154
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
155
+
156
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
157
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
158
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
159
+ len_hi = len(res2_filtered)
160
+
161
+ df_performance = pd.DataFrame(
162
+ index=[
163
+ 'N',
164
+ 'ROC AUC',
165
+ 'Precision',
166
+ 'Recall'
167
+ ],
168
+ columns = [
169
+ 'All',
170
+ 'High Confidence'
171
+ ],
172
+ data = [
173
+ [len_all, len_hi],
174
+ [roc_auc_score_all, roc_auc_score_hi],
175
+ [precision_score_all, precision_score_hi],
176
+ [recall_score_all, recall_score_hi]
177
+ ]
178
+ ).round(2)
179
+
180
+ def get_acc(t, p):
181
+ if t == False and p <= 0.4:
182
+ return 'โœ…'
183
+ elif t == True and p > 0.6:
184
+ return 'โœ…'
185
+ elif t == False and p > 0.6:
186
+ return 'โŒ'
187
+ elif t == True and p <= 0.4:
188
+ return 'โŒ'
189
+ else:
190
+ return '๐ŸŸจ'
191
+
192
+ perf_daily = res1.copy()
193
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
194
 
195
+ tab1.subheader(f'Pred for {curr_date} as of 6:30AM PST')
196
+ tab1.write(results)
197
+ tab1.write(df_probas)
198
+
199
+ tab2.subheader('Latest Data for Pred')
200
+ tab2.write(new_pred)
201
+
202
+ tab3.subheader('Historical Data')
203
+ tab3.write(df_final)
204
+
205
+ tab4.subheader('Performance')
206
+ tab4.write(df_performance)
207
+ tab4.write(perf_daily)
208
+
209
+ elif option == 'โŒš 30 Mins':
210
+ if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'):
211
+ from model_30m import *
212
+ with st.spinner('Loading data...'):
213
+ data, df_final, final_row = get_data()
214
+ # st.success("โœ… Historical data")
215
+
216
+ with st.spinner("Training models..."):
217
+ def train_models():
218
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
219
+ return res1, xgbr, seq2
220
+ res1, xgbr, seq2 = train_models()
221
+ # st.success("โœ… Models trained")
222
+
223
+ with st.spinner("Getting new prediction..."):
224
+
225
+ # Get last row
226
+ new_pred = data.loc[final_row, ['BigNewsDay',
227
+ 'Quarter',
228
+ 'Perf5Day',
229
+ 'Perf5Day_n1',
230
+ 'DaysGreen',
231
+ 'DaysRed',
232
+ 'CurrentHigh30toClose',
233
+ 'CurrentLow30toClose',
234
+ 'CurrentClose30toClose',
235
+ 'CurrentRange30',
236
+ 'GapFill30',
237
+ 'CurrentGap',
238
+ 'RangePct',
239
+ 'RangePct_n1',
240
+ 'RangePct_n2',
241
+ 'OHLC4_VIX',
242
+ 'OHLC4_VIX_n1',
243
+ 'OHLC4_VIX_n2']]
244
+
245
+ new_pred = pd.DataFrame(new_pred).T
246
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
247
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
248
+ curr_date = final_row + BDay(1)
249
+ curr_date = curr_date.strftime('%Y-%m-%d')
250
+
251
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
252
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
253
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
254
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
255
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
256
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
257
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
258
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
259
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
260
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
261
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
262
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
263
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
264
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
265
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
266
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
267
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
268
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
269
+
270
+ st.success("โœ… All done!")
271
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
272
+
273
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
274
+
275
+ green_proba = seq_proba[0]
276
+ red_proba = 1 - green_proba
277
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
278
+ stdev = 0.01
279
+ score = None
280
+ num_obs = None
281
+ cond = None
282
+ historical_proba = None
283
+ text_cond = None
284
+ operator = None
285
+
286
+ if do_not_play:
287
+ text_cond = '๐ŸŸจ'
288
+ operator = ''
289
+ score = seq_proba[0]
290
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
291
+ num_obs = len(res1.loc[cond])
292
+ historical_proba = res1.loc[cond, 'True'].mean()
293
+
294
+
295
+ elif green_proba > red_proba:
296
+ # If the day is predicted to be green, say so
297
+ text_cond = '๐ŸŸฉ'
298
+ operator = '>='
299
+ score = green_proba
300
+ # How many with this score?
301
+ cond = (res1['Predicted'] >= green_proba)
302
+ num_obs = len(res1.loc[cond])
303
+ # How often green?
304
+ historical_proba = res1.loc[cond, 'True'].mean()
305
+ # print(cond)
306
+
307
+ elif green_proba <= red_proba:
308
+ # If the day is predicted to be green, say so
309
+ text_cond = '๐ŸŸฅ'
310
+ operator = '<='
311
+ score = red_proba
312
+ # How many with this score?
313
+ cond = (res1['Predicted'] <= red_proba)
314
+ num_obs = len(res1.loc[cond])
315
+ # How often green?
316
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
317
+ # print(cond)
318
+
319
+ score_fmt = f'{score:.1%}'
320
+
321
+ results = pd.DataFrame(index=[
322
+ 'PrevClose',
323
+ 'Confidence Score',
324
+ 'Success Rate',
325
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
326
+ ], data = [
327
+ f"{data.loc[final_row,'Close']:.2f}",
328
+ f'{text_cond} {score:.1%}',
329
+ f'{historical_proba:.1%}',
330
+ num_obs,
331
+ ])
332
+
333
+ results.columns = ['Outputs']
334
+
335
+ # st.subheader('New Prediction')
336
+
337
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
338
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
339
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
340
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
341
+
342
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
343
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
344
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
345
+ len_all = len(res1)
346
+
347
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
348
+
349
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
350
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
351
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
352
+ len_hi = len(res2_filtered)
353
+
354
+ df_performance = pd.DataFrame(
355
+ index=[
356
+ 'N',
357
+ 'ROC AUC',
358
+ 'Precision',
359
+ 'Recall'
360
+ ],
361
+ columns = [
362
+ 'All',
363
+ 'High Confidence'
364
+ ],
365
+ data = [
366
+ [len_all, len_hi],
367
+ [roc_auc_score_all, roc_auc_score_hi],
368
+ [precision_score_all, precision_score_hi],
369
+ [recall_score_all, recall_score_hi]
370
+ ]
371
+ ).round(2)
372
+
373
+ def get_acc(t, p):
374
+ if t == False and p <= 0.4:
375
+ return 'โœ…'
376
+ elif t == True and p > 0.6:
377
+ return 'โœ…'
378
+ elif t == False and p > 0.6:
379
+ return 'โŒ'
380
+ elif t == True and p <= 0.4:
381
+ return 'โŒ'
382
+ else:
383
+ return '๐ŸŸจ'
384
+
385
+ perf_daily = res1.copy()
386
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
387
 
388
+ tab1.subheader(f'Pred for {curr_date} as of 7AM PST')
389
+ tab1.write(results)
390
+ tab1.write(df_probas)
391
+
392
+ tab2.subheader('Latest Data for Pred')
393
+ tab2.write(new_pred)
394
+
395
+ tab3.subheader('Historical Data')
396
+ tab3.write(df_final)
397
+
398
+ tab4.subheader('Performance')
399
+ tab4.write(df_performance)
400
+ tab4.write(perf_daily.sort_index(ascending=False))
401
+
402
+ elif option == 'โณ 60 Mins':
403
+ if st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run'):
404
+ from model_1h import *
405
+ with st.spinner('Loading data...'):
406
+ data, df_final, final_row = get_data()
407
+ # st.success("โœ… Historical data")
408
+
409
+ with st.spinner("Training models..."):
410
+ def train_models():
411
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
412
+ return res1, xgbr, seq2
413
+ res1, xgbr, seq2 = train_models()
414
+ # st.success("โœ… Models trained")
415
+
416
+ with st.spinner("Getting new prediction..."):
417
+
418
+ # Get last row
419
+ new_pred = data.loc[final_row, ['BigNewsDay',
420
+ 'Quarter',
421
+ 'Perf5Day',
422
+ 'Perf5Day_n1',
423
+ 'DaysGreen',
424
+ 'DaysRed',
425
+ 'CurrentHigh30toClose',
426
+ 'CurrentLow30toClose',
427
+ 'CurrentClose30toClose',
428
+ 'CurrentRange30',
429
+ 'GapFill30',
430
+ 'CurrentGap',
431
+ 'RangePct',
432
+ 'RangePct_n1',
433
+ 'RangePct_n2',
434
+ 'OHLC4_VIX',
435
+ 'OHLC4_VIX_n1',
436
+ 'OHLC4_VIX_n2']]
437
+
438
+ new_pred = pd.DataFrame(new_pred).T
439
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
440
+ # last_date = datetime.datetime.strptime(data.loc[final_row], '%Y-%m-%d')
441
+ curr_date = final_row + BDay(1)
442
+ curr_date = curr_date.strftime('%Y-%m-%d')
443
+
444
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
445
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
446
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
447
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
448
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
449
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
450
+ new_pred['CurrentHigh30toClose'] = new_pred['CurrentHigh30toClose'].astype(float)
451
+ new_pred['CurrentLow30toClose'] = new_pred['CurrentLow30toClose'].astype(float)
452
+ new_pred['CurrentClose30toClose'] = new_pred['CurrentClose30toClose'].astype(float)
453
+ new_pred['CurrentRange30'] = new_pred['CurrentRange30'].astype(float)
454
+ new_pred['GapFill30'] = new_pred['GapFill30'].astype(float)
455
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
456
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
457
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
458
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
459
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
460
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
461
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
462
+
463
+ st.success("โœ… All done!")
464
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
465
+
466
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
467
+
468
+ green_proba = seq_proba[0]
469
+ red_proba = 1 - green_proba
470
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
471
+ stdev = 0.01
472
+ score = None
473
+ num_obs = None
474
+ cond = None
475
+ historical_proba = None
476
+ text_cond = None
477
+ operator = None
478
+
479
+ if do_not_play:
480
+ text_cond = '๐ŸŸจ'
481
+ operator = ''
482
+ score = seq_proba[0]
483
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
484
+ num_obs = len(res1.loc[cond])
485
+ historical_proba = res1.loc[cond, 'True'].mean()
486
+
487
+
488
+ elif green_proba > red_proba:
489
+ # If the day is predicted to be green, say so
490
+ text_cond = '๐ŸŸฉ'
491
+ operator = '>='
492
+ score = green_proba
493
+ # How many with this score?
494
+ cond = (res1['Predicted'] >= green_proba)
495
+ num_obs = len(res1.loc[cond])
496
+ # How often green?
497
+ historical_proba = res1.loc[cond, 'True'].mean()
498
+ # print(cond)
499
+
500
+ elif green_proba <= red_proba:
501
+ # If the day is predicted to be green, say so
502
+ text_cond = '๐ŸŸฅ'
503
+ operator = '<='
504
+ score = red_proba
505
+ # How many with this score?
506
+ cond = (res1['Predicted'] <= red_proba)
507
+ num_obs = len(res1.loc[cond])
508
+ # How often green?
509
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
510
+ # print(cond)
511
+
512
+ score_fmt = f'{score:.1%}'
513
+
514
+ results = pd.DataFrame(index=[
515
+ 'PrevClose',
516
+ 'Confidence Score',
517
+ 'Success Rate',
518
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
519
+ ], data = [
520
+ f"{data.loc[final_row,'Close']:.2f}",
521
+ f'{text_cond} {score:.1%}',
522
+ f'{historical_proba:.1%}',
523
+ num_obs,
524
+ ])
525
+
526
+ results.columns = ['Outputs']
527
+
528
+ # st.subheader('New Prediction')
529
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
530
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
531
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum]})
532
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
533
+
534
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
535
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
536
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
537
+ len_all = len(res1)
538
+
539
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
540
+
541
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
542
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
543
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
544
+ len_hi = len(res2_filtered)
545
+
546
+ df_performance = pd.DataFrame(
547
+ index=[
548
+ 'N',
549
+ 'ROC AUC',
550
+ 'Precision',
551
+ 'Recall'
552
+ ],
553
+ columns = [
554
+ 'All',
555
+ 'High Confidence'
556
+ ],
557
+ data = [
558
+ [len_all, len_hi],
559
+ [roc_auc_score_all, roc_auc_score_hi],
560
+ [precision_score_all, precision_score_hi],
561
+ [recall_score_all, recall_score_hi]
562
+ ]
563
+ ).round(2)
564
+
565
+ def get_acc(t, p):
566
+ if t == False and p <= 0.4:
567
+ return 'โœ…'
568
+ elif t == True and p > 0.6:
569
+ return 'โœ…'
570
+ elif t == False and p > 0.6:
571
+ return 'โŒ'
572
+ elif t == True and p <= 0.4:
573
+ return 'โŒ'
574
+ else:
575
+ return '๐ŸŸจ'
576
+
577
+ perf_daily = res1.copy()
578
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
579
 
580
+ tab1.subheader(f'Pred for {curr_date} as of 7:30AM PST')
581
+ tab1.write(results)
582
+ tab1.write(df_probas)
583
+
584
+ tab2.subheader('Latest Data for Pred')
585
+ tab2.write(new_pred)
586
+
587
+ tab3.subheader('Historical Data')
588
+ tab3.write(df_final)
589
+
590
+ tab4.subheader('Performance')
591
+ tab4.write(df_performance)
592
+ tab4.write(perf_daily)