wnstnb commited on
Commit
cf3cfd9
Β·
1 Parent(s): 0e5b201

add some tables

Browse files
Files changed (1) hide show
  1. app.py +41 -5
app.py CHANGED
@@ -109,7 +109,7 @@ with st.form("choose_model"):
109
 
110
  with st.spinner("Training models..."):
111
  def train_models():
112
- res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
113
  return res1, xgbr, seq2
114
  res1, xgbr, seq2 = train_models()
115
  # st.success("βœ… Models trained")
@@ -236,26 +236,37 @@ with st.form("choose_model"):
236
  data['ClosePct'] = data['ClosePct'].shift(-1)
237
  res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
238
  # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
239
- df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
240
 
241
- df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
242
  df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
 
 
243
 
244
  green_proba = seq_proba[0]
245
  red_proba = 1 - green_proba
246
  do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
247
  stdev = 0.01
248
  score = None
 
249
  num_obs = None
250
  cond = None
251
  historical_proba = None
 
 
 
 
252
  text_cond = None
253
  operator = None
254
  intv = None
255
  for q in df_probas.index:
256
  if q.left <= green_proba <= q.right:
257
  historical_proba = df_probas.loc[q, 'PctGreen']
 
258
  num_obs = df_probas.loc[q, 'NumObs']
 
 
 
259
  intv = f'({q.left:.03f}, {q.right:.03f}])'
260
 
261
  qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
@@ -265,6 +276,7 @@ with st.form("choose_model"):
265
  text_cond = '🟨'
266
  operator = ''
267
  score = seq_proba[0]
 
268
  cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
269
  # num_obs = len(res1.loc[cond])
270
  # historical_proba = res1.loc[cond, 'True'].mean()
@@ -275,6 +287,7 @@ with st.form("choose_model"):
275
  text_cond = '🟩'
276
  operator = '>='
277
  score = green_proba
 
278
  # How many with this score?
279
  cond = (res1['Predicted'] >= green_proba)
280
  # num_obs = len(res1.loc[cond])
@@ -287,6 +300,7 @@ with st.form("choose_model"):
287
  text_cond = 'πŸŸ₯'
288
  operator = '<='
289
  score = red_proba
 
290
  # How many with this score?
291
  cond = (res1['Predicted'] <= seq_proba[0])
292
  # num_obs = len(res1.loc[cond])
@@ -295,6 +309,7 @@ with st.form("choose_model"):
295
  # print(cond)
296
 
297
  score_fmt = f'{score:.1%}'
 
298
 
299
  prev_close = data.loc[final_row,'Close']
300
  curr_close = data['Close'].iloc[-1]
@@ -309,7 +324,8 @@ with st.form("choose_model"):
309
  index=['Results'],
310
  data = {
311
  'Confidence':[f'{text_cond} {score:.1%}'],
312
- 'Calib. Proba':[f'{historical_proba:.1%}'],
 
313
  f'{intv}':[f'{num_obs}'],
314
  'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
315
  })
@@ -324,6 +340,21 @@ with st.form("choose_model"):
324
 
325
  prices.columns = ['']
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
328
  roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
329
  precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
@@ -424,7 +455,12 @@ with st.form("choose_model"):
424
  # Cache all DFs
425
  all_dfs = []
426
 
427
- st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True)
 
 
 
 
 
428
 
429
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ€– Stats", "✨ New Data", "πŸ“š Historical", "πŸ“Š Performance"])
430
 
 
109
 
110
  with st.spinner("Training models..."):
111
  def train_models():
112
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 200, 1)
113
  return res1, xgbr, seq2
114
  res1, xgbr, seq2 = train_models()
115
  # st.success("βœ… Models trained")
 
236
  data['ClosePct'] = data['ClosePct'].shift(-1)
237
  res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
238
  # df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
239
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], _q)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.median, lambda x: np.quantile(x, 0.25), lambda x: np.quantile(x, 0.75)]})
240
 
241
+ df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','25P','75P']
242
  df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
243
+ df_probas['25P'] = df_probas['25P'].apply(lambda x: f'{x:.2%}')
244
+ df_probas['75P'] = df_probas['75P'].apply(lambda x: f'{x:.2%}')
245
 
246
  green_proba = seq_proba[0]
247
  red_proba = 1 - green_proba
248
  do_not_play = (seq_proba[0] > lo_thres) and (seq_proba[0] <= hi_thres)
249
  stdev = 0.01
250
  score = None
251
+ calib_score = None
252
  num_obs = None
253
  cond = None
254
  historical_proba = None
255
+ red_hist_proba = None
256
+ mid = None
257
+ lo = None
258
+ hi = None
259
  text_cond = None
260
  operator = None
261
  intv = None
262
  for q in df_probas.index:
263
  if q.left <= green_proba <= q.right:
264
  historical_proba = df_probas.loc[q, 'PctGreen']
265
+ red_hist_proba = 1 - historical_proba
266
  num_obs = df_probas.loc[q, 'NumObs']
267
+ mid = df_probas.loc[q, 'AvgPerf']
268
+ lo = df_probas.loc[q, '25P']
269
+ hi = df_probas.loc[q, '75P']
270
  intv = f'({q.left:.03f}, {q.right:.03f}])'
271
 
272
  qs = [f'({q.left:.2f}, {q.right:.2f}]' for q in df_probas.index]
 
276
  text_cond = '🟨'
277
  operator = ''
278
  score = seq_proba[0]
279
+ calib_score = historical_proba
280
  cond = (res1['Predicted'] > lo_thres) & (res1['Predicted'] <= hi_thres)
281
  # num_obs = len(res1.loc[cond])
282
  # historical_proba = res1.loc[cond, 'True'].mean()
 
287
  text_cond = '🟩'
288
  operator = '>='
289
  score = green_proba
290
+ calib_score = historical_proba
291
  # How many with this score?
292
  cond = (res1['Predicted'] >= green_proba)
293
  # num_obs = len(res1.loc[cond])
 
300
  text_cond = 'πŸŸ₯'
301
  operator = '<='
302
  score = red_proba
303
+ calib_score = red_hist_proba
304
  # How many with this score?
305
  cond = (res1['Predicted'] <= seq_proba[0])
306
  # num_obs = len(res1.loc[cond])
 
309
  # print(cond)
310
 
311
  score_fmt = f'{score:.1%}'
312
+ calib_score_fmt = f'{calib_score:.1%}'
313
 
314
  prev_close = data.loc[final_row,'Close']
315
  curr_close = data['Close'].iloc[-1]
 
324
  index=['Results'],
325
  data = {
326
  'Confidence':[f'{text_cond} {score:.1%}'],
327
+ # 'Calib. Proba':[f'{historical_proba:.1%}'],
328
+ 'Calib. Proba':[f'{text_cond} {calib_score_fmt}'],
329
  f'{intv}':[f'{num_obs}'],
330
  'Prev / Curr':[f'{prev_close:.2f} / {curr_close:.2f}']
331
  })
 
340
 
341
  prices.columns = ['']
342
 
343
+ targets = pd.DataFrame(
344
+ index=[
345
+ f'Curr ({(curr_close / prev_close) - 1:.2%})',
346
+ f'Low ({lo})',
347
+ f'Mid ({mid})',
348
+ f'High ({hi})'
349
+ ],
350
+ data=[
351
+ [f"{curr_close:.0f}"],
352
+ [f"{(1+float(lo.strip('%'))/100) * prev_close:.0f}"],
353
+ [f"{(1+float(mid.strip('%'))/100) * prev_close:.0f}"],
354
+ [f"{(1+float(hi.strip('%'))/100) * prev_close :.0f}"]
355
+ ],
356
+ columns=['Targets'])
357
+
358
  roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
359
  roc_auc_score_calib = roc_auc_score(res1.dropna(subset='CalibPredicted')['True'].astype(int), res1.dropna(subset='CalibPredicted')['CalibPredicted'].values)
360
  precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
 
455
  # Cache all DFs
456
  all_dfs = []
457
 
458
+ top1, top2 = st.columns(2)
459
+ # st.dataframe(top_of_fold.set_index('Confidence',drop=True), use_container_width=True)
460
+ with top1:
461
+ st.dataframe(top_of_fold.T, use_container_width=True)
462
+ with top2:
463
+ st.dataframe(targets, use_container_width=True)
464
 
465
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ€– Stats", "✨ New Data", "πŸ“š Historical", "πŸ“Š Performance"])
466