wnstnb commited on
Commit
68219a2
ยท
1 Parent(s): f5b2435

Add gamedayspx port

Browse files
Files changed (1) hide show
  1. app.py +458 -2
app.py CHANGED
@@ -1,4 +1,460 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import pandas_datareader as pdr
4
+ import numpy as np
5
+ import yfinance as yf
6
+ import json
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ from typing import List
10
+ import xgboost as xgb
11
+ from tqdm import tqdm
12
+ from sklearn import linear_model
13
+ import joblib
14
+ import os
15
 
16
+ def walk_forward_validation(df, target_column, num_training_rows, num_periods):
17
+
18
+ # Create an XGBRegressor model
19
+ # model = xgb.XGBRegressor(n_estimators=100, objective='reg:squarederror', random_state = 42)
20
+ model = linear_model.LinearRegression()
21
+
22
+ overall_results = []
23
+ # Iterate over the rows in the DataFrame, one step at a time
24
+ for i in tqdm(range(num_training_rows, df.shape[0] - num_periods + 1),desc='LR Model'):
25
+ # Split the data into training and test sets
26
+ X_train = df.drop(target_column, axis=1).iloc[:i]
27
+ y_train = df[target_column].iloc[:i]
28
+ X_test = df.drop(target_column, axis=1).iloc[i:i+num_periods]
29
+ y_test = df[target_column].iloc[i:i+num_periods]
30
+
31
+ # Fit the model to the training data
32
+ model.fit(X_train, y_train)
33
+
34
+ # Make a prediction on the test data
35
+ predictions = model.predict(X_test)
36
+
37
+ # Create a DataFrame to store the true and predicted values
38
+ result_df = pd.DataFrame({'True': y_test, 'Predicted': predictions}, index=y_test.index)
39
+
40
+ overall_results.append(result_df)
41
+
42
+ df_results = pd.concat(overall_results)
43
+ # model.save_model('model_lr.bin')
44
+ # Return the true and predicted values, and fitted model
45
+ return df_results, model
46
+
47
+ def walk_forward_validation_seq(df, target_column_clf, target_column_regr, num_training_rows, num_periods):
48
+
49
+ # Create run the regression model to get its target
50
+ res, model1 = walk_forward_validation(df.drop(columns=[target_column_clf]).dropna(), target_column_regr, num_training_rows, num_periods)
51
+ # joblib.dump(model1, 'model1.bin')
52
+
53
+ # Merge the result df back on the df for feeding into the classifier
54
+ for_merge = res[['Predicted']]
55
+ for_merge.columns = ['RegrModelOut']
56
+ for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0
57
+ df = df.merge(for_merge, left_index=True, right_index=True)
58
+ df = df.drop(columns=[target_column_regr])
59
+ df = df[[
60
+ 'CurrentGap','RegrModelOut',target_column_clf
61
+ ]]
62
+
63
+ df[target_column_clf] = df[target_column_clf].astype(bool)
64
+ df['RegrModelOut'] = df['RegrModelOut'].astype(bool)
65
+
66
+ # Create an XGBRegressor model
67
+ model2 = xgb.XGBClassifier(n_estimators=10, random_state = 42)
68
+ # model = linear_model.LogisticRegression(max_iter=1500)
69
+
70
+ overall_results = []
71
+ # Iterate over the rows in the DataFrame, one step at a time
72
+ for i in tqdm(range(num_training_rows, df.shape[0] - num_periods + 1),'CLF Model'):
73
+ # Split the data into training and test sets
74
+ X_train = df.drop(target_column_clf, axis=1).iloc[:i]
75
+ y_train = df[target_column_clf].iloc[:i]
76
+ X_test = df.drop(target_column_clf, axis=1).iloc[i:i+num_periods]
77
+ y_test = df[target_column_clf].iloc[i:i+num_periods]
78
+
79
+ # Fit the model to the training data
80
+ model2.fit(X_train, y_train)
81
+
82
+ # Make a prediction on the test data
83
+ predictions = model2.predict_proba(X_test)[:,-1]
84
+
85
+ # Create a DataFrame to store the true and predicted values
86
+ result_df = pd.DataFrame({'True': y_test, 'Predicted': predictions}, index=y_test.index)
87
+
88
+ overall_results.append(result_df)
89
+
90
+ df_results = pd.concat(overall_results)
91
+ # model1.save_model('model_ensemble.bin')
92
+ # joblib.dump(model2, 'model2.bin')
93
+ # Return the true and predicted values, and fitted model
94
+ return df_results, model1, model2
95
+
96
+ def seq_predict_proba(df, trained_reg_model, trained_clf_model):
97
+ regr_pred = trained_reg_model.predict(df)
98
+ regr_pred = regr_pred > 0
99
+ new_df = df.copy()
100
+ new_df['RegrModelOut'] = regr_pred
101
+ clf_pred_proba = trained_clf_model.predict_proba(new_df[['CurrentGap','RegrModelOut']])[:,-1]
102
+ return clf_pred_proba
103
+
104
+ def get_data():
105
+ # f = open('settings.json')
106
+ # j = json.load(f)
107
+ # API_KEY_FRED = j["API_KEY_FRED"]
108
+
109
+ API_KEY_FRED = os.getenv('API_KEY_FRED')
110
+
111
+ def parse_release_dates(release_id: str) -> List[str]:
112
+ release_dates_url = f'https://api.stlouisfed.org/fred/release/dates?release_id={release_id}&realtime_start=2015-01-01&include_release_dates_with_no_data=true&api_key={API_KEY_FRED}'
113
+ r = requests.get(release_dates_url)
114
+ text = r.text
115
+ soup = BeautifulSoup(text, 'xml')
116
+ dates = []
117
+ for release_date_tag in soup.find_all('release_date', {'release_id': release_id}):
118
+ dates.append(release_date_tag.text)
119
+ return dates
120
+
121
+ def parse_release_dates_obs(series_id: str) -> List[str]:
122
+ obs_url = f'https://api.stlouisfed.org/fred/series/observations?series_id={series_id}&realtime_start=2015-01-01&include_release_dates_with_no_data=true&api_key={API_KEY_FRED}'
123
+ r = requests.get(obs_url)
124
+ text = r.text
125
+ soup = BeautifulSoup(text, 'xml')
126
+ observations = []
127
+ for observation_tag in soup.find_all('observation'):
128
+ date = observation_tag.get('date')
129
+ value = observation_tag.get('value')
130
+ observations.append((date, value))
131
+ return observations
132
+
133
+ econ_dfs = {}
134
+
135
+ econ_tickers = [
136
+ 'WALCL',
137
+ 'NFCI',
138
+ 'WRESBAL'
139
+ ]
140
+
141
+ for et in tqdm(econ_tickers, desc='getting econ tickers'):
142
+ # p = parse_release_dates_obs(et)
143
+ # df = pd.DataFrame(columns = ['ds',et], data = p)
144
+ df = pdr.get_data_fred(et)
145
+ df.index = df.index.rename('ds')
146
+ # df.index = pd.to_datetime(df.index.rename('ds')).dt.tz_localize(None)
147
+ # df['ds'] = pd.to_datetime(df['ds']).dt.tz_localize(None)
148
+ econ_dfs[et] = df
149
+
150
+ # walcl = pd.DataFrame(columns = ['ds','WALCL'], data = p)
151
+ # walcl['ds'] = pd.to_datetime(walcl['ds']).dt.tz_localize(None)
152
+
153
+ # nfci = pd.DataFrame(columns = ['ds','NFCI'], data = p2)
154
+ # nfci['ds'] = pd.to_datetime(nfci['ds']).dt.tz_localize(None)
155
+
156
+ release_ids = [
157
+ "10", # "Consumer Price Index"
158
+ "46", # "Producer Price Index"
159
+ "50", # "Employment Situation"
160
+ "53", # "Gross Domestic Product"
161
+ "103", # "Discount Rate Meeting Minutes"
162
+ "180", # "Unemployment Insurance Weekly Claims Report"
163
+ "194", # "ADP National Employment Report"
164
+ "323" # "Trimmed Mean PCE Inflation Rate"
165
+ ]
166
+
167
+ release_names = [
168
+ "CPI",
169
+ "PPI",
170
+ "NFP",
171
+ "GDP",
172
+ "FOMC",
173
+ "UNEMP",
174
+ "ADP",
175
+ "PCE"
176
+ ]
177
+
178
+ releases = {}
179
+
180
+ for rid, n in tqdm(zip(release_ids, release_names), total = len(release_ids), desc='Getting release dates'):
181
+ releases[rid] = {}
182
+ releases[rid]['dates'] = parse_release_dates(rid)
183
+ releases[rid]['name'] = n
184
+
185
+ # Create a DF that has all dates with the name of the col as 1
186
+ # Once merged on the main dataframe, days with econ events will be 1 or None. Fill NA with 0
187
+ # This column serves as the true/false indicator of whether there was economic data released that day.
188
+ for rid in tqdm(release_ids, desc='Making indicators'):
189
+ releases[rid]['df'] = pd.DataFrame(
190
+ index=releases[rid]['dates'],
191
+ data={
192
+ releases[rid]['name']: 1
193
+ })
194
+ releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
195
+ # releases[rid]['df']['ds'] = pd.to_datetime(releases[rid]['df']['ds']).dt.tz_localize(None)
196
+ # releases[rid]['df'] = releases[rid]['df'].set_index('ds')
197
+
198
+ vix = yf.Ticker('^VIX')
199
+ spx = yf.Ticker('^GSPC')
200
+
201
+ prices_vix = vix.history(start='2018-07-01', interval='1d')
202
+ prices_spx = spx.history(start='2018-07-01', interval='1d')
203
+ prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
204
+ prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
205
+ prices_spx.index = prices_spx['index']
206
+ prices_spx = prices_spx.drop(columns='index')
207
+
208
+ prices_vix['index'] = [str(x).split()[0] for x in prices_vix.index]
209
+ prices_vix['index'] = pd.to_datetime(prices_vix['index']).dt.date
210
+ prices_vix.index = prices_vix['index']
211
+ prices_vix = prices_vix.drop(columns='index')
212
+
213
+ data = prices_spx.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
214
+ data.index = pd.DatetimeIndex(data.index)
215
+
216
+ # Features
217
+ data['PrevClose'] = data['Close'].shift(1)
218
+ data['Perf5Day'] = data['Close'] > data['Close'].shift(5)
219
+ data['Perf5Day_n1'] = data['Perf5Day'].shift(1)
220
+ data['Perf5Day_n1'] = data['Perf5Day_n1'].astype(bool)
221
+ data['GreenDay'] = (data['Close'] > data['PrevClose']) * 1
222
+ data['RedDay'] = (data['Close'] <= data['PrevClose']) * 1
223
+
224
+ data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
225
+ data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
226
+
227
+ data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1) # Current day range in points
228
+ data['RangePct'] = data['Range'] / data['Close']
229
+ data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
230
+ data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
231
+ data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
232
+ data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
233
+ data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1)
234
+ data['OHLC4_Trend_n1'] = data['OHLC4_Trend_n1'].astype(float)
235
+ data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(1)
236
+ data['OHLC4_Trend_n2'] = data['OHLC4_Trend_n2'].astype(float)
237
+ data['RangePct_n1'] = data['RangePct'].shift(1)
238
+ data['RangePct_n2'] = data['RangePct'].shift(2)
239
+ data['OHLC4_VIX_n1'] = data['OHLC4_VIX'].shift(1)
240
+ data['OHLC4_VIX_n2'] = data['OHLC4_VIX'].shift(2)
241
+ data['CurrentGap'] = (data['Open'] - data['PrevClose']) / data['PrevClose']
242
+ data['CurrentGap'] = data['CurrentGap'].shift(-1)
243
+ data['DayOfWeek'] = pd.to_datetime(data.index)
244
+ data['DayOfWeek'] = data['DayOfWeek'].dt.day
245
+
246
+ # Target -- the next day's low
247
+ data['Target'] = (data['OHLC4'] / data['PrevClose']) - 1
248
+ data['Target'] = data['Target'].shift(-1)
249
+ # data['Target'] = data['RangePct'].shift(-1)
250
+
251
+ # Target for clf -- whether tomorrow will close above or below today's close
252
+ data['Target_clf'] = data['Close'] > data['PrevClose']
253
+ data['Target_clf'] = data['Target_clf'].shift(-1)
254
+ data['DayOfWeek'] = pd.to_datetime(data.index)
255
+ data['Quarter'] = data['DayOfWeek'].dt.quarter
256
+ data['DayOfWeek'] = data['DayOfWeek'].dt.weekday
257
+
258
+ for rid in tqdm(release_ids, desc='Merging econ data'):
259
+ # Get the name of the release
260
+ n = releases[rid]['name']
261
+ # Merge the corresponding DF of the release
262
+ data = data.merge(releases[rid]['df'], how = 'left', left_index=True, right_index=True)
263
+ # Create a column that shifts the value in the merged column up by 1
264
+ data[f'{n}_shift'] = data[n].shift(-1)
265
+ # Fill the rest with zeroes
266
+ data[n] = data[n].fillna(0)
267
+ data[f'{n}_shift'] = data[f'{n}_shift'].fillna(0)
268
+
269
+ data['BigNewsDay'] = data[[x for x in data.columns if '_shift' in x]].max(axis=1)
270
+
271
+ def cumul_sum(col):
272
+ nums = []
273
+ s = 0
274
+ for x in col:
275
+ if x == 1:
276
+ s += 1
277
+ elif x == 0:
278
+ s = 0
279
+ nums.append(s)
280
+ return nums
281
+
282
+ consec_green = cumul_sum(data['GreenDay'].values)
283
+ consec_red = cumul_sum(data['RedDay'].values)
284
+
285
+ data['DaysGreen'] = consec_green
286
+ data['DaysRed'] = consec_red
287
+
288
+ final_row = data.index[-2]
289
+
290
+ exp_row = data.index[-1]
291
+
292
+ df_final = data.loc[:final_row,
293
+ [
294
+ 'BigNewsDay',
295
+ 'Quarter',
296
+ 'Perf5Day',
297
+ 'Perf5Day_n1',
298
+ 'DaysGreen',
299
+ 'DaysRed',
300
+ # 'OHLC4_Trend',
301
+ # 'OHLC4_Trend_n1',
302
+ # 'OHLC4_Trend_n2',
303
+ # 'VIX5Day',
304
+ # 'VIX5Day_n1',
305
+ 'CurrentGap',
306
+ 'RangePct',
307
+ 'RangePct_n1',
308
+ 'RangePct_n2',
309
+ 'OHLC4_VIX',
310
+ 'OHLC4_VIX_n1',
311
+ 'OHLC4_VIX_n2',
312
+ 'Target',
313
+ 'Target_clf'
314
+ ]]
315
+ df_final = df_final.dropna(subset=['Target','Target_clf','Perf5Day_n1'])
316
+ return data, df_final, final_row
317
+
318
+ st.set_page_config(
319
+ page_title="Gameday Model for $SPX",
320
+ page_icon="๐ŸŽฎ"
321
+ )
322
+
323
+ st.title('๐ŸŽฎ Gameday Model for $SPX')
324
+ st.markdown('**PLEASE NOTE:** Model should be run at or after market open.')
325
+
326
+ if st.button("๐Ÿงน Clear All"):
327
+ st.cache_data.clear()
328
+
329
+ if st.button('๐Ÿค– Run it'):
330
+ with st.spinner('Loading data...'):
331
+ data, df_final, final_row = get_data()
332
+ # st.success("โœ… Historical data")
333
+
334
+ with st.spinner("Training models..."):
335
+ def train_models():
336
+ res1, xgbr, seq2 = walk_forward_validation_seq(df_final.dropna(), 'Target_clf', 'Target', 100, 1)
337
+ return res1, xgbr, seq2
338
+ res1, xgbr, seq2 = train_models()
339
+ # st.success("โœ… Models trained")
340
+
341
+ with st.spinner("Getting new prediction..."):
342
+
343
+ # Get last row
344
+ new_pred = data.loc[final_row, ['BigNewsDay',
345
+ 'Quarter',
346
+ 'Perf5Day',
347
+ 'Perf5Day_n1',
348
+ 'DaysGreen',
349
+ 'DaysRed',
350
+ # 'OHLC4_Trend',
351
+ # 'OHLC4_Trend_n1',
352
+ # 'OHLC4_Trend_n2',
353
+ # 'VIX5Day',
354
+ # 'VIX5Day_n1',
355
+ 'CurrentGap',
356
+ 'RangePct',
357
+ 'RangePct_n1',
358
+ 'RangePct_n2',
359
+ 'OHLC4_VIX',
360
+ 'OHLC4_VIX_n1',
361
+ 'OHLC4_VIX_n2']]
362
+
363
+ new_pred = pd.DataFrame(new_pred).T
364
+ # new_pred_show = pd.DataFrame(index=[new_pred.columns], columns=[new_pred.index], data=[[v] for v in new_pred.values])
365
+
366
+ new_pred['BigNewsDay'] = new_pred['BigNewsDay'].astype(float)
367
+ new_pred['Quarter'] = new_pred['Quarter'].astype(int)
368
+ new_pred['Perf5Day'] = new_pred['Perf5Day'].astype(bool)
369
+ new_pred['Perf5Day_n1'] = new_pred['Perf5Day_n1'].astype(bool)
370
+ new_pred['DaysGreen'] = new_pred['DaysGreen'].astype(float)
371
+ new_pred['DaysRed'] = new_pred['DaysRed'].astype(float)
372
+ # new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(float)
373
+ # new_pred['OHLC4_Trend_n1'] = new_pred['OHLC4_Trend_n1'].astype(float)
374
+ # new_pred['OHLC4_Trend_n2'] = new_pred['OHLC4_Trend_n2'].astype(float)
375
+ # new_pred['VIX5Day'] = new_pred['VIX5Day'].astype(bool)
376
+ # new_pred['VIX5Day_n1'] = new_pred['VIX5Day_n1'].astype(bool)
377
+ new_pred['CurrentGap'] = new_pred['CurrentGap'].astype(float)
378
+ new_pred['RangePct'] = new_pred['RangePct'].astype(float)
379
+ new_pred['RangePct_n1'] = new_pred['RangePct_n1'].astype(float)
380
+ new_pred['RangePct_n2'] = new_pred['RangePct_n2'].astype(float)
381
+ new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
382
+ new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
383
+ new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
384
+
385
+ st.success("โœ… All done!")
386
+ tab1, tab2, tab3 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical"])
387
+
388
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
389
+ # above_pct_green = res1.loc[res1['Predicted'] >= seq_proba, 'True'].mean()
390
+ # len_above_pct_green = len(res1.loc[res1['Predicted'] >= seq_proba])
391
+ # below_pct_red = 1 - res1.loc[res1['Predicted'] <= seq_proba, 'True'].mean()
392
+ # len_below_pct_red = len(res1.loc[res1['Predicted'] <= seq_proba])
393
+
394
+ # Calc green and red probas
395
+ green_proba = seq_proba[0]
396
+ red_proba = 1 - green_proba
397
+ stdev = 0.01
398
+ score = None
399
+ num_obs = None
400
+ cond = None
401
+ historical_proba = None
402
+
403
+ if green_proba > red_proba:
404
+ # If the day is predicted to be green, say so
405
+ score = green_proba
406
+ # How many with this score?
407
+ cond = (res1['Predicted'] <= (green_proba + stdev)) & (res1['Predicted'] >= (green_proba - stdev))
408
+ num_obs = len(res1.loc[cond])
409
+ # How often green?
410
+ historical_proba = res1.loc[cond, 'True'].mean()
411
+ # print(cond)
412
+
413
+
414
+ elif green_proba <= red_proba:
415
+ # If the day is predicted to be green, say so
416
+ score = red_proba
417
+ # How many with this score?
418
+ cond = (res1['Predicted'] <= (red_proba + stdev)) & (res1['Predicted'] >= (red_proba - stdev))
419
+ num_obs = len(res1.loc[cond])
420
+ # How often green?
421
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
422
+ # print(cond)
423
+
424
+ text_cond = '๐ŸŸฉ' if green_proba > red_proba else '๐ŸŸฅ'
425
+
426
+ results = pd.DataFrame(index=[
427
+ 'ModelScore',
428
+ f'NumInRange ({score - stdev:.1%} - {score + stdev:.1%})',
429
+ 'HistoricalRate'
430
+ ], data = [f'{text_cond} {score:.1%}', num_obs, f'{text_cond} {historical_proba:.1%}'])
431
+
432
+ results.columns = ['Outputs']
433
+
434
+ # st.subheader('New Prediction')
435
+
436
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
437
+ df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})
438
+ df_probas.columns = ['PctGreen','NumObs','NumGreen']
439
+ tab1.subheader('Preds and Probabilities')
440
+ tab1.write(results)
441
+ tab1.write(df_probas)
442
+
443
+ tab2.subheader('Latest Data for Pred')
444
+ tab2.write(new_pred)
445
+
446
+ tab3.subheader('Historical Data')
447
+ tab3.write(df_final)
448
+
449
+
450
+ # The only variable you can play with as the other ones are historical
451
+ # new_pred.loc[:,'CurrentGap'] = -0.01 / 100
452
+ # new_pred.loc[:,'BigNewsDay'] = 0
453
+
454
+ # st.subheader('Subset')
455
+ # st.write(data.iloc[-1])
456
+
457
+ # st.subheader('Number of pickups by hour')
458
+ # hist_values = np.histogram(
459
+ # data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
460
+ # st.bar_chart(hist_values)