wnstnb commited on
Commit
08b8ea5
·
1 Parent(s): 26a8e4a

added more feats

Browse files
Files changed (4) hide show
  1. app.py +7 -1
  2. model_day.py +35 -33
  3. model_intra.py +107 -46
  4. troubleshoot_day_model.ipynb +0 -0
app.py CHANGED
@@ -162,6 +162,8 @@ with st.form("choose_model"):
162
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
163
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
164
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
 
 
165
  # new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
166
  # new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
167
  new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
@@ -194,7 +196,7 @@ with st.form("choose_model"):
194
 
195
  my_bar.progress(0.66, "Training models...")
196
  def train_models():
197
- res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 120, 1)
198
  return res1, xgbr
199
  res1, xgbr = train_models()
200
  # st.success("✅ Models trained")
@@ -230,6 +232,10 @@ with st.form("choose_model"):
230
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
231
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
232
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
 
 
 
 
233
  # new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
234
  # new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
235
  # new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
 
162
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
163
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
164
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
165
+ new_pred['VIXOpen'] = new_pred['VIXOpen'].astype(bool)
166
+ new_pred['VVIXOpen'] = new_pred['VVIXOpen'].astype(bool)
167
  # new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
168
  # new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
169
  new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
 
196
 
197
  my_bar.progress(0.66, "Training models...")
198
  def train_models():
199
+ res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 1)
200
  return res1, xgbr
201
  res1, xgbr = train_models()
202
  # st.success("✅ Models trained")
 
232
  new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
233
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
234
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
235
+ new_pred['CurrentVIXTrend'] = new_pred['CurrentVIXTrend'].astype(bool)
236
+ new_pred['SPX30IntraPerf'] = new_pred['SPX30IntraPerf'].astype(float)
237
+ new_pred['VIX30IntraPerf'] = new_pred['VIX30IntraPerf'].astype(float)
238
+ new_pred['VVIX30IntraPerf'] = new_pred['VVIX30IntraPerf'].astype(float)
239
  # new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
240
  # new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
241
  # new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
model_day.py CHANGED
@@ -62,6 +62,8 @@ model_cols = [
62
  'OHLC4_VIX',
63
  'OHLC4_VIX_n1',
64
  'OHLC4_VIX_n2',
 
 
65
  'OpenL1',
66
  'OpenL2',
67
  'OpenH1',
@@ -251,10 +253,13 @@ def get_data():
251
  # releases[rid]['df'] = releases[rid]['df'].set_index('ds')
252
 
253
  vix = yf.Ticker('^VIX')
 
254
  spx = yf.Ticker('^GSPC')
255
 
256
  prices_vix = vix.history(start='2018-07-01', interval='1d')
257
  prices_spx = spx.history(start='2018-07-01', interval='1d')
 
 
258
  prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
259
  prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
260
  prices_spx.index = prices_spx['index']
@@ -265,46 +270,44 @@ def get_data():
265
  prices_vix.index = prices_vix['index']
266
  prices_vix = prices_vix.drop(columns='index')
267
 
 
 
 
 
 
268
  data = prices_spx.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
 
269
  data.index = pd.DatetimeIndex(data.index)
270
 
271
  # Features
272
  data['PrevClose'] = data['Close'].shift(1)
273
  data['Perf5Day'] = data['Close'] > data['Close'].shift(5)
274
- data['Perf5Day_n1'] = data['Perf5Day'].shift(1)
275
- data['Perf5Day_n1'] = data['Perf5Day_n1'].astype(bool)
276
  data['GreenDay'] = (data['Close'] > data['PrevClose']) * 1
277
  data['RedDay'] = (data['Close'] <= data['PrevClose']) * 1
278
-
279
  data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
280
- data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
281
-
282
- data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1) # Current day range in points
 
 
 
283
  data['RangePct'] = data['Range'] / data['Close']
284
  data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
285
  data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
286
  data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
287
  data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
288
- data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1)
289
- data['OHLC4_Trend_n1'] = data['OHLC4_Trend_n1'].astype(float)
290
- data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(1)
291
- data['OHLC4_Trend_n2'] = data['OHLC4_Trend_n2'].astype(float)
292
  data['RangePct_n1'] = data['RangePct'].shift(1)
293
  data['RangePct_n2'] = data['RangePct'].shift(2)
294
  data['OHLC4_VIX_n1'] = data['OHLC4_VIX'].shift(1)
295
  data['OHLC4_VIX_n2'] = data['OHLC4_VIX'].shift(2)
296
- data['CurrentGap'] = (data['Open'] - data['PrevClose']) / data['PrevClose']
297
- data['CurrentGap'] = data['CurrentGap'].shift(-1)
298
  data['DayOfWeek'] = pd.to_datetime(data.index)
299
  data['DayOfWeek'] = data['DayOfWeek'].dt.day
300
-
301
- # Calculate up
302
  data['up'] = 100 * (data['High'].shift(1) - data['Open'].shift(1)) / data['Close'].shift(1)
303
-
304
- # Calculate upSD
305
  data['upSD'] = data['up'].rolling(30).std(ddof=0)
306
-
307
- # Calculate aveUp
308
  data['aveUp'] = data['up'].rolling(30).mean()
309
  data['H1'] = data['Open'] + (data['aveUp'] / 100) * data['Open']
310
  data['H2'] = data['Open'] + ((data['aveUp'] + data['upSD']) / 100) * data['Open']
@@ -313,21 +316,18 @@ def get_data():
313
  data['aveDown'] = data['down'].rolling(30).mean()
314
  data['L1'] = data['Open'] - (data['aveDown'] / 100) * data['Open']
315
  data['L2'] = data['Open'] - ((data['aveDown'] + data['upSD']) / 100) * data['Open']
316
-
317
- data = data.assign(
318
- L1Touch = lambda x: x['Low'] < x['L1'],
319
- L2Touch = lambda x: x['Low'] < x['L2'],
320
- H1Touch = lambda x: x['High'] > x['H1'],
321
- H2Touch = lambda x: x['High'] > x['H2'],
322
- L1Break = lambda x: x['Close'] < x['L1'],
323
- L2Break = lambda x: x['Close'] < x['L2'],
324
- H1Break = lambda x: x['Close'] > x['H1'],
325
- H2Break = lambda x: x['Close'] > x['H2'],
326
- OpenL1 = lambda x: x['Open'] / x['L1'],
327
- OpenL2 = lambda x: x['Open'] / x['L2'],
328
- OpenH1 = lambda x: x['Open'] / x['H1'],
329
- OpenH2 = lambda x: x['Open'] / x['H2']
330
- )
331
 
332
  level_cols = [
333
  'L1Touch',
@@ -409,6 +409,8 @@ def get_data():
409
  'OHLC4_VIX',
410
  'OHLC4_VIX_n1',
411
  'OHLC4_VIX_n2',
 
 
412
  'OpenL1',
413
  'OpenL2',
414
  'OpenH1',
 
62
  'OHLC4_VIX',
63
  'OHLC4_VIX_n1',
64
  'OHLC4_VIX_n2',
65
+ 'VIXOpen',
66
+ 'VVIXOpen',
67
  'OpenL1',
68
  'OpenL2',
69
  'OpenH1',
 
253
  # releases[rid]['df'] = releases[rid]['df'].set_index('ds')
254
 
255
  vix = yf.Ticker('^VIX')
256
+ vvix = yf.Ticker('^VVIX')
257
  spx = yf.Ticker('^GSPC')
258
 
259
  prices_vix = vix.history(start='2018-07-01', interval='1d')
260
  prices_spx = spx.history(start='2018-07-01', interval='1d')
261
+ prices_vvix = vvix.history(start='2018-07-01', interval='1d')
262
+
263
  prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
264
  prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
265
  prices_spx.index = prices_spx['index']
 
270
  prices_vix.index = prices_vix['index']
271
  prices_vix = prices_vix.drop(columns='index')
272
 
273
+ prices_vvix['index'] = [str(x).split()[0] for x in prices_vvix.index]
274
+ prices_vvix['index'] = pd.to_datetime(prices_vvix['index']).dt.date
275
+ prices_vvix.index = prices_vvix['index']
276
+ prices_vvix = prices_vvix.drop(columns='index')
277
+
278
  data = prices_spx.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
279
+ data = data.merge(prices_vvix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VVIX'])
280
  data.index = pd.DatetimeIndex(data.index)
281
 
282
  # Features
283
  data['PrevClose'] = data['Close'].shift(1)
284
  data['Perf5Day'] = data['Close'] > data['Close'].shift(5)
285
+ data['Perf5Day_n1'] = data['Perf5Day'].shift(1).astype(bool)
 
286
  data['GreenDay'] = (data['Close'] > data['PrevClose']) * 1
287
  data['RedDay'] = (data['Close'] <= data['PrevClose']) * 1
 
288
  data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
289
+ data['VIX5Day_n1'] = data['VIX5Day'].shift(1).astype(bool)
290
+ data['VIXOpen'] = data['Open_VIX'] > data['Close_VIX'].shift(1)
291
+ data['VVIXOpen'] = data['Open_VVIX'] > data['Close_VVIX'].shift(1)
292
+ data['VIXOpen'] = data['VIXOpen'].astype(bool)
293
+ data['VVIXOpen'] = data['VVIXOpen'].astype(bool)
294
+ data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1)
295
  data['RangePct'] = data['Range'] / data['Close']
296
  data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
297
  data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
298
  data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
299
  data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
300
+ data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1).astype(float)
301
+ data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(2).astype(float)
 
 
302
  data['RangePct_n1'] = data['RangePct'].shift(1)
303
  data['RangePct_n2'] = data['RangePct'].shift(2)
304
  data['OHLC4_VIX_n1'] = data['OHLC4_VIX'].shift(1)
305
  data['OHLC4_VIX_n2'] = data['OHLC4_VIX'].shift(2)
306
+ data['CurrentGap'] = ((data['Open'] - data['PrevClose']) / data['PrevClose']).shift(-1)
 
307
  data['DayOfWeek'] = pd.to_datetime(data.index)
308
  data['DayOfWeek'] = data['DayOfWeek'].dt.day
 
 
309
  data['up'] = 100 * (data['High'].shift(1) - data['Open'].shift(1)) / data['Close'].shift(1)
 
 
310
  data['upSD'] = data['up'].rolling(30).std(ddof=0)
 
 
311
  data['aveUp'] = data['up'].rolling(30).mean()
312
  data['H1'] = data['Open'] + (data['aveUp'] / 100) * data['Open']
313
  data['H2'] = data['Open'] + ((data['aveUp'] + data['upSD']) / 100) * data['Open']
 
316
  data['aveDown'] = data['down'].rolling(30).mean()
317
  data['L1'] = data['Open'] - (data['aveDown'] / 100) * data['Open']
318
  data['L2'] = data['Open'] - ((data['aveDown'] + data['upSD']) / 100) * data['Open']
319
+ data['L1Touch'] = data['Low'] < data['L1']
320
+ data['L2Touch'] = data['Low'] < data['L2']
321
+ data['H1Touch'] = data['High'] > data['H1']
322
+ data['H2Touch'] = data['High'] > data['H2']
323
+ data['L1Break'] = data['Close'] < data['L1']
324
+ data['L2Break'] = data['Close'] < data['L2']
325
+ data['H1Break'] = data['Close'] > data['H1']
326
+ data['H2Break'] = data['Close'] > data['H2']
327
+ data['OpenL1'] = data['Open'] / data['L1']
328
+ data['OpenL2'] = data['Open'] / data['L2']
329
+ data['OpenH1'] = data['Open'] / data['H1']
330
+ data['OpenH2'] = data['Open'] / data['H2']
 
 
 
331
 
332
  level_cols = [
333
  'L1Touch',
 
409
  'OHLC4_VIX',
410
  'OHLC4_VIX_n1',
411
  'OHLC4_VIX_n2',
412
+ 'VIXOpen',
413
+ 'VVIXOpen',
414
  'OpenL1',
415
  'OpenL2',
416
  'OpenH1',
model_intra.py CHANGED
@@ -13,6 +13,7 @@ from pandas.tseries.offsets import BDay
13
  from datasets import load_dataset
14
  import lightgbm as lgb
15
  from sklearn.model_selection import TimeSeriesSplit
 
16
 
17
  data_start_date = '2018-07-01'
18
 
@@ -37,6 +38,10 @@ model_cols = [
37
  'OHLC4_VIX_n2',
38
  'OHLC4_Current_Trend',
39
  'OHLC4_Trend',
 
 
 
 
40
  # 'OpenL1',
41
  # 'OpenL2',
42
  # 'OpenH1',
@@ -62,7 +67,7 @@ model_cols = [
62
  ]
63
 
64
  # If the dataset is gated/private, make sure you have run huggingface-cli login
65
- def walk_forward_validation(df, target_column, num_training_rows, num_periods):
66
 
67
  df = df[model_cols + [target_column]]
68
  df[target_column] = df[target_column].astype(bool)
@@ -81,13 +86,13 @@ def walk_forward_validation(df, target_column, num_training_rows, num_periods):
81
  y_train = df[target_column].iloc[train_index]
82
  X_test = df.drop(target_column, axis=1).iloc[test_index]
83
  y_test = df[target_column].iloc[test_index]
84
-
85
- # Fit the model to the training data
86
  model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
87
  model.fit(X_train, y_train)
88
  # Make a prediction on the test data
89
  predictions = model.predict_proba(X_test)[:,-1]
90
-
91
  # Create a DataFrame to store the true and predicted values
92
  result_df = pd.DataFrame({'True': y_test, 'Predicted': predictions}, index=y_test.index)
93
  overall_results.append(result_df)
@@ -189,62 +194,102 @@ def get_data(periods_30m = 1):
189
  releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
190
 
191
  vix = yf.Ticker('^VIX')
 
192
  spx = yf.Ticker('^GSPC')
193
 
194
  # Pull in data
195
- data = load_dataset("boomsss/spx_intra", split='train')
196
-
197
- rows = [d['text'] for d in data]
198
- rows = [x.split(',') for x in rows]
199
-
200
- fr = pd.DataFrame(columns=[
201
- 'Datetime','Open','High','Low','Close'
202
- ], data = rows)
203
-
204
- fr['Datetime'] = pd.to_datetime(fr['Datetime'])
205
- fr['Datetime'] = fr['Datetime'].dt.tz_localize('America/New_York')
206
- fr = fr.set_index('Datetime')
207
- fr['Open'] = pd.to_numeric(fr['Open'])
208
- fr['High'] = pd.to_numeric(fr['High'])
209
- fr['Low'] = pd.to_numeric(fr['Low'])
210
- fr['Close'] = pd.to_numeric(fr['Close'])
211
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  # Get incremental date
213
- last_date = fr.index.date[-1]
214
  last_date = last_date + datetime.timedelta(days=1)
215
- # Get incremental data
 
216
  spx1 = yf.Ticker('^GSPC')
 
 
217
  yfp = spx1.history(start=last_date, interval='30m')
 
 
218
 
219
  if len(yfp) > 0:
220
- # Concat current and incremental
221
- df_30m = pd.concat([fr, yfp])
 
 
 
 
 
 
 
 
 
 
222
  else:
223
- df_30m = fr.copy()
224
-
225
- # Get the first 30 minute bar
226
- df_30m = df_30m.reset_index()
227
- df_30m['Datetime'] = df_30m['Datetime'].dt.date
228
- df_30m = df_30m.groupby('Datetime').head(periods_30m)
229
- df_30m = df_30m.set_index('Datetime',drop=True)
230
- # Rename the columns
231
- df_30m = df_30m[['Open','High','Low','Close']]
232
-
233
- opens_intra = df_30m.groupby('Datetime')['Open'].head(1)
234
- highs_intra = df_30m.groupby('Datetime')['High'].max()
235
- lows_intra = df_30m.groupby('Datetime')['Low'].min()
236
- closes_intra = df_30m.groupby('Datetime')['Close'].tail(1)
237
-
238
- df_intra = pd.DataFrame(index=df_30m.index.unique())
239
- df_intra['Open'] = opens_intra
240
- df_intra['High'] = highs_intra
241
- df_intra['Low'] = lows_intra
242
- df_intra['Close'] = closes_intra
 
 
 
243
 
244
- df_intra.columns = ['Open30','High30','Low30','Close30']
245
 
246
  prices_vix = vix.history(start=data_start_date, interval='1d')
 
247
  prices_spx = spx.history(start=data_start_date, interval='1d')
 
248
  prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
249
  prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
250
  prices_spx.index = prices_spx['index']
@@ -257,8 +302,15 @@ def get_data(periods_30m = 1):
257
  prices_vix = prices_vix.drop(columns='index')
258
  prices_vix.index = pd.DatetimeIndex(prices_vix.index)
259
 
 
 
 
 
 
 
260
  data = prices_spx.merge(df_intra, left_index=True, right_index=True)
261
  data = data.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
 
262
 
263
  # Features
264
  data['PrevClose'] = data['Close'].shift(1)
@@ -271,6 +323,9 @@ def get_data(periods_30m = 1):
271
  data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
272
  data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
273
 
 
 
 
274
  data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1) # Current day range in points
275
  data['RangePct'] = data['Range'] / data['Close']
276
  data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
@@ -302,6 +357,10 @@ def get_data(periods_30m = 1):
302
  data['OHLC4_Current_Trend'] = data['OHLC4_Current_Trend'].astype(bool)
303
  data['HistClose30toPrevClose'] = (data['Close30'] / data['PrevClose']) - 1
304
 
 
 
 
 
305
 
306
  # Open to High
307
  data['CurrentHigh30toClose'] = (data['CurrentHigh30'] / data['Close']) - 1
@@ -399,8 +458,10 @@ def get_data(periods_30m = 1):
399
  return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()
400
 
401
  probas = []
 
402
  for i, pct in enumerate(data['CurrentClose30toClose']):
403
  try:
 
404
  df_q = get_quintiles(data.iloc[:i], 'HistClose30toPrevClose', 10)
405
  for q in df_q.index:
406
  if q.left <= pct <= q.right:
 
13
  from datasets import load_dataset
14
  import lightgbm as lgb
15
  from sklearn.model_selection import TimeSeriesSplit
16
+ import json
17
 
18
  data_start_date = '2018-07-01'
19
 
 
38
  'OHLC4_VIX_n2',
39
  'OHLC4_Current_Trend',
40
  'OHLC4_Trend',
41
+ 'CurrentVIXTrend',
42
+ 'SPX30IntraPerf',
43
+ 'VIX30IntraPerf',
44
+ 'VVIX30IntraPerf',
45
  # 'OpenL1',
46
  # 'OpenL2',
47
  # 'OpenH1',
 
67
  ]
68
 
69
  # If the dataset is gated/private, make sure you have run huggingface-cli login
70
+ def walk_forward_validation(df, target_column, num_periods):
71
 
72
  df = df[model_cols + [target_column]]
73
  df[target_column] = df[target_column].astype(bool)
 
86
  y_train = df[target_column].iloc[train_index]
87
  X_test = df.drop(target_column, axis=1).iloc[test_index]
88
  y_test = df[target_column].iloc[test_index]
89
+
90
+ y_train = y_train.astype(bool)
91
  model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
92
  model.fit(X_train, y_train)
93
  # Make a prediction on the test data
94
  predictions = model.predict_proba(X_test)[:,-1]
95
+
96
  # Create a DataFrame to store the true and predicted values
97
  result_df = pd.DataFrame({'True': y_test, 'Predicted': predictions}, index=y_test.index)
98
  overall_results.append(result_df)
 
194
  releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
195
 
196
  vix = yf.Ticker('^VIX')
197
+ vvix = yf.Ticker('^VVIX')
198
  spx = yf.Ticker('^GSPC')
199
 
200
  # Pull in data
201
+ data_files = {"spx": "SPX_full_30min.txt", "vix": "VIX_full_30min.txt", "vvix":'VVIX_full_30min.txt'}
202
+ data = load_dataset("boomsss/spx_intra", data_files=data_files)
203
+ dfs = []
204
+ for ticker in data.keys():
205
+ rows = [d['text'] for d in data[ticker]]
206
+ rows = [x.split(',') for x in rows]
207
+
208
+ fr = pd.DataFrame(columns=[
209
+ 'Datetime','Open','High','Low','Close'
210
+ ], data = rows)
211
+
212
+ fr['Datetime'] = pd.to_datetime(fr['Datetime'])
213
+ fr['Datetime'] = fr['Datetime'].dt.tz_localize('America/New_York')
214
+ fr = fr.set_index('Datetime')
215
+ fr['Open'] = pd.to_numeric(fr['Open'])
216
+ fr['High'] = pd.to_numeric(fr['High'])
217
+ fr['Low'] = pd.to_numeric(fr['Low'])
218
+ fr['Close'] = pd.to_numeric(fr['Close'])
219
+ dfs.append(fr)
220
+
221
+ df_30m = pd.concat(dfs, axis=1)
222
+
223
+ df_30m.columns = [
224
+ 'Open30',
225
+ 'High30',
226
+ 'Low30',
227
+ 'Close30',
228
+ 'Open_VIX30',
229
+ 'High_VIX30',
230
+ 'Low_VIX30',
231
+ 'Close_VIX30',
232
+ 'Open_VVIX30',
233
+ 'High_VVIX30',
234
+ 'Low_VVIX30',
235
+ 'Close_VVIX30'
236
+ ]
237
+
238
  # Get incremental date
239
+ last_date = df_30m.index.date[-1]
240
  last_date = last_date + datetime.timedelta(days=1)
241
+
242
+ # Get incremental data for each index
243
  spx1 = yf.Ticker('^GSPC')
244
+ vix1 = yf.Ticker('^VIX')
245
+ vvix1 = yf.Ticker('^VVIX')
246
  yfp = spx1.history(start=last_date, interval='30m')
247
+ yf_vix = vix1.history(start=last_date, interval='30m')
248
+ yf_vvix = vvix1.history(start=last_date, interval='30m')
249
 
250
  if len(yfp) > 0:
251
+ # Convert indexes to EST if not already
252
+ for _df in [yfp, yf_vix, yf_vvix]:
253
+ if _df.index.tz.zone != 'America/New_York':
254
+ _df['Datetime'] = pd.to_datetime(_df.index)
255
+ _df['Datetime'] = _df['Datetime'].dt.tz_convert('America/New_York')
256
+ _df.set_index('Datetime', inplace=True)
257
+ # Concat them
258
+ df_inc = pd.concat([yfp, yf_vix, yf_vvix], axis=1)
259
+ df_inc = df_inc.loc[
260
+ (df_inc.index.time >= datetime.time(9,30)) & (df_inc.index.time < datetime.time(16,00))
261
+ ]
262
+ df_30m = pd.concat([df_30m, df_inc])
263
  else:
264
+ df_30m = df_30m.copy()
265
+
266
+ df_30m = df_30m.loc[
267
+ (df_30m.index.time >= datetime.time(9,30)) & (df_30m.index.time < datetime.time(16,00))
268
+ ]
269
+ df_30m['dt'] = df_30m.index.date
270
+ df_30m = df_30m.groupby('dt').head(periods_30m)
271
+ df_30m = df_30m.set_index('dt',drop=True)
272
+ df_30m.index.name = 'Datetime'
273
+
274
+ df_30m['SPX30IntraPerf'] = (df_30m['Close30'] / df_30m['Close30'].shift(1)) - 1
275
+ df_30m['VIX30IntraPerf'] = (df_30m['Close_VIX30'] / df_30m['Close_VIX30'].shift(1)) - 1
276
+ df_30m['VVIX30IntraPerf'] = (df_30m['Close_VVIX30'] / df_30m['Close_VVIX30'].shift(1)) - 1
277
+
278
+ opens_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'Open' in c]].head(1)
279
+ highs_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'High' in c]].max()
280
+ lows_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'Low' in c]].min()
281
+ closes_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'Close' in c]].tail(1)
282
+ spx_intra = df_30m.groupby('Datetime')['SPX30IntraPerf'].tail(1)
283
+ vix_intra = df_30m.groupby('Datetime')['VIX30IntraPerf'].tail(1)
284
+ vvix_intra = df_30m.groupby('Datetime')['VVIX30IntraPerf'].tail(1)
285
+
286
+ df_intra = pd.concat([opens_intra, highs_intra, lows_intra, closes_intra, spx_intra, vix_intra, vvix_intra], axis=1)
287
 
 
288
 
289
  prices_vix = vix.history(start=data_start_date, interval='1d')
290
+ prices_vvix = vvix.history(start=data_start_date, interval='1d')
291
  prices_spx = spx.history(start=data_start_date, interval='1d')
292
+
293
  prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
294
  prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
295
  prices_spx.index = prices_spx['index']
 
302
  prices_vix = prices_vix.drop(columns='index')
303
  prices_vix.index = pd.DatetimeIndex(prices_vix.index)
304
 
305
+ prices_vvix['index'] = [str(x).split()[0] for x in prices_vvix.index]
306
+ prices_vvix['index'] = pd.to_datetime(prices_vvix['index']).dt.date
307
+ prices_vvix.index = prices_vvix['index']
308
+ prices_vvix = prices_vvix.drop(columns='index')
309
+ prices_vvix.index = pd.DatetimeIndex(prices_vvix.index)
310
+
311
  data = prices_spx.merge(df_intra, left_index=True, right_index=True)
312
  data = data.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
313
+ data = data.merge(prices_vvix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VVIX'])
314
 
315
  # Features
316
  data['PrevClose'] = data['Close'].shift(1)
 
323
  data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
324
  data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
325
 
326
+ data['VVIX5Day'] = data['Close_VVIX'] > data['Close_VVIX'].shift(5)
327
+ data['VVIX5Day_n1'] = data['VVIX5Day'].astype(bool)
328
+
329
  data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1) # Current day range in points
330
  data['RangePct'] = data['Range'] / data['Close']
331
  data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
 
357
  data['OHLC4_Current_Trend'] = data['OHLC4_Current_Trend'].astype(bool)
358
  data['HistClose30toPrevClose'] = (data['Close30'] / data['PrevClose']) - 1
359
 
360
+ data['CurrentCloseVIX30'] = data['Close_VIX30'].shift(-1)
361
+ data['CurrentOpenVIX30'] = data['Open_VIX30'].shift(-1)
362
+
363
+ data['CurrentVIXTrend'] = data['CurrentCloseVIX30'] > data['Close_VIX']
364
 
365
  # Open to High
366
  data['CurrentHigh30toClose'] = (data['CurrentHigh30'] / data['Close']) - 1
 
458
  return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()
459
 
460
  probas = []
461
+ # Given the current price level
462
  for i, pct in enumerate(data['CurrentClose30toClose']):
463
  try:
464
+ # Split
465
  df_q = get_quintiles(data.iloc[:i], 'HistClose30toPrevClose', 10)
466
  for q in df_q.index:
467
  if q.left <= pct <= q.right:
troubleshoot_day_model.ipynb CHANGED
The diff for this file is too large to render. See raw diff