Spaces:
Sleeping
Sleeping
added more feats
Browse files- app.py +7 -1
- model_day.py +35 -33
- model_intra.py +107 -46
- troubleshoot_day_model.ipynb +0 -0
app.py
CHANGED
@@ -162,6 +162,8 @@ with st.form("choose_model"):
|
|
162 |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
|
163 |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
|
164 |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
|
|
|
|
|
165 |
# new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
|
166 |
# new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
|
167 |
new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
|
@@ -194,7 +196,7 @@ with st.form("choose_model"):
|
|
194 |
|
195 |
my_bar.progress(0.66, "Training models...")
|
196 |
def train_models():
|
197 |
-
res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf',
|
198 |
return res1, xgbr
|
199 |
res1, xgbr = train_models()
|
200 |
# st.success("✅ Models trained")
|
@@ -230,6 +232,10 @@ with st.form("choose_model"):
|
|
230 |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
|
231 |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
|
232 |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
|
|
|
|
|
|
|
|
|
233 |
# new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
|
234 |
# new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
|
235 |
# new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
|
|
|
162 |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
|
163 |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
|
164 |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
|
165 |
+
new_pred['VIXOpen'] = new_pred['VIXOpen'].astype(bool)
|
166 |
+
new_pred['VVIXOpen'] = new_pred['VVIXOpen'].astype(bool)
|
167 |
# new_pred['OHLC4_Current_Trend'] = new_pred['OHLC4_Current_Trend'].astype(bool)
|
168 |
# new_pred['OHLC4_Trend'] = new_pred['OHLC4_Trend'].astype(bool)
|
169 |
new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
|
|
|
196 |
|
197 |
my_bar.progress(0.66, "Training models...")
|
198 |
def train_models():
|
199 |
+
res1, xgbr = walk_forward_validation(df_final.dropna(), 'Target_clf', 1)
|
200 |
return res1, xgbr
|
201 |
res1, xgbr = train_models()
|
202 |
# st.success("✅ Models trained")
|
|
|
232 |
new_pred['OHLC4_VIX'] = new_pred['OHLC4_VIX'].astype(float)
|
233 |
new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
|
234 |
new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
|
235 |
+
new_pred['CurrentVIXTrend'] = new_pred['CurrentVIXTrend'].astype(bool)
|
236 |
+
new_pred['SPX30IntraPerf'] = new_pred['SPX30IntraPerf'].astype(float)
|
237 |
+
new_pred['VIX30IntraPerf'] = new_pred['VIX30IntraPerf'].astype(float)
|
238 |
+
new_pred['VVIX30IntraPerf'] = new_pred['VVIX30IntraPerf'].astype(float)
|
239 |
# new_pred['OpenL1'] = new_pred['OpenL1'].astype(float)
|
240 |
# new_pred['OpenL2'] = new_pred['OpenL2'].astype(float)
|
241 |
# new_pred['OpenH1'] = new_pred['OpenH1'].astype(float)
|
model_day.py
CHANGED
@@ -62,6 +62,8 @@ model_cols = [
|
|
62 |
'OHLC4_VIX',
|
63 |
'OHLC4_VIX_n1',
|
64 |
'OHLC4_VIX_n2',
|
|
|
|
|
65 |
'OpenL1',
|
66 |
'OpenL2',
|
67 |
'OpenH1',
|
@@ -251,10 +253,13 @@ def get_data():
|
|
251 |
# releases[rid]['df'] = releases[rid]['df'].set_index('ds')
|
252 |
|
253 |
vix = yf.Ticker('^VIX')
|
|
|
254 |
spx = yf.Ticker('^GSPC')
|
255 |
|
256 |
prices_vix = vix.history(start='2018-07-01', interval='1d')
|
257 |
prices_spx = spx.history(start='2018-07-01', interval='1d')
|
|
|
|
|
258 |
prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
|
259 |
prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
|
260 |
prices_spx.index = prices_spx['index']
|
@@ -265,46 +270,44 @@ def get_data():
|
|
265 |
prices_vix.index = prices_vix['index']
|
266 |
prices_vix = prices_vix.drop(columns='index')
|
267 |
|
|
|
|
|
|
|
|
|
|
|
268 |
data = prices_spx.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
|
|
|
269 |
data.index = pd.DatetimeIndex(data.index)
|
270 |
|
271 |
# Features
|
272 |
data['PrevClose'] = data['Close'].shift(1)
|
273 |
data['Perf5Day'] = data['Close'] > data['Close'].shift(5)
|
274 |
-
data['Perf5Day_n1'] = data['Perf5Day'].shift(1)
|
275 |
-
data['Perf5Day_n1'] = data['Perf5Day_n1'].astype(bool)
|
276 |
data['GreenDay'] = (data['Close'] > data['PrevClose']) * 1
|
277 |
data['RedDay'] = (data['Close'] <= data['PrevClose']) * 1
|
278 |
-
|
279 |
data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
|
280 |
-
data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
|
281 |
-
|
282 |
-
data['
|
|
|
|
|
|
|
283 |
data['RangePct'] = data['Range'] / data['Close']
|
284 |
data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
|
285 |
data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
|
286 |
data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
|
287 |
data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
|
288 |
-
data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1)
|
289 |
-
data['
|
290 |
-
data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(1)
|
291 |
-
data['OHLC4_Trend_n2'] = data['OHLC4_Trend_n2'].astype(float)
|
292 |
data['RangePct_n1'] = data['RangePct'].shift(1)
|
293 |
data['RangePct_n2'] = data['RangePct'].shift(2)
|
294 |
data['OHLC4_VIX_n1'] = data['OHLC4_VIX'].shift(1)
|
295 |
data['OHLC4_VIX_n2'] = data['OHLC4_VIX'].shift(2)
|
296 |
-
data['CurrentGap'] = (data['Open'] - data['PrevClose']) / data['PrevClose']
|
297 |
-
data['CurrentGap'] = data['CurrentGap'].shift(-1)
|
298 |
data['DayOfWeek'] = pd.to_datetime(data.index)
|
299 |
data['DayOfWeek'] = data['DayOfWeek'].dt.day
|
300 |
-
|
301 |
-
# Calculate up
|
302 |
data['up'] = 100 * (data['High'].shift(1) - data['Open'].shift(1)) / data['Close'].shift(1)
|
303 |
-
|
304 |
-
# Calculate upSD
|
305 |
data['upSD'] = data['up'].rolling(30).std(ddof=0)
|
306 |
-
|
307 |
-
# Calculate aveUp
|
308 |
data['aveUp'] = data['up'].rolling(30).mean()
|
309 |
data['H1'] = data['Open'] + (data['aveUp'] / 100) * data['Open']
|
310 |
data['H2'] = data['Open'] + ((data['aveUp'] + data['upSD']) / 100) * data['Open']
|
@@ -313,21 +316,18 @@ def get_data():
|
|
313 |
data['aveDown'] = data['down'].rolling(30).mean()
|
314 |
data['L1'] = data['Open'] - (data['aveDown'] / 100) * data['Open']
|
315 |
data['L2'] = data['Open'] - ((data['aveDown'] + data['upSD']) / 100) * data['Open']
|
316 |
-
|
317 |
-
data = data
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
OpenH1 = lambda x: x['Open'] / x['H1'],
|
329 |
-
OpenH2 = lambda x: x['Open'] / x['H2']
|
330 |
-
)
|
331 |
|
332 |
level_cols = [
|
333 |
'L1Touch',
|
@@ -409,6 +409,8 @@ def get_data():
|
|
409 |
'OHLC4_VIX',
|
410 |
'OHLC4_VIX_n1',
|
411 |
'OHLC4_VIX_n2',
|
|
|
|
|
412 |
'OpenL1',
|
413 |
'OpenL2',
|
414 |
'OpenH1',
|
|
|
62 |
'OHLC4_VIX',
|
63 |
'OHLC4_VIX_n1',
|
64 |
'OHLC4_VIX_n2',
|
65 |
+
'VIXOpen',
|
66 |
+
'VVIXOpen',
|
67 |
'OpenL1',
|
68 |
'OpenL2',
|
69 |
'OpenH1',
|
|
|
253 |
# releases[rid]['df'] = releases[rid]['df'].set_index('ds')
|
254 |
|
255 |
vix = yf.Ticker('^VIX')
|
256 |
+
vvix = yf.Ticker('^VVIX')
|
257 |
spx = yf.Ticker('^GSPC')
|
258 |
|
259 |
prices_vix = vix.history(start='2018-07-01', interval='1d')
|
260 |
prices_spx = spx.history(start='2018-07-01', interval='1d')
|
261 |
+
prices_vvix = vvix.history(start='2018-07-01', interval='1d')
|
262 |
+
|
263 |
prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
|
264 |
prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
|
265 |
prices_spx.index = prices_spx['index']
|
|
|
270 |
prices_vix.index = prices_vix['index']
|
271 |
prices_vix = prices_vix.drop(columns='index')
|
272 |
|
273 |
+
prices_vvix['index'] = [str(x).split()[0] for x in prices_vvix.index]
|
274 |
+
prices_vvix['index'] = pd.to_datetime(prices_vvix['index']).dt.date
|
275 |
+
prices_vvix.index = prices_vvix['index']
|
276 |
+
prices_vvix = prices_vvix.drop(columns='index')
|
277 |
+
|
278 |
data = prices_spx.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
|
279 |
+
data = data.merge(prices_vvix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VVIX'])
|
280 |
data.index = pd.DatetimeIndex(data.index)
|
281 |
|
282 |
# Features
|
283 |
data['PrevClose'] = data['Close'].shift(1)
|
284 |
data['Perf5Day'] = data['Close'] > data['Close'].shift(5)
|
285 |
+
data['Perf5Day_n1'] = data['Perf5Day'].shift(1).astype(bool)
|
|
|
286 |
data['GreenDay'] = (data['Close'] > data['PrevClose']) * 1
|
287 |
data['RedDay'] = (data['Close'] <= data['PrevClose']) * 1
|
|
|
288 |
data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
|
289 |
+
data['VIX5Day_n1'] = data['VIX5Day'].shift(1).astype(bool)
|
290 |
+
data['VIXOpen'] = data['Open_VIX'] > data['Close_VIX'].shift(1)
|
291 |
+
data['VVIXOpen'] = data['Open_VVIX'] > data['Close_VVIX'].shift(1)
|
292 |
+
data['VIXOpen'] = data['VIXOpen'].astype(bool)
|
293 |
+
data['VVIXOpen'] = data['VVIXOpen'].astype(bool)
|
294 |
+
data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1)
|
295 |
data['RangePct'] = data['Range'] / data['Close']
|
296 |
data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
|
297 |
data['OHLC4_VIX'] = data[['Open_VIX','High_VIX','Low_VIX','Close_VIX']].mean(axis=1)
|
298 |
data['OHLC4'] = data[['Open','High','Low','Close']].mean(axis=1)
|
299 |
data['OHLC4_Trend'] = data['OHLC4'] > data['OHLC4'].shift(1)
|
300 |
+
data['OHLC4_Trend_n1'] = data['OHLC4_Trend'].shift(1).astype(float)
|
301 |
+
data['OHLC4_Trend_n2'] = data['OHLC4_Trend'].shift(2).astype(float)
|
|
|
|
|
302 |
data['RangePct_n1'] = data['RangePct'].shift(1)
|
303 |
data['RangePct_n2'] = data['RangePct'].shift(2)
|
304 |
data['OHLC4_VIX_n1'] = data['OHLC4_VIX'].shift(1)
|
305 |
data['OHLC4_VIX_n2'] = data['OHLC4_VIX'].shift(2)
|
306 |
+
data['CurrentGap'] = ((data['Open'] - data['PrevClose']) / data['PrevClose']).shift(-1)
|
|
|
307 |
data['DayOfWeek'] = pd.to_datetime(data.index)
|
308 |
data['DayOfWeek'] = data['DayOfWeek'].dt.day
|
|
|
|
|
309 |
data['up'] = 100 * (data['High'].shift(1) - data['Open'].shift(1)) / data['Close'].shift(1)
|
|
|
|
|
310 |
data['upSD'] = data['up'].rolling(30).std(ddof=0)
|
|
|
|
|
311 |
data['aveUp'] = data['up'].rolling(30).mean()
|
312 |
data['H1'] = data['Open'] + (data['aveUp'] / 100) * data['Open']
|
313 |
data['H2'] = data['Open'] + ((data['aveUp'] + data['upSD']) / 100) * data['Open']
|
|
|
316 |
data['aveDown'] = data['down'].rolling(30).mean()
|
317 |
data['L1'] = data['Open'] - (data['aveDown'] / 100) * data['Open']
|
318 |
data['L2'] = data['Open'] - ((data['aveDown'] + data['upSD']) / 100) * data['Open']
|
319 |
+
data['L1Touch'] = data['Low'] < data['L1']
|
320 |
+
data['L2Touch'] = data['Low'] < data['L2']
|
321 |
+
data['H1Touch'] = data['High'] > data['H1']
|
322 |
+
data['H2Touch'] = data['High'] > data['H2']
|
323 |
+
data['L1Break'] = data['Close'] < data['L1']
|
324 |
+
data['L2Break'] = data['Close'] < data['L2']
|
325 |
+
data['H1Break'] = data['Close'] > data['H1']
|
326 |
+
data['H2Break'] = data['Close'] > data['H2']
|
327 |
+
data['OpenL1'] = data['Open'] / data['L1']
|
328 |
+
data['OpenL2'] = data['Open'] / data['L2']
|
329 |
+
data['OpenH1'] = data['Open'] / data['H1']
|
330 |
+
data['OpenH2'] = data['Open'] / data['H2']
|
|
|
|
|
|
|
331 |
|
332 |
level_cols = [
|
333 |
'L1Touch',
|
|
|
409 |
'OHLC4_VIX',
|
410 |
'OHLC4_VIX_n1',
|
411 |
'OHLC4_VIX_n2',
|
412 |
+
'VIXOpen',
|
413 |
+
'VVIXOpen',
|
414 |
'OpenL1',
|
415 |
'OpenL2',
|
416 |
'OpenH1',
|
model_intra.py
CHANGED
@@ -13,6 +13,7 @@ from pandas.tseries.offsets import BDay
|
|
13 |
from datasets import load_dataset
|
14 |
import lightgbm as lgb
|
15 |
from sklearn.model_selection import TimeSeriesSplit
|
|
|
16 |
|
17 |
data_start_date = '2018-07-01'
|
18 |
|
@@ -37,6 +38,10 @@ model_cols = [
|
|
37 |
'OHLC4_VIX_n2',
|
38 |
'OHLC4_Current_Trend',
|
39 |
'OHLC4_Trend',
|
|
|
|
|
|
|
|
|
40 |
# 'OpenL1',
|
41 |
# 'OpenL2',
|
42 |
# 'OpenH1',
|
@@ -62,7 +67,7 @@ model_cols = [
|
|
62 |
]
|
63 |
|
64 |
# If the dataset is gated/private, make sure you have run huggingface-cli login
|
65 |
-
def walk_forward_validation(df, target_column,
|
66 |
|
67 |
df = df[model_cols + [target_column]]
|
68 |
df[target_column] = df[target_column].astype(bool)
|
@@ -81,13 +86,13 @@ def walk_forward_validation(df, target_column, num_training_rows, num_periods):
|
|
81 |
y_train = df[target_column].iloc[train_index]
|
82 |
X_test = df.drop(target_column, axis=1).iloc[test_index]
|
83 |
y_test = df[target_column].iloc[test_index]
|
84 |
-
|
85 |
-
|
86 |
model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
|
87 |
model.fit(X_train, y_train)
|
88 |
# Make a prediction on the test data
|
89 |
predictions = model.predict_proba(X_test)[:,-1]
|
90 |
-
|
91 |
# Create a DataFrame to store the true and predicted values
|
92 |
result_df = pd.DataFrame({'True': y_test, 'Predicted': predictions}, index=y_test.index)
|
93 |
overall_results.append(result_df)
|
@@ -189,62 +194,102 @@ def get_data(periods_30m = 1):
|
|
189 |
releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
|
190 |
|
191 |
vix = yf.Ticker('^VIX')
|
|
|
192 |
spx = yf.Ticker('^GSPC')
|
193 |
|
194 |
# Pull in data
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
# Get incremental date
|
213 |
-
last_date =
|
214 |
last_date = last_date + datetime.timedelta(days=1)
|
215 |
-
|
|
|
216 |
spx1 = yf.Ticker('^GSPC')
|
|
|
|
|
217 |
yfp = spx1.history(start=last_date, interval='30m')
|
|
|
|
|
218 |
|
219 |
if len(yfp) > 0:
|
220 |
-
#
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
else:
|
223 |
-
df_30m =
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
df_30m = df_30m.
|
229 |
-
df_30m = df_30m.
|
230 |
-
|
231 |
-
df_30m =
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
243 |
|
244 |
-
df_intra.columns = ['Open30','High30','Low30','Close30']
|
245 |
|
246 |
prices_vix = vix.history(start=data_start_date, interval='1d')
|
|
|
247 |
prices_spx = spx.history(start=data_start_date, interval='1d')
|
|
|
248 |
prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
|
249 |
prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
|
250 |
prices_spx.index = prices_spx['index']
|
@@ -257,8 +302,15 @@ def get_data(periods_30m = 1):
|
|
257 |
prices_vix = prices_vix.drop(columns='index')
|
258 |
prices_vix.index = pd.DatetimeIndex(prices_vix.index)
|
259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
data = prices_spx.merge(df_intra, left_index=True, right_index=True)
|
261 |
data = data.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
|
|
|
262 |
|
263 |
# Features
|
264 |
data['PrevClose'] = data['Close'].shift(1)
|
@@ -271,6 +323,9 @@ def get_data(periods_30m = 1):
|
|
271 |
data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
|
272 |
data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
|
273 |
|
|
|
|
|
|
|
274 |
data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1) # Current day range in points
|
275 |
data['RangePct'] = data['Range'] / data['Close']
|
276 |
data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
|
@@ -302,6 +357,10 @@ def get_data(periods_30m = 1):
|
|
302 |
data['OHLC4_Current_Trend'] = data['OHLC4_Current_Trend'].astype(bool)
|
303 |
data['HistClose30toPrevClose'] = (data['Close30'] / data['PrevClose']) - 1
|
304 |
|
|
|
|
|
|
|
|
|
305 |
|
306 |
# Open to High
|
307 |
data['CurrentHigh30toClose'] = (data['CurrentHigh30'] / data['Close']) - 1
|
@@ -399,8 +458,10 @@ def get_data(periods_30m = 1):
|
|
399 |
return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()
|
400 |
|
401 |
probas = []
|
|
|
402 |
for i, pct in enumerate(data['CurrentClose30toClose']):
|
403 |
try:
|
|
|
404 |
df_q = get_quintiles(data.iloc[:i], 'HistClose30toPrevClose', 10)
|
405 |
for q in df_q.index:
|
406 |
if q.left <= pct <= q.right:
|
|
|
13 |
from datasets import load_dataset
|
14 |
import lightgbm as lgb
|
15 |
from sklearn.model_selection import TimeSeriesSplit
|
16 |
+
import json
|
17 |
|
18 |
data_start_date = '2018-07-01'
|
19 |
|
|
|
38 |
'OHLC4_VIX_n2',
|
39 |
'OHLC4_Current_Trend',
|
40 |
'OHLC4_Trend',
|
41 |
+
'CurrentVIXTrend',
|
42 |
+
'SPX30IntraPerf',
|
43 |
+
'VIX30IntraPerf',
|
44 |
+
'VVIX30IntraPerf',
|
45 |
# 'OpenL1',
|
46 |
# 'OpenL2',
|
47 |
# 'OpenH1',
|
|
|
67 |
]
|
68 |
|
69 |
# If the dataset is gated/private, make sure you have run huggingface-cli login
|
70 |
+
def walk_forward_validation(df, target_column, num_periods):
|
71 |
|
72 |
df = df[model_cols + [target_column]]
|
73 |
df[target_column] = df[target_column].astype(bool)
|
|
|
86 |
y_train = df[target_column].iloc[train_index]
|
87 |
X_test = df.drop(target_column, axis=1).iloc[test_index]
|
88 |
y_test = df[target_column].iloc[test_index]
|
89 |
+
|
90 |
+
y_train = y_train.astype(bool)
|
91 |
model = lgb.LGBMClassifier(n_estimators=10, random_state=42, verbosity=-1)
|
92 |
model.fit(X_train, y_train)
|
93 |
# Make a prediction on the test data
|
94 |
predictions = model.predict_proba(X_test)[:,-1]
|
95 |
+
|
96 |
# Create a DataFrame to store the true and predicted values
|
97 |
result_df = pd.DataFrame({'True': y_test, 'Predicted': predictions}, index=y_test.index)
|
98 |
overall_results.append(result_df)
|
|
|
194 |
releases[rid]['df'].index = pd.DatetimeIndex(releases[rid]['df'].index)
|
195 |
|
196 |
vix = yf.Ticker('^VIX')
|
197 |
+
vvix = yf.Ticker('^VVIX')
|
198 |
spx = yf.Ticker('^GSPC')
|
199 |
|
200 |
# Pull in data
|
201 |
+
data_files = {"spx": "SPX_full_30min.txt", "vix": "VIX_full_30min.txt", "vvix":'VVIX_full_30min.txt'}
|
202 |
+
data = load_dataset("boomsss/spx_intra", data_files=data_files)
|
203 |
+
dfs = []
|
204 |
+
for ticker in data.keys():
|
205 |
+
rows = [d['text'] for d in data[ticker]]
|
206 |
+
rows = [x.split(',') for x in rows]
|
207 |
+
|
208 |
+
fr = pd.DataFrame(columns=[
|
209 |
+
'Datetime','Open','High','Low','Close'
|
210 |
+
], data = rows)
|
211 |
+
|
212 |
+
fr['Datetime'] = pd.to_datetime(fr['Datetime'])
|
213 |
+
fr['Datetime'] = fr['Datetime'].dt.tz_localize('America/New_York')
|
214 |
+
fr = fr.set_index('Datetime')
|
215 |
+
fr['Open'] = pd.to_numeric(fr['Open'])
|
216 |
+
fr['High'] = pd.to_numeric(fr['High'])
|
217 |
+
fr['Low'] = pd.to_numeric(fr['Low'])
|
218 |
+
fr['Close'] = pd.to_numeric(fr['Close'])
|
219 |
+
dfs.append(fr)
|
220 |
+
|
221 |
+
df_30m = pd.concat(dfs, axis=1)
|
222 |
+
|
223 |
+
df_30m.columns = [
|
224 |
+
'Open30',
|
225 |
+
'High30',
|
226 |
+
'Low30',
|
227 |
+
'Close30',
|
228 |
+
'Open_VIX30',
|
229 |
+
'High_VIX30',
|
230 |
+
'Low_VIX30',
|
231 |
+
'Close_VIX30',
|
232 |
+
'Open_VVIX30',
|
233 |
+
'High_VVIX30',
|
234 |
+
'Low_VVIX30',
|
235 |
+
'Close_VVIX30'
|
236 |
+
]
|
237 |
+
|
238 |
# Get incremental date
|
239 |
+
last_date = df_30m.index.date[-1]
|
240 |
last_date = last_date + datetime.timedelta(days=1)
|
241 |
+
|
242 |
+
# Get incremental data for each index
|
243 |
spx1 = yf.Ticker('^GSPC')
|
244 |
+
vix1 = yf.Ticker('^VIX')
|
245 |
+
vvix1 = yf.Ticker('^VVIX')
|
246 |
yfp = spx1.history(start=last_date, interval='30m')
|
247 |
+
yf_vix = vix1.history(start=last_date, interval='30m')
|
248 |
+
yf_vvix = vvix1.history(start=last_date, interval='30m')
|
249 |
|
250 |
if len(yfp) > 0:
|
251 |
+
# Convert indexes to EST if not already
|
252 |
+
for _df in [yfp, yf_vix, yf_vvix]:
|
253 |
+
if _df.index.tz.zone != 'America/New_York':
|
254 |
+
_df['Datetime'] = pd.to_datetime(_df.index)
|
255 |
+
_df['Datetime'] = _df['Datetime'].dt.tz_convert('America/New_York')
|
256 |
+
_df.set_index('Datetime', inplace=True)
|
257 |
+
# Concat them
|
258 |
+
df_inc = pd.concat([yfp, yf_vix, yf_vvix], axis=1)
|
259 |
+
df_inc = df_inc.loc[
|
260 |
+
(df_inc.index.time >= datetime.time(9,30)) & (df_inc.index.time < datetime.time(16,00))
|
261 |
+
]
|
262 |
+
df_30m = pd.concat([df_30m, df_inc])
|
263 |
else:
|
264 |
+
df_30m = df_30m.copy()
|
265 |
+
|
266 |
+
df_30m = df_30m.loc[
|
267 |
+
(df_30m.index.time >= datetime.time(9,30)) & (df_30m.index.time < datetime.time(16,00))
|
268 |
+
]
|
269 |
+
df_30m['dt'] = df_30m.index.date
|
270 |
+
df_30m = df_30m.groupby('dt').head(periods_30m)
|
271 |
+
df_30m = df_30m.set_index('dt',drop=True)
|
272 |
+
df_30m.index.name = 'Datetime'
|
273 |
+
|
274 |
+
df_30m['SPX30IntraPerf'] = (df_30m['Close30'] / df_30m['Close30'].shift(1)) - 1
|
275 |
+
df_30m['VIX30IntraPerf'] = (df_30m['Close_VIX30'] / df_30m['Close_VIX30'].shift(1)) - 1
|
276 |
+
df_30m['VVIX30IntraPerf'] = (df_30m['Close_VVIX30'] / df_30m['Close_VVIX30'].shift(1)) - 1
|
277 |
+
|
278 |
+
opens_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'Open' in c]].head(1)
|
279 |
+
highs_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'High' in c]].max()
|
280 |
+
lows_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'Low' in c]].min()
|
281 |
+
closes_intra = df_30m.groupby('Datetime')[[c for c in df_30m.columns if 'Close' in c]].tail(1)
|
282 |
+
spx_intra = df_30m.groupby('Datetime')['SPX30IntraPerf'].tail(1)
|
283 |
+
vix_intra = df_30m.groupby('Datetime')['VIX30IntraPerf'].tail(1)
|
284 |
+
vvix_intra = df_30m.groupby('Datetime')['VVIX30IntraPerf'].tail(1)
|
285 |
+
|
286 |
+
df_intra = pd.concat([opens_intra, highs_intra, lows_intra, closes_intra, spx_intra, vix_intra, vvix_intra], axis=1)
|
287 |
|
|
|
288 |
|
289 |
prices_vix = vix.history(start=data_start_date, interval='1d')
|
290 |
+
prices_vvix = vvix.history(start=data_start_date, interval='1d')
|
291 |
prices_spx = spx.history(start=data_start_date, interval='1d')
|
292 |
+
|
293 |
prices_spx['index'] = [str(x).split()[0] for x in prices_spx.index]
|
294 |
prices_spx['index'] = pd.to_datetime(prices_spx['index']).dt.date
|
295 |
prices_spx.index = prices_spx['index']
|
|
|
302 |
prices_vix = prices_vix.drop(columns='index')
|
303 |
prices_vix.index = pd.DatetimeIndex(prices_vix.index)
|
304 |
|
305 |
+
prices_vvix['index'] = [str(x).split()[0] for x in prices_vvix.index]
|
306 |
+
prices_vvix['index'] = pd.to_datetime(prices_vvix['index']).dt.date
|
307 |
+
prices_vvix.index = prices_vvix['index']
|
308 |
+
prices_vvix = prices_vvix.drop(columns='index')
|
309 |
+
prices_vvix.index = pd.DatetimeIndex(prices_vvix.index)
|
310 |
+
|
311 |
data = prices_spx.merge(df_intra, left_index=True, right_index=True)
|
312 |
data = data.merge(prices_vix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VIX'])
|
313 |
+
data = data.merge(prices_vvix[['Open','High','Low','Close']], left_index=True, right_index=True, suffixes=['','_VVIX'])
|
314 |
|
315 |
# Features
|
316 |
data['PrevClose'] = data['Close'].shift(1)
|
|
|
323 |
data['VIX5Day'] = data['Close_VIX'] > data['Close_VIX'].shift(5)
|
324 |
data['VIX5Day_n1'] = data['VIX5Day'].astype(bool)
|
325 |
|
326 |
+
data['VVIX5Day'] = data['Close_VVIX'] > data['Close_VVIX'].shift(5)
|
327 |
+
data['VVIX5Day_n1'] = data['VVIX5Day'].astype(bool)
|
328 |
+
|
329 |
data['Range'] = data[['Open','High']].max(axis=1) - data[['Low','Open']].min(axis=1) # Current day range in points
|
330 |
data['RangePct'] = data['Range'] / data['Close']
|
331 |
data['VIXLevel'] = pd.qcut(data['Close_VIX'], 4)
|
|
|
357 |
data['OHLC4_Current_Trend'] = data['OHLC4_Current_Trend'].astype(bool)
|
358 |
data['HistClose30toPrevClose'] = (data['Close30'] / data['PrevClose']) - 1
|
359 |
|
360 |
+
data['CurrentCloseVIX30'] = data['Close_VIX30'].shift(-1)
|
361 |
+
data['CurrentOpenVIX30'] = data['Open_VIX30'].shift(-1)
|
362 |
+
|
363 |
+
data['CurrentVIXTrend'] = data['CurrentCloseVIX30'] > data['Close_VIX']
|
364 |
|
365 |
# Open to High
|
366 |
data['CurrentHigh30toClose'] = (data['CurrentHigh30'] / data['Close']) - 1
|
|
|
458 |
return df.groupby(pd.qcut(df[col_name], q))['GreenDay'].mean()
|
459 |
|
460 |
probas = []
|
461 |
+
# Given the current price level
|
462 |
for i, pct in enumerate(data['CurrentClose30toClose']):
|
463 |
try:
|
464 |
+
# Split
|
465 |
df_q = get_quintiles(data.iloc[:i], 'HistClose30toPrevClose', 10)
|
466 |
for q in df_q.index:
|
467 |
if q.left <= pct <= q.right:
|
troubleshoot_day_model.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|