yifanxie commited on
Commit
ed581c9
·
1 Parent(s): f3c73fd

code separation from numerai model repo

Browse files
dash/numerdash_app.py ADDED
@@ -0,0 +1,1023 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import os
5
+ import sys
6
+ sys.path.append(os.path.dirname(os.getcwd()))
7
+ from project_tools import project_utils, project_config, numerapi_utils
8
+ import warnings
9
+ import plotly.express as px
10
+ import json
11
+ warnings.filterwarnings("ignore")
12
+ from PIL import Image
13
+ import plotly.express as px
14
+ import plotly.graph_objects as go
15
+ from plotly.subplots import make_subplots
16
+ from streamlit import caching
17
+ import time
18
+ import traceback
19
+ import datetime
20
+
21
+ st.set_page_config(layout='wide')
22
+
23
+ def sidebar_data_picker():
24
+ st.sidebar.subheader('Model Data Picker')
25
+ top_lb = st.sidebar.checkbox('top LB by corr', value=True)
26
+ top_tp3m = st.sidebar.checkbox('most profitable 3 month', value=True)
27
+ top_tp1y = st.sidebar.checkbox('most profitable 1 year', value=True)
28
+ special_list = st.sidebar.checkbox('model from specific users', value=True)
29
+ return top_lb, top_tp3m, top_tp1y, special_list
30
+
31
+ def model_data_picker(values = None):
32
+ if values is None:
33
+ values = [True, True, True, True, True, True]
34
+ model_dict = {}
35
+ st.sidebar.subheader('Model Data Picker')
36
+ # top_lb = st.sidebar.checkbox('top LB by corr', value=values[0])
37
+ # top_tp3m = st.sidebar.checkbox('most profitable 3 month', value=values[1])
38
+ top_tp1y = st.sidebar.checkbox('most profitable 1 year', value=values[2])
39
+ special_list = st.sidebar.checkbox('model from specific users', value=values[3])
40
+ benchmark_list = st.sidebar.checkbox('benchmark models', value=values[4])
41
+ default_list = st.sidebar.checkbox('default models', value=values[5])
42
+ # if top_lb:
43
+ # model_dict['top_corr'] = project_config.TOP_LB
44
+ # if top_tp3m:
45
+ # model_dict['top_3m'] = project_config.TP3M
46
+ if top_tp1y:
47
+ model_dict['top_1y'] = project_config.TP1Y
48
+ if benchmark_list:
49
+ model_dict['benchmark'] = project_config.BENCHMARK_MODELS
50
+ if special_list:
51
+ model_dict['iaai'] = project_config.IAAI_MODELS
52
+ # model_dict['arbitrage'] = project_config.ARBITRAGE_MODELS
53
+ # model_dict['mm'] = project_config.MM_MODELS
54
+ # model_dict['restrade'] = project_config.RESTRADE_MODELS
55
+
56
+ if default_list:
57
+ model_dict['yx'] = project_config.MODEL_NAMES + project_config.NEW_MODEL_NAMES
58
+ model_dict['mcv'] = project_config.MCV_MODELS + project_config.MCV_NEW_MODELS
59
+ return model_dict
60
+
61
+ def model_fast_picker(models):
62
+ text_content = '''
63
+ fast model picker by CSV string.
64
+ example: "model1, model2, model3"
65
+ '''
66
+ text = st.sidebar.text_area(text_content)
67
+ result_models = []
68
+ if len(text)>0:
69
+ csv_parts = text.split(',')
70
+ for s in csv_parts:
71
+ m = s.strip()
72
+ if m in models:
73
+ result_models.append(m)
74
+ return list(dict.fromkeys(result_models))
75
+
76
+
77
+
78
+ def generate_round_table(data, row_cts, c, r, sortcol='corrmmc'):
79
+ # rounds = data
80
+ # row_cts[c].write(2*r+c)
81
+ latest_round = int(data['roundNumber'].max())
82
+ earliest_round = int(data['roundNumber'].min())
83
+ suggest_round = int(latest_round - (2*r+c))
84
+ select_round = row_cts[c].slider('select a round', earliest_round, latest_round, suggest_round, 1)
85
+ # row_cts[c].write(select_round)
86
+ round_data = data[data['roundNumber']==select_round].sort_values(by=sortcol, ascending=False).reset_index(drop=True)
87
+ # round_data = round_data[round_data['model'].isin(models)].reset_index(drop=True)
88
+ latest_date = round_data['date'].values[0]
89
+ row_cts[c].write(f'round: {select_round}, date: {latest_date}')
90
+ row_cts[c].dataframe(round_data.drop(['roundNumber', 'date'], axis=1), height=max_table_height-100)
91
+
92
+
93
+
94
+
95
+
96
+
97
+ def generate_dailyscore_metrics(data, row_cts, c, r):
98
+ # row_cts[c].write([r, c, 2*r+c])
99
+ select_metric = row_cts[c].selectbox("", list(id_metric_opt.keys()), index=2*r+c, format_func=lambda x: id_metric_opt[x])
100
+ latest_round = int(data['roundNumber'].max())
101
+ earliest_round = int(data['roundNumber'].min())
102
+ score = id_metric_score_dic[select_metric]
103
+ df = project_utils.calculate_rounddailysharpe_dashboard(data, latest_round, earliest_round, score).sort_values(by='sos', ascending=False)
104
+ row_cts[c].dataframe(df, height=max_table_height-100)
105
+ pass
106
+
107
+ def get_roundmetric_data(data):
108
+ numfeats1 = ['corr', 'mmc', 'corrmmc', 'corr2mmc']
109
+ stat1 = ['sum', 'mean', 'count',
110
+ {'sharpe': project_utils.get_array_sharpe}] # {'ptp':np.ptp}]#{'sharp':project_utils.get_array_sharpe}]
111
+ numfeats2 = ['corr_pct', 'mmc_pct', 'cmavg_pct', 'c2mavg_pct']
112
+ stat2 = ['mean']#, {'sharp': project_utils.get_array_sharpe}]
113
+
114
+ roundmetric_agg_rcp = [
115
+ [['model'], numfeats1, stat1],
116
+ [['model'], numfeats2, stat2]
117
+ ]
118
+
119
+ res = project_utils.groupby_agg_execution(roundmetric_agg_rcp, data)['model']
120
+ rename_dict = {}
121
+ for c in res.columns.tolist():
122
+ if c != 'model':
123
+ rename_dict[c] = c[6:] # remove 'model_' in column name
124
+ res.rename(columns = rename_dict, inplace=True)
125
+ return res
126
+
127
+
128
+ def generate_round_metrics(data, row_cts, c, r):
129
+ select_metric = row_cts[c].selectbox("", list(roundmetric_opt.keys()), index=2*r+c, format_func=lambda x: roundmetric_opt[x])
130
+ cols = ['model']
131
+ # st.write(select_metric)
132
+ # st.write(data.columns.tolist())
133
+ for col in data.columns.tolist():
134
+ if select_metric =='corrmmc':
135
+ if (f'{select_metric}_' in col) or ('cmavg_' in col):
136
+ cols += [col]
137
+ elif select_metric =='corr2mmc':
138
+ if (f'{select_metric}_' in col) or ('c2mavg_' in col):
139
+ cols += [col]
140
+ else:
141
+ if (f'{select_metric}_' in col) and (not('corrmmc' in col)) and (not('corr2mmc' in col)):
142
+ cols+= [col]
143
+
144
+ if select_metric != 'pct':
145
+ sort_col = select_metric+'_sharpe'
146
+ else:
147
+ sort_col = 'cmavg_pct_mean'
148
+ view_data = data[cols].sort_values(by=sort_col, ascending=False)
149
+ row_cts[c].dataframe(view_data)
150
+ pass
151
+
152
+
153
+ def dailyscore_chart(data, row_cts, c, r, select_metric):
154
+ latest_round = int(data['roundNumber'].max())
155
+ earliest_round = int(data['roundNumber'].min())
156
+ suggest_round = int(latest_round - (2*r+c))
157
+ select_round = row_cts[c].slider('select a round', earliest_round, latest_round, suggest_round, 1)
158
+ data = data[data['roundNumber']==select_round]
159
+ if len(data)>0:
160
+ fig = chart_pxline(data, 'date', y=select_metric, color='model', hover_data=list(histtrend_opt.keys()))
161
+ row_cts[c].plotly_chart(fig, use_container_width=True)
162
+ else:
163
+ row_cts[c].info('no data was found for the selected round')
164
+ pass
165
+
166
+
167
+ def generate_live_round_stake(data, row_cts, c, r):
168
+ latest_round = int(data['roundNumber'].max())
169
+ select_round = int(latest_round - (2*r+c))
170
+ select_data = data[data['roundNumber']==select_round].reset_index(drop=True)
171
+ if len(select_data)>0:
172
+ payout_sum = select_data['payout'].sum().round(3)
173
+ stake_sum = select_data['stake'].sum().round(3)
174
+ if payout_sum >= 0:
175
+ payout_color = 'green'
176
+ else:
177
+ payout_color = 'red'
178
+
179
+ space = ' '*5
180
+ content_str = f'#### Round: {select_round}{space}Stake: {stake_sum}{space}Payout: <span style="color:{payout_color}">{payout_sum}</span> NMR'
181
+ row_cts[c].markdown(content_str, unsafe_allow_html=True)
182
+ select_data = select_data.drop(['roundNumber'], axis=1).sort_values(by='payout', ascending=False)
183
+ row_cts[c].dataframe(select_data, height=max_table_height-100)
184
+
185
+
186
+
187
+ def round_view(data, select_perview, select_metric=None):
188
+ num_cols = 2
189
+ num_rows = 2
190
+ for r in range(num_rows):
191
+ row_cts = st.columns(num_cols)
192
+ for c in range(num_cols):
193
+ if select_perview=='round_result':
194
+ generate_round_table(data, row_cts, c, r)
195
+ if select_perview=='dailyscore_metric':
196
+ generate_dailyscore_metrics(data, row_cts, c, r)
197
+ if select_perview=='round_metric':
198
+ generate_round_metrics(data, row_cts, c, r)
199
+ if select_perview=='dailyscore_chart':
200
+ dailyscore_chart(data, row_cts, c, r, select_metric)
201
+ if select_perview=='live_round_stake':
202
+ generate_live_round_stake(data, row_cts, c, r)
203
+
204
+
205
+ def performance_overview():
206
+ models = []
207
+ st.sidebar.subheader('Choose a Table View')
208
+ select_perview = st.sidebar.selectbox("", list(tbl_opt.keys()), index=0, format_func=lambda x: tbl_opt[x])
209
+ model_dict = model_data_picker(values=[False, False, False, False, True, True])
210
+ data = []
211
+ for k in model_dict.keys():
212
+ models += model_dict[k]
213
+ if os.path.isfile(project_config.DASHBOARD_MODEL_RESULT_FILE) and len(models)>0:
214
+ data = project_utils.load_data(project_config.DASHBOARD_MODEL_RESULT_FILE)
215
+ if select_perview=='round_result':
216
+ data = data.drop(['fnc', 'fnc_pct'], axis=1)
217
+ data = data.drop_duplicates(['model', 'roundNumber'], keep='first')
218
+ data = data[data['model'].isin(models)].reset_index(drop=True)
219
+ round_view(data, select_perview)
220
+ if select_perview=='dailyscore_metric':
221
+ st.sidebar.subheader('Select Round Data')
222
+ latest_round = int(data['roundNumber'].max())
223
+ earliest_round = int(data['roundNumber'].min())
224
+ if (latest_round - earliest_round) > 10:
225
+ # suggest_round = int(latest_round - (latest_round - earliest_round) / 2)
226
+ suggest_round = 263
227
+ else:
228
+ suggest_round = earliest_round
229
+ select_rounds = st.sidebar.slider('select a round', earliest_round, latest_round, (suggest_round, latest_round - 1), 1)
230
+ data = data[(data['model'].isin(models))]
231
+ data = data[(data['roundNumber']>=select_rounds[0]) & (data['roundNumber']<=select_rounds[1])]
232
+ # st.write(data.shape, latest_round, earliest_round, suggest_round, select_rounds)
233
+ st.write(f'Key columns: sos - Sharpe raito of daily score sharpe, avg_sharpe - Average of daily score sharpe')
234
+ round_view(data, select_perview)
235
+ # round_view(models, )
236
+ if select_perview=='round_metric':
237
+ st.sidebar.subheader('Select Round Data')
238
+ latest_round = int(data['roundNumber'].max())
239
+ earliest_round = int(data['roundNumber'].min())
240
+ if (latest_round - earliest_round) > 10:
241
+ # suggest_round = int(latest_round - (latest_round - earliest_round) / 2)
242
+ suggest_round = 263
243
+ else:
244
+ suggest_round = earliest_round
245
+ select_rounds = st.sidebar.slider('select a round', earliest_round, latest_round, (suggest_round, latest_round - 1), 1)
246
+
247
+ data = data.drop(['fnc', 'fnc_pct'], axis=1)
248
+ data = data.drop_duplicates(['model', 'roundNumber'], keep='first')
249
+ data = data[(data['roundNumber']>=select_rounds[0]) & (data['roundNumber']<=select_rounds[1])]
250
+ data = data[data['model'].isin(models)].reset_index(drop=True)
251
+
252
+ roundmetrics_data = get_roundmetric_data(data)
253
+ min_count = int(roundmetrics_data['count'].min())
254
+ max_count = int(roundmetrics_data['count'].max())
255
+ if min_count<max_count:
256
+ select_minround = st.sidebar.slider('miminum number of rounds', min_count, max_count, min_count, 1)
257
+ else:
258
+ select_minround = min_count
259
+ roundmetrics_data = roundmetrics_data[roundmetrics_data['count']>=select_minround].reset_index(drop=True)
260
+ # st.write(roundmetrics_data.shape)
261
+ round_view(roundmetrics_data, select_perview)
262
+ # st.write(roundmetrics_data)
263
+ else:
264
+ st.info('model result data file missing, or no model is selected')
265
+
266
+
267
+
268
+
269
+ def data_operation():
270
+ # top_lb, top_tp3m, top_tp1y, special_list = sidebar_data_picker()
271
+ latest_round = project_utils.latest_round
272
+ models = []
273
+ model_dict = model_data_picker()
274
+ for k in model_dict.keys():
275
+ models += model_dict[k]
276
+ suggest_min_round = 182 #latest_round-50
277
+ min_round, max_round = st.slider('select tournament rounds', 200, latest_round, (suggest_min_round, latest_round), 1)
278
+ roundlist = [i for i in range(max_round, min_round-1, -1)]
279
+ download = st.button('download data of tracked models')
280
+ st.sidebar.subheader('configuration')
281
+ show_info=st.sidebar.checkbox('show background data', value=False)
282
+ update_numeraiti_data = st.sidebar.checkbox('update numerati data', value=True)
283
+ update_model_data = st.sidebar.checkbox('update model data', value=True)
284
+
285
+
286
+ model_df = []
287
+ if download and len(models)>0:
288
+ if update_numeraiti_data:
289
+ if show_info:
290
+ st.info('updating numerati data')
291
+ project_utils.update_numerati_data()
292
+
293
+ if update_model_data:
294
+ model_dfs = []
295
+ my_bar = st.progress(0.0)
296
+ my_bar.progress(0.0)
297
+ percent_complete = 0.0
298
+ # models = models[0:5]
299
+ for i in range(len(models)):
300
+ message = ''
301
+ try:
302
+ model_res = numerapi_utils.daily_submissions_performances(models[i])
303
+ if len(model_res) > 0:
304
+ cols = ['model'] + list(model_res[0].keys())
305
+ model_df = pd.DataFrame(model_res)
306
+ model_df['model'] = models[i]
307
+ model_df = model_df[cols]
308
+ model_dfs.append(model_df)
309
+ else:
310
+ message = f'no result found for model {models[i]}'
311
+ except Exception:
312
+ # if show_info:
313
+ # st.write(f'error while getting result for {models[i]}')
314
+ except_msg = traceback.format_exc()
315
+ message = f'error while getting result for {models[i]}: {except_msg}'
316
+ if show_info and len(message)>0:
317
+ st.info(message)
318
+ percent_complete += 1/len(models)
319
+ if i == len(models)-1:
320
+ percent_complete = 1.0
321
+ time.sleep(0.1)
322
+ my_bar.progress(percent_complete)
323
+ model_df = pd.concat(model_dfs, axis=0).sort_values(by=['roundNumber','date'], ascending=False).reset_index(drop=True)
324
+ model_df = model_df[model_df['roundNumber'].isin(roundlist)].reset_index(drop=True)
325
+ model_df['date'] = model_df['date'].dt.date
326
+ model_df['group'] = model_df['model'].apply(lambda x: project_utils.get_model_group(x))
327
+
328
+ prjreload = st.sidebar.button('reload config')
329
+ if prjreload:
330
+ project_utils.reload_project()
331
+ if len(model_df)>0:
332
+ rename_dict = {'corrPercentile': 'corr_pct', 'correlation':'corr', 'correlationWithMetamodel':'corr_meta', 'mmcPercentile':'mmc_pct', 'fncPercentile':'fnc_pct'}
333
+ model_df.rename(columns=rename_dict, inplace=True)
334
+ model_df['corrmmc'] = model_df['corr'] + model_df['mmc']
335
+ model_df['corr2mmc'] = model_df['corr'] + 2*model_df['mmc']
336
+ model_df['cmavg_pct'] = (model_df['corr_pct'] + model_df['mmc_pct'])/2
337
+ model_df['c2mavg_pct'] = (model_df['corr_pct'] + 2*model_df['mmc_pct'])/3
338
+ ord_cols = ['model','corr', 'corr_pct', 'mmc', 'mmc_pct', 'corrmmc', 'cmavg_pct', 'corr_meta','group', 'corr2mmc','c2mavg_pct', 'date', 'roundNumber', 'fnc', 'fnc_pct']
339
+ model_df = model_df[ord_cols]
340
+ project_utils.pickle_data(project_config.DASHBOARD_MODEL_RESULT_FILE, model_df)
341
+ if show_info:
342
+ st.text('list of models being tracked')
343
+ st.write(model_dict)
344
+ # st.write(models)
345
+ try:
346
+ st.write(model_df.head(5))
347
+ except:
348
+ st.write('model data was not retrieved')
349
+ st.sidebar.subheader('data info')
350
+ dbd_tstr, nmtd_str = project_utils.get_dashboard_data_status()
351
+ st.sidebar.text(f'dashboard timestamp: {dbd_tstr}')
352
+ st.sidebar.text(f'numerati timestamp: {nmtd_str}')
353
+ return None
354
+
355
+
356
+ def chart_pxline(data, x, y, color, hover_data=None, x_range=None):
357
+ fig = px.line(data, x=x, y=y, color=color, hover_data=hover_data)
358
+ fig.update_layout(plot_bgcolor='black', paper_bgcolor='black', font_color='white', height = max_height, margin=dict(l=0, r=10, t=20, b=20))
359
+ fig.update_xaxes(showgrid=False, range=x_range)
360
+ fig.update_yaxes(gridcolor='grey')
361
+ return fig
362
+
363
+
364
+ def roundresult_chart(data, model_selection):
365
+
366
+ round_data = data[data['model'].isin(model_selection)].drop_duplicates(['model', 'roundNumber'], keep='first').reset_index(drop=True)
367
+ min_round = int(round_data['roundNumber'].min())
368
+ max_round = int(round_data['roundNumber'].max())
369
+ suggest_min_round = max_round - 20
370
+ if min_round == max_round:
371
+ min_round = max_round - 20
372
+
373
+ min_selectround, max_selectround = st.slider('select plotting round range', min_round, max_round,
374
+ (suggest_min_round, max_round), 1)
375
+
376
+ # min_selectround, max_selectround = placeholder.slider('select plotting round range', min_round, max_round,
377
+ # (suggest_min_round, max_round), 1)
378
+
379
+ select_metric = st.selectbox('Choose a metric', list(histtrend_opt.keys()), index=0,
380
+ format_func=lambda x: histtrend_opt[x])
381
+ round_range = [min_selectround, max_selectround]
382
+ round_list = [r for r in range(min_selectround, max_selectround + 1)]
383
+ round_data = round_data[round_data['roundNumber'].isin(round_list)]
384
+ mean_df = round_data.groupby(['model'])[select_metric].agg('mean').reset_index()
385
+ mean_df[f'model avg.'] = mean_df['model'] + ': ' + mean_df[select_metric].round(5).astype(str)
386
+ mean_df['mean'] = mean_df[select_metric]
387
+ merge_cols = ['model', 'model avg.', 'mean']
388
+ round_data = round_data.merge(right=mean_df[merge_cols], on='model', how='left').sort_values(by=['mean','model', 'roundNumber'], ascending=False)
389
+ fig = chart_pxline(round_data, 'roundNumber', y=select_metric, color='model avg.', hover_data=list(histtrend_opt.keys())+['date'],x_range=round_range)
390
+
391
+ if fig is not None:
392
+ st.plotly_chart(fig, use_container_width=True)
393
+ dailyscore_data = data[(data['model'].isin(model_selection)) & data['roundNumber'].isin(round_list)].reset_index(drop=True)
394
+ dailyscore_data = dailyscore_data.merge(right=mean_df[merge_cols], on='model', how='left').sort_values(
395
+ by=['mean', 'model', 'roundNumber'], ascending=False)
396
+ round_view(dailyscore_data, 'dailyscore_chart', select_metric)
397
+
398
+ else:
399
+ st.text(f'No data available for models: {models}')
400
+
401
+
402
+
403
+
404
+ def histtrend():
405
+ # default_models = ['yxbot']
406
+ # models = default_models.copy()
407
+ models = []
408
+ model_selection = []
409
+ model_dict = model_data_picker(values=[False, False, False, False, True, True])
410
+ for k in model_dict.keys():
411
+ if model_dict[k] not in models:
412
+ models += model_dict[k]
413
+
414
+ default_models = model_fast_picker(models)
415
+ if len(models)>0:
416
+ if len(default_models)==0:
417
+ default_models = [models[0]]
418
+ model_selection = st.sidebar.multiselect('select models for chart', models, default=default_models)
419
+
420
+
421
+ if os.path.isfile(project_config.DASHBOARD_MODEL_RESULT_FILE) and len(model_selection)>0:
422
+ data = project_utils.load_data(project_config.DASHBOARD_MODEL_RESULT_FILE)
423
+ roundresult_chart(data, model_selection)
424
+
425
+ # fig = px.line(df, x='roundNumber', y='corr', color='model', hover_data=['corr_pct'])
426
+ # st.write(model_selection)
427
+ else:
428
+ if len(model_selection)==0:
429
+ st.info('please select some models from the dropdown list')
430
+ else:
431
+ st.info('model result data file missing, or no model is selected')
432
+
433
+ # st.write(models)
434
+
435
+
436
+
437
+ def model_evaluation():
438
+ models = []
439
+ model_selection = []
440
+ model_dict = model_data_picker(values=[True, True, True, True, True, True])
441
+ mean_scale = [-0.05, 0.1]
442
+ count_scale = [1, 50]
443
+ sharpe_scale = [-0.2, 3]
444
+ pct_scale = [0, 1]
445
+ radar_scale = [0, 5]
446
+
447
+ for k in model_dict.keys():
448
+ if model_dict[k] not in models:
449
+ models += model_dict[k]
450
+
451
+ default_models = model_fast_picker(models)
452
+ if len(models)>0:
453
+ if len(default_models)==0:
454
+ if 'integration_test' in models:
455
+ default_models = 'integration_test'
456
+ else:
457
+ default_models = [models[0]]
458
+
459
+ model_selection = st.sidebar.multiselect('select models for chart', models, default=default_models)
460
+
461
+ if os.path.isfile(project_config.DASHBOARD_MODEL_RESULT_FILE) and len(model_selection)>0:
462
+ data = project_utils.load_data(project_config.DASHBOARD_MODEL_RESULT_FILE)
463
+ round_data = data[data['model'].isin(model_selection)].drop_duplicates(['model', 'roundNumber'],keep='first').reset_index(drop=True)
464
+ min_round = int(round_data['roundNumber'].min())
465
+ max_round = int(round_data['roundNumber'].max())
466
+ suggest_min_round = max_round - 20
467
+ if min_round == max_round:
468
+ min_round = max_round - 20
469
+
470
+ min_selectround, max_selectround = st.slider('select plotting round range', min_round, max_round,
471
+ (suggest_min_round, max_round), 1)
472
+ round_list = [r for r in range(min_selectround, max_selectround+1)]
473
+ defaultlist = ['corr_sharpe', 'mmc_sharpe', 'corr2mmc_sharpe','corr_mean', 'mmc_mean', 'corr2mmc_mean', 'count']
474
+
475
+ select_metrics = st.multiselect('Metric Selection', list(model_eval_opt.keys()),
476
+ format_func=lambda x: model_eval_opt[x], default=defaultlist)
477
+
478
+ use_dailymetrics = ('id_corr_sharpe' in select_metrics) or (('id_mmc_sharpe' in select_metrics)) or ('id_corrmmc_sharpe' in select_metrics)
479
+ if use_dailymetrics:
480
+ st.write('use daily metrics')
481
+
482
+ round_data = round_data[round_data['roundNumber'].isin(round_list)].reset_index(drop=True)
483
+ #'need normalised radar chart + tabular view here
484
+ roundmetric_df = get_roundmetric_data(round_data).sort_values(by='corrmmc_sharpe', ascending=False).reset_index(drop=True)
485
+
486
+ radarmetric_df = roundmetric_df.copy(deep=True)
487
+ for col in select_metrics:
488
+ if 'mean' in col:
489
+ use_scale = mean_scale
490
+ if 'sharpe' in col:
491
+ use_scale = sharpe_scale
492
+ if 'pct' in col:
493
+ use_scale = pct_scale
494
+ if 'count' in col:
495
+ use_scale = count_scale
496
+ radarmetric_df[col] = radarmetric_df[col].apply(lambda x: project_utils.rescale(x, use_scale, radar_scale))
497
+ select_metrics_name = [model_eval_opt[i] for i in select_metrics]
498
+ radarmetric_df.rename(columns=model_eval_opt, inplace=True)
499
+ roundmetric_df.rename(columns=model_eval_opt, inplace=True)
500
+
501
+ fig = go.Figure()
502
+ for i in range(len(radarmetric_df)):
503
+ fig.add_trace(go.Scatterpolar(
504
+ r=radarmetric_df.loc[i, select_metrics_name].values,
505
+ theta=select_metrics_name,
506
+ fill='toself',
507
+ name=radarmetric_df['model'].values[i]
508
+ ))
509
+
510
+ fig.update_polars(
511
+ radialaxis=dict(visible=True, autorange=False, #type='linear',
512
+ range=[0,5])
513
+ )
514
+
515
+ fig.update_layout(plot_bgcolor='black', paper_bgcolor='black', font_color='aliceblue',
516
+ height=max_height+100,
517
+ margin=dict(l=0, r=10, t=20, b=20), showlegend=True)
518
+
519
+ st.plotly_chart(fig, use_container_width=True)
520
+ st.text('Calculated Metrics')
521
+ st.dataframe(roundmetric_df[['model'] + select_metrics_name], height=max_table_height)
522
+ st.text('Rescaled Metrics on Chart')
523
+ st.dataframe(radarmetric_df[['model'] + select_metrics_name], height=max_table_height)
524
+
525
+ # st.write(select_metrics)
526
+
527
+
528
+ def get_portfolio_overview(models, onlylatest=True):
529
+ res_df = []
530
+ my_bar = st.progress(0.0)
531
+ my_bar.progress(0.0)
532
+ percent_complete = 0.0
533
+ for i in range(len(models)):
534
+ m = models[i]
535
+ try:
536
+ if onlylatest:
537
+ # mdf = numerapi_utils.get_model_history(m).loc[0:0]
538
+ mdf = numerapi_utils.get_model_history_v3(m).loc[0:0]
539
+ else:
540
+ # mdf = numerapi_utils.get_model_history(m)
541
+ mdf = numerapi_utils.get_model_history_v3(m)
542
+ res_df.append(mdf)
543
+ except:
544
+ # st.info(f'no information for model {m} is available')
545
+ pass
546
+ percent_complete += 1 / len(models)
547
+ if i == len(models) - 1:
548
+ percent_complete = 1.0
549
+ time.sleep(0.1)
550
+ my_bar.progress(percent_complete)
551
+ try:
552
+ res_df = pd.concat(res_df, axis=0)
553
+ res_df['profitability'] = res_df['realised_pl']/(res_df['current_stake']-res_df['realised_pl'])
554
+ cols = ['model', 'date', 'current_stake', 'floating_stake', 'floating_pl', 'realised_pl', 'profitability']
555
+
556
+ # res_df['date'] = res_df['date'].dt.date
557
+ if onlylatest:
558
+ res_df = res_df.sort_values(by='floating_pl', ascending=False).reset_index(drop=True)
559
+ return res_df[cols]
560
+ else:
561
+ return res_df[cols]
562
+ except:
563
+ return []
564
+
565
+
566
+ def get_stake_type(corr, mmc):
567
+ if mmc>0:
568
+ res = str(int(corr)) + 'xCORR ' + str(int(mmc)) +'xMMC'
569
+ else:
570
+ res = '1xCORR'
571
+ return res
572
+
573
+
574
+ @st.cache(suppress_st_warning=True)
575
+ def get_stake_by_liverounds(models):
576
+ latest_round_id = int(project_utils.get_latest_round_id())
577
+ roundlist = [i for i in range(latest_round_id, latest_round_id - 5, -1)]
578
+ res = []
579
+ my_bar = st.progress(0.0)
580
+ my_bar.progress(0.0)
581
+ percent_complete = 0.0
582
+ percent_part = 0
583
+ for r in roundlist:
584
+ for m in models:
585
+ percent_complete += 1 / (len(models)*len(roundlist))
586
+ try:
587
+ data = numerapi_utils.get_round_model_performance(r, m)
588
+ # print(f'successfuly extract for model {m} in round {r}')
589
+ res.append(data)
590
+ except:
591
+ pass
592
+ # print(f'no result found for model {m} in round {r}')
593
+ if percent_part == (len(models)*len(roundlist)) - 1:
594
+ percent_complete = 1.0
595
+ time.sleep(0.1)
596
+ my_bar.progress(percent_complete)
597
+ percent_part +=1
598
+ res_df = pd.DataFrame.from_dict(res).fillna(0)
599
+ res_df['payoutPending'] = res_df['payoutPending'].astype(np.float64)
600
+ res_df['selectedStakeValue'] = res_df['selectedStakeValue'].astype(np.float64)
601
+ res_df['stake_type'] = res_df.apply(lambda x: get_stake_type(x['corrMultiplier'], x['mmcMultiplier']),axis=1)
602
+ rename_dict = {'selectedStakeValue': 'stake', 'payoutPending': 'payout', 'correlation':'corr'}
603
+ res_df = res_df.rename(columns=rename_dict)
604
+ col_ord = ['model', 'roundNumber', 'stake', 'payout', 'stake_type', 'corr', 'mmc']
605
+ return res_df[col_ord]
606
+
607
+
608
+
609
+ def get_stake_graph(data):
610
+ numfeats = ['current_stake', 'floating_stake', 'floating_pl', 'realised_pl']
611
+ stat1 = ['sum']
612
+ agg_rcp = [[['date'], numfeats, stat1]]
613
+
614
+ select_opt = st.selectbox('Select Time Span', list(stakeoverview_plot_opt.keys()), index=1, format_func=lambda x: stakeoverview_plot_opt[x])
615
+
616
+ res = project_utils.groupby_agg_execution(agg_rcp, data)['date']
617
+ w5delta = datetime.timedelta(weeks=5)
618
+ w13delta = datetime.timedelta(weeks=13)
619
+ date_w5delta = res['date'].max() - w5delta
620
+ date_w13delta = res['date'].max() - w13delta
621
+ y1delta = datetime.timedelta(weeks=52)
622
+ date_y1delta = res['date'].max() - y1delta
623
+
624
+ rename_dict = {'date_current_stake_sum': 'total_stake', 'date_floating_stake_sum': 'floating_stake',
625
+ 'date_floating_pl_sum': 'floating_pl', 'date_realised_pl_sum': 'realised_pl'}
626
+ res = res.rename(columns=rename_dict)
627
+ if select_opt == '1month':
628
+ res = res[res['date']>date_w5delta]
629
+ elif select_opt=='3month':
630
+ res = res[res['date']>date_w13delta]
631
+ elif select_opt=='1year':
632
+ res = res[res['date']>date_y1delta]
633
+ else:
634
+ pass
635
+
636
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
637
+ fig.add_trace( go.Scatter(x=res['date'], y=res['floating_stake'], name="floating_stake"), secondary_y=False,)
638
+
639
+ fig.add_trace(go.Scatter(x=res['date'], y=res['total_stake'], name="total_stake"),secondary_y=False,)
640
+
641
+ fig.add_trace(go.Scatter(x=res['date'], y=res['realised_pl'], name="realised_pl"),secondary_y=True,)
642
+ fig.update_layout(plot_bgcolor='black', paper_bgcolor='black', font_color='white')
643
+ fig.update_xaxes(showgrid=False, range=None, nticks=30)
644
+ fig.update_yaxes(gridcolor='grey', title_text="total stake/floating stake/realised PL", secondary_y=False)
645
+ fig.update_yaxes(showgrid=False, title_text="realised PL", zeroline=False,secondary_y=True)
646
+ st.plotly_chart(fig, use_container_width=True)
647
+
648
+ #
649
+ # def live_round_stakeview(data):
650
+ # models = data
651
+ # latest_round_id = int(project_utils.get_latest_round_id())
652
+ # roundlist = [i for i in range(latest_round_id, latest_round_id-4, -1]
653
+
654
+
655
+ def check_session_state(key, data, init=False):
656
+ # st.write(data)
657
+ portsel_list = ['portfolio_left', 'portfolio_right']
658
+ if key in portsel_list:
659
+ if ('last_opt' not in st.session_state) & (~init):
660
+ st.session_state['last_opt'] = key
661
+ if key not in st.session_state:
662
+ st.session_state[key] = data
663
+ # st.session_state['last_opt'] = key
664
+ else:
665
+ # st.write(key, st.session_state['last_opt'],len(st.session_state[key]))
666
+ if st.session_state[key] is None:
667
+ st.session_state[key] = []
668
+ # st.session_state['last_opt'] = key
669
+ if data is None:
670
+ return st.session_state[key]
671
+ elif (set(data)!=set(st.session_state[key])) & (len(data)>0 & (~init)):
672
+ # if st.session_state['last_opt'] == key:
673
+ if(st.session_state['last_opt']==key):
674
+ st.session_state[key] = data
675
+ else:
676
+ if len(st.session_state[key]) ==0:
677
+ st.session_state[key] = data
678
+ st.session_state['last_opt'] = key
679
+
680
+ return st.session_state[key]
681
+ else:
682
+ return None
683
+
684
+
685
+ def stake_overview():
686
+ models = []
687
+ model_selection = []
688
+ model_dict = model_data_picker(values=[True, True, True, True, True, True])
689
+ for k in model_dict.keys():
690
+ if model_dict[k] not in models:
691
+ models += model_dict[k]
692
+
693
+ default_models = model_fast_picker(models)
694
+ if len(models)>0:
695
+ model_selection = st.sidebar.multiselect('select models for chart', models, default=default_models)
696
+ redownload_data = False
697
+ if len(model_selection) > 0:
698
+ if 'stake_df' not in st.session_state:
699
+ redownload_data = True
700
+ else:
701
+ if set(model_selection)!=st.session_state['stake_overview_models']:
702
+ redownload_data = True
703
+ else:
704
+ ovdf = st.session_state['stake_df']
705
+ if redownload_data:
706
+ ovdf = get_portfolio_overview(model_selection, onlylatest=False)
707
+ print(ovdf.shape)
708
+ st.session_state['stake_df'] = ovdf
709
+ st.session_state['stake_overview_models'] = set(ovdf['model'].unique().tolist())
710
+
711
+ chartdf = ovdf.copy(deep=True)
712
+ ovdf = ovdf.drop_duplicates('model', keep='first')
713
+ ovdf = ovdf.sort_values(by='floating_pl', ascending=False).reset_index(drop=True)
714
+ if len(ovdf)>0:
715
+ numerai_date = str(ovdf['date'].values[0])[0:10]
716
+ ovdf.drop(['date'], axis=1, inplace=True)
717
+ stake_cts = st.columns(2)
718
+ pl_cts = st.columns(2)
719
+ date_label = st.empty()
720
+ get_stake_graph(chartdf)
721
+ ovdf_exp = st.expander('', expanded=True)
722
+ with ovdf_exp:
723
+ st.dataframe(ovdf, height=max_table_height)
724
+ total_current_stake = round(ovdf['current_stake'].sum(), 3)
725
+ total_floating_stake = round(ovdf['floating_stake'].sum(), 3)
726
+ rpl = round(ovdf['realised_pl'].sum(), 3)
727
+ fpl = round(ovdf['floating_pl'].sum(), 3)
728
+ current_stake_str = f'### Stake Balance: {total_current_stake:0.3f} NMR'
729
+ float_stake_str = f'### Floating Balance: {total_floating_stake:0.3f} NMR'
730
+ if rpl>=0:
731
+ real_pl_color = 'green'
732
+ else:
733
+ real_pl_color = 'red'
734
+ if fpl>=0:
735
+ float_pl_color = 'green'
736
+ else:
737
+ float_pl_color = 'red'
738
+ real_pl_str = f'### Realised P/L: <span style="color:{real_pl_color}">{rpl}</span> NMR'
739
+ float_pl_str = f'### Floating P/L: <span style="color:{float_pl_color}">{fpl}</span> NMR'
740
+ stake_cts[0].markdown(current_stake_str, unsafe_allow_html=True)
741
+ stake_cts[1].markdown(float_stake_str, unsafe_allow_html=True)
742
+ pl_cts[0].markdown(real_pl_str, unsafe_allow_html=True)
743
+ pl_cts[1].markdown(float_pl_str, unsafe_allow_html=True)
744
+ date_label.subheader(f'Date: {numerai_date}')
745
+ if st.button('show breakdown by live rounds'):
746
+ liveround_exp = st.expander('',expanded=True)
747
+ with liveround_exp:
748
+ stake_models = ovdf['model'].tolist()
749
+ liveround_stake_df = get_stake_by_liverounds(stake_models)
750
+ # st.write(liveround_stake_df)
751
+ round_view(liveround_stake_df,'live_round_stake')
752
+
753
+
754
+ def set_portolio_control(ct, models ,data):
755
+ roundmodels = data['model'].unique().tolist()
756
+ use_models = [m for m in models if m in roundmodels]
757
+ ct.write(use_models)
758
+
759
+
760
+ # def portfolio_model_selector(models):
761
+ # st.sidebar.subheader('Portfolio Model Shortlist')
762
+ # # placeholder = st.sidebar.empty()
763
+ # text_content = '''
764
+ # fast model picker by CSV string.
765
+ # example: "model1, model2, model3"
766
+ # '''
767
+ # # port_model_exp = st.sidebar.expander('portfolio model selector', expanded=True)
768
+ # # with port_model_exp:
769
+ # # text = placeholder.text_input(label=text_content, key='1')
770
+ # text = st.sidebar.text_area(label=text_content)
771
+ # result_models = []
772
+ # if len(text)>0:
773
+ # csv_parts = text.split(',')
774
+ # for s in csv_parts:
775
+ # m = s.strip()
776
+ # if m in models:
777
+ # result_models.append(m)
778
+ # default_models = list(dict.fromkeys(result_models))
779
+ # port_model_selection = st.sidebar.multiselect('select models for portfolio shortlist', models, default=default_models)
780
+ # # selection_opt = st.sidebar.radio('select models for', list(port_model_selection_opt.keys()), index=0, format_func=lambda x: port_model_selection_opt[x])
781
+ # return port_model_selection
782
+
783
+
784
+ def portfolio_model_selector(models):
785
+ # placeholder = st.sidebar.empty()
786
+ selection_opt = st.sidebar.radio('select models for', list(port_model_selection_opt.keys()), index=0, format_func=lambda x: port_model_selection_opt[x], key='pmsel_mulsel')
787
+
788
+ text_content = '''
789
+ fast model picker by CSV string.
790
+ example: "model1, model2, model3"
791
+ '''
792
+ # port_model_exp = st.sidebar.expander('portfolio model selector', expanded=True)
793
+ # with port_model_exp:
794
+ # text = placeholder.text_input(label=text_content, key='1')
795
+ text = st.sidebar.text_area(label=text_content, key='pmsel_txt')
796
+ result_models = []
797
+ if len(text)>0:
798
+ csv_parts = text.split(',')
799
+ for s in csv_parts:
800
+ m = s.strip()
801
+ if m in models:
802
+ result_models.append(m)
803
+ default_models = list(dict.fromkeys(result_models))
804
+ # st.write(default_models)
805
+ port_model_selection = st.sidebar.multiselect('select models for portfolio shortlist', models, default=default_models)
806
+ return port_model_selection, selection_opt
807
+
808
+
809
+
810
+ def portfolio_mgmt():
811
+ models = []
812
+ model_selection = []
813
+ # model_dict = model_data_picker(values=[True, True, True, True, True, True])
814
+ model_dict = model_data_picker(values=[True, True, True, True, True, True])
815
+
816
+ for k in model_dict.keys():
817
+ if model_dict[k] not in models:
818
+ models += model_dict[k]
819
+ # overview_models = models
820
+ port_models_left = check_session_state('portfolio_left', [], init=True)
821
+ port_models_right = check_session_state('portfolio_right', [], init=True)
822
+
823
+ if os.path.isfile(project_config.DASHBOARD_MODEL_RESULT_FILE) and len(models)>0:
824
+ port_cts = st.columns(2)
825
+ # port_models_shortlist = portfolio_model_selector(models)
826
+
827
+ # elif port_model_opt=='overview':
828
+ # if len(port_models_shortlist)==0:
829
+ #
830
+ # port_models_shortlist = models
831
+ # else:
832
+ # return None
833
+ data = project_utils.load_data(project_config.DASHBOARD_MODEL_RESULT_FILE)
834
+ round_data = data[data['model'].isin(models)].drop_duplicates(['model', 'roundNumber'],keep='first').reset_index(drop=True)
835
+ min_round = int(round_data['roundNumber'].min())
836
+ max_round = int(round_data['roundNumber'].max())
837
+ suggest_min_round = max_round - 20
838
+ if min_round == max_round:
839
+ min_round = max_round - 20
840
+
841
+ round_exp = st.expander('Round Selection', expanded=True)
842
+ metric_exp = st.expander('Metric Selection', expanded=True)
843
+ # portmodel_select_exp = st.expander('Portfolio Model Selection', expanded=True)
844
+ models_overview_exp =st.expander('Portfolio Model Shortlist', expanded=True)
845
+ with round_exp:
846
+ min_selectround, max_selectround = st.slider('', min_round, max_round,
847
+ (suggest_min_round, max_round), 1)
848
+ round_list = [r for r in range(min_selectround, max_selectround+1)]
849
+ with metric_exp:
850
+ defaultlist = ['corr_sharpe', 'mmc_sharpe', 'corr2mmc_sharpe','corr_mean', 'mmc_mean', 'corr2mmc_mean', 'count']
851
+ select_metrics = st.multiselect('', list(model_eval_opt.keys()),
852
+ format_func=lambda x: model_eval_opt[x], default=defaultlist)
853
+
854
+ round_data = round_data[round_data['roundNumber'].isin(round_list)].reset_index(drop=True)
855
+ roundmetric_df = get_roundmetric_data(round_data).sort_values(by='corrmmc_sharpe', ascending=False).reset_index(drop=True)
856
+ roundmodels = roundmetric_df['model'].unique().tolist()
857
+
858
+ # with portmodel_select_exp:
859
+ # port_sel = st.columns(2)
860
+ # pl = port_sel[0].multiselect('', port_models_shortlist, default=[])
861
+ port_models_selection, port_opt = portfolio_model_selector(roundmodels)
862
+ if port_opt=='left':
863
+ port_models_left = check_session_state('portfolio_left',port_models_selection)
864
+ elif port_opt=='right':
865
+ port_models_right = check_session_state('portfolio_right',port_models_selection)
866
+
867
+ # port_models_right = portfolio_model_selector_ct(port_sel, roundmodels, c=1)
868
+
869
+ # port_models_right = portfolio_model_selector_ct(port_sel[1], port_models_shortlist, '2')
870
+
871
+
872
+
873
+ # with port_exp = st.expander('Portfolio Comparison ')
874
+ set_portolio_control(port_cts[0], port_models_left, roundmetric_df)
875
+ set_portolio_control(port_cts[1], port_models_right, roundmetric_df)
876
+
877
+ with models_overview_exp:
878
+ cols = ['model'] + select_metrics
879
+ st.subheader(f'{len(roundmetric_df)} models are available for portfolio selection')
880
+ st.dataframe(roundmetric_df[cols], height=max_table_height)
881
+
882
+ # default_models = model_fast_picker(models)
883
+ # model_selection = st.sidebar.multiselect('select models for chart', models, default=default_models)
884
+
885
+
886
+
887
+ pass
888
+
889
+
890
+
891
+ def show_content():
892
+ st.sidebar.header('Dashboard Selection')
893
+ select_app = st.sidebar.selectbox("", list(app_opt.keys()), index=3, format_func=lambda x: app_opt[x])
894
+ if select_app=='performance_overview':
895
+ performance_overview()
896
+ if select_app=='historic_trend':
897
+ histtrend()
898
+ if select_app=='data_op':
899
+ data_operation()
900
+ if select_app=='model_evaluation':
901
+ model_evaluation()
902
+ if select_app=='stake_overview':
903
+ stake_overview()
904
+ if select_app=='portfolio_mgmt':
905
+ portfolio_mgmt()
906
+
907
+
908
+
909
+
910
+ # main body
911
+ # various configuration setting
912
+ app_opt = {
913
+ 'performance_overview' : 'Performance Overview',
914
+ 'historic_trend':'Historic Trend',
915
+ 'model_evaluation' : 'Model Evaluation',
916
+ 'stake_overview': 'Stake Overview',
917
+ 'portfolio_mgmt': 'Portfolio_Management',
918
+ 'data_op':'Data Operation'
919
+ }
920
+
921
+ tbl_opt = {
922
+ 'round_result':'Round Results',
923
+ 'dailyscore_metric':'Daily Score Metrics',
924
+ 'round_metric' : 'Round Metrics'
925
+ }
926
+
927
+ id_metric_opt = {
928
+ 'id_corr_sharpe':'Daily Score corr sharpe',
929
+ 'id_mmc_sharpe': 'Daily Score mmc sharpe',
930
+ 'id_corrmmc_sharpe': 'Daily Score corrmmc sharpe',
931
+ 'id_corr2mmc_sharpe': 'Daily Score corr2mmc sharpe',
932
+ 'id_corrmmcpct_sharpe': 'Daily Score corrmmc avg pct sharpe',
933
+ 'id_corr2mmcpct_sharpe': 'Daily Score corr2mmc avg pct sharpe',
934
+ 'id_corrpct_sharpe':'Daily Score corr pct sharpe',
935
+ 'id_mmcpct_sharpe': 'Daily Score mmc pct sharpe',
936
+ }
937
+
938
+
939
+ id_metric_score_dic = {
940
+ 'id_corr_sharpe':'corr',
941
+ 'id_mmc_sharpe': 'mmc',
942
+ 'id_corrmmc_sharpe': 'corrmmc',
943
+ 'id_corr2mmc_sharpe': 'corr2mmc',
944
+ 'id_corrmmcpct_sharpe': 'cmavg_pct',
945
+ 'id_corr2mmcpct_sharpe': 'c2mavg_pct',
946
+ 'id_corrpct_sharpe':'corr_pct',
947
+ 'id_mmcpct_sharpe': 'mmc_pct'
948
+ }
949
+
950
+
951
+ roundmetric_opt ={'corr':'Corr metrics',
952
+ 'mmc' : 'MMC metrics',
953
+ 'corrmmc' : 'CorrMMC metrics',
954
+ 'corr2mmc' : 'Corr2MMC metrics',
955
+ 'pct' : 'Pecentage metrics'
956
+
957
+ }
958
+
959
+
960
+ histtrend_opt = {
961
+ 'corr':'Correlation',
962
+ 'mmc': 'MMC',
963
+ 'corrmmc': 'Correlation+MMC',
964
+ 'corr2mmc': 'Correlation+2*MMC',
965
+ 'corr_pct': 'Correlation Percentile',
966
+ 'mmc_pct':'MMC Percentile',
967
+ 'cmavg_pct': 'Correlation+MMC Average Percentile',
968
+ 'c2mavg_pct': 'Correlation+2*MMC Average Percentile',
969
+
970
+ }
971
+
972
+
973
+ model_eval_opt = {
974
+ 'corr_sharpe' : 'Correlation Sharpe',
975
+ 'mmc_sharpe' : 'MMC Sharpe',
976
+ 'corrmmc_sharpe' : 'Correlation+MMC Sharpe',
977
+ 'corr2mmc_sharpe': 'Correlation+2*MMC Sharpe',
978
+ 'corr_mean':'Avg. Correlation',
979
+ 'mmc_mean':'Avg. MMC',
980
+ 'count': 'Number of Rounds',
981
+ 'corrmmc_mean': 'Avg. Correlation+MMC',
982
+ 'corr2mmc_mean': 'Avg. Correlation+2*MMC',
983
+ 'corr_pct_mean': 'Avg. Correlation Percentile',
984
+ 'mmc_pct_mean': 'Avg. MMC Percentile',
985
+ 'cmavg_pct_mean': 'Avg. Correlation+MMC Percentile',
986
+ 'c2mavg_pct_mean': 'Avg. Correlation+2*MMC Percentile',
987
+ 'id_corr_sharpe': 'Daily Score corr sharpe',
988
+ 'id_mmc_sharpe': 'Daily Score mmc sharpe',
989
+ 'id_corrmmc_sharpe': 'Daily Score corrmmc sharpe',
990
+ }
991
+
992
+ stakeoverview_plot_opt = {
993
+ '1month':'1 Month',
994
+ '3month':'3 Months',
995
+ '1year':'1 Year',
996
+ 'all':'Display all available data'
997
+ }
998
+
999
+
1000
+ port_model_selection_opt = {
1001
+ 'left':'Left Portfolio',
1002
+ 'right':'Right Portfolio'
1003
+ # 'overview':'Model Overview'
1004
+ }
1005
+
1006
+
1007
+
1008
+
1009
+ project_utils.reload_project()
1010
+
1011
+ height_exp = st.sidebar.expander('Plots and tables setting', expanded=False)
1012
+ with height_exp:
1013
+ max_height = st.slider('Please choose the height for plots', 100, 1000, 400, 50)
1014
+ max_table_height = st.slider('Please choose the height for tables', 100, 1000, 500, 50)
1015
+
1016
+ st.title('Numerai Dashboard')
1017
+
1018
+ # trying out multi columns
1019
+ # col1, col2 = st.columns(2)
1020
+ # col1.header('col1')
1021
+ # col2.header('col2')
1022
+
1023
+ show_content()
project_tools/numerapi_utils.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numerapi
2
+ from numerapi import utils
3
+ from project_tools import project_config, project_utils
4
+ from typing import List, Dict
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+ napi = numerapi.NumerAPI()
9
+
10
+
11
+ # def get_round
12
+
13
+
14
+ def get_model_history(model):
15
+ res = napi.daily_user_performances(model)
16
+ res = pd.DataFrame.from_dict(res)
17
+ res['payoutPending'] = res['payoutPending'].astype(np.float64)
18
+ res['payoutSettled'] = res['payoutSettled'].astype(np.float64)
19
+ res['stakeValue'] = res['stakeValue'].astype(np.float64)
20
+ res['deltaRatio'] = res['payoutPending'] / res['stakeValue']
21
+ res['realised_pl'] = project_utils.series_reverse_cumsum(res['payoutSettled'])
22
+ res['floating_pl'] = project_utils.series_reverse_cumsum(res['payoutPending']) - res['realised_pl']
23
+ res['current_stake'] = res['stakeValue'] - res['floating_pl']
24
+ rename_dict = {'stakeValue':'floating_stake'}
25
+ res = res.rename(columns=rename_dict)
26
+ # res['equity'] = res['stakeValue'] + res['floating_pl']
27
+ # cols = res.columns.tolist()
28
+ # res = res[['model'] + cols]
29
+
30
+ res['model'] = model
31
+ cols = ['model', 'date', 'current_stake', 'floating_stake', 'payoutPending', 'floating_pl', 'realised_pl']
32
+ res = res[cols]
33
+ return res
34
+
35
+
36
+ def get_portfolio_overview(models, onlylatest=True):
37
+ res_df = []
38
+ for m in models:
39
+ try:
40
+ print(f'extracting information for model {m}')
41
+ if onlylatest:
42
+ mdf = get_model_history(m).loc[0:0]
43
+ else:
44
+ mdf = get_model_history(m)
45
+ res_df.append(mdf)
46
+ except:
47
+ print(f'no information for model {m} is available')
48
+ if len(res_df)>0:
49
+ res_df = pd.concat(res_df, axis=0)
50
+ res_df['date'] = res_df['date'].dt.date
51
+ if onlylatest:
52
+ return res_df.sort_values(by='floating_pl', ascending=False).reset_index(drop=True)
53
+ else:
54
+ return res_df.reset_index(drop=True)
55
+ else:
56
+ return None
57
+
58
+
59
+
60
+
61
+
62
+
63
+ def get_competitions(tournament=8):
64
+ """Retrieves information about all competitions
65
+ Args:
66
+ tournament (int, optional): ID of the tournament, defaults to 8
67
+ -- DEPRECATED there is only one tournament nowadays
68
+ Returns:
69
+ list of dicts: list of rounds
70
+ Each round's dict contains the following items:
71
+ * datasetId (`str`)
72
+ * number (`int`)
73
+ * openTime (`datetime`)
74
+ * resolveTime (`datetime`)
75
+ * participants (`int`): number of participants
76
+ * prizePoolNmr (`decimal.Decimal`)
77
+ * prizePoolUsd (`decimal.Decimal`)
78
+ * resolvedGeneral (`bool`)
79
+ * resolvedStaking (`bool`)
80
+ * ruleset (`string`)
81
+ Example:
82
+ >>> NumerAPI().get_competitions()
83
+ [
84
+ {'datasetId': '59a70840ca11173c8b2906ac',
85
+ 'number': 71,
86
+ 'openTime': datetime.datetime(2017, 8, 31, 0, 0),
87
+ 'resolveTime': datetime.datetime(2017, 9, 27, 21, 0),
88
+ 'participants': 1287,
89
+ 'prizePoolNmr': Decimal('0.00'),
90
+ 'prizePoolUsd': Decimal('6000.00'),
91
+ 'resolvedGeneral': True,
92
+ 'resolvedStaking': True,
93
+ 'ruleset': 'p_auction'
94
+ },
95
+ ..
96
+ ]
97
+ """
98
+ # self.logger.info("getting rounds...")
99
+
100
+ query = '''
101
+ query($tournament: Int!) {
102
+ rounds(tournament: $tournament) {
103
+ number
104
+ resolveTime
105
+ openTime
106
+ resolvedGeneral
107
+ resolvedStaking
108
+ }
109
+ }
110
+ '''
111
+ arguments = {'tournament': tournament}
112
+ result = napi.raw_query(query, arguments)
113
+ rounds = result['data']['rounds']
114
+ # convert datetime strings to datetime.datetime objects
115
+ for r in rounds:
116
+ utils.replace(r, "openTime", utils.parse_datetime_string)
117
+ utils.replace(r, "resolveTime", utils.parse_datetime_string)
118
+ utils.replace(r, "prizePoolNmr", utils.parse_float_string)
119
+ utils.replace(r, "prizePoolUsd", utils.parse_float_string)
120
+ return rounds
121
+
122
+
123
+ def daily_submissions_performances(username: str) -> List[Dict]:
124
+ """Fetch daily performance of a user's submissions.
125
+ Args:
126
+ username (str)
127
+ Returns:
128
+ list of dicts: list of daily submission performance entries
129
+ For each entry in the list, there is a dict with the following
130
+ content:
131
+ * date (`datetime`)
132
+ * correlation (`float`)
133
+ * roundNumber (`int`)
134
+ * mmc (`float`): metamodel contribution
135
+ * fnc (`float`): feature neutral correlation
136
+ * correlationWithMetamodel (`float`)
137
+ Example:
138
+ >>> api = NumerAPI()
139
+ >>> api.daily_user_performances("uuazed")
140
+ [{'roundNumber': 181,
141
+ 'correlation': -0.011765912,
142
+ 'date': datetime.datetime(2019, 10, 16, 0, 0),
143
+ 'mmc': 0.3,
144
+ 'fnc': 0.1,
145
+ 'correlationWithMetamodel': 0.87},
146
+ ...
147
+ ]
148
+ """
149
+ query = """
150
+ query($username: String!) {
151
+ v2UserProfile(username: $username) {
152
+ dailySubmissionPerformances {
153
+ date
154
+ correlation
155
+ corrPercentile
156
+ roundNumber
157
+ mmc
158
+ mmcPercentile
159
+ fnc
160
+ fncPercentile
161
+ correlationWithMetamodel
162
+ }
163
+ }
164
+ }
165
+ """
166
+ arguments = {'username': username}
167
+ data = napi.raw_query(query, arguments)['data']['v2UserProfile']
168
+ performances = data['dailySubmissionPerformances']
169
+ # convert strings to python objects
170
+ for perf in performances:
171
+ utils.replace(perf, "date", utils.parse_datetime_string)
172
+ # remove useless items
173
+ performances = [p for p in performances
174
+ if any([p['correlation'], p['fnc'], p['mmc']])]
175
+ return performances
176
+
177
+
178
+
179
+
180
+
181
+ def get_round_model_performance(roundNumber: int, model: str):
182
+ query = """
183
+ query($roundNumber: Int!, $username: String!) {
184
+ roundSubmissionPerformance(roundNumber: $roundNumber, username: $username) {
185
+ corrMultiplier
186
+ mmcMultiplier
187
+ roundDailyPerformances{
188
+ correlation
189
+ mmc
190
+ corrPercentile
191
+ mmcPercentile
192
+ payoutPending
193
+ }
194
+ selectedStakeValue
195
+ }
196
+ }
197
+ """
198
+ arguments = {'roundNumber': roundNumber,'username': model}
199
+ data = napi.raw_query(query, arguments)['data']['roundSubmissionPerformance']
200
+ latest_performance = data['roundDailyPerformances'][-1] #[-1] ### issue with order
201
+ res = {}
202
+ res['model'] = model
203
+ res['roundNumber'] = roundNumber
204
+ res['corrMultiplier'] = data['corrMultiplier']
205
+ res['mmcMultiplier'] = data['mmcMultiplier']
206
+ res['selectedStakeValue'] = data['selectedStakeValue']
207
+ for key in latest_performance.keys():
208
+ res[key] = latest_performance[key]
209
+ return res
210
+
211
+
212
+
213
+
214
+ def get_user_profile(username: str) -> List[Dict]:
215
+ """Fetch daily performance of a user's submissions.
216
+ Args:
217
+ username (str)
218
+ Returns:
219
+ list of dicts: list of daily submission performance entries
220
+ For each entry in the list, there is a dict with the following
221
+ content:
222
+ * date (`datetime`)
223
+ * correlation (`float`)
224
+ * roundNumber (`int`)
225
+ * mmc (`float`): metamodel contribution
226
+ * fnc (`float`): feature neutral correlation
227
+ * correlationWithMetamodel (`float`)
228
+ Example:
229
+ >>> api = NumerAPI()
230
+ >>> api.daily_user_performances("uuazed")
231
+ [{'roundNumber': 181,
232
+ 'correlation': -0.011765912,
233
+ 'date': datetime.datetime(2019, 10, 16, 0, 0),
234
+ 'mmc': 0.3,
235
+ 'fnc': 0.1,
236
+ 'correlationWithMetamodel': 0.87},
237
+ ...
238
+ ]
239
+ """
240
+ query = """
241
+ query($username: String!) {
242
+ v2UserProfile(username: $username) {
243
+ dailySubmissionPerformances {
244
+ date
245
+ correlation
246
+ corrPercentile
247
+ roundNumber
248
+ mmc
249
+ mmcPercentile
250
+ fnc
251
+ fncPercentile
252
+ correlationWithMetamodel
253
+ }
254
+ }
255
+ }
256
+ """
257
+ arguments = {'username': username}
258
+ data = napi.raw_query(query, arguments)['data']#['v2UserProfile']
259
+ # performances = data['dailySubmissionPerformances']
260
+ # # convert strings to python objects
261
+ # for perf in performances:
262
+ # utils.replace(perf, "date", utils.parse_datetime_string)
263
+ # # remove useless items
264
+ # performances = [p for p in performances
265
+ # if any([p['correlation'], p['fnc'], p['mmc']])]
266
+ return data
267
+
268
+
269
+ def download_dataset(filename: str, dest_path: str = None,
270
+ round_num: int = None) -> None:
271
+ """ Download specified file for the current active round.
272
+
273
+ Args:
274
+ filename (str): file to be downloaded
275
+ dest_path (str, optional): complate path where the file should be
276
+ stored, defaults to the same name as the source file
277
+ round_num (int, optional): tournament round you are interested in.
278
+ defaults to the current round
279
+ tournament (int, optional): ID of the tournament, defaults to 8
280
+
281
+ Example:
282
+ >>> filenames = NumerAPI().list_datasets()
283
+ >>> NumerAPI().download_dataset(filenames[0]}")
284
+ """
285
+ if dest_path is None:
286
+ dest_path = filename
287
+
288
+ query = """
289
+ query ($filename: String!
290
+ $round: Int) {
291
+ dataset(filename: $filename
292
+ round: $round)
293
+ }
294
+ """
295
+ args = {'filename': filename, "round": round_num}
296
+
297
+ dataset_url = napi.raw_query(query, args)['data']['dataset']
298
+ utils.download_file(dataset_url, dest_path, show_progress_bars=True)
299
+
300
+
301
+
302
+ # function using V3UserProfile
303
+
304
+ def model_payout_history(model):
305
+ napi = numerapi.NumerAPI()
306
+ query = """
307
+ query($model: String!) {
308
+ v3UserProfile(modelName: $model) {
309
+ roundModelPerformances{
310
+ payout
311
+ roundNumber
312
+ roundResolved
313
+ roundResolveTime
314
+ corrMultiplier
315
+ mmcMultiplier
316
+ selectedStakeValue
317
+ }
318
+ stakeValue
319
+ nmrStaked
320
+ }
321
+ }
322
+ """
323
+ arguments = {'model': model}
324
+ payout_info = napi.raw_query(query, arguments)['data']['v3UserProfile']['roundModelPerformances']
325
+ payout_info = pd.DataFrame.from_dict(payout_info)
326
+ payout_info = payout_info[~pd.isnull(payout_info['payout'])].reset_index(drop=True)
327
+ return payout_info
328
+
329
+
330
+ def get_model_history_v3(model):
331
+ res = model_payout_history(model)
332
+ res = pd.DataFrame.from_dict(res)
333
+ res['payout'] = res['payout'].astype(np.float64)
334
+ res['current_stake'] = res['selectedStakeValue'].astype(np.float64)
335
+ res['payout_cumsum'] = project_utils.series_reverse_cumsum(res['payout'])
336
+ res['date'] = pd.to_datetime(res['roundResolveTime']).dt.date
337
+
338
+ res['realised_pl'] = res['payout_cumsum']
339
+ latest_realised_pl = res[res['roundResolved'] == True]['payout_cumsum'].max()
340
+ res.loc[res['roundResolved'] == False, 'realised_pl'] = latest_realised_pl
341
+
342
+ res['floating_pl'] = 0
343
+ payoutPending_values = res[res['roundResolved'] == False]['payout'].values
344
+ payoutPending_cumsum = payoutPending_values[::-1].cumsum()[::-1]
345
+ res.loc[res['roundResolved'] == False, 'floating_pl'] = payoutPending_cumsum
346
+
347
+ res['model'] = model
348
+ # res['floating_pl'] = res['current_stake'] + res['payoutPending']
349
+ res['floating_stake'] = res['current_stake'] + res['floating_pl']
350
+ cols = ['model', 'date', 'current_stake', 'floating_stake', 'payout', 'floating_pl', 'realised_pl', 'roundResolved',
351
+ 'roundNumber']
352
+ res = res[cols]
353
+ return res
354
+
355
+
356
+
357
+
358
+
359
+
project_tools/project_class.py DELETED
File without changes
project_tools/project_config.py CHANGED
@@ -5,3 +5,39 @@ sys.path.append(os.path.dirname(os.getcwd()))
5
  DATETIME_FORMAT1 = '%Y%m%d%H%M'
6
  DATETIME_FORMAT2 = '%Y/%m/%d %H:%M'
7
  DATETIME_FORMAT3 = '%Y-%m-%d'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  DATETIME_FORMAT1 = '%Y%m%d%H%M'
6
  DATETIME_FORMAT2 = '%Y/%m/%d %H:%M'
7
  DATETIME_FORMAT3 = '%Y-%m-%d'
8
+
9
+ MODEL_NAMES = ['yxbot', 'yxbot2', 'sforest_baihu', 'stree_qinlong', 'flyingbus_mcv6', 'starry_night','fish_and_chips', 'rogue_planet', 'three_body_problem', 'grinning_cat', 'schrodingers_cat', 'omega_weapon', 'ifirit','dark_bahamut', 'wen_score', 'qinlong', 'baihu','marlboro', 'hell_cerberus', 'fuxi', 'roci_fuxi', 'kupo_mcv7', 'yxbot_mcv2', 'yxbot_mcv10']
10
+
11
+
12
+ NEW_MODEL_NAMES = ['yxbot3_m15', 'yxbot4_m23', 'yxbot5', 'yxbot6_m16', 'yxbot7_m17', 'yxbot_a10b8', 'yxbot9_m24', 'yxbot_a10', 'yxbot_a10xu', 'yxbot_a10bk','yxbot_a11', 'yxbot_a12', 'yxbot_ultima_weapon', 'yxbot_valkyrie', 'yxbot_bearmate', 'yxbot_dracula','yxbot_a13', 'yxbot_a14', 'yxbot15_zhuque', 'yxbot_redhare', 'yxbot_a15', 'yxbot18_m25', 'yxbot11_x302']
13
+
14
+ # flyingbus
15
+
16
+ TOP_LB = ['mdl3', 'nescience', 'sapphirescipionyx','quantaquetzalcoatlus', 'anna13', 'mercuryai', 'uuazed6', 'rosetta', 'sinookas']
17
+
18
+
19
+ TP3M = ['ageonsen', 'davebaty', 'wallingford_nut', 'filipstefano2', 'davat6', 'lions', 'wsw', 'lottery_of_babylon', 'kup_choy_n', 'pinky_and_the_brain']
20
+
21
+
22
+ TP1Y = ['hiryuu', 'victoria', 'benben11', 'usigma7', 'crystal_sphere', 'era__mix__2000', 'rgb_alpha', 'smokh', 'shoukaku', 'stables', 'deepnum', 'botarai', 'zuikaku', 'kond']
23
+
24
+
25
+ ARBITRAGE_MODELS = ['arbitrage', 'arbitrage2', 'arbitrage3', 'arbitrage4', 'leverage', 'leverage2', 'leverage3', 'culebracapital', 'culebracapital2', 'culebracapital3']
26
+
27
+
28
+ IAAI_MODELS = ['ia_ai', 'the_aijoe4','i_like_the_coin_08', 'i_like_the_coin_09', 'i_like_the_coin_10']
29
+
30
+
31
+ RESTRADE_MODELS = ['restrading', 'restrading2', 'restrading3', 'restrading4', 'restrading5', 'restrading6', 'restrading7', 'restrading8', 'restrading9']
32
+
33
+
34
+ BENCHMARK_MODELS = ['integration_test', 'i_like_the_coin_01'] #'budbot_7'] #'integration_test_7'
35
+ MCV_MODELS = ['mcv', 'mcv2', 'mcv3', 'mcv4', 'mcv5','mcv6','mcv7','mcv8','mcv9','mcv10','mcv11','mcv12','mcv13']
36
+
37
+ MCV_NEW_MODELS = ['mcv14', 'mcv15', 'mcv16', 'mcv17', 'mcv18', 'mcv19', 'mcv20', 'mcv21', 'mcv22', 'mcv23', 'mcv24', 'mcv25', 'mcv26', 'mcv27', 'mcv28', 'mcv29', 'mcv30', 'mcv31', 'mcv32', 'mcv33', 'mcv34', 'mcv35', 'mcv36', 'mcv37', 'mcv38', 'mcv39', 'mcv40', 'mcv41', 'mcv42', 'mcv43', 'mcv44', 'mcv45', 'mcv46', 'mcv47', 'mcv48', 'mcv49', 'mcv50']
38
+
39
+
40
+ DASHBOARD_MODEL_RESULT_FILE = '../feature_data/dashboard_model_result.pkl'
41
+ NUMERATI_URL = 'https://raw.githubusercontent.com/woobe/numerati/master/data.csv'
42
+ NUMERATI_FILE = '../feature_data/numerati_data.pkl'
43
+ FEATURE_PATH = '../feature_data/'
project_tools/project_utils.py CHANGED
@@ -1,31 +1,39 @@
1
  import numpy as np
2
  import pandas as pd
3
- import random
4
  import os
5
- from os import listdir
6
- from os.path import isfile, join, isdir
7
- import cv2
8
  import pickle
9
- import sys
10
  import time
11
  from contextlib import contextmanager
12
  from importlib import reload
13
- # from datetime import datetime
14
- from shutil import copyfile, move
15
  import re
16
- from pathlib import Path
17
-
18
- from project_tools import project_config, project_utils
19
-
20
- from shutil import copyfile, move
21
- import gc
22
  import glob
23
- from multiprocessing import Pool
24
- from functools import partial
25
  import matplotlib.pyplot as plt
26
- import traceback
27
- import json
 
 
 
 
 
 
 
28
  import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  def get_time_string():
@@ -46,8 +54,7 @@ def reload_project():
46
  """
47
  reload(project_config)
48
  reload(project_utils)
49
- reload(project_class)
50
-
51
 
52
  @contextmanager
53
  def timer(name):
@@ -234,6 +241,16 @@ def empty_folder(path):
234
  os.remove(f)
235
 
236
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  def rmse(y_true, y_pred):
239
  """
@@ -639,6 +656,151 @@ def try_divide(x, y, val=0.0):
639
  return val
640
 
641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
 
643
 
644
 
 
1
  import numpy as np
2
  import pandas as pd
 
3
  import os
 
 
 
4
  import pickle
 
5
  import time
6
  from contextlib import contextmanager
7
  from importlib import reload
 
 
8
  import re
9
+ from project_tools import project_config, project_utils, numerapi_utils
 
 
 
 
 
10
  import glob
 
 
11
  import matplotlib.pyplot as plt
12
+ import seaborn as sns
13
+ from random import randint, random
14
+ import itertools
15
+ import scipy
16
+ from scipy.stats import ks_2samp
17
+ from sklearn.metrics import log_loss, roc_auc_score, accuracy_score, mean_squared_error
18
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
19
+ from sklearn.pipeline import make_pipeline
20
+ from sklearn import linear_model
21
  import datetime
22
+ import json
23
+ from collections import OrderedDict
24
+ from os import listdir
25
+ from os.path import isfile, join, isdir
26
+ import glob
27
+ import numerapi
28
+ import itertools
29
+ import io
30
+ import requests
31
+ from pathlib import Path
32
+ from scipy.stats.mstats import gmean
33
+ from typing import List, Dict
34
+
35
+
36
+ napi = numerapi.NumerAPI() #verbosity="info")
37
 
38
 
39
  def get_time_string():
 
54
  """
55
  reload(project_config)
56
  reload(project_utils)
57
+ reload(numerapi_utils)
 
58
 
59
  @contextmanager
60
  def timer(name):
 
241
  os.remove(f)
242
 
243
 
244
+ def rescale(n, range1, range2):
245
+ if n>range1[1]: #or n<range1[0]:
246
+ n=range1[1]
247
+ if n<range1[0]:
248
+ n=range1[0]
249
+ delta1 = range1[1] - range1[0]
250
+ delta2 = range2[1] - range2[0]
251
+ return (delta2 * (n - range1[0]) / delta1) + range2[0]
252
+
253
+
254
 
255
  def rmse(y_true, y_pred):
256
  """
 
656
  return val
657
 
658
 
659
+ def series_reverse_cumsum(a):
660
+ return a.fillna(0).values[::-1].cumsum()[::-1]
661
+
662
+
663
+
664
+ #### NumerDash specific functions ###
665
+
666
+ def calculate_rounddailysharpe_dashboard(df, lastround, earliest_round, score='corr'):
667
+ if score=='corr':
668
+ target = 'corr_sharpe'
669
+ elif score == 'corr_pct':
670
+ target = 'corr_pct_sharpe'
671
+ elif score=='mmc':
672
+ target = 'mmc_sharpe'
673
+ elif score=='mmc_pct':
674
+ target = 'mmc_pct_sharpe'
675
+ elif score=='corrmmc':
676
+ target = 'corrmmc_sharpe'
677
+ elif score=='corr2mmc':
678
+ target = 'corr2mmc_sharpe'
679
+ elif score=='cmavg_pct':
680
+ target = 'cmavgpct_sharpe'
681
+ elif score=='c2mavg_pct':
682
+ target = 'c2mavcpct_sharpe'
683
+
684
+ mean_feat = 'avg_sharpe'
685
+ sos_feat = 'sos'
686
+ df = df[(df['roundNumber'] >= earliest_round) & (df['roundNumber'] <= lastround)]
687
+ res = df.groupby(['model', 'roundNumber', 'group'])[score].apply(
688
+ lambda x: get_array_sharpe(x)).reset_index(drop=False)
689
+ res = res.rename(columns={score: target}).sort_values('roundNumber', ascending=False)
690
+ res = res.pivot(index=['model', 'group'], columns='roundNumber', values=target)
691
+ res.columns.name = ''
692
+ cols = [i for i in res.columns[::-1]]
693
+ res = res[cols]
694
+ res[mean_feat] = res[cols].mean(axis=1)
695
+ res[sos_feat] = res[cols].apply(lambda x: get_array_sharpe(x), axis=1)
696
+ res = res.drop_duplicates(keep='first').sort_values(by=sos_feat, ascending=False)
697
+ res.reset_index(drop=False, inplace=True)
698
+ return res[['model', 'group', sos_feat, mean_feat]+cols]
699
+
700
+
701
+
702
+ def groupby_agg_execution(agg_recipies, df, verbose=True):
703
+ result_dfs = dict()
704
+ for groupby_cols, features, aggs in agg_recipies:
705
+ group_object = df.groupby(groupby_cols)
706
+ groupby_key = '_'.join(groupby_cols)
707
+ if groupby_key not in list(result_dfs.keys()):
708
+ result_dfs[groupby_key] = pd.DataFrame()
709
+ for feature in features:
710
+ rename_col = feature
711
+ for agg in aggs:
712
+ if isinstance(agg, dict):
713
+ agg_name = list(agg.keys())[0]
714
+ agg_func = agg[agg_name]
715
+ else:
716
+ agg_name = agg
717
+ agg_func = agg
718
+ if agg_name=='count':
719
+ groupby_aggregate_name = '{}_{}'.format(groupby_key, agg_name)
720
+ else:
721
+ groupby_aggregate_name = '{}_{}_{}'.format(groupby_key, feature, agg_name)
722
+ verbose and print(f'generating statistic {groupby_aggregate_name}')
723
+ groupby_res_df = group_object[feature].agg(agg_func).reset_index(drop=False)
724
+ groupby_res_df = groupby_res_df.rename(columns={rename_col: groupby_aggregate_name})
725
+ if len(result_dfs[groupby_key]) == 0:
726
+ result_dfs[groupby_key] = groupby_res_df
727
+ else:
728
+ result_dfs[groupby_key][groupby_aggregate_name] = groupby_res_df[groupby_aggregate_name]
729
+ return result_dfs
730
+
731
+
732
+ def get_latest_round_id():
733
+ try:
734
+ all_competitions = numerapi_utils.get_competitions()
735
+ latest_comp_id = all_competitions[0]['number']
736
+ except:
737
+ print('calling api unsuccessulf, using downloaded data to get the latest round')
738
+ local_data = load_data(project_config.DASHBOARD_MODEL_RESULT_FILE)
739
+ latest_comp_id = local_data['roundNumber'].max()
740
+ return int(latest_comp_id)
741
+ # except:
742
+
743
+ latest_round = get_latest_round_id()
744
+
745
+
746
+
747
+
748
+ def update_numerati_data(url=project_config.NUMERATI_URL, save_path=project_config.FEATURE_PATH):
749
+ content = requests.get(url).content
750
+ data = pd.read_csv(io.StringIO(content.decode('utf-8')))
751
+ save_file = os.path.join(save_path, 'numerati_data.pkl')
752
+ pickle_data(save_file, data)
753
+ return data
754
+
755
+
756
+
757
+
758
+ def get_model_group(model_name):
759
+ cat_name = 'other'
760
+ if model_name in project_config.MODEL_NAMES+project_config.NEW_MODEL_NAMES:
761
+ cat_name = 'yx'
762
+ elif model_name in project_config.TOP_LB:
763
+ cat_name = 'top_corr'
764
+ elif model_name in project_config.IAAI_MODELS:
765
+ cat_name = 'iaai'
766
+ elif model_name in project_config.ARBITRAGE_MODELS:
767
+ cat_name = 'arbitrage'
768
+ elif model_name in project_config.MCV_MODELS:
769
+ cat_name = 'mcv'
770
+ # elif model_name in project_config.MM_MODELS:
771
+ # cat_name = 'mm'
772
+ elif model_name in project_config.BENCHMARK_MODELS:
773
+ cat_name = 'benchmark'
774
+ elif model_name in project_config.TP3M:
775
+ cat_name = 'top_3m'
776
+ elif model_name in project_config.TP1Y:
777
+ cat_name = 'top_1y'
778
+ return cat_name
779
+
780
+
781
+ def get_dashboard_data_status():
782
+ dashboard_data_tstr = 'NA'
783
+ nmtd_tstr = 'NA'
784
+ try:
785
+ dashboard_data_t = datetime.datetime.utcfromtimestamp(os.path.getctime(project_config.DASHBOARD_MODEL_RESULT_FILE))
786
+ dashboard_data_tstr = dashboard_data_t.strftime(project_config.DATETIME_FORMAT2)
787
+ except Exception as e:
788
+ print(e)
789
+ pass
790
+ try:
791
+ nmtd_t = datetime.datetime.utcfromtimestamp(os.path.getctime(project_config.NUMERATI_FILE))
792
+ nmtd_tstr = nmtd_t.strftime(project_config.DATETIME_FORMAT2)
793
+ except Exception as e:
794
+ print(e)
795
+ pass
796
+ return dashboard_data_tstr, nmtd_tstr
797
+
798
+
799
+
800
+
801
+
802
+
803
+
804
 
805
 
806
 
x302_dev ADDED
@@ -0,0 +1 @@
 
 
1
+ ../x302_sp/numerdash/