cyberosa commited on
Commit
f842047
·
1 Parent(s): 30394cf

new data and only open markets for extreme cases

Browse files
app.py CHANGED
@@ -1,10 +1,9 @@
1
- from datetime import datetime, timedelta
2
  import gradio as gr
3
- import matplotlib.pyplot as plt
4
  import pandas as pd
5
- import seaborn as sns
6
  import duckdb
7
  import logging
 
8
  from tabs.tokens_votes_dist import (
9
  get_based_tokens_distribution,
10
  get_based_votes_distribution,
@@ -57,7 +56,7 @@ def prepare_data():
57
  return df
58
 
59
 
60
- def get_extreme_cases(live_fpmms: pd.DataFrame):
61
  """Function to return the id of the best and worst case according to the dist gap metric"""
62
  # select markets with more than 1 sample
63
  samples_per_market = (
@@ -70,48 +69,56 @@ def get_extreme_cases(live_fpmms: pd.DataFrame):
70
  live_fpmms["id"].isin(markets_with_multiple_samples)
71
  ]
72
  selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
73
- return selected_markets.iloc[-1].id, selected_markets.iloc[0].id
 
 
 
 
 
74
 
75
 
76
  demo = gr.Blocks()
77
  markets_data = prepare_data()
 
78
 
79
  with demo:
80
  gr.HTML("<h1>Olas Predict Live Markets </h1>")
81
  gr.Markdown("This app shows the distributions of predictions on the live markets.")
82
- best_market_id, worst_market_id = get_extreme_cases(markets_data)
 
 
83
  with gr.Tabs():
84
  with gr.TabItem("💹 Probability distributions of live markets"):
85
  with gr.Row():
86
  gr.Markdown("Best case: a market with a low gap between distributions")
87
  with gr.Row():
88
- gr.Markdown(f"Market id = {best_market_id}")
89
  with gr.Row():
90
  with gr.Column(min_width=350):
91
  gr.Markdown("# Evolution of outcomes probability based on tokens")
92
  best_market_tokens_dist = get_based_tokens_distribution(
93
- best_market_id, markets_data
94
  )
95
  with gr.Column(min_width=350):
96
  gr.Markdown("# Evolution of outcomes probability based on votes")
97
  best_market_votes_dist = get_based_votes_distribution(
98
- best_market_id, markets_data
99
  )
100
 
101
  with gr.Row():
102
  gr.Markdown("Worst case: a market with a high distribution gap metric")
103
  with gr.Row():
104
- gr.Markdown(f"Market id = {worst_market_id}")
105
 
106
  with gr.Row():
107
  with gr.Column(min_width=350):
108
  # gr.Markdown("# Evolution of outcomes probability based on tokens")
109
  worst_market_tokens_dist = get_based_tokens_distribution(
110
- worst_market_id, markets_data
111
  )
112
  with gr.Column(min_width=350):
113
  worst_market_votes_dist = get_based_votes_distribution(
114
- worst_market_id, markets_data
115
  )
116
 
117
  with gr.TabItem("📏 Distribution gap metric"):
 
1
+ from datetime import datetime
2
  import gradio as gr
 
3
  import pandas as pd
 
4
  import duckdb
5
  import logging
6
+ from typing import Tuple
7
  from tabs.tokens_votes_dist import (
8
  get_based_tokens_distribution,
9
  get_based_votes_distribution,
 
56
  return df
57
 
58
 
59
+ def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
60
  """Function to return the id of the best and worst case according to the dist gap metric"""
61
  # select markets with more than 1 sample
62
  samples_per_market = (
 
69
  live_fpmms["id"].isin(markets_with_multiple_samples)
70
  ]
71
  selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
72
+ return (
73
+ selected_markets.iloc[-1].id,
74
+ selected_markets.iloc[-1].dist_gap_perc,
75
+ selected_markets.iloc[0].id,
76
+ selected_markets.iloc[0].dist_gap_perc,
77
+ )
78
 
79
 
80
  demo = gr.Blocks()
81
  markets_data = prepare_data()
82
+ live_markets_data = markets_data.loc[markets_data["open"] == True]
83
 
84
  with demo:
85
  gr.HTML("<h1>Olas Predict Live Markets </h1>")
86
  gr.Markdown("This app shows the distributions of predictions on the live markets.")
87
+ best_market_id, best_gap, worst_market_id, worst_gap = get_extreme_cases(
88
+ live_markets_data
89
+ )
90
  with gr.Tabs():
91
  with gr.TabItem("💹 Probability distributions of live markets"):
92
  with gr.Row():
93
  gr.Markdown("Best case: a market with a low gap between distributions")
94
  with gr.Row():
95
+ gr.Markdown(f"Market id = {best_market_id} Dist gap = {best_gap}")
96
  with gr.Row():
97
  with gr.Column(min_width=350):
98
  gr.Markdown("# Evolution of outcomes probability based on tokens")
99
  best_market_tokens_dist = get_based_tokens_distribution(
100
+ best_market_id, live_markets_data
101
  )
102
  with gr.Column(min_width=350):
103
  gr.Markdown("# Evolution of outcomes probability based on votes")
104
  best_market_votes_dist = get_based_votes_distribution(
105
+ best_market_id, live_markets_data
106
  )
107
 
108
  with gr.Row():
109
  gr.Markdown("Worst case: a market with a high distribution gap metric")
110
  with gr.Row():
111
+ gr.Markdown(f"Market id = {worst_market_id} Dist gap = {worst_gap}")
112
 
113
  with gr.Row():
114
  with gr.Column(min_width=350):
115
  # gr.Markdown("# Evolution of outcomes probability based on tokens")
116
  worst_market_tokens_dist = get_based_tokens_distribution(
117
+ worst_market_id, live_markets_data
118
  )
119
  with gr.Column(min_width=350):
120
  worst_market_votes_dist = get_based_votes_distribution(
121
+ worst_market_id, live_markets_data
122
  )
123
 
124
  with gr.TabItem("📏 Distribution gap metric"):
live_data/markets_live_data.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
3
- size 34357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ae654e0654ae908aff28be1cac0ebdff765f65e6c442dc94df1a2c801319c2a
3
+ size 39132
live_data/markets_live_data_sample.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8592b33790b8209ebd8de3ac3108e76f25a3e02fc5720dc0a8984fe47e09cad1
3
- size 27990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
3
+ size 34357
notebooks/analysis_of_markets_data.ipynb CHANGED
@@ -82,7 +82,7 @@
82
  " <td>14.76</td>\n",
83
  " <td>NaN</td>\n",
84
  " <td>2024-07-31 18:06:59</td>\n",
85
- " <td>2</td>\n",
86
  " </tr>\n",
87
  " <tr>\n",
88
  " <th>1</th>\n",
@@ -105,7 +105,7 @@
105
  " <td>47.84</td>\n",
106
  " <td>NaN</td>\n",
107
  " <td>2024-07-31 18:06:59</td>\n",
108
- " <td>3</td>\n",
109
  " </tr>\n",
110
  " <tr>\n",
111
  " <th>2</th>\n",
@@ -128,7 +128,7 @@
128
  " <td>43.07</td>\n",
129
  " <td>NaN</td>\n",
130
  " <td>2024-07-31 18:06:59</td>\n",
131
- " <td>6</td>\n",
132
  " </tr>\n",
133
  " <tr>\n",
134
  " <th>3</th>\n",
@@ -140,7 +140,7 @@
140
  " <td>[4808284238922480369, 10190745298156651455]</td>\n",
141
  " <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
142
  " <td>1722442019</td>\n",
143
- " <td>True</td>\n",
144
  " <td>27</td>\n",
145
  " <td>4.98</td>\n",
146
  " <td>62.96</td>\n",
@@ -151,7 +151,7 @@
151
  " <td>32.06</td>\n",
152
  " <td>NaN</td>\n",
153
  " <td>2024-07-31 18:06:59</td>\n",
154
- " <td>7</td>\n",
155
  " </tr>\n",
156
  " <tr>\n",
157
  " <th>4</th>\n",
@@ -174,7 +174,7 @@
174
  " <td>50.32</td>\n",
175
  " <td>NaN</td>\n",
176
  " <td>2024-07-31 18:06:59</td>\n",
177
- " <td>8</td>\n",
178
  " </tr>\n",
179
  " </tbody>\n",
180
  "</table>\n",
@@ -206,7 +206,7 @@
206
  "0 Will the new AI-powered upgrade for the Philip... 1722442019 False \n",
207
  "1 Will Harvey Weinstein recover from Covid-19 an... 1722442019 False \n",
208
  "2 Will Tesla's net income increase in the third ... 1722442019 False \n",
209
- "3 Will SpaceX launch Falcon 9 rocket on 3 August... 1722442019 True \n",
210
  "4 Will the wildfire in California be under contr... 1722442019 False \n",
211
  "\n",
212
  " total_trades dist_gap_perc votes_first_outcome_perc \\\n",
@@ -224,11 +224,11 @@
224
  "4 52.46 Yes No 49.68 \n",
225
  "\n",
226
  " second_token_perc mean_trade_size sample_datetime market_id \n",
227
- "0 14.76 NaN 2024-07-31 18:06:59 2 \n",
228
- "1 47.84 NaN 2024-07-31 18:06:59 3 \n",
229
- "2 43.07 NaN 2024-07-31 18:06:59 6 \n",
230
- "3 32.06 NaN 2024-07-31 18:06:59 7 \n",
231
- "4 50.32 NaN 2024-07-31 18:06:59 8 "
232
  ]
233
  },
234
  "execution_count": 3,
@@ -243,7 +243,7 @@
243
  },
244
  {
245
  "cell_type": "code",
246
- "execution_count": 156,
247
  "metadata": {},
248
  "outputs": [
249
  {
@@ -251,30 +251,32 @@
251
  "output_type": "stream",
252
  "text": [
253
  "<class 'pandas.core.frame.DataFrame'>\n",
254
- "RangeIndex: 126 entries, 0 to 125\n",
255
- "Data columns (total 18 columns):\n",
256
- " # Column Non-Null Count Dtype \n",
257
- "--- ------ -------------- ----- \n",
258
- " 0 creationTimestamp 126 non-null object \n",
259
- " 1 id 126 non-null object \n",
260
- " 2 liquidityMeasure 126 non-null object \n",
261
- " 3 liquidityParameter 126 non-null object \n",
262
- " 4 openingTimestamp 126 non-null object \n",
263
- " 5 outcomeTokenAmounts 126 non-null object \n",
264
- " 6 title 126 non-null object \n",
265
- " 7 sample_timestamp 126 non-null int64 \n",
266
- " 8 open 126 non-null bool \n",
267
- " 9 total_trades 126 non-null int64 \n",
268
- " 10 dist_gap_perc 126 non-null float64\n",
269
- " 11 votes_first_outcome_perc 126 non-null float64\n",
270
- " 12 votes_second_outcome_perc 126 non-null float64\n",
271
- " 13 first_outcome 126 non-null object \n",
272
- " 14 second_outcome 126 non-null object \n",
273
- " 15 first_token_perc 126 non-null float64\n",
274
- " 16 second_token_perc 126 non-null float64\n",
275
- " 17 mean_trade_size 42 non-null float64\n",
276
- "dtypes: bool(1), float64(6), int64(2), object(9)\n",
277
- "memory usage: 17.0+ KB\n"
 
 
278
  ]
279
  }
280
  ],
@@ -308,19 +310,19 @@
308
  },
309
  {
310
  "cell_type": "code",
311
- "execution_count": 158,
312
  "metadata": {},
313
  "outputs": [
314
  {
315
  "data": {
316
  "text/plain": [
317
  "open\n",
318
- "True 96\n",
319
- "False 30\n",
320
  "Name: count, dtype: int64"
321
  ]
322
  },
323
- "execution_count": 158,
324
  "metadata": {},
325
  "output_type": "execute_result"
326
  }
 
82
  " <td>14.76</td>\n",
83
  " <td>NaN</td>\n",
84
  " <td>2024-07-31 18:06:59</td>\n",
85
+ " <td>2.0</td>\n",
86
  " </tr>\n",
87
  " <tr>\n",
88
  " <th>1</th>\n",
 
105
  " <td>47.84</td>\n",
106
  " <td>NaN</td>\n",
107
  " <td>2024-07-31 18:06:59</td>\n",
108
+ " <td>3.0</td>\n",
109
  " </tr>\n",
110
  " <tr>\n",
111
  " <th>2</th>\n",
 
128
  " <td>43.07</td>\n",
129
  " <td>NaN</td>\n",
130
  " <td>2024-07-31 18:06:59</td>\n",
131
+ " <td>6.0</td>\n",
132
  " </tr>\n",
133
  " <tr>\n",
134
  " <th>3</th>\n",
 
140
  " <td>[4808284238922480369, 10190745298156651455]</td>\n",
141
  " <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
142
  " <td>1722442019</td>\n",
143
+ " <td>False</td>\n",
144
  " <td>27</td>\n",
145
  " <td>4.98</td>\n",
146
  " <td>62.96</td>\n",
 
151
  " <td>32.06</td>\n",
152
  " <td>NaN</td>\n",
153
  " <td>2024-07-31 18:06:59</td>\n",
154
+ " <td>7.0</td>\n",
155
  " </tr>\n",
156
  " <tr>\n",
157
  " <th>4</th>\n",
 
174
  " <td>50.32</td>\n",
175
  " <td>NaN</td>\n",
176
  " <td>2024-07-31 18:06:59</td>\n",
177
+ " <td>8.0</td>\n",
178
  " </tr>\n",
179
  " </tbody>\n",
180
  "</table>\n",
 
206
  "0 Will the new AI-powered upgrade for the Philip... 1722442019 False \n",
207
  "1 Will Harvey Weinstein recover from Covid-19 an... 1722442019 False \n",
208
  "2 Will Tesla's net income increase in the third ... 1722442019 False \n",
209
+ "3 Will SpaceX launch Falcon 9 rocket on 3 August... 1722442019 False \n",
210
  "4 Will the wildfire in California be under contr... 1722442019 False \n",
211
  "\n",
212
  " total_trades dist_gap_perc votes_first_outcome_perc \\\n",
 
224
  "4 52.46 Yes No 49.68 \n",
225
  "\n",
226
  " second_token_perc mean_trade_size sample_datetime market_id \n",
227
+ "0 14.76 NaN 2024-07-31 18:06:59 2.0 \n",
228
+ "1 47.84 NaN 2024-07-31 18:06:59 3.0 \n",
229
+ "2 43.07 NaN 2024-07-31 18:06:59 6.0 \n",
230
+ "3 32.06 NaN 2024-07-31 18:06:59 7.0 \n",
231
+ "4 50.32 NaN 2024-07-31 18:06:59 8.0 "
232
  ]
233
  },
234
  "execution_count": 3,
 
243
  },
244
  {
245
  "cell_type": "code",
246
+ "execution_count": 4,
247
  "metadata": {},
248
  "outputs": [
249
  {
 
251
  "output_type": "stream",
252
  "text": [
253
  "<class 'pandas.core.frame.DataFrame'>\n",
254
+ "RangeIndex: 168 entries, 0 to 167\n",
255
+ "Data columns (total 20 columns):\n",
256
+ " # Column Non-Null Count Dtype \n",
257
+ "--- ------ -------------- ----- \n",
258
+ " 0 creationTimestamp 168 non-null object \n",
259
+ " 1 id 168 non-null object \n",
260
+ " 2 liquidityMeasure 168 non-null int64 \n",
261
+ " 3 liquidityParameter 168 non-null object \n",
262
+ " 4 openingTimestamp 168 non-null object \n",
263
+ " 5 outcomeTokenAmounts 168 non-null object \n",
264
+ " 6 title 168 non-null object \n",
265
+ " 7 sample_timestamp 168 non-null int64 \n",
266
+ " 8 open 168 non-null bool \n",
267
+ " 9 total_trades 168 non-null int64 \n",
268
+ " 10 dist_gap_perc 168 non-null float64 \n",
269
+ " 11 votes_first_outcome_perc 168 non-null float64 \n",
270
+ " 12 votes_second_outcome_perc 168 non-null float64 \n",
271
+ " 13 first_outcome 168 non-null object \n",
272
+ " 14 second_outcome 168 non-null object \n",
273
+ " 15 first_token_perc 168 non-null float64 \n",
274
+ " 16 second_token_perc 168 non-null float64 \n",
275
+ " 17 mean_trade_size 84 non-null float64 \n",
276
+ " 18 sample_datetime 126 non-null datetime64[ns]\n",
277
+ " 19 market_id 126 non-null float64 \n",
278
+ "dtypes: bool(1), datetime64[ns](1), float64(7), int64(3), object(8)\n",
279
+ "memory usage: 25.2+ KB\n"
280
  ]
281
  }
282
  ],
 
310
  },
311
  {
312
  "cell_type": "code",
313
+ "execution_count": 5,
314
  "metadata": {},
315
  "outputs": [
316
  {
317
  "data": {
318
  "text/plain": [
319
  "open\n",
320
+ "False 110\n",
321
+ "True 58\n",
322
  "Name: count, dtype: int64"
323
  ]
324
  },
325
+ "execution_count": 5,
326
  "metadata": {},
327
  "output_type": "execute_result"
328
  }
scripts/live_markets_data.py CHANGED
@@ -292,7 +292,6 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
292
 
293
  if __name__ == "__main__":
294
 
295
- logger = logging.getLogger(__name__)
296
  logger.setLevel(logging.DEBUG)
297
  # stream handler and formatter
298
  stream_handler = logging.StreamHandler()
 
292
 
293
  if __name__ == "__main__":
294
 
 
295
  logger.setLevel(logging.DEBUG)
296
  # stream handler and formatter
297
  stream_handler = logging.StreamHandler()
scripts/live_traders_data.py CHANGED
@@ -12,9 +12,8 @@ from utils import (
12
  )
13
  from utils import SUBGRAPH_API_KEY, _to_content
14
  from queries import omen_trader_votes_query
15
- from live_markets_data import logger
16
 
17
- #logger = logging.getLogger(__name__)
18
 
19
  headers = {
20
  "Accept": "application/json, multipart/mixed",
@@ -108,6 +107,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
108
  """Function to update only the information related with the current timestamp"""
109
 
110
  logger.info("Adding votes distribution per market")
 
111
  # Iterate over the markets
112
  for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
113
  # update the trades for this market and at this specific current_timestamp
 
12
  )
13
  from utils import SUBGRAPH_API_KEY, _to_content
14
  from queries import omen_trader_votes_query
 
15
 
16
+ logger = logging.getLogger(__name__)
17
 
18
  headers = {
19
  "Accept": "application/json, multipart/mixed",
 
107
  """Function to update only the information related with the current timestamp"""
108
 
109
  logger.info("Adding votes distribution per market")
110
+ fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
111
  # Iterate over the markets
112
  for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
113
  # update the trades for this market and at this specific current_timestamp