cyberosa
commited on
Commit
·
f842047
1
Parent(s):
30394cf
new data and only open markets for extreme cases
Browse files- app.py +19 -12
- live_data/markets_live_data.parquet +2 -2
- live_data/markets_live_data_sample.parquet +2 -2
- notebooks/analysis_of_markets_data.ipynb +43 -41
- scripts/live_markets_data.py +0 -1
- scripts/live_traders_data.py +2 -2
app.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
-
from datetime import datetime
|
2 |
import gradio as gr
|
3 |
-
import matplotlib.pyplot as plt
|
4 |
import pandas as pd
|
5 |
-
import seaborn as sns
|
6 |
import duckdb
|
7 |
import logging
|
|
|
8 |
from tabs.tokens_votes_dist import (
|
9 |
get_based_tokens_distribution,
|
10 |
get_based_votes_distribution,
|
@@ -57,7 +56,7 @@ def prepare_data():
|
|
57 |
return df
|
58 |
|
59 |
|
60 |
-
def get_extreme_cases(live_fpmms: pd.DataFrame):
|
61 |
"""Function to return the id of the best and worst case according to the dist gap metric"""
|
62 |
# select markets with more than 1 sample
|
63 |
samples_per_market = (
|
@@ -70,48 +69,56 @@ def get_extreme_cases(live_fpmms: pd.DataFrame):
|
|
70 |
live_fpmms["id"].isin(markets_with_multiple_samples)
|
71 |
]
|
72 |
selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
|
73 |
-
return
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
|
76 |
demo = gr.Blocks()
|
77 |
markets_data = prepare_data()
|
|
|
78 |
|
79 |
with demo:
|
80 |
gr.HTML("<h1>Olas Predict Live Markets </h1>")
|
81 |
gr.Markdown("This app shows the distributions of predictions on the live markets.")
|
82 |
-
best_market_id, worst_market_id = get_extreme_cases(
|
|
|
|
|
83 |
with gr.Tabs():
|
84 |
with gr.TabItem("💹 Probability distributions of live markets"):
|
85 |
with gr.Row():
|
86 |
gr.Markdown("Best case: a market with a low gap between distributions")
|
87 |
with gr.Row():
|
88 |
-
gr.Markdown(f"Market id = {best_market_id}")
|
89 |
with gr.Row():
|
90 |
with gr.Column(min_width=350):
|
91 |
gr.Markdown("# Evolution of outcomes probability based on tokens")
|
92 |
best_market_tokens_dist = get_based_tokens_distribution(
|
93 |
-
best_market_id,
|
94 |
)
|
95 |
with gr.Column(min_width=350):
|
96 |
gr.Markdown("# Evolution of outcomes probability based on votes")
|
97 |
best_market_votes_dist = get_based_votes_distribution(
|
98 |
-
best_market_id,
|
99 |
)
|
100 |
|
101 |
with gr.Row():
|
102 |
gr.Markdown("Worst case: a market with a high distribution gap metric")
|
103 |
with gr.Row():
|
104 |
-
gr.Markdown(f"Market id = {worst_market_id}")
|
105 |
|
106 |
with gr.Row():
|
107 |
with gr.Column(min_width=350):
|
108 |
# gr.Markdown("# Evolution of outcomes probability based on tokens")
|
109 |
worst_market_tokens_dist = get_based_tokens_distribution(
|
110 |
-
worst_market_id,
|
111 |
)
|
112 |
with gr.Column(min_width=350):
|
113 |
worst_market_votes_dist = get_based_votes_distribution(
|
114 |
-
worst_market_id,
|
115 |
)
|
116 |
|
117 |
with gr.TabItem("📏 Distribution gap metric"):
|
|
|
1 |
+
from datetime import datetime
|
2 |
import gradio as gr
|
|
|
3 |
import pandas as pd
|
|
|
4 |
import duckdb
|
5 |
import logging
|
6 |
+
from typing import Tuple
|
7 |
from tabs.tokens_votes_dist import (
|
8 |
get_based_tokens_distribution,
|
9 |
get_based_votes_distribution,
|
|
|
56 |
return df
|
57 |
|
58 |
|
59 |
+
def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
|
60 |
"""Function to return the id of the best and worst case according to the dist gap metric"""
|
61 |
# select markets with more than 1 sample
|
62 |
samples_per_market = (
|
|
|
69 |
live_fpmms["id"].isin(markets_with_multiple_samples)
|
70 |
]
|
71 |
selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
|
72 |
+
return (
|
73 |
+
selected_markets.iloc[-1].id,
|
74 |
+
selected_markets.iloc[-1].dist_gap_perc,
|
75 |
+
selected_markets.iloc[0].id,
|
76 |
+
selected_markets.iloc[0].dist_gap_perc,
|
77 |
+
)
|
78 |
|
79 |
|
80 |
demo = gr.Blocks()
|
81 |
markets_data = prepare_data()
|
82 |
+
live_markets_data = markets_data.loc[markets_data["open"] == True]
|
83 |
|
84 |
with demo:
|
85 |
gr.HTML("<h1>Olas Predict Live Markets </h1>")
|
86 |
gr.Markdown("This app shows the distributions of predictions on the live markets.")
|
87 |
+
best_market_id, best_gap, worst_market_id, worst_gap = get_extreme_cases(
|
88 |
+
live_markets_data
|
89 |
+
)
|
90 |
with gr.Tabs():
|
91 |
with gr.TabItem("💹 Probability distributions of live markets"):
|
92 |
with gr.Row():
|
93 |
gr.Markdown("Best case: a market with a low gap between distributions")
|
94 |
with gr.Row():
|
95 |
+
gr.Markdown(f"Market id = {best_market_id} Dist gap = {best_gap}")
|
96 |
with gr.Row():
|
97 |
with gr.Column(min_width=350):
|
98 |
gr.Markdown("# Evolution of outcomes probability based on tokens")
|
99 |
best_market_tokens_dist = get_based_tokens_distribution(
|
100 |
+
best_market_id, live_markets_data
|
101 |
)
|
102 |
with gr.Column(min_width=350):
|
103 |
gr.Markdown("# Evolution of outcomes probability based on votes")
|
104 |
best_market_votes_dist = get_based_votes_distribution(
|
105 |
+
best_market_id, live_markets_data
|
106 |
)
|
107 |
|
108 |
with gr.Row():
|
109 |
gr.Markdown("Worst case: a market with a high distribution gap metric")
|
110 |
with gr.Row():
|
111 |
+
gr.Markdown(f"Market id = {worst_market_id} Dist gap = {worst_gap}")
|
112 |
|
113 |
with gr.Row():
|
114 |
with gr.Column(min_width=350):
|
115 |
# gr.Markdown("# Evolution of outcomes probability based on tokens")
|
116 |
worst_market_tokens_dist = get_based_tokens_distribution(
|
117 |
+
worst_market_id, live_markets_data
|
118 |
)
|
119 |
with gr.Column(min_width=350):
|
120 |
worst_market_votes_dist = get_based_votes_distribution(
|
121 |
+
worst_market_id, live_markets_data
|
122 |
)
|
123 |
|
124 |
with gr.TabItem("📏 Distribution gap metric"):
|
live_data/markets_live_data.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ae654e0654ae908aff28be1cac0ebdff765f65e6c442dc94df1a2c801319c2a
|
3 |
+
size 39132
|
live_data/markets_live_data_sample.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
|
3 |
+
size 34357
|
notebooks/analysis_of_markets_data.ipynb
CHANGED
@@ -82,7 +82,7 @@
|
|
82 |
" <td>14.76</td>\n",
|
83 |
" <td>NaN</td>\n",
|
84 |
" <td>2024-07-31 18:06:59</td>\n",
|
85 |
-
" <td>2</td>\n",
|
86 |
" </tr>\n",
|
87 |
" <tr>\n",
|
88 |
" <th>1</th>\n",
|
@@ -105,7 +105,7 @@
|
|
105 |
" <td>47.84</td>\n",
|
106 |
" <td>NaN</td>\n",
|
107 |
" <td>2024-07-31 18:06:59</td>\n",
|
108 |
-
" <td>3</td>\n",
|
109 |
" </tr>\n",
|
110 |
" <tr>\n",
|
111 |
" <th>2</th>\n",
|
@@ -128,7 +128,7 @@
|
|
128 |
" <td>43.07</td>\n",
|
129 |
" <td>NaN</td>\n",
|
130 |
" <td>2024-07-31 18:06:59</td>\n",
|
131 |
-
" <td>6</td>\n",
|
132 |
" </tr>\n",
|
133 |
" <tr>\n",
|
134 |
" <th>3</th>\n",
|
@@ -140,7 +140,7 @@
|
|
140 |
" <td>[4808284238922480369, 10190745298156651455]</td>\n",
|
141 |
" <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
|
142 |
" <td>1722442019</td>\n",
|
143 |
-
" <td>
|
144 |
" <td>27</td>\n",
|
145 |
" <td>4.98</td>\n",
|
146 |
" <td>62.96</td>\n",
|
@@ -151,7 +151,7 @@
|
|
151 |
" <td>32.06</td>\n",
|
152 |
" <td>NaN</td>\n",
|
153 |
" <td>2024-07-31 18:06:59</td>\n",
|
154 |
-
" <td>7</td>\n",
|
155 |
" </tr>\n",
|
156 |
" <tr>\n",
|
157 |
" <th>4</th>\n",
|
@@ -174,7 +174,7 @@
|
|
174 |
" <td>50.32</td>\n",
|
175 |
" <td>NaN</td>\n",
|
176 |
" <td>2024-07-31 18:06:59</td>\n",
|
177 |
-
" <td>8</td>\n",
|
178 |
" </tr>\n",
|
179 |
" </tbody>\n",
|
180 |
"</table>\n",
|
@@ -206,7 +206,7 @@
|
|
206 |
"0 Will the new AI-powered upgrade for the Philip... 1722442019 False \n",
|
207 |
"1 Will Harvey Weinstein recover from Covid-19 an... 1722442019 False \n",
|
208 |
"2 Will Tesla's net income increase in the third ... 1722442019 False \n",
|
209 |
-
"3 Will SpaceX launch Falcon 9 rocket on 3 August... 1722442019
|
210 |
"4 Will the wildfire in California be under contr... 1722442019 False \n",
|
211 |
"\n",
|
212 |
" total_trades dist_gap_perc votes_first_outcome_perc \\\n",
|
@@ -224,11 +224,11 @@
|
|
224 |
"4 52.46 Yes No 49.68 \n",
|
225 |
"\n",
|
226 |
" second_token_perc mean_trade_size sample_datetime market_id \n",
|
227 |
-
"0 14.76 NaN 2024-07-31 18:06:59
|
228 |
-
"1 47.84 NaN 2024-07-31 18:06:59
|
229 |
-
"2 43.07 NaN 2024-07-31 18:06:59
|
230 |
-
"3 32.06 NaN 2024-07-31 18:06:59
|
231 |
-
"4 50.32 NaN 2024-07-31 18:06:59
|
232 |
]
|
233 |
},
|
234 |
"execution_count": 3,
|
@@ -243,7 +243,7 @@
|
|
243 |
},
|
244 |
{
|
245 |
"cell_type": "code",
|
246 |
-
"execution_count":
|
247 |
"metadata": {},
|
248 |
"outputs": [
|
249 |
{
|
@@ -251,30 +251,32 @@
|
|
251 |
"output_type": "stream",
|
252 |
"text": [
|
253 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
254 |
-
"RangeIndex:
|
255 |
-
"Data columns (total
|
256 |
-
" # Column Non-Null Count Dtype
|
257 |
-
"--- ------ -------------- -----
|
258 |
-
" 0 creationTimestamp
|
259 |
-
" 1 id
|
260 |
-
" 2 liquidityMeasure
|
261 |
-
" 3 liquidityParameter
|
262 |
-
" 4 openingTimestamp
|
263 |
-
" 5 outcomeTokenAmounts
|
264 |
-
" 6 title
|
265 |
-
" 7 sample_timestamp
|
266 |
-
" 8 open
|
267 |
-
" 9 total_trades
|
268 |
-
" 10 dist_gap_perc
|
269 |
-
" 11 votes_first_outcome_perc
|
270 |
-
" 12 votes_second_outcome_perc
|
271 |
-
" 13 first_outcome
|
272 |
-
" 14 second_outcome
|
273 |
-
" 15 first_token_perc
|
274 |
-
" 16 second_token_perc
|
275 |
-
" 17 mean_trade_size
|
276 |
-
"
|
277 |
-
"
|
|
|
|
|
278 |
]
|
279 |
}
|
280 |
],
|
@@ -308,19 +310,19 @@
|
|
308 |
},
|
309 |
{
|
310 |
"cell_type": "code",
|
311 |
-
"execution_count":
|
312 |
"metadata": {},
|
313 |
"outputs": [
|
314 |
{
|
315 |
"data": {
|
316 |
"text/plain": [
|
317 |
"open\n",
|
318 |
-
"
|
319 |
-
"
|
320 |
"Name: count, dtype: int64"
|
321 |
]
|
322 |
},
|
323 |
-
"execution_count":
|
324 |
"metadata": {},
|
325 |
"output_type": "execute_result"
|
326 |
}
|
|
|
82 |
" <td>14.76</td>\n",
|
83 |
" <td>NaN</td>\n",
|
84 |
" <td>2024-07-31 18:06:59</td>\n",
|
85 |
+
" <td>2.0</td>\n",
|
86 |
" </tr>\n",
|
87 |
" <tr>\n",
|
88 |
" <th>1</th>\n",
|
|
|
105 |
" <td>47.84</td>\n",
|
106 |
" <td>NaN</td>\n",
|
107 |
" <td>2024-07-31 18:06:59</td>\n",
|
108 |
+
" <td>3.0</td>\n",
|
109 |
" </tr>\n",
|
110 |
" <tr>\n",
|
111 |
" <th>2</th>\n",
|
|
|
128 |
" <td>43.07</td>\n",
|
129 |
" <td>NaN</td>\n",
|
130 |
" <td>2024-07-31 18:06:59</td>\n",
|
131 |
+
" <td>6.0</td>\n",
|
132 |
" </tr>\n",
|
133 |
" <tr>\n",
|
134 |
" <th>3</th>\n",
|
|
|
140 |
" <td>[4808284238922480369, 10190745298156651455]</td>\n",
|
141 |
" <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
|
142 |
" <td>1722442019</td>\n",
|
143 |
+
" <td>False</td>\n",
|
144 |
" <td>27</td>\n",
|
145 |
" <td>4.98</td>\n",
|
146 |
" <td>62.96</td>\n",
|
|
|
151 |
" <td>32.06</td>\n",
|
152 |
" <td>NaN</td>\n",
|
153 |
" <td>2024-07-31 18:06:59</td>\n",
|
154 |
+
" <td>7.0</td>\n",
|
155 |
" </tr>\n",
|
156 |
" <tr>\n",
|
157 |
" <th>4</th>\n",
|
|
|
174 |
" <td>50.32</td>\n",
|
175 |
" <td>NaN</td>\n",
|
176 |
" <td>2024-07-31 18:06:59</td>\n",
|
177 |
+
" <td>8.0</td>\n",
|
178 |
" </tr>\n",
|
179 |
" </tbody>\n",
|
180 |
"</table>\n",
|
|
|
206 |
"0 Will the new AI-powered upgrade for the Philip... 1722442019 False \n",
|
207 |
"1 Will Harvey Weinstein recover from Covid-19 an... 1722442019 False \n",
|
208 |
"2 Will Tesla's net income increase in the third ... 1722442019 False \n",
|
209 |
+
"3 Will SpaceX launch Falcon 9 rocket on 3 August... 1722442019 False \n",
|
210 |
"4 Will the wildfire in California be under contr... 1722442019 False \n",
|
211 |
"\n",
|
212 |
" total_trades dist_gap_perc votes_first_outcome_perc \\\n",
|
|
|
224 |
"4 52.46 Yes No 49.68 \n",
|
225 |
"\n",
|
226 |
" second_token_perc mean_trade_size sample_datetime market_id \n",
|
227 |
+
"0 14.76 NaN 2024-07-31 18:06:59 2.0 \n",
|
228 |
+
"1 47.84 NaN 2024-07-31 18:06:59 3.0 \n",
|
229 |
+
"2 43.07 NaN 2024-07-31 18:06:59 6.0 \n",
|
230 |
+
"3 32.06 NaN 2024-07-31 18:06:59 7.0 \n",
|
231 |
+
"4 50.32 NaN 2024-07-31 18:06:59 8.0 "
|
232 |
]
|
233 |
},
|
234 |
"execution_count": 3,
|
|
|
243 |
},
|
244 |
{
|
245 |
"cell_type": "code",
|
246 |
+
"execution_count": 4,
|
247 |
"metadata": {},
|
248 |
"outputs": [
|
249 |
{
|
|
|
251 |
"output_type": "stream",
|
252 |
"text": [
|
253 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
254 |
+
"RangeIndex: 168 entries, 0 to 167\n",
|
255 |
+
"Data columns (total 20 columns):\n",
|
256 |
+
" # Column Non-Null Count Dtype \n",
|
257 |
+
"--- ------ -------------- ----- \n",
|
258 |
+
" 0 creationTimestamp 168 non-null object \n",
|
259 |
+
" 1 id 168 non-null object \n",
|
260 |
+
" 2 liquidityMeasure 168 non-null int64 \n",
|
261 |
+
" 3 liquidityParameter 168 non-null object \n",
|
262 |
+
" 4 openingTimestamp 168 non-null object \n",
|
263 |
+
" 5 outcomeTokenAmounts 168 non-null object \n",
|
264 |
+
" 6 title 168 non-null object \n",
|
265 |
+
" 7 sample_timestamp 168 non-null int64 \n",
|
266 |
+
" 8 open 168 non-null bool \n",
|
267 |
+
" 9 total_trades 168 non-null int64 \n",
|
268 |
+
" 10 dist_gap_perc 168 non-null float64 \n",
|
269 |
+
" 11 votes_first_outcome_perc 168 non-null float64 \n",
|
270 |
+
" 12 votes_second_outcome_perc 168 non-null float64 \n",
|
271 |
+
" 13 first_outcome 168 non-null object \n",
|
272 |
+
" 14 second_outcome 168 non-null object \n",
|
273 |
+
" 15 first_token_perc 168 non-null float64 \n",
|
274 |
+
" 16 second_token_perc 168 non-null float64 \n",
|
275 |
+
" 17 mean_trade_size 84 non-null float64 \n",
|
276 |
+
" 18 sample_datetime 126 non-null datetime64[ns]\n",
|
277 |
+
" 19 market_id 126 non-null float64 \n",
|
278 |
+
"dtypes: bool(1), datetime64[ns](1), float64(7), int64(3), object(8)\n",
|
279 |
+
"memory usage: 25.2+ KB\n"
|
280 |
]
|
281 |
}
|
282 |
],
|
|
|
310 |
},
|
311 |
{
|
312 |
"cell_type": "code",
|
313 |
+
"execution_count": 5,
|
314 |
"metadata": {},
|
315 |
"outputs": [
|
316 |
{
|
317 |
"data": {
|
318 |
"text/plain": [
|
319 |
"open\n",
|
320 |
+
"False 110\n",
|
321 |
+
"True 58\n",
|
322 |
"Name: count, dtype: int64"
|
323 |
]
|
324 |
},
|
325 |
+
"execution_count": 5,
|
326 |
"metadata": {},
|
327 |
"output_type": "execute_result"
|
328 |
}
|
scripts/live_markets_data.py
CHANGED
@@ -292,7 +292,6 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
|
|
292 |
|
293 |
if __name__ == "__main__":
|
294 |
|
295 |
-
logger = logging.getLogger(__name__)
|
296 |
logger.setLevel(logging.DEBUG)
|
297 |
# stream handler and formatter
|
298 |
stream_handler = logging.StreamHandler()
|
|
|
292 |
|
293 |
if __name__ == "__main__":
|
294 |
|
|
|
295 |
logger.setLevel(logging.DEBUG)
|
296 |
# stream handler and formatter
|
297 |
stream_handler = logging.StreamHandler()
|
scripts/live_traders_data.py
CHANGED
@@ -12,9 +12,8 @@ from utils import (
|
|
12 |
)
|
13 |
from utils import SUBGRAPH_API_KEY, _to_content
|
14 |
from queries import omen_trader_votes_query
|
15 |
-
from live_markets_data import logger
|
16 |
|
17 |
-
|
18 |
|
19 |
headers = {
|
20 |
"Accept": "application/json, multipart/mixed",
|
@@ -108,6 +107,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
|
108 |
"""Function to update only the information related with the current timestamp"""
|
109 |
|
110 |
logger.info("Adding votes distribution per market")
|
|
|
111 |
# Iterate over the markets
|
112 |
for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
|
113 |
# update the trades for this market and at this specific current_timestamp
|
|
|
12 |
)
|
13 |
from utils import SUBGRAPH_API_KEY, _to_content
|
14 |
from queries import omen_trader_votes_query
|
|
|
15 |
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
|
18 |
headers = {
|
19 |
"Accept": "application/json, multipart/mixed",
|
|
|
107 |
"""Function to update only the information related with the current timestamp"""
|
108 |
|
109 |
logger.info("Adding votes distribution per market")
|
110 |
+
fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
|
111 |
# Iterate over the markets
|
112 |
for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
|
113 |
# update the trades for this market and at this specific current_timestamp
|