cyberosa commited on
Commit
11a5d2b
·
1 Parent(s): 37fdecc

new price weighted distribution

Browse files
app.py CHANGED
@@ -4,16 +4,14 @@ import pandas as pd
4
  import duckdb
5
  import logging
6
 
7
- from tabs.tokens_votes_dist import (
8
- get_based_tokens_distribution,
9
- get_based_votes_distribution,
10
  get_extreme_cases,
11
  )
12
  from tabs.dist_gap import (
13
  get_distribution_plot,
14
  get_correlation_map,
15
  get_kde_with_trades,
16
- get_regplot_with_mean_trade_size,
17
  )
18
 
19
 
@@ -69,7 +67,7 @@ with demo:
69
  live_markets_data
70
  )
71
  with gr.Tabs():
72
- with gr.TabItem("💹 Probability distributions of live markets"):
73
  with gr.Row():
74
  gr.Markdown("Best case: a market with a low gap between distributions")
75
  with gr.Row():
@@ -77,16 +75,19 @@ with demo:
77
  f"Market id = {best_market_id} Dist gap = {round(best_gap,2)}"
78
  )
79
  with gr.Row():
80
- with gr.Column(min_width=350):
81
- gr.Markdown("# Evolution of outcomes probability based on tokens")
82
- best_market_tokens_dist = get_based_tokens_distribution(
83
- best_market_id, live_markets_data
84
- )
85
- with gr.Column(min_width=350):
86
- gr.Markdown("# Evolution of outcomes probability based on votes")
87
- best_market_votes_dist = get_based_votes_distribution(
88
- best_market_id, live_markets_data
89
- )
 
 
 
90
 
91
  with gr.Row():
92
  gr.Markdown("Worst case: a market with a high distribution gap metric")
@@ -96,21 +97,25 @@ with demo:
96
  )
97
 
98
  with gr.Row():
99
- with gr.Column(min_width=350):
100
- # gr.Markdown("# Evolution of outcomes probability based on tokens")
101
- worst_market_tokens_dist = get_based_tokens_distribution(
102
- worst_market_id, live_markets_data
103
- )
104
- with gr.Column(min_width=350):
105
- worst_market_votes_dist = get_based_votes_distribution(
106
- worst_market_id, live_markets_data
107
- )
108
-
109
- with gr.TabItem("📏 Distribution gap metric"):
 
 
 
 
110
  # remove samples with no trades
111
  with gr.Row():
112
  gr.Markdown(
113
- "This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution"
114
  )
115
  with gr.Row():
116
  gr.Markdown("# Density distribution")
 
4
  import duckdb
5
  import logging
6
 
7
+ from olas_predict_live_markets.tabs.tokens_pwc_dist import (
 
 
8
  get_extreme_cases,
9
  )
10
  from tabs.dist_gap import (
11
  get_distribution_plot,
12
  get_correlation_map,
13
  get_kde_with_trades,
14
+ get_dist_gap_time_evolution,
15
  )
16
 
17
 
 
67
  live_markets_data
68
  )
69
  with gr.Tabs():
70
+ with gr.TabItem("💹 Probability distributions of some markets"):
71
  with gr.Row():
72
  gr.Markdown("Best case: a market with a low gap between distributions")
73
  with gr.Row():
 
75
  f"Market id = {best_market_id} Dist gap = {round(best_gap,2)}"
76
  )
77
  with gr.Row():
78
+ # with gr.Column(min_width=350):
79
+ # gr.Markdown("# Evolution of outcomes probability based on tokens")
80
+ # best_market_tokens_dist = get_based_tokens_distribution(
81
+ # best_market_id, live_markets_data
82
+ # )
83
+ # with gr.Column(min_width=350):
84
+ # gr.Markdown("# Evolution of outcomes probability based on votes")
85
+ # best_market_votes_dist = get_based_votes_distribution(
86
+ # best_market_id, live_markets_data
87
+ # )
88
+ best_case = get_dist_gap_time_evolution(
89
+ best_market_id, live_markets_data
90
+ )
91
 
92
  with gr.Row():
93
  gr.Markdown("Worst case: a market with a high distribution gap metric")
 
97
  )
98
 
99
  with gr.Row():
100
+ # with gr.Column(min_width=350):
101
+ # # gr.Markdown("# Evolution of outcomes probability based on tokens")
102
+ # worst_market_tokens_dist = get_based_tokens_distribution(
103
+ # worst_market_id, live_markets_data
104
+ # )
105
+ # with gr.Column(min_width=350):
106
+ # worst_market_votes_dist = get_based_votes_distribution(
107
+ # worst_market_id, live_markets_data
108
+ # )
109
+
110
+ worst_case = get_dist_gap_time_evolution(
111
+ worst_market_id, live_markets_data
112
+ )
113
+
114
+ with gr.TabItem("📏 Distribution gap metric for all markets"):
115
  # remove samples with no trades
116
  with gr.Row():
117
  gr.Markdown(
118
+ "This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the price weighted distribution"
119
  )
120
  with gr.Row():
121
  gr.Markdown("# Density distribution")
live_data/markets_live_data.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3827de48ab0e0bbc2b0ab8b141e2b815e6d7e28bfb183ed7c4dc3b52b5fe07d4
3
- size 78693
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3283dcea94638dee3b949a1cbdb90b7acdf8c50aa80c398888ed57525c07158
3
+ size 27358
live_data/{markets_live_data_sample.parquet → markets_live_data_with_votes.parquet} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bedc1873d9d7019ec3c6f394b3e625bf04543d1b00bd943a2b7ca7c39c90091b
3
- size 72466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3827de48ab0e0bbc2b0ab8b141e2b815e6d7e28bfb183ed7c4dc3b52b5fe07d4
3
+ size 78693
notebooks/analysis_of_markets_data.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/analysis_of_markets_data_with_votes.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
scripts/live_markets_data.py CHANGED
@@ -204,12 +204,15 @@ def get_answer(fpmm: pd.Series) -> str:
204
 
205
 
206
  def get_first_token_perc(row):
 
207
  if row["total_tokens"] == 0.0:
208
  return 0
 
209
  return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
210
 
211
 
212
  def get_second_token_perc(row):
 
213
  if row["total_tokens"] == 0.0:
214
  return 0
215
  return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
@@ -286,13 +289,13 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
286
 
287
  logger.info("transforming and updating previous data")
288
  fpmms = transform_fpmms(fpmms, filename, current_timestamp)
289
- logger.debug(fpmms.info())
290
 
291
  logger.info("Adding trading information")
292
- add_trading_info(fpmms, current_timestamp)
293
 
294
  logger.info("saving the data")
295
- logger.debug(fpmms.info())
296
  if filename:
297
  fpmms.to_parquet(DATA_DIR / filename, index=False)
298
 
 
204
 
205
 
206
  def get_first_token_perc(row):
207
+ """To compute from the total amount of tokens bought how many are for first outcome"""
208
  if row["total_tokens"] == 0.0:
209
  return 0
210
+
211
  return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
212
 
213
 
214
  def get_second_token_perc(row):
215
+ """To compute from the total amount of tokens bought how many are for second outcome"""
216
  if row["total_tokens"] == 0.0:
217
  return 0
218
  return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
 
289
 
290
  logger.info("transforming and updating previous data")
291
  fpmms = transform_fpmms(fpmms, filename, current_timestamp)
292
+ # logger.debug(fpmms.info())
293
 
294
  logger.info("Adding trading information")
295
+ add_trading_info(fpmms, current_timestamp, logger)
296
 
297
  logger.info("saving the data")
298
+ # logger.debug(fpmms.info())
299
  if filename:
300
  fpmms.to_parquet(DATA_DIR / filename, index=False)
301
 
scripts/live_traders_data.py CHANGED
@@ -13,7 +13,7 @@ from utils import (
13
  from utils import SUBGRAPH_API_KEY, _to_content
14
  from queries import omen_trader_votes_query
15
 
16
- logger = logging.getLogger(__name__)
17
 
18
  headers = {
19
  "Accept": "application/json, multipart/mixed",
@@ -22,7 +22,7 @@ headers = {
22
 
23
 
24
  def _query_omen_xdai_subgraph(
25
- fpmm_id: str,
26
  ) -> dict[str, Any]:
27
  """Query the subgraph."""
28
  omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
@@ -66,7 +66,7 @@ def _query_omen_xdai_subgraph(
66
  return all_results
67
 
68
 
69
- def transform_trades(trades_json: dict) -> pd.DataFrame:
70
  # convert to dataframe
71
  logger.info("transforming trades")
72
  df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
@@ -91,7 +91,7 @@ def transform_trades(trades_json: dict) -> pd.DataFrame:
91
 
92
 
93
 
94
- def compute_votes_distribution(market_trades: pd.DataFrame):
95
  """Function to compute the distribution of votes for the trades of a market"""
96
  logger.info("Computing the votes distribution")
97
  total_trades = len(market_trades)
@@ -103,7 +103,24 @@ def compute_votes_distribution(market_trades: pd.DataFrame):
103
  return (100 - percentage_index_1), percentage_index_1
104
 
105
 
106
- def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  """Function to update only the information related with the current timestamp"""
108
 
109
  logger.info("Adding votes distribution per market")
@@ -120,33 +137,32 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
120
 
121
  logger.info(f"Adding trades information for the market {market_id}")
122
  market_trades_json = _query_omen_xdai_subgraph(
123
- fpmm_id=market_id,
124
  )
125
- market_trades = transform_trades(market_trades_json)
126
  fpmms.at[i,"total_trades"] = len(market_trades)
127
 
128
  if len(market_trades) > 0:
129
  # adding average trade size
130
  market_trades["collateralAmountUSD"] = market_trades.collateralAmountUSD.apply(lambda x: round(float(x),3))
131
  mean_trade_size = market_trades.collateralAmountUSD.mean()
132
- first_outcome, second_outcome = compute_votes_distribution(market_trades)
 
133
  else:
134
  logger.info("No trades for this market")
135
  mean_trade_size = 0.0
 
136
  first_outcome, second_outcome = 50.0, 50.0
137
  fpmms.at[i,"mean_trade_size"] = mean_trade_size
 
138
  logger.info(
139
- f"first outcome votes ={first_outcome}, second outcome votes = {second_outcome}"
140
  )
141
- fpmms.at[i,"votes_first_outcome_perc"] = first_outcome
142
- fpmms.at[i,"votes_second_outcome_perc"] = second_outcome
143
  metric = abs(fpmm["first_token_perc"] - first_outcome)
144
  logger.info(f"metric for this market {metric}")
145
  fpmms.at[i,"dist_gap_perc"] = metric
146
  logger.debug("Dataset after adding trading info")
147
  logger.debug(fpmms.head())
148
  return
149
-
150
-
151
- if __name__ == "__main__":
152
- print("collecting votes distribution")
 
13
  from utils import SUBGRAPH_API_KEY, _to_content
14
  from queries import omen_trader_votes_query
15
 
16
+
17
 
18
  headers = {
19
  "Accept": "application/json, multipart/mixed",
 
22
 
23
 
24
  def _query_omen_xdai_subgraph(
25
+ fpmm_id: str, logger
26
  ) -> dict[str, Any]:
27
  """Query the subgraph."""
28
  omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
 
66
  return all_results
67
 
68
 
69
+ def transform_trades(trades_json: dict, logger) -> pd.DataFrame:
70
  # convert to dataframe
71
  logger.info("transforming trades")
72
  df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
 
91
 
92
 
93
 
94
+ def compute_votes_distribution(market_trades: pd.DataFrame, logger):
95
  """Function to compute the distribution of votes for the trades of a market"""
96
  logger.info("Computing the votes distribution")
97
  total_trades = len(market_trades)
 
103
  return (100 - percentage_index_1), percentage_index_1
104
 
105
 
106
+ def compute_price_weighted_perc(market_trades: pd.DataFrame, logger):
107
+ """It computes the price weighted distribution with the percentages of each outcome"""
108
+ logger.info("Computing the price weighted distribution")
109
+ # trades for outcome 0
110
+ trades_outcome_0 = market_trades.loc[market_trades["outcomeIndex"]==0]
111
+ logger.debug(f"Total trades for outcome 0 = {len(trades_outcome_0)}")
112
+ # trades for outcome 1
113
+ trades_outcome_1 = market_trades.loc[market_trades["outcomeIndex"]==1]
114
+ logger.debug(f"Total trades for outcome 1 = {len(trades_outcome_1)}")
115
+ total_usd_outcome_0 = sum(trades_outcome_0.collateralAmountUSD)
116
+ total_usd_outcome_1 = sum(trades_outcome_1.collateralAmountUSD)
117
+ total_usd = total_usd_outcome_0 + total_usd_outcome_1
118
+ percentage_pwc_outcome_0 = round((total_usd_outcome_0/total_usd)*100, 2)
119
+ logger.debug(f"total amount for outcome 0 = {total_usd_outcome_0}")
120
+ logger.debug(f"total usd = {total_usd}")
121
+ return percentage_pwc_outcome_0, 100 - percentage_pwc_outcome_0
122
+
123
+ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int, logger) -> None:
124
  """Function to update only the information related with the current timestamp"""
125
 
126
  logger.info("Adding votes distribution per market")
 
137
 
138
  logger.info(f"Adding trades information for the market {market_id}")
139
  market_trades_json = _query_omen_xdai_subgraph(
140
+ fpmm_id=market_id, logger=logger
141
  )
142
+ market_trades = transform_trades(market_trades_json, logger)
143
  fpmms.at[i,"total_trades"] = len(market_trades)
144
 
145
  if len(market_trades) > 0:
146
  # adding average trade size
147
  market_trades["collateralAmountUSD"] = market_trades.collateralAmountUSD.apply(lambda x: round(float(x),3))
148
  mean_trade_size = market_trades.collateralAmountUSD.mean()
149
+ total_bet_amount = sum(market_trades.collateralAmountUSD)
150
+ first_outcome, second_outcome = compute_price_weighted_perc(market_trades, logger)
151
  else:
152
  logger.info("No trades for this market")
153
  mean_trade_size = 0.0
154
+ total_bet_amount = 0.0
155
  first_outcome, second_outcome = 50.0, 50.0
156
  fpmms.at[i,"mean_trade_size"] = mean_trade_size
157
+ fpmms.at[i,"total_bet_amount"] = total_bet_amount
158
  logger.info(
159
+ f"first outcome pwc ={first_outcome}, second outcome pwc = {second_outcome}"
160
  )
161
+ fpmms.at[i,"price_weighted_first_outcome_perc"] = first_outcome
162
+ fpmms.at[i,"price_weighted_second_outcome_perc"] = second_outcome
163
  metric = abs(fpmm["first_token_perc"] - first_outcome)
164
  logger.info(f"metric for this market {metric}")
165
  fpmms.at[i,"dist_gap_perc"] = metric
166
  logger.debug("Dataset after adding trading info")
167
  logger.debug(fpmms.head())
168
  return
 
 
 
 
tabs/dist_gap.py CHANGED
@@ -5,6 +5,34 @@ import seaborn as sns
5
  from seaborn import FacetGrid
6
  import plotly.express as px
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def get_top_best_behaviour_markets(markets_data: pd.DataFrame):
10
  """Function to paint the top markets with the lowest metric of distribution gap"""
@@ -51,6 +79,7 @@ def get_correlation_map(markets_data: pd.DataFrame):
51
  "dist_gap_perc",
52
  "liquidityMeasure",
53
  "mean_trade_size",
 
54
  ]
55
  data = markets_data[columns_of_interest]
56
 
 
5
  from seaborn import FacetGrid
6
  import plotly.express as px
7
 
8
+ HEIGHT = 600
9
+ WIDTH = 1000
10
+
11
+
12
+ def get_dist_gap_time_evolution(market_id: str, all_markets: pd.DataFrame):
13
+ """Function to paint the evolution in time of the distance gap between the tokens and the price weighted distributions"""
14
+ sns.set_style("darkgrid")
15
+ selected_market = all_markets.loc[all_markets["id"] == market_id]
16
+ selected_market["sample_datetime"] = selected_market["sample_datetime"].astype(str)
17
+ selected_market.columns = selected_market.columns.astype(str)
18
+
19
+ return gr.LinePlot(
20
+ value=selected_market,
21
+ x="sample_datetime",
22
+ y="dist_gap_perc",
23
+ y_title="Distribution gap in %",
24
+ interactive=True,
25
+ show_actions_button=True,
26
+ tooltip=[
27
+ "sample_datetime",
28
+ "dist_gap_perc",
29
+ "total_trades",
30
+ "total_bet_amount",
31
+ ],
32
+ height=HEIGHT,
33
+ width=WIDTH,
34
+ )
35
+
36
 
37
  def get_top_best_behaviour_markets(markets_data: pd.DataFrame):
38
  """Function to paint the top markets with the lowest metric of distribution gap"""
 
79
  "dist_gap_perc",
80
  "liquidityMeasure",
81
  "mean_trade_size",
82
+ "total_bet_amount",
83
  ]
84
  data = markets_data[columns_of_interest]
85
 
tabs/{tokens_votes_dist.py → tokens_pwc_dist.py} RENAMED
File without changes