cyberosa commited on
Commit
a134d9b
·
1 Parent(s): 3305162

initial files and scripts. App under construction

Browse files
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ .DS_Store
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Olas Predict Live Markets
3
- emoji: 🐠
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
 
1
  ---
2
  title: Olas Predict Live Markets
3
+ emoji: 💹
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+ import pandas as pd
5
+ import seaborn as sns
6
+ import duckdb
7
+ import logging
8
+
9
+
10
+ def get_logger():
11
+ logger = logging.getLogger(__name__)
12
+ logger.setLevel(logging.DEBUG)
13
+ # stream handler and formatter
14
+ stream_handler = logging.StreamHandler()
15
+ stream_handler.setLevel(logging.DEBUG)
16
+ formatter = logging.Formatter(
17
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
18
+ )
19
+ stream_handler.setFormatter(formatter)
20
+ logger.addHandler(stream_handler)
21
+ return logger
22
+
23
+
24
+ logger = get_logger()
25
+
26
+
27
+ def prepare_data():
28
+ """
29
+ Get all data from the parquet files
30
+ """
31
+ logger.info("Getting all data")
32
+ con = duckdb.connect(":memory:")
33
+
34
+ # Query to fetch invalid trades data
35
+ query = f"""
36
+ SELECT *
37
+ FROM read_parquet('./live_data/markets_live_data.parquet')
38
+ """
39
+ df = con.execute(query).fetchdf()
40
+ return df
41
+
42
+
43
+ demo = gr.Blocks()
44
+ markets_data = prepare_data()
45
+
46
+ with demo:
47
+ gr.HTML("<h1>Olas Predict Live Markets </h1>")
48
+ gr.Markdown("This app shows the distributions of predictions on the live markets.")
49
+
50
+ with gr.Tabs():
51
+ with gr.TabItem("💹Probability distributions"):
52
+ with gr.Row():
53
+ gr.Markdown("# Daily probability distribution of live markets")
54
+ with gr.Row():
55
+ # TODO
56
+ print("WIP")
57
+ gr.Markdown("Under construction (WIP)")
58
+ # daily_distributions = plot_daily_market_distributions(markets_data)
59
+
60
+ demo.queue(default_concurrency_limit=40).launch()
live_data/analysis_of_markets_data.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
live_data/markets_live_data.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a878df63a59bc40ef9c4df0b3a2a87b5d9a66bc533962dff98547fef256348e0
3
+ size 22146
live_data/markets_live_data_sample.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f78eff92f06483f12d9acf36494488732e39c22098ce2f3e21e6d44efb88af
3
+ size 25464
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.0.1
2
+ seaborn
3
+ matplotlib
4
+ huggingface-hub
5
+ pyarrow
6
+ requests
7
+ gradio==4.13.0
8
+ plotly
9
+ nbformat
10
+ pytz
11
+ duckdb
12
+ ipfshttpclient
scripts/live_markets_data.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ------------------------------------------------------------------------------
3
+ #
4
+ # Copyright 2024 Valory AG
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ # ------------------------------------------------------------------------------
19
+
20
+ import functools
21
+ import warnings
22
+ from typing import Optional, Generator, Callable
23
+ import os
24
+ import logging
25
+ import pandas as pd
26
+ from datetime import datetime, timedelta, UTC
27
+ import requests
28
+ from tqdm import tqdm
29
+ from typing import List, Dict
30
+ from live_traders_data import add_trading_info
31
+ from utils import (
32
+ SUBGRAPH_API_KEY,
33
+ measure_execution_time,
34
+ OMEN_SUBGRAPH_URL,
35
+ CREATOR,
36
+ BATCH_SIZE,
37
+ DATA_DIR,
38
+ )
39
+ from queries import (
40
+ FPMMS_WITH_TOKENS_QUERY,
41
+ ID_FIELD,
42
+ DATA_FIELD,
43
+ ANSWER_FIELD,
44
+ ANSWER_TIMESTAMP_FIELD,
45
+ QUERY_FIELD,
46
+ TITLE_FIELD,
47
+ OUTCOMES_FIELD,
48
+ OPENING_TIMESTAMP_FIELD,
49
+ CREATION_TIMESTAMP_FIELD,
50
+ LIQUIDITY_FIELD,
51
+ LIQUIDIY_MEASURE_FIELD,
52
+ TOKEN_AMOUNTS_FIELD,
53
+ ERROR_FIELD,
54
+ QUESTION_FIELD,
55
+ FPMMS_FIELD,
56
+ )
57
+
58
+ logging.basicConfig(level=logging.INFO)
59
+
60
+ ResponseItemType = List[Dict[str, str]]
61
+ SubgraphResponseType = Dict[str, ResponseItemType]
62
+
63
+
64
+ class RetriesExceeded(Exception):
65
+ """Exception to raise when retries are exceeded during data-fetching."""
66
+
67
+ def __init__(
68
+ self, msg="Maximum retries were exceeded while trying to fetch the data!"
69
+ ):
70
+ super().__init__(msg)
71
+
72
+
73
+ def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
74
+ """Create a hacky retry strategy.
75
+ Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
76
+ because the subgraph does not return the appropriate status codes in case of failure.
77
+ Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
78
+ catch those exceptions in the hacky retry decorator and try again.
79
+ Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
80
+
81
+ :param func: the input request function.
82
+ :param n_retries: the maximum allowed number of retries.
83
+ :return: The request method with the hacky retry strategy applied.
84
+ """
85
+
86
+ @functools.wraps(func)
87
+ def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
88
+ """The wrapper for the hacky retry.
89
+
90
+ :return: a response dictionary.
91
+ """
92
+ retried = 0
93
+
94
+ while retried <= n_retries:
95
+ try:
96
+ if retried > 0:
97
+ warnings.warn(f"Retrying {retried}/{n_retries}...")
98
+
99
+ return func(*args, **kwargs)
100
+ except (ValueError, ConnectionError) as e:
101
+ warnings.warn(e.args[0])
102
+ finally:
103
+ retried += 1
104
+
105
+ raise RetriesExceeded()
106
+
107
+ return wrapper_hacky_retry
108
+
109
+
110
+ @hacky_retry
111
+ def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
112
+ """Query a subgraph.
113
+
114
+ Args:
115
+ url: the subgraph's URL.
116
+ query: the query to be used.
117
+ key: the key to use in order to access the required data.
118
+
119
+ Returns:
120
+ a response dictionary.
121
+ """
122
+ content = {QUERY_FIELD: query}
123
+ headers = {
124
+ "Accept": "application/json",
125
+ "Content-Type": "application/json",
126
+ }
127
+ res = requests.post(url, json=content, headers=headers)
128
+
129
+ if res.status_code != 200:
130
+ raise ConnectionError(
131
+ "Something went wrong while trying to communicate with the subgraph "
132
+ f"(Error: {res.status_code})!\n{res.text}"
133
+ )
134
+
135
+ body = res.json()
136
+ if ERROR_FIELD in body.keys():
137
+ raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
138
+
139
+ data = body.get(DATA_FIELD, {}).get(key, None)
140
+ if data is None:
141
+ raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
142
+
143
+ return data
144
+
145
+
146
+ def fpmms_fetcher(current_timestamp: int) -> Generator[ResponseItemType, int, None]:
147
+ """An indefinite fetcher for the FPMMs."""
148
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
149
+ logging.debug(f"omen_subgraph = {omen_subgraph}")
150
+ while True:
151
+ fpmm_id = yield
152
+ fpmms_query = FPMMS_WITH_TOKENS_QUERY.substitute(
153
+ creator=CREATOR,
154
+ fpmm_id=fpmm_id,
155
+ current_timestamp=current_timestamp,
156
+ fpmms_field=FPMMS_FIELD,
157
+ first=BATCH_SIZE,
158
+ id_field=ID_FIELD,
159
+ answer_timestamp_field=ANSWER_TIMESTAMP_FIELD,
160
+ question_field=QUESTION_FIELD,
161
+ outcomes_field=OUTCOMES_FIELD,
162
+ title_field=TITLE_FIELD,
163
+ opening_timestamp_field=OPENING_TIMESTAMP_FIELD,
164
+ creation_timestamp_field=CREATION_TIMESTAMP_FIELD,
165
+ liquidity_field=LIQUIDITY_FIELD,
166
+ liquidity_measure_field=LIQUIDIY_MEASURE_FIELD,
167
+ token_amounts_field=TOKEN_AMOUNTS_FIELD,
168
+ )
169
+ logging.debug(f"Executing query {fpmms_query}")
170
+ yield query_subgraph(omen_subgraph, fpmms_query, FPMMS_FIELD)
171
+
172
+
173
+ def fetch_fpmms(current_timestamp: int) -> pd.DataFrame:
174
+ """Fetch all the fpmms of the creator."""
175
+ logging.info("Fetching all markets")
176
+ latest_id = ""
177
+ fpmms = []
178
+ fetcher = fpmms_fetcher(current_timestamp)
179
+ for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
180
+ batch = fetcher.send(latest_id)
181
+ if len(batch) == 0:
182
+ logging.debug("no data")
183
+ break
184
+
185
+ # TODO Add the incremental batching system from market creator
186
+ # prev_fpmms is the previous local file with the markets
187
+ # for fpmm in batch:
188
+ # if fpmm["id"] not in fpmms or "trades" not in prev_fpmms[fpmm["id"]]:
189
+ # prev_fpmms[fpmm["id"]] = fpmm
190
+ logging.debug(f"length of the data received = {len(batch)}")
191
+ latest_id = batch[-1].get(ID_FIELD, "")
192
+ if latest_id == "":
193
+ raise ValueError(f"Unexpected data format retrieved: {batch}")
194
+
195
+ fpmms.extend(batch)
196
+
197
+ logging.info("Finished collecting data")
198
+ return pd.DataFrame(fpmms)
199
+
200
+
201
+ def get_answer(fpmm: pd.Series) -> str:
202
+ """Get an answer from its index, using Series of an FPMM."""
203
+ return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]
204
+
205
+
206
+ def get_first_token_perc(row):
207
+ if row["total_tokens"] == 0.0:
208
+ return 0
209
+ return round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
210
+
211
+
212
+ def get_second_token_perc(row):
213
+ if row["total_tokens"] == 0.0:
214
+ return 0
215
+ return round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
216
+
217
+
218
+ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
219
+ """Transform an FPMMS dataframe."""
220
+
221
+ # prepare the new ones
222
+ # Add current timestamp
223
+ fpmms["tokens_timestamp"] = current_timestamp
224
+ fpmms["open"] = True
225
+ fpmms["total_trades"] = 0
226
+ fpmms["dist_gap_perc"] = 0.0
227
+ fpmms["votes_first_outcome_perc"] = 0.0
228
+ fpmms["votes_second_outcome_perc"] = 0.0
229
+
230
+ # computation of token distributions
231
+ fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
232
+ fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
233
+ fpmms["total_tokens"] = fpmms.apply(
234
+ lambda x: x.token_first_amount + x.token_second_amount, axis=1
235
+ )
236
+ fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
237
+ fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
238
+ fpmms.drop(
239
+ columns=["token_first_amount", "token_second_amount", "total_tokens"],
240
+ inplace=True,
241
+ )
242
+ # previous file to update?
243
+ old_fpmms = None
244
+ if os.path.exists(DATA_DIR / filename):
245
+ old_fpmms = pd.read_parquet(DATA_DIR / filename)
246
+
247
+ if old_fpmms is not None:
248
+ # update which markets are not open anymore
249
+ open_markets = list(fpmms.id.unique())
250
+ logging.info("Updating market status of old markets")
251
+ open_mask = old_fpmms["id"].isin(open_markets)
252
+ old_fpmms.loc[~open_mask, "open"] = False
253
+
254
+ # now concatenate
255
+ logging.info("Appending new data to previous data")
256
+ return pd.concat([old_fpmms, fpmms], axis=0, ignore_index=True)
257
+
258
+ return fpmms
259
+
260
+
261
+ @measure_execution_time
262
+ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
263
+ """Fetch, process, store and return the markets as a Dataframe."""
264
+
265
+ logging.info("fetching new markets information")
266
+ current_timestamp = int(datetime.now(UTC).timestamp())
267
+ fpmms = fetch_fpmms(current_timestamp)
268
+ logging.debug("New collected data")
269
+ logging.debug(fpmms.head())
270
+
271
+ logging.info("transforming and updating previous data")
272
+ fpmms = transform_fpmms(fpmms, filename, current_timestamp)
273
+ logging.debug(fpmms.info())
274
+
275
+ logging.info("Adding trading information")
276
+ add_trading_info(fpmms, current_timestamp)
277
+
278
+ logging.info("saving the data")
279
+ print(fpmms.info())
280
+ if filename:
281
+ fpmms.to_parquet(DATA_DIR / filename, index=False)
282
+
283
+ return fpmms
284
+
285
+
286
+ if __name__ == "__main__":
287
+
288
+ logger = logging.getLogger(__name__)
289
+ logger.setLevel(logging.DEBUG)
290
+ # stream handler and formatter
291
+ stream_handler = logging.StreamHandler()
292
+ stream_handler.setLevel(logging.DEBUG)
293
+ formatter = logging.Formatter(
294
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
295
+ )
296
+ stream_handler.setFormatter(formatter)
297
+ logger.addHandler(stream_handler)
298
+ compute_distributions("markets_live_data.parquet")
scripts/live_traders_data.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import logging
3
+ import pandas as pd
4
+ from collections import defaultdict
5
+ from typing import Any, Optional
6
+ from tqdm import tqdm
7
+ from utils import (
8
+ OMEN_SUBGRAPH_URL,
9
+ CREATOR,
10
+ BATCH_SIZE,
11
+ DATA_DIR,
12
+ )
13
+ from utils import SUBGRAPH_API_KEY, _to_content
14
+ from queries import omen_trader_votes_query
15
+
16
+
17
+ headers = {
18
+ "Accept": "application/json, multipart/mixed",
19
+ "Content-Type": "application/json",
20
+ }
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+
24
+
25
+ def _query_omen_xdai_subgraph(
26
+ fpmm_id: str,
27
+ ) -> dict[str, Any]:
28
+ """Query the subgraph."""
29
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
30
+ logging.info(f"omen_subgraph = {omen_subgraph}")
31
+ grouped_results = defaultdict(list)
32
+ id_gt = ""
33
+
34
+ while True:
35
+ query = omen_trader_votes_query.substitute(
36
+ fpmm_creator=CREATOR.lower(),
37
+ first=BATCH_SIZE,
38
+ id_gt=id_gt,
39
+ fpmm_id=fpmm_id,
40
+ )
41
+ logging.debug(f"query for the omen to collect trades {query}")
42
+ content_json = _to_content(query)
43
+
44
+ res = requests.post(omen_subgraph, headers=headers, json=content_json)
45
+ result_json = res.json()
46
+ user_trades = result_json.get("data", {}).get("fpmmTrades", [])
47
+
48
+ if not user_trades:
49
+ break
50
+
51
+ for trade in user_trades:
52
+ fpmm_id = trade.get("fpmm", {}).get("id")
53
+ grouped_results[fpmm_id].append(trade)
54
+
55
+ id_gt = user_trades[len(user_trades) - 1]["id"]
56
+
57
+ all_results = {
58
+ "data": {
59
+ "fpmmTrades": [
60
+ trade
61
+ for trades_list in grouped_results.values()
62
+ for trade in trades_list
63
+ ]
64
+ }
65
+ }
66
+
67
+ return all_results
68
+
69
+
70
+ def transform_trades(trades_json: dict) -> pd.DataFrame:
71
+ # convert to dataframe
72
+ logging.info("transforming trades")
73
+ df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
74
+ if len(df) == 0:
75
+ logging.warning("No trades for this market")
76
+ return df
77
+
78
+ # print(df.info())
79
+
80
+ # convert creator to address
81
+ df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
82
+
83
+ # normalize fpmm column
84
+ fpmm = pd.json_normalize(df["fpmm"])
85
+ fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
86
+ df = pd.concat([df, fpmm], axis=1)
87
+
88
+ # drop fpmm column
89
+ df.drop(["fpmm"], axis=1, inplace=True)
90
+
91
+ # convert into int
92
+ df.outcomeIndex = pd.to_numeric(df.outcomeIndex, errors="coerce")
93
+ return df
94
+
95
+
96
+ def compute_from_timestamp_value(
97
+ fpmm_id: str, opening_timestamp: int, fpmms: pd.DataFrame
98
+ ) -> Optional[int]:
99
+ """Function to find the latest timestamp registered for a specific market"""
100
+ try:
101
+ market_data = fpmms.loc[fpmms["id"] == fpmm_id]
102
+ # how many previous samples do we have?
103
+ if len(market_data) == 1:
104
+ # take the opening Timestamp of the Market
105
+ return opening_timestamp
106
+ timestamps = (market_data.tokens_timestamp.values).sort()
107
+ # the last value is the current timestamp so we need to take the previous one
108
+ return timestamps[-2]
109
+ except Exception as e:
110
+ logging.error(
111
+ f"Error when trying to get the from timestamp value of the market id {fpmm_id}"
112
+ )
113
+ return None
114
+
115
+
116
+ def compute_votes_distribution(market_trades: pd.DataFrame):
117
+ """Function to compute the distribution of votes for the trades of a market"""
118
+ total_trades = len(market_trades)
119
+ print(f"The total number of trades is {total_trades}")
120
+ # outcomeIndex is always 1 or 0?
121
+ sum_outcome_index_1 = sum(market_trades.outcomeIndex)
122
+ print(f"The total number of votes for index 1 is {sum_outcome_index_1}")
123
+ logging.info(f"The total number of votes for index 1 is {sum_outcome_index_1}")
124
+ percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
125
+ return (100 - percentage_index_1), percentage_index_1
126
+
127
+
128
+ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
129
+ """Function to update only the information related with the current timestamp"""
130
+ # Iterate over the markets
131
+ logging.info("Adding votes distribution per market")
132
+
133
+ for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
134
+ # update the trades for this market and at this specific current_timestamp
135
+ logging.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
136
+ to_update = fpmm["open"] and fpmm["tokens_timestamp"] == current_timestamp
137
+ if not to_update: # jump closed markets or old data
138
+ logging.debug("Jumping this row")
139
+ continue
140
+ market_id = fpmm["id"]
141
+
142
+ logging.info(f"Adding information for the market {market_id}")
143
+ market_trades_json = _query_omen_xdai_subgraph(
144
+ fpmm_id=market_id,
145
+ )
146
+ market_trades = transform_trades(market_trades_json)
147
+ if len(market_trades) == 0:
148
+ logging.info("No trades for this market")
149
+ continue
150
+ # to compute the votes distribution
151
+ logging.info("Computing the votes distribution")
152
+ fpmm["total_trades"] = len(market_trades)
153
+ first_outcome, second_outcome = compute_votes_distribution(market_trades)
154
+ logging.info(
155
+ f"first outcome votes ={first_outcome}, second outcome votes = {second_outcome}"
156
+ )
157
+ # TODO Why these numbers are wrong? DEBUG HERE
158
+ fpmm["votes_first_outcome_perc"] = first_outcome
159
+ fpmm["votes_second_outcome_perc"] = second_outcome
160
+ metric = abs(fpmm["first_token_perc"] - first_outcome)
161
+ logging.info(f"metric for this market {metric}")
162
+ fpmm["dist_gap_perc"] = metric
163
+ logging.debug("Dataset after adding trading info")
164
+ logging.debug(fpmms.head())
165
+ return
166
+
167
+
168
+ if __name__ == "__main__":
169
+ print("collecting votes distribution")
scripts/queries.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ------------------------------------------------------------------------------
3
+ #
4
+ # Copyright 2024 Valory AG
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ # ------------------------------------------------------------------------------
19
+
20
+ from string import Template
21
+
22
+ FPMMS_FIELD = "fixedProductMarketMakers"
23
+ QUERY_FIELD = "query"
24
+ ERROR_FIELD = "errors"
25
+ DATA_FIELD = "data"
26
+ ID_FIELD = "id"
27
+ ANSWER_FIELD = "currentAnswer"
28
+ QUESTION_FIELD = "question"
29
+ OUTCOMES_FIELD = "outcomes"
30
+ TITLE_FIELD = "title"
31
+ ANSWER_TIMESTAMP_FIELD = "currentAnswerTimestamp"
32
+ OPENING_TIMESTAMP_FIELD = "openingTimestamp"
33
+ RESOLUTION_TIMESTAMP_FIELD = "resolutionTimestamp"
34
+ CREATION_TIMESTAMP_FIELD = "creationTimestamp"
35
+ LIQUIDITY_FIELD = "liquidityParameter"
36
+ LIQUIDIY_MEASURE_FIELD = "liquidityMeasure"
37
+ TOKEN_AMOUNTS_FIELD = "outcomeTokenAmounts"
38
+
39
+ FPMMS_WITH_TOKENS_QUERY = Template(
40
+ """
41
+ {
42
+ ${fpmms_field}(
43
+ where: {
44
+ creator: "${creator}",
45
+ id_gt: "${fpmm_id}",
46
+ isPendingArbitration: false
47
+ currentAnswer: null
48
+ openingTimestamp_gt:${current_timestamp}
49
+ },
50
+ orderBy: ${id_field}
51
+ orderDirection: asc
52
+ first: ${first}
53
+ ){
54
+ ${id_field}
55
+ ${question_field} {
56
+ ${outcomes_field}
57
+ ${answer_timestamp_field}
58
+ answers{
59
+ answer
60
+ }
61
+ }
62
+ ${title_field}
63
+ ${opening_timestamp_field}
64
+ ${creation_timestamp_field}
65
+ ${liquidity_field}
66
+ ${liquidity_measure_field}
67
+ ${token_amounts_field}
68
+ }
69
+ }
70
+ """
71
+ )
72
+
73
+
74
+ omen_trader_votes_query = Template(
75
+ """
76
+ {
77
+ fpmmTrades(
78
+ where: {
79
+ type: Buy,
80
+ fpmm_: {
81
+ creator: "${fpmm_creator}",
82
+ id: "${fpmm_id}",
83
+ },
84
+ id_gt: "${id_gt}"
85
+ }
86
+ first: ${first}
87
+ orderBy: id
88
+ orderDirection: asc
89
+ ) {
90
+ id
91
+ title
92
+ collateralToken
93
+ outcomeTokenMarginalPrice
94
+ oldOutcomeTokenMarginalPrice
95
+ type
96
+ creator {
97
+ id
98
+ }
99
+ creationTimestamp
100
+ collateralAmount
101
+ collateralAmountUSD
102
+ feeAmount
103
+ outcomeIndex
104
+ outcomeTokensTraded
105
+ transactionHash
106
+ fpmm {
107
+ id
108
+ outcomes
109
+ title
110
+ condition {
111
+ id
112
+ }
113
+ }
114
+ }
115
+ }
116
+ """
117
+ )
scripts/utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from string import Template
2
+ from pathlib import Path
3
+ import sys
4
+ import json
5
+ import os
6
+ import time
7
+ from tqdm import tqdm
8
+ from typing import List, Any, Optional, Union
9
+ import numpy as np
10
+ import pandas as pd
11
+ import gc
12
+ import re
13
+ from dataclasses import dataclass
14
+ from enum import Enum
15
+ from json.decoder import JSONDecodeError
16
+
17
+
18
+ CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
19
+ BATCH_SIZE = 1000
20
+ # OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
21
+ OMEN_SUBGRAPH_URL = Template(
22
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
23
+ )
24
+ SCRIPTS_DIR = Path(__file__).parent
25
+ ROOT_DIR = SCRIPTS_DIR.parent
26
+ DATA_DIR = ROOT_DIR / "live_data"
27
+ MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
28
+ SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
29
+
30
+
31
+ def measure_execution_time(func):
32
+ def wrapper(*args, **kwargs):
33
+ start_time = time.time()
34
+ result = func(*args, **kwargs)
35
+ end_time = time.time()
36
+ execution_time = end_time - start_time
37
+ print(f"Execution time: {execution_time:.6f} seconds")
38
+ return result
39
+
40
+ return wrapper
41
+
42
+
43
+ def _to_content(q: str) -> dict[str, Any]:
44
+ """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
45
+ finalized_query = {
46
+ "query": q,
47
+ "variables": None,
48
+ "extensions": {"headers": None},
49
+ }
50
+ return finalized_query