Spaces:

valory
/

olas_predict_live_markets

Running

App Files Files Community

cyberosa commited on Jul 30, 2024

Commit

a134d9b

1 Parent(s): 3305162

initial files and scripts. App under construction

Browse files

Files changed (11) hide show

.gitignore +162 -0
README.md +1 -1
app.py +60 -0
live_data/analysis_of_markets_data.ipynb +0 -0
live_data/markets_live_data.parquet +3 -0
live_data/markets_live_data_sample.parquet +3 -0
requirements.txt +12 -0
scripts/live_markets_data.py +298 -0
scripts/live_traders_data.py +169 -0
scripts/queries.py +117 -0
scripts/utils.py +50 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.DS_Store
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Olas Predict Live Markets
-emoji: 🐠
 colorFrom: gray
 colorTo: red
 sdk: gradio

 ---
 title: Olas Predict Live Markets
+emoji: 💹
 colorFrom: gray
 colorTo: red
 sdk: gradio

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from datetime import datetime, timedelta
+import gradio as gr
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+import duckdb
+import logging
+def get_logger():
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.DEBUG)
+    # stream handler and formatter
+    stream_handler = logging.StreamHandler()
+    stream_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    stream_handler.setFormatter(formatter)
+    logger.addHandler(stream_handler)
+    return logger
+logger = get_logger()
+def prepare_data():
+    """
+    Get all data from the parquet files
+    """
+    logger.info("Getting all data")
+    con = duckdb.connect(":memory:")
+    # Query to fetch invalid trades data
+    query = f"""
+    SELECT *
+    FROM read_parquet('./live_data/markets_live_data.parquet')
+    """
+    df = con.execute(query).fetchdf()
+    return df
+demo = gr.Blocks()
+markets_data = prepare_data()
+with demo:
+    gr.HTML("<h1>Olas Predict Live Markets </h1>")
+    gr.Markdown("This app shows the distributions of predictions on the live markets.")
+    with gr.Tabs():
+        with gr.TabItem("💹Probability distributions"):
+            with gr.Row():
+                gr.Markdown("# Daily probability distribution of live markets")
+            with gr.Row():
+                # TODO
+                print("WIP")
+                gr.Markdown("Under construction (WIP)")
+                # daily_distributions = plot_daily_market_distributions(markets_data)
+    demo.queue(default_concurrency_limit=40).launch()

live_data/analysis_of_markets_data.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

live_data/markets_live_data.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a878df63a59bc40ef9c4df0b3a2a87b5d9a66bc533962dff98547fef256348e0
+size 22146

live_data/markets_live_data_sample.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7f78eff92f06483f12d9acf36494488732e39c22098ce2f3e21e6d44efb88af
+size 25464

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+pandas==2.0.1
+seaborn
+matplotlib
+huggingface-hub
+pyarrow
+requests
+gradio==4.13.0
+plotly
+nbformat
+pytz
+duckdb
+ipfshttpclient

scripts/live_markets_data.py ADDED Viewed

	@@ -0,0 +1,298 @@

+#   -*- coding: utf-8 -*-
+#   ------------------------------------------------------------------------------
+#
+#     Copyright 2024 Valory AG
+#
+#     Licensed under the Apache License, Version 2.0 (the "License");
+#     you may not use this file except in compliance with the License.
+#     You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#     Unless required by applicable law or agreed to in writing, software
+#     distributed under the License is distributed on an "AS IS" BASIS,
+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#     See the License for the specific language governing permissions and
+#     limitations under the License.
+#
+#   ------------------------------------------------------------------------------
+import functools
+import warnings
+from typing import Optional, Generator, Callable
+import os
+import logging
+import pandas as pd
+from datetime import datetime, timedelta, UTC
+import requests
+from tqdm import tqdm
+from typing import List, Dict
+from live_traders_data import add_trading_info
+from utils import (
+    SUBGRAPH_API_KEY,
+    measure_execution_time,
+    OMEN_SUBGRAPH_URL,
+    CREATOR,
+    BATCH_SIZE,
+    DATA_DIR,
+)
+from queries import (
+    FPMMS_WITH_TOKENS_QUERY,
+    ID_FIELD,
+    DATA_FIELD,
+    ANSWER_FIELD,
+    ANSWER_TIMESTAMP_FIELD,
+    QUERY_FIELD,
+    TITLE_FIELD,
+    OUTCOMES_FIELD,
+    OPENING_TIMESTAMP_FIELD,
+    CREATION_TIMESTAMP_FIELD,
+    LIQUIDITY_FIELD,
+    LIQUIDIY_MEASURE_FIELD,
+    TOKEN_AMOUNTS_FIELD,
+    ERROR_FIELD,
+    QUESTION_FIELD,
+    FPMMS_FIELD,
+)
+logging.basicConfig(level=logging.INFO)
+ResponseItemType = List[Dict[str, str]]
+SubgraphResponseType = Dict[str, ResponseItemType]
+class RetriesExceeded(Exception):
+    """Exception to raise when retries are exceeded during data-fetching."""
+    def __init__(
+        self, msg="Maximum retries were exceeded while trying to fetch the data!"
+    ):
+        super().__init__(msg)
+def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
+    """Create a hacky retry strategy.
+        Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
+        because the subgraph does not return the appropriate status codes in case of failure.
+        Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
+        catch those exceptions in the hacky retry decorator and try again.
+        Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
+    :param func: the input request function.
+    :param n_retries: the maximum allowed number of retries.
+    :return: The request method with the hacky retry strategy applied.
+    """
+    @functools.wraps(func)
+    def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
+        """The wrapper for the hacky retry.
+        :return: a response dictionary.
+        """
+        retried = 0
+        while retried <= n_retries:
+            try:
+                if retried > 0:
+                    warnings.warn(f"Retrying {retried}/{n_retries}...")
+                return func(*args, **kwargs)
+            except (ValueError, ConnectionError) as e:
+                warnings.warn(e.args[0])
+            finally:
+                retried += 1
+        raise RetriesExceeded()
+    return wrapper_hacky_retry
+@hacky_retry
+def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
+    """Query a subgraph.
+    Args:
+        url: the subgraph's URL.
+        query: the query to be used.
+        key: the key to use in order to access the required data.
+    Returns:
+        a response dictionary.
+    """
+    content = {QUERY_FIELD: query}
+    headers = {
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+    }
+    res = requests.post(url, json=content, headers=headers)
+    if res.status_code != 200:
+        raise ConnectionError(
+            "Something went wrong while trying to communicate with the subgraph "
+            f"(Error: {res.status_code})!\n{res.text}"
+        )
+    body = res.json()
+    if ERROR_FIELD in body.keys():
+        raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
+    data = body.get(DATA_FIELD, {}).get(key, None)
+    if data is None:
+        raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
+    return data
+def fpmms_fetcher(current_timestamp: int) -> Generator[ResponseItemType, int, None]:
+    """An indefinite fetcher for the FPMMs."""
+    omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
+    logging.debug(f"omen_subgraph = {omen_subgraph}")
+    while True:
+        fpmm_id = yield
+        fpmms_query = FPMMS_WITH_TOKENS_QUERY.substitute(
+            creator=CREATOR,
+            fpmm_id=fpmm_id,
+            current_timestamp=current_timestamp,
+            fpmms_field=FPMMS_FIELD,
+            first=BATCH_SIZE,
+            id_field=ID_FIELD,
+            answer_timestamp_field=ANSWER_TIMESTAMP_FIELD,
+            question_field=QUESTION_FIELD,
+            outcomes_field=OUTCOMES_FIELD,
+            title_field=TITLE_FIELD,
+            opening_timestamp_field=OPENING_TIMESTAMP_FIELD,
+            creation_timestamp_field=CREATION_TIMESTAMP_FIELD,
+            liquidity_field=LIQUIDITY_FIELD,
+            liquidity_measure_field=LIQUIDIY_MEASURE_FIELD,
+            token_amounts_field=TOKEN_AMOUNTS_FIELD,
+        )
+        logging.debug(f"Executing query {fpmms_query}")
+        yield query_subgraph(omen_subgraph, fpmms_query, FPMMS_FIELD)
+def fetch_fpmms(current_timestamp: int) -> pd.DataFrame:
+    """Fetch all the fpmms of the creator."""
+    logging.info("Fetching all markets")
+    latest_id = ""
+    fpmms = []
+    fetcher = fpmms_fetcher(current_timestamp)
+    for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
+        batch = fetcher.send(latest_id)
+        if len(batch) == 0:
+            logging.debug("no data")
+            break
+        # TODO Add the incremental batching system from market creator
+        # prev_fpmms is the previous local file with the markets
+        # for fpmm in batch:
+        #     if fpmm["id"] not in fpmms or "trades" not in prev_fpmms[fpmm["id"]]:
+        #         prev_fpmms[fpmm["id"]] = fpmm
+        logging.debug(f"length of the data received = {len(batch)}")
+        latest_id = batch[-1].get(ID_FIELD, "")
+        if latest_id == "":
+            raise ValueError(f"Unexpected data format retrieved: {batch}")
+        fpmms.extend(batch)
+    logging.info("Finished collecting data")
+    return pd.DataFrame(fpmms)
+def get_answer(fpmm: pd.Series) -> str:
+    """Get an answer from its index, using Series of an FPMM."""
+    return fpmm[QUESTION_FIELD][OUTCOMES_FIELD][fpmm[ANSWER_FIELD]]
+def get_first_token_perc(row):
+    if row["total_tokens"] == 0.0:
+        return 0
+    return round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
+def get_second_token_perc(row):
+    if row["total_tokens"] == 0.0:
+        return 0
+    return round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
+def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
+    """Transform an FPMMS dataframe."""
+    # prepare the new ones
+    # Add current timestamp
+    fpmms["tokens_timestamp"] = current_timestamp
+    fpmms["open"] = True
+    fpmms["total_trades"] = 0
+    fpmms["dist_gap_perc"] = 0.0
+    fpmms["votes_first_outcome_perc"] = 0.0
+    fpmms["votes_second_outcome_perc"] = 0.0
+    # computation of token distributions
+    fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
+    fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
+    fpmms["total_tokens"] = fpmms.apply(
+        lambda x: x.token_first_amount + x.token_second_amount, axis=1
+    )
+    fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
+    fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
+    fpmms.drop(
+        columns=["token_first_amount", "token_second_amount", "total_tokens"],
+        inplace=True,
+    )
+    # previous file to update?
+    old_fpmms = None
+    if os.path.exists(DATA_DIR / filename):
+        old_fpmms = pd.read_parquet(DATA_DIR / filename)
+    if old_fpmms is not None:
+        # update which markets are not open anymore
+        open_markets = list(fpmms.id.unique())
+        logging.info("Updating market status of old markets")
+        open_mask = old_fpmms["id"].isin(open_markets)
+        old_fpmms.loc[~open_mask, "open"] = False
+        # now concatenate
+        logging.info("Appending new data to previous data")
+        return pd.concat([old_fpmms, fpmms], axis=0, ignore_index=True)
+    return fpmms
+@measure_execution_time
+def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
+    """Fetch, process, store and return the markets as a Dataframe."""
+    logging.info("fetching new markets information")
+    current_timestamp = int(datetime.now(UTC).timestamp())
+    fpmms = fetch_fpmms(current_timestamp)
+    logging.debug("New collected data")
+    logging.debug(fpmms.head())
+    logging.info("transforming and updating previous data")
+    fpmms = transform_fpmms(fpmms, filename, current_timestamp)
+    logging.debug(fpmms.info())
+    logging.info("Adding trading information")
+    add_trading_info(fpmms, current_timestamp)
+    logging.info("saving the data")
+    print(fpmms.info())
+    if filename:
+        fpmms.to_parquet(DATA_DIR / filename, index=False)
+    return fpmms
+if __name__ == "__main__":
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.DEBUG)
+    # stream handler and formatter
+    stream_handler = logging.StreamHandler()
+    stream_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    stream_handler.setFormatter(formatter)
+    logger.addHandler(stream_handler)
+    compute_distributions("markets_live_data.parquet")

scripts/live_traders_data.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import requests
+import logging
+import pandas as pd
+from collections import defaultdict
+from typing import Any, Optional
+from tqdm import tqdm
+from utils import (
+    OMEN_SUBGRAPH_URL,
+    CREATOR,
+    BATCH_SIZE,
+    DATA_DIR,
+)
+from utils import SUBGRAPH_API_KEY, _to_content
+from queries import omen_trader_votes_query
+headers = {
+    "Accept": "application/json, multipart/mixed",
+    "Content-Type": "application/json",
+}
+logging.basicConfig(level=logging.INFO)
+def _query_omen_xdai_subgraph(
+    fpmm_id: str,
+) -> dict[str, Any]:
+    """Query the subgraph."""
+    omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
+    logging.info(f"omen_subgraph = {omen_subgraph}")
+    grouped_results = defaultdict(list)
+    id_gt = ""
+    while True:
+        query = omen_trader_votes_query.substitute(
+            fpmm_creator=CREATOR.lower(),
+            first=BATCH_SIZE,
+            id_gt=id_gt,
+            fpmm_id=fpmm_id,
+        )
+        logging.debug(f"query for the omen to collect trades {query}")
+        content_json = _to_content(query)
+        res = requests.post(omen_subgraph, headers=headers, json=content_json)
+        result_json = res.json()
+        user_trades = result_json.get("data", {}).get("fpmmTrades", [])
+        if not user_trades:
+            break
+        for trade in user_trades:
+            fpmm_id = trade.get("fpmm", {}).get("id")
+            grouped_results[fpmm_id].append(trade)
+        id_gt = user_trades[len(user_trades) - 1]["id"]
+    all_results = {
+        "data": {
+            "fpmmTrades": [
+                trade
+                for trades_list in grouped_results.values()
+                for trade in trades_list
+            ]
+        }
+    }
+    return all_results
+def transform_trades(trades_json: dict) -> pd.DataFrame:
+    # convert to dataframe
+    logging.info("transforming trades")
+    df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
+    if len(df) == 0:
+        logging.warning("No trades for this market")
+        return df
+    # print(df.info())
+    # convert creator to address
+    df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
+    # normalize fpmm column
+    fpmm = pd.json_normalize(df["fpmm"])
+    fpmm.columns = [f"fpmm.{col}" for col in fpmm.columns]
+    df = pd.concat([df, fpmm], axis=1)
+    # drop fpmm column
+    df.drop(["fpmm"], axis=1, inplace=True)
+    # convert into int
+    df.outcomeIndex = pd.to_numeric(df.outcomeIndex, errors="coerce")
+    return df
+def compute_from_timestamp_value(
+    fpmm_id: str, opening_timestamp: int, fpmms: pd.DataFrame
+) -> Optional[int]:
+    """Function to find the latest timestamp registered for a specific market"""
+    try:
+        market_data = fpmms.loc[fpmms["id"] == fpmm_id]
+        # how many previous samples do we have?
+        if len(market_data) == 1:
+            # take the opening Timestamp of the Market
+            return opening_timestamp
+        timestamps = (market_data.tokens_timestamp.values).sort()
+        # the last value is the current timestamp so we need to take the previous one
+        return timestamps[-2]
+    except Exception as e:
+        logging.error(
+            f"Error when trying to get the from timestamp value of the market id {fpmm_id}"
+        )
+        return None
+def compute_votes_distribution(market_trades: pd.DataFrame):
+    """Function to compute the distribution of votes for the trades of a market"""
+    total_trades = len(market_trades)
+    print(f"The total number of trades is {total_trades}")
+    # outcomeIndex is always 1 or 0?
+    sum_outcome_index_1 = sum(market_trades.outcomeIndex)
+    print(f"The total number of votes for index 1 is {sum_outcome_index_1}")
+    logging.info(f"The total number of votes for index 1 is {sum_outcome_index_1}")
+    percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
+    return (100 - percentage_index_1), percentage_index_1
+def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
+    """Function to update only the information related with the current timestamp"""
+    # Iterate over the markets
+    logging.info("Adding votes distribution per market")
+    for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
+        # update the trades for this market and at this specific current_timestamp
+        logging.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
+        to_update = fpmm["open"] and fpmm["tokens_timestamp"] == current_timestamp
+        if not to_update:  # jump closed markets or old data
+            logging.debug("Jumping this row")
+            continue
+        market_id = fpmm["id"]
+        logging.info(f"Adding information for the market {market_id}")
+        market_trades_json = _query_omen_xdai_subgraph(
+            fpmm_id=market_id,
+        )
+        market_trades = transform_trades(market_trades_json)
+        if len(market_trades) == 0:
+            logging.info("No trades for this market")
+            continue
+        # to compute the votes distribution
+        logging.info("Computing the votes distribution")
+        fpmm["total_trades"] = len(market_trades)
+        first_outcome, second_outcome = compute_votes_distribution(market_trades)
+        logging.info(
+            f"first outcome votes ={first_outcome}, second outcome votes = {second_outcome}"
+        )
+        # TODO Why these numbers are wrong? DEBUG HERE
+        fpmm["votes_first_outcome_perc"] = first_outcome
+        fpmm["votes_second_outcome_perc"] = second_outcome
+        metric = abs(fpmm["first_token_perc"] - first_outcome)
+        logging.info(f"metric for this market {metric}")
+        fpmm["dist_gap_perc"] = metric
+    logging.debug("Dataset after adding trading info")
+    logging.debug(fpmms.head())
+    return
+if __name__ == "__main__":
+    print("collecting votes distribution")

scripts/queries.py ADDED Viewed

	@@ -0,0 +1,117 @@

+#   -*- coding: utf-8 -*-
+#   ------------------------------------------------------------------------------
+#
+#     Copyright 2024 Valory AG
+#
+#     Licensed under the Apache License, Version 2.0 (the "License");
+#     you may not use this file except in compliance with the License.
+#     You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#     Unless required by applicable law or agreed to in writing, software
+#     distributed under the License is distributed on an "AS IS" BASIS,
+#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#     See the License for the specific language governing permissions and
+#     limitations under the License.
+#
+#   ------------------------------------------------------------------------------
+from string import Template
+FPMMS_FIELD = "fixedProductMarketMakers"
+QUERY_FIELD = "query"
+ERROR_FIELD = "errors"
+DATA_FIELD = "data"
+ID_FIELD = "id"
+ANSWER_FIELD = "currentAnswer"
+QUESTION_FIELD = "question"
+OUTCOMES_FIELD = "outcomes"
+TITLE_FIELD = "title"
+ANSWER_TIMESTAMP_FIELD = "currentAnswerTimestamp"
+OPENING_TIMESTAMP_FIELD = "openingTimestamp"
+RESOLUTION_TIMESTAMP_FIELD = "resolutionTimestamp"
+CREATION_TIMESTAMP_FIELD = "creationTimestamp"
+LIQUIDITY_FIELD = "liquidityParameter"
+LIQUIDIY_MEASURE_FIELD = "liquidityMeasure"
+TOKEN_AMOUNTS_FIELD = "outcomeTokenAmounts"
+FPMMS_WITH_TOKENS_QUERY = Template(
+    """
+    {
+      ${fpmms_field}(
+        where: {
+          creator: "${creator}",
+          id_gt: "${fpmm_id}",
+          isPendingArbitration: false
+          currentAnswer: null
+          openingTimestamp_gt:${current_timestamp}
+        },
+        orderBy: ${id_field}
+        orderDirection: asc
+        first: ${first}
+      ){
+        ${id_field}
+        ${question_field} {
+          ${outcomes_field}
+          ${answer_timestamp_field}
+          answers{
+          answer
+          }
+        }
+        ${title_field}
+        ${opening_timestamp_field}
+        ${creation_timestamp_field}
+        ${liquidity_field}
+        ${liquidity_measure_field}
+        ${token_amounts_field}
+      }
+    }
+    """
+)
+omen_trader_votes_query = Template(
+    """
+    {
+        fpmmTrades(
+            where: {
+                type: Buy,
+                fpmm_: {
+                    creator: "${fpmm_creator}",
+                    id: "${fpmm_id}",
+                },
+                id_gt: "${id_gt}"
+            }
+            first: ${first}
+            orderBy: id
+            orderDirection: asc
+        ) {
+            id
+            title
+            collateralToken
+            outcomeTokenMarginalPrice
+            oldOutcomeTokenMarginalPrice
+            type
+            creator {
+                id
+            }
+            creationTimestamp
+            collateralAmount
+            collateralAmountUSD
+            feeAmount
+            outcomeIndex
+            outcomeTokensTraded
+            transactionHash
+            fpmm {
+                id
+                outcomes
+                title
+                condition {
+                    id
+                }
+            }
+        }
+    }
+    """
+)

scripts/utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from string import Template
+from pathlib import Path
+import sys
+import json
+import os
+import time
+from tqdm import tqdm
+from typing import List, Any, Optional, Union
+import numpy as np
+import pandas as pd
+import gc
+import re
+from dataclasses import dataclass
+from enum import Enum
+from json.decoder import JSONDecodeError
+CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
+BATCH_SIZE = 1000
+# OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
+OMEN_SUBGRAPH_URL = Template(
+    """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
+)
+SCRIPTS_DIR = Path(__file__).parent
+ROOT_DIR = SCRIPTS_DIR.parent
+DATA_DIR = ROOT_DIR / "live_data"
+MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
+def measure_execution_time(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_time = end_time - start_time
+        print(f"Execution time: {execution_time:.6f} seconds")
+        return result
+    return wrapper
+def _to_content(q: str) -> dict[str, Any]:
+    """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
+    finalized_query = {
+        "query": q,
+        "variables": None,
+        "extensions": {"headers": None},
+    }
+    return finalized_query