Spaces:

valory
/

olas_predict_live_markets

Running

App Files Files Community

cyberosa commited on Aug 5, 2024

Commit

f842047

1 Parent(s): 30394cf

new data and only open markets for extreme cases

Browse files

Files changed (6) hide show

app.py +19 -12
live_data/markets_live_data.parquet +2 -2
live_data/markets_live_data_sample.parquet +2 -2
notebooks/analysis_of_markets_data.ipynb +43 -41
scripts/live_markets_data.py +0 -1
scripts/live_traders_data.py +2 -2

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
-from datetime import datetime, timedelta
 import gradio as gr
-import matplotlib.pyplot as plt
 import pandas as pd
-import seaborn as sns
 import duckdb
 import logging
 from tabs.tokens_votes_dist import (
     get_based_tokens_distribution,
     get_based_votes_distribution,
@@ -57,7 +56,7 @@ def prepare_data():
     return df
-def get_extreme_cases(live_fpmms: pd.DataFrame):
     """Function to return the id of the best and worst case according to the dist gap metric"""
     # select markets with more than 1 sample
     samples_per_market = (
@@ -70,48 +69,56 @@ def get_extreme_cases(live_fpmms: pd.DataFrame):
         live_fpmms["id"].isin(markets_with_multiple_samples)
     ]
     selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
-    return selected_markets.iloc[-1].id, selected_markets.iloc[0].id
 demo = gr.Blocks()
 markets_data = prepare_data()
 with demo:
     gr.HTML("<h1>Olas Predict Live Markets </h1>")
     gr.Markdown("This app shows the distributions of predictions on the live markets.")
-    best_market_id, worst_market_id = get_extreme_cases(markets_data)
     with gr.Tabs():
         with gr.TabItem("💹 Probability distributions of live markets"):
             with gr.Row():
                 gr.Markdown("Best case: a market with a low gap between distributions")
             with gr.Row():
-                gr.Markdown(f"Market id = {best_market_id}")
             with gr.Row():
                 with gr.Column(min_width=350):
                     gr.Markdown("# Evolution of outcomes probability based on tokens")
                     best_market_tokens_dist = get_based_tokens_distribution(
-                        best_market_id, markets_data
                     )
                 with gr.Column(min_width=350):
                     gr.Markdown("# Evolution of outcomes probability based on votes")
                     best_market_votes_dist = get_based_votes_distribution(
-                        best_market_id, markets_data
                     )
             with gr.Row():
                 gr.Markdown("Worst case: a market with a high distribution gap metric")
             with gr.Row():
-                gr.Markdown(f"Market id = {worst_market_id}")
             with gr.Row():
                 with gr.Column(min_width=350):
                     # gr.Markdown("# Evolution of outcomes probability based on tokens")
                     worst_market_tokens_dist = get_based_tokens_distribution(
-                        worst_market_id, markets_data
                     )
                 with gr.Column(min_width=350):
                     worst_market_votes_dist = get_based_votes_distribution(
-                        worst_market_id, markets_data
                     )
         with gr.TabItem("📏 Distribution gap metric"):

+from datetime import datetime
 import gradio as gr
 import pandas as pd
 import duckdb
 import logging
+from typing import Tuple
 from tabs.tokens_votes_dist import (
     get_based_tokens_distribution,
     get_based_votes_distribution,
     return df
+def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
     """Function to return the id of the best and worst case according to the dist gap metric"""
     # select markets with more than 1 sample
     samples_per_market = (
         live_fpmms["id"].isin(markets_with_multiple_samples)
     ]
     selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
+    return (
+        selected_markets.iloc[-1].id,
+        selected_markets.iloc[-1].dist_gap_perc,
+        selected_markets.iloc[0].id,
+        selected_markets.iloc[0].dist_gap_perc,
+    )
 demo = gr.Blocks()
 markets_data = prepare_data()
+live_markets_data = markets_data.loc[markets_data["open"] == True]
 with demo:
     gr.HTML("<h1>Olas Predict Live Markets </h1>")
     gr.Markdown("This app shows the distributions of predictions on the live markets.")
+    best_market_id, best_gap, worst_market_id, worst_gap = get_extreme_cases(
+        live_markets_data
+    )
     with gr.Tabs():
         with gr.TabItem("💹 Probability distributions of live markets"):
             with gr.Row():
                 gr.Markdown("Best case: a market with a low gap between distributions")
             with gr.Row():
+                gr.Markdown(f"Market id = {best_market_id} Dist gap = {best_gap}")
             with gr.Row():
                 with gr.Column(min_width=350):
                     gr.Markdown("# Evolution of outcomes probability based on tokens")
                     best_market_tokens_dist = get_based_tokens_distribution(
+                        best_market_id, live_markets_data
                     )
                 with gr.Column(min_width=350):
                     gr.Markdown("# Evolution of outcomes probability based on votes")
                     best_market_votes_dist = get_based_votes_distribution(
+                        best_market_id, live_markets_data
                     )
             with gr.Row():
                 gr.Markdown("Worst case: a market with a high distribution gap metric")
             with gr.Row():
+                gr.Markdown(f"Market id = {worst_market_id} Dist gap = {worst_gap}")
             with gr.Row():
                 with gr.Column(min_width=350):
                     # gr.Markdown("# Evolution of outcomes probability based on tokens")
                     worst_market_tokens_dist = get_based_tokens_distribution(
+                        worst_market_id, live_markets_data
                     )
                 with gr.Column(min_width=350):
                     worst_market_votes_dist = get_based_votes_distribution(
+                        worst_market_id, live_markets_data
                     )
         with gr.TabItem("📏 Distribution gap metric"):

live_data/markets_live_data.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
-size 34357

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ae654e0654ae908aff28be1cac0ebdff765f65e6c442dc94df1a2c801319c2a
+size 39132

live_data/markets_live_data_sample.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8592b33790b8209ebd8de3ac3108e76f25a3e02fc5720dc0a8984fe47e09cad1
-size 27990

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
+size 34357

notebooks/analysis_of_markets_data.ipynb CHANGED Viewed

@@ -82,7 +82,7 @@
        "      <td>14.76</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -105,7 +105,7 @@
        "      <td>47.84</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -128,7 +128,7 @@
        "      <td>43.07</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -140,7 +140,7 @@
        "      <td>[4808284238922480369, 10190745298156651455]</td>\n",
        "      <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
        "      <td>1722442019</td>\n",
-       "      <td>True</td>\n",
        "      <td>27</td>\n",
        "      <td>4.98</td>\n",
        "      <td>62.96</td>\n",
@@ -151,7 +151,7 @@
        "      <td>32.06</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -174,7 +174,7 @@
        "      <td>50.32</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>8</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -206,7 +206,7 @@
        "0  Will the new AI-powered upgrade for the Philip...        1722442019  False   \n",
        "1  Will Harvey Weinstein recover from Covid-19 an...        1722442019  False   \n",
        "2  Will Tesla's net income increase in the third ...        1722442019  False   \n",
-       "3  Will SpaceX launch Falcon 9 rocket on 3 August...        1722442019   True   \n",
        "4  Will the wildfire in California be under contr...        1722442019  False   \n",
        "\n",
        "   total_trades  dist_gap_perc  votes_first_outcome_perc  \\\n",
@@ -224,11 +224,11 @@
        "4                      52.46           Yes             No             49.68   \n",
        "\n",
        "   second_token_perc  mean_trade_size     sample_datetime  market_id  \n",
-       "0              14.76              NaN 2024-07-31 18:06:59          2  \n",
-       "1              47.84              NaN 2024-07-31 18:06:59          3  \n",
-       "2              43.07              NaN 2024-07-31 18:06:59          6  \n",
-       "3              32.06              NaN 2024-07-31 18:06:59          7  \n",
-       "4              50.32              NaN 2024-07-31 18:06:59          8  "
       ]
      },
      "execution_count": 3,
@@ -243,7 +243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
    "metadata": {},
    "outputs": [
     {
@@ -251,30 +251,32 @@
      "output_type": "stream",
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 126 entries, 0 to 125\n",
-      "Data columns (total 18 columns):\n",
-      " #   Column                     Non-Null Count  Dtype  \n",
-      "---  ------                     --------------  -----  \n",
-      " 0   creationTimestamp          126 non-null    object \n",
-      " 1   id                         126 non-null    object \n",
-      " 2   liquidityMeasure           126 non-null    object \n",
-      " 3   liquidityParameter         126 non-null    object \n",
-      " 4   openingTimestamp           126 non-null    object \n",
-      " 5   outcomeTokenAmounts        126 non-null    object \n",
-      " 6   title                      126 non-null    object \n",
-      " 7   sample_timestamp           126 non-null    int64  \n",
-      " 8   open                       126 non-null    bool   \n",
-      " 9   total_trades               126 non-null    int64  \n",
-      " 10  dist_gap_perc              126 non-null    float64\n",
-      " 11  votes_first_outcome_perc   126 non-null    float64\n",
-      " 12  votes_second_outcome_perc  126 non-null    float64\n",
-      " 13  first_outcome              126 non-null    object \n",
-      " 14  second_outcome             126 non-null    object \n",
-      " 15  first_token_perc           126 non-null    float64\n",
-      " 16  second_token_perc          126 non-null    float64\n",
-      " 17  mean_trade_size            42 non-null     float64\n",
-      "dtypes: bool(1), float64(6), int64(2), object(9)\n",
-      "memory usage: 17.0+ KB\n"
      ]
     }
    ],
@@ -308,19 +310,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 158,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "open\n",
-       "True     96\n",
-       "False    30\n",
        "Name: count, dtype: int64"
       ]
      },
-     "execution_count": 158,
      "metadata": {},
      "output_type": "execute_result"
     }

        "      <td>14.76</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
+       "      <td>2.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>47.84</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
+       "      <td>3.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>43.07</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
+       "      <td>6.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>[4808284238922480369, 10190745298156651455]</td>\n",
        "      <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
        "      <td>1722442019</td>\n",
+       "      <td>False</td>\n",
        "      <td>27</td>\n",
        "      <td>4.98</td>\n",
        "      <td>62.96</td>\n",
        "      <td>32.06</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
+       "      <td>7.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>50.32</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
+       "      <td>8.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "0  Will the new AI-powered upgrade for the Philip...        1722442019  False   \n",
        "1  Will Harvey Weinstein recover from Covid-19 an...        1722442019  False   \n",
        "2  Will Tesla's net income increase in the third ...        1722442019  False   \n",
+       "3  Will SpaceX launch Falcon 9 rocket on 3 August...        1722442019  False   \n",
        "4  Will the wildfire in California be under contr...        1722442019  False   \n",
        "\n",
        "   total_trades  dist_gap_perc  votes_first_outcome_perc  \\\n",
        "4                      52.46           Yes             No             49.68   \n",
        "\n",
        "   second_token_perc  mean_trade_size     sample_datetime  market_id  \n",
+       "0              14.76              NaN 2024-07-31 18:06:59        2.0  \n",
+       "1              47.84              NaN 2024-07-31 18:06:59        3.0  \n",
+       "2              43.07              NaN 2024-07-31 18:06:59        6.0  \n",
+       "3              32.06              NaN 2024-07-31 18:06:59        7.0  \n",
+       "4              50.32              NaN 2024-07-31 18:06:59        8.0  "
       ]
      },
      "execution_count": 3,
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 168 entries, 0 to 167\n",
+      "Data columns (total 20 columns):\n",
+      " #   Column                     Non-Null Count  Dtype         \n",
+      "---  ------                     --------------  -----         \n",
+      " 0   creationTimestamp          168 non-null    object        \n",
+      " 1   id                         168 non-null    object        \n",
+      " 2   liquidityMeasure           168 non-null    int64         \n",
+      " 3   liquidityParameter         168 non-null    object        \n",
+      " 4   openingTimestamp           168 non-null    object        \n",
+      " 5   outcomeTokenAmounts        168 non-null    object        \n",
+      " 6   title                      168 non-null    object        \n",
+      " 7   sample_timestamp           168 non-null    int64         \n",
+      " 8   open                       168 non-null    bool          \n",
+      " 9   total_trades               168 non-null    int64         \n",
+      " 10  dist_gap_perc              168 non-null    float64       \n",
+      " 11  votes_first_outcome_perc   168 non-null    float64       \n",
+      " 12  votes_second_outcome_perc  168 non-null    float64       \n",
+      " 13  first_outcome              168 non-null    object        \n",
+      " 14  second_outcome             168 non-null    object        \n",
+      " 15  first_token_perc           168 non-null    float64       \n",
+      " 16  second_token_perc          168 non-null    float64       \n",
+      " 17  mean_trade_size            84 non-null     float64       \n",
+      " 18  sample_datetime            126 non-null    datetime64[ns]\n",
+      " 19  market_id                  126 non-null    float64       \n",
+      "dtypes: bool(1), datetime64[ns](1), float64(7), int64(3), object(8)\n",
+      "memory usage: 25.2+ KB\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "open\n",
+       "False    110\n",
+       "True      58\n",
        "Name: count, dtype: int64"
       ]
      },
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }

scripts/live_markets_data.py CHANGED Viewed

@@ -292,7 +292,6 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
 if __name__ == "__main__":
-    logger = logging.getLogger(__name__)
     logger.setLevel(logging.DEBUG)
     # stream handler and formatter
     stream_handler = logging.StreamHandler()

 if __name__ == "__main__":
     logger.setLevel(logging.DEBUG)
     # stream handler and formatter
     stream_handler = logging.StreamHandler()

scripts/live_traders_data.py CHANGED Viewed

@@ -12,9 +12,8 @@ from utils import (
 )
 from utils import SUBGRAPH_API_KEY, _to_content
 from queries import omen_trader_votes_query
-from live_markets_data import logger
-#logger = logging.getLogger(__name__)
 headers = {
     "Accept": "application/json, multipart/mixed",
@@ -108,6 +107,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
     """Function to update only the information related with the current timestamp"""
     logger.info("Adding votes distribution per market")
     # Iterate over the markets
     for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
         # update the trades for this market and at this specific current_timestamp

 )
 from utils import SUBGRAPH_API_KEY, _to_content
 from queries import omen_trader_votes_query
+logger = logging.getLogger(__name__)
 headers = {
     "Accept": "application/json, multipart/mixed",
     """Function to update only the information related with the current timestamp"""
     logger.info("Adding votes distribution per market")
+    fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
     # Iterate over the markets
     for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
         # update the trades for this market and at this specific current_timestamp