File size: 3,261 Bytes
a134d9b
 
 
 
 
 
 
72f2521
 
 
 
a134d9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f497c67
 
 
a134d9b
 
 
35989d5
 
 
 
72f2521
35989d5
 
72f2521
35989d5
 
 
 
 
c9eef1d
35989d5
 
a134d9b
 
 
 
 
 
35989d5
a134d9b
35989d5
 
 
 
a134d9b
35989d5
c9eef1d
72f2521
35989d5
 
 
 
 
 
 
c9eef1d
72f2521
35989d5
 
 
 
 
 
a134d9b
35989d5
a134d9b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from datetime import datetime, timedelta
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import duckdb
import logging
from tabs.tokens_votes_dist import (
    get_based_tokens_distribution,
    get_based_votes_distribution,
)


def get_logger():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    # stream handler and formatter
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)
    return logger


logger = get_logger()


def prepare_data():
    """
    Get all data from the parquet files
    """
    logger.info("Getting all data")
    con = duckdb.connect(":memory:")

    # Query to fetch invalid trades data
    query = f"""
    SELECT *
    FROM read_parquet('./live_data/markets_live_data.parquet')
    """
    df = con.execute(query).fetchdf()
    df["sample_datetime"] = df["sample_timestamp"].apply(
        lambda x: datetime.fromtimestamp(x)
    )
    return df


def get_extreme_cases(live_fpmms: pd.DataFrame):
    """Function to return the id of the best and worst case according to the dist gap metric"""
    # select markets with more than 1 sample
    samples_per_market = (
        live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
    )
    markets_with_multiple_samples = list(
        samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
    )
    selected_markets = live_fpmms.loc[
        live_fpmms["id"].isin(markets_with_multiple_samples)
    ]
    selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
    return selected_markets.iloc[-1].id, selected_markets.iloc[0].id


demo = gr.Blocks()
markets_data = prepare_data()

with demo:
    gr.HTML("<h1>Olas Predict Live Markets </h1>")
    gr.Markdown("This app shows the distributions of predictions on the live markets.")
    best_market_id, worst_market_id = get_extreme_cases(markets_data)
    with gr.Tabs():
        with gr.TabItem("💹 Probability distributions of live markets"):
            with gr.Row():
                gr.Markdown("# Evolution of outcomes probability based on tokens")

            with gr.Row():
                gr.Markdown("Best case: a market with a low distribution gap metric")
            with gr.Row():
                gr.Markdown(f"Market id = {best_market_id}")
            with gr.Row():
                best_market_tokens_dist = get_based_tokens_distribution(
                    best_market_id, markets_data
                )

            with gr.Row():
                gr.Markdown("Worst case: a market with a high distribution gap metric")
            with gr.Row():
                gr.Markdown(f"Market id = {worst_market_id}")

            with gr.Row():
                worst_market_tokens_dist = get_based_tokens_distribution(
                    worst_market_id, markets_data
                )

            with gr.Row():
                gr.Markdown("# Evolution of outcomes probability based on votes")

    demo.queue(default_concurrency_limit=40).launch()