File size: 4,943 Bytes
a134d9b
 
 
 
 
 
 
72f2521
 
 
 
a46bb55
 
 
 
 
a134d9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f497c67
 
 
a46bb55
 
 
 
a134d9b
 
 
35989d5
 
 
 
72f2521
35989d5
 
72f2521
35989d5
 
 
 
 
c9eef1d
35989d5
 
a134d9b
 
 
 
 
 
35989d5
a134d9b
35989d5
 
a46bb55
c9eef1d
72f2521
35989d5
a46bb55
 
 
 
 
 
 
 
 
35989d5
 
 
c9eef1d
72f2521
35989d5
 
a46bb55
 
 
 
 
 
 
 
 
35989d5
a46bb55
a134d9b
a46bb55
 
773f144
 
a46bb55
773f144
a46bb55
773f144
 
a46bb55
a134d9b
773f144
a46bb55
773f144
 
a46bb55
 
 
 
 
a134d9b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from datetime import datetime, timedelta
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import duckdb
import logging
from tabs.tokens_votes_dist import (
    get_based_tokens_distribution,
    get_based_votes_distribution,
)
from tabs.dist_gap import (
    get_distribution_plot,
    get_correlation_map,
    get_kde_with_trades,
)


def get_logger():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    # stream handler and formatter
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)
    return logger


logger = get_logger()


def prepare_data():
    """
    Get all data from the parquet files
    """
    logger.info("Getting all data")
    con = duckdb.connect(":memory:")

    # Query to fetch invalid trades data
    query = f"""
    SELECT *
    FROM read_parquet('./live_data/markets_live_data.parquet')
    """
    df = con.execute(query).fetchdf()
    df["sample_datetime"] = df["sample_timestamp"].apply(
        lambda x: datetime.fromtimestamp(x)
    )
    df["opening_datetime"] = df["openingTimestamp"].apply(
        lambda x: datetime.fromtimestamp(int(x))
    )
    df["days_to_resolution"] = (df["opening_datetime"] - df["sample_datetime"]).dt.days
    return df


def get_extreme_cases(live_fpmms: pd.DataFrame):
    """Function to return the id of the best and worst case according to the dist gap metric"""
    # select markets with more than 1 sample
    samples_per_market = (
        live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
    )
    markets_with_multiple_samples = list(
        samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
    )
    selected_markets = live_fpmms.loc[
        live_fpmms["id"].isin(markets_with_multiple_samples)
    ]
    selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
    return selected_markets.iloc[-1].id, selected_markets.iloc[0].id


demo = gr.Blocks()
markets_data = prepare_data()

with demo:
    gr.HTML("<h1>Olas Predict Live Markets </h1>")
    gr.Markdown("This app shows the distributions of predictions on the live markets.")
    best_market_id, worst_market_id = get_extreme_cases(markets_data)
    with gr.Tabs():
        with gr.TabItem("💹 Probability distributions of live markets"):
            with gr.Row():
                gr.Markdown("Best case: a market with a low gap between distributions")
            with gr.Row():
                gr.Markdown(f"Market id = {best_market_id}")
            with gr.Row():
                with gr.Column(scale=1, min_width=300):
                    # gr.Markdown("# Evolution of outcomes probability based on tokens")
                    best_market_tokens_dist = get_based_tokens_distribution(
                        best_market_id, markets_data
                    )
                with gr.Column(scale=2, min_width=300):
                    best_market_votes_dist = get_based_votes_distribution(
                        best_market_id, markets_data
                    )

            with gr.Row():
                gr.Markdown("Worst case: a market with a high distribution gap metric")
            with gr.Row():
                gr.Markdown(f"Market id = {worst_market_id}")

            with gr.Row():
                with gr.Column(scale=1, min_width=300):
                    # gr.Markdown("# Evolution of outcomes probability based on tokens")
                    worst_market_tokens_dist = get_based_tokens_distribution(
                        worst_market_id, markets_data
                    )
                with gr.Column(scale=2, min_width=300):
                    worst_market_votes_dist = get_based_votes_distribution(
                        worst_market_id, markets_data
                    )

        with gr.TabItem("📏 Distribution gap metric"):
            with gr.Row():
                gr.Markdown(
                    "This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution"
                )
            with gr.Row():
                gr.Markdown("# Density distribution")
            with gr.Row():
                kde_plot = get_distribution_plot(markets_data)

            with gr.Row():
                gr.Markdown("# Relationship with number of trades")

            with gr.Row():
                kde_trades_plot = get_kde_with_trades(markets_data)

            with gr.Row():
                gr.Markdown("# Correlation analysis between variables")

            with gr.Row():
                correlation_plot = get_correlation_map(markets_data)

    demo.queue(default_concurrency_limit=40).launch()