File size: 4,943 Bytes
a134d9b 72f2521 a46bb55 a134d9b f497c67 a46bb55 a134d9b 35989d5 72f2521 35989d5 72f2521 35989d5 c9eef1d 35989d5 a134d9b 35989d5 a134d9b 35989d5 a46bb55 c9eef1d 72f2521 35989d5 a46bb55 35989d5 c9eef1d 72f2521 35989d5 a46bb55 35989d5 a46bb55 a134d9b a46bb55 773f144 a46bb55 773f144 a46bb55 773f144 a46bb55 a134d9b 773f144 a46bb55 773f144 a46bb55 a134d9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from datetime import datetime, timedelta
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import duckdb
import logging
from tabs.tokens_votes_dist import (
get_based_tokens_distribution,
get_based_votes_distribution,
)
from tabs.dist_gap import (
get_distribution_plot,
get_correlation_map,
get_kde_with_trades,
)
def get_logger():
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# stream handler and formatter
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
return logger
logger = get_logger()
def prepare_data():
"""
Get all data from the parquet files
"""
logger.info("Getting all data")
con = duckdb.connect(":memory:")
# Query to fetch invalid trades data
query = f"""
SELECT *
FROM read_parquet('./live_data/markets_live_data.parquet')
"""
df = con.execute(query).fetchdf()
df["sample_datetime"] = df["sample_timestamp"].apply(
lambda x: datetime.fromtimestamp(x)
)
df["opening_datetime"] = df["openingTimestamp"].apply(
lambda x: datetime.fromtimestamp(int(x))
)
df["days_to_resolution"] = (df["opening_datetime"] - df["sample_datetime"]).dt.days
return df
def get_extreme_cases(live_fpmms: pd.DataFrame):
"""Function to return the id of the best and worst case according to the dist gap metric"""
# select markets with more than 1 sample
samples_per_market = (
live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
)
markets_with_multiple_samples = list(
samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
)
selected_markets = live_fpmms.loc[
live_fpmms["id"].isin(markets_with_multiple_samples)
]
selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
return selected_markets.iloc[-1].id, selected_markets.iloc[0].id
demo = gr.Blocks()
markets_data = prepare_data()
with demo:
gr.HTML("<h1>Olas Predict Live Markets </h1>")
gr.Markdown("This app shows the distributions of predictions on the live markets.")
best_market_id, worst_market_id = get_extreme_cases(markets_data)
with gr.Tabs():
with gr.TabItem("💹 Probability distributions of live markets"):
with gr.Row():
gr.Markdown("Best case: a market with a low gap between distributions")
with gr.Row():
gr.Markdown(f"Market id = {best_market_id}")
with gr.Row():
with gr.Column(scale=1, min_width=300):
# gr.Markdown("# Evolution of outcomes probability based on tokens")
best_market_tokens_dist = get_based_tokens_distribution(
best_market_id, markets_data
)
with gr.Column(scale=2, min_width=300):
best_market_votes_dist = get_based_votes_distribution(
best_market_id, markets_data
)
with gr.Row():
gr.Markdown("Worst case: a market with a high distribution gap metric")
with gr.Row():
gr.Markdown(f"Market id = {worst_market_id}")
with gr.Row():
with gr.Column(scale=1, min_width=300):
# gr.Markdown("# Evolution of outcomes probability based on tokens")
worst_market_tokens_dist = get_based_tokens_distribution(
worst_market_id, markets_data
)
with gr.Column(scale=2, min_width=300):
worst_market_votes_dist = get_based_votes_distribution(
worst_market_id, markets_data
)
with gr.TabItem("📏 Distribution gap metric"):
with gr.Row():
gr.Markdown(
"This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution"
)
with gr.Row():
gr.Markdown("# Density distribution")
with gr.Row():
kde_plot = get_distribution_plot(markets_data)
with gr.Row():
gr.Markdown("# Relationship with number of trades")
with gr.Row():
kde_trades_plot = get_kde_with_trades(markets_data)
with gr.Row():
gr.Markdown("# Correlation analysis between variables")
with gr.Row():
correlation_plot = get_correlation_map(markets_data)
demo.queue(default_concurrency_limit=40).launch()
|