Spaces:
Sleeping
Sleeping
from datetime import datetime, timedelta | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import seaborn as sns | |
import duckdb | |
import logging | |
from tabs.tokens_votes_dist import ( | |
get_based_tokens_distribution, | |
get_based_votes_distribution, | |
) | |
from tabs.dist_gap import ( | |
get_distribution_plot, | |
get_correlation_map, | |
get_kde_with_trades, | |
) | |
def get_logger(): | |
logger = logging.getLogger(__name__) | |
logger.setLevel(logging.DEBUG) | |
# stream handler and formatter | |
stream_handler = logging.StreamHandler() | |
stream_handler.setLevel(logging.DEBUG) | |
formatter = logging.Formatter( | |
"%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
) | |
stream_handler.setFormatter(formatter) | |
logger.addHandler(stream_handler) | |
return logger | |
logger = get_logger() | |
def prepare_data(): | |
""" | |
Get all data from the parquet files | |
""" | |
logger.info("Getting all data") | |
con = duckdb.connect(":memory:") | |
# Query to fetch invalid trades data | |
query = f""" | |
SELECT * | |
FROM read_parquet('./live_data/markets_live_data.parquet') | |
""" | |
df = con.execute(query).fetchdf() | |
df["sample_datetime"] = df["sample_timestamp"].apply( | |
lambda x: datetime.fromtimestamp(x) | |
) | |
df["opening_datetime"] = df["openingTimestamp"].apply( | |
lambda x: datetime.fromtimestamp(int(x)) | |
) | |
df["days_to_resolution"] = (df["opening_datetime"] - df["sample_datetime"]).dt.days | |
return df | |
def get_extreme_cases(live_fpmms: pd.DataFrame): | |
"""Function to return the id of the best and worst case according to the dist gap metric""" | |
# select markets with more than 1 sample | |
samples_per_market = ( | |
live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index() | |
) | |
markets_with_multiple_samples = list( | |
samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values | |
) | |
selected_markets = live_fpmms.loc[ | |
live_fpmms["id"].isin(markets_with_multiple_samples) | |
] | |
selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True) | |
return selected_markets.iloc[-1].id, selected_markets.iloc[0].id | |
demo = gr.Blocks() | |
markets_data = prepare_data() | |
with demo: | |
gr.HTML("<h1>Olas Predict Live Markets </h1>") | |
gr.Markdown("This app shows the distributions of predictions on the live markets.") | |
best_market_id, worst_market_id = get_extreme_cases(markets_data) | |
with gr.Tabs(): | |
with gr.TabItem("๐น Probability distributions of live markets"): | |
with gr.Row(): | |
gr.Markdown("Best case: a market with a low gap between distributions") | |
with gr.Row(): | |
gr.Markdown(f"Market id = {best_market_id}") | |
with gr.Row(): | |
with gr.Column(min_width=350): | |
gr.Markdown("# Evolution of outcomes probability based on tokens") | |
best_market_tokens_dist = get_based_tokens_distribution( | |
best_market_id, markets_data | |
) | |
with gr.Column(min_width=350): | |
gr.Markdown("# Evolution of outcomes probability based on votes") | |
best_market_votes_dist = get_based_votes_distribution( | |
best_market_id, markets_data | |
) | |
with gr.Row(): | |
gr.Markdown("Worst case: a market with a high distribution gap metric") | |
with gr.Row(): | |
gr.Markdown(f"Market id = {worst_market_id}") | |
with gr.Row(): | |
with gr.Column(min_width=350): | |
# gr.Markdown("# Evolution of outcomes probability based on tokens") | |
worst_market_tokens_dist = get_based_tokens_distribution( | |
worst_market_id, markets_data | |
) | |
with gr.Column(min_width=350): | |
worst_market_votes_dist = get_based_votes_distribution( | |
worst_market_id, markets_data | |
) | |
with gr.TabItem("๐ Distribution gap metric"): | |
with gr.Row(): | |
gr.Markdown( | |
"This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution" | |
) | |
with gr.Row(): | |
gr.Markdown("# Density distribution") | |
with gr.Row(): | |
kde_plot = get_distribution_plot(markets_data) | |
with gr.Row(): | |
gr.Markdown("# Relationship with number of trades") | |
with gr.Row(): | |
kde_trades_plot = get_kde_with_trades(markets_data) | |
with gr.Row(): | |
gr.Markdown("# Correlation analysis between variables") | |
with gr.Row(): | |
correlation_plot = get_correlation_map(markets_data) | |
demo.queue(default_concurrency_limit=40).launch() | |