from datetime import datetime, timedelta import gradio as gr import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import duckdb import logging from tabs.tokens_votes_dist import ( get_based_tokens_distribution, get_based_votes_distribution, ) from tabs.dist_gap import ( get_distribution_plot, get_correlation_map, get_kde_with_trades, ) def get_logger(): logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # stream handler and formatter stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.DEBUG) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) return logger logger = get_logger() def prepare_data(): """ Get all data from the parquet files """ logger.info("Getting all data") con = duckdb.connect(":memory:") # Query to fetch invalid trades data query = f""" SELECT * FROM read_parquet('./live_data/markets_live_data.parquet') """ df = con.execute(query).fetchdf() df["sample_datetime"] = df["sample_timestamp"].apply( lambda x: datetime.fromtimestamp(x) ) df["opening_datetime"] = df["openingTimestamp"].apply( lambda x: datetime.fromtimestamp(int(x)) ) df["days_to_resolution"] = (df["opening_datetime"] - df["sample_datetime"]).dt.days return df def get_extreme_cases(live_fpmms: pd.DataFrame): """Function to return the id of the best and worst case according to the dist gap metric""" # select markets with more than 1 sample samples_per_market = ( live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index() ) markets_with_multiple_samples = list( samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values ) selected_markets = live_fpmms.loc[ live_fpmms["id"].isin(markets_with_multiple_samples) ] selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True) return selected_markets.iloc[-1].id, selected_markets.iloc[0].id demo = gr.Blocks() markets_data = prepare_data() with demo: gr.HTML("

Olas Predict Live Markets

") gr.Markdown("This app shows the distributions of predictions on the live markets.") best_market_id, worst_market_id = get_extreme_cases(markets_data) with gr.Tabs(): with gr.TabItem("💹 Probability distributions of live markets"): with gr.Row(): gr.Markdown("Best case: a market with a low gap between distributions") with gr.Row(): gr.Markdown(f"Market id = {best_market_id}") with gr.Row(): with gr.Column(scale=1, min_width=300): # gr.Markdown("# Evolution of outcomes probability based on tokens") best_market_tokens_dist = get_based_tokens_distribution( best_market_id, markets_data ) with gr.Column(scale=2, min_width=300): best_market_votes_dist = get_based_votes_distribution( best_market_id, markets_data ) with gr.Row(): gr.Markdown("Worst case: a market with a high distribution gap metric") with gr.Row(): gr.Markdown(f"Market id = {worst_market_id}") with gr.Row(): with gr.Column(scale=1, min_width=300): # gr.Markdown("# Evolution of outcomes probability based on tokens") worst_market_tokens_dist = get_based_tokens_distribution( worst_market_id, markets_data ) with gr.Column(scale=2, min_width=300): worst_market_votes_dist = get_based_votes_distribution( worst_market_id, markets_data ) with gr.TabItem("📏 Distribution gap metric"): with gr.Row(): gr.Markdown( "This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution" ) with gr.Row(): gr.Markdown("# Density distribution") with gr.Row(): kde_plot = get_distribution_plot(markets_data) with gr.Row(): gr.Markdown("# Relationship with number of trades") with gr.Row(): kde_trades_plot = get_kde_with_trades(markets_data) with gr.Row(): gr.Markdown("# Correlation analysis between variables") with gr.Row(): correlation_plot = get_correlation_map(markets_data) demo.queue(default_concurrency_limit=40).launch()