Olas Predict Live Markets

from datetime import datetime, timedelta
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import duckdb
import logging
from tabs.tokens_votes_dist import (
    get_based_tokens_distribution,
    get_based_votes_distribution,
)


def get_logger():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    # stream handler and formatter
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)
    return logger


logger = get_logger()


def prepare_data():
    """
    Get all data from the parquet files
    """
    logger.info("Getting all data")
    con = duckdb.connect(":memory:")

    # Query to fetch invalid trades data
    query = f"""
    SELECT *
    FROM read_parquet('./live_data/markets_live_data.parquet')
    """
    df = con.execute(query).fetchdf()
    df["sample_datetime"] = df["sample_timestamp"].apply(
        lambda x: datetime.fromtimestamp(x)
    )
    return df


def get_extreme_cases(live_fpmms: pd.DataFrame):
    """Function to return the id of the best and worst case according to the dist gap metric"""
    # select markets with more than 1 sample
    samples_per_market = (
        live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
    )
    markets_with_multiple_samples = list(
        samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
    )
    selected_markets = live_fpmms.loc[
        live_fpmms["id"].isin(markets_with_multiple_samples)
    ]
    selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
    return selected_markets.iloc[-1].id, selected_markets.iloc[0].id


demo = gr.Blocks()
markets_data = prepare_data()

with demo:
    gr.HTML("<h1>Olas Predict Live Markets </h1>")
    gr.Markdown("This app shows the distributions of predictions on the live markets.")
    best_market_id, worst_market_id = get_extreme_cases(markets_data)
    with gr.Tabs():
        with gr.TabItem("💹 Probability distributions of live markets"):
            with gr.Row():
                gr.Markdown("# Evolution of outcomes probability based on tokens")

            with gr.Row():
                gr.Markdown("Best case: a market with a low distribution gap metric")
            with gr.Row():
                gr.Markdown(f"Market id = {best_market_id}")
            with gr.Row():
                best_market_tokens_dist = get_based_tokens_distribution(
                    best_market_id, markets_data
                )

            with gr.Row():
                gr.Markdown("Worst case: a market with a high distribution gap metric")
            with gr.Row():
                gr.Markdown(f"Market id = {worst_market_id}")

            with gr.Row():
                worst_market_tokens_dist = get_based_tokens_distribution(
                    worst_market_id, markets_data
                )

            with gr.Row():
                gr.Markdown("# Evolution of outcomes probability based on votes")

    demo.queue(default_concurrency_limit=40).launch()