from datetime import datetime, timedelta import gradio as gr import matplotlib.pyplot as plt import pandas as pd import seaborn as sns import duckdb import logging from tabs.tokens_votes_dist import ( get_based_tokens_distribution, get_based_votes_distribution, ) def get_logger(): logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # stream handler and formatter stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.DEBUG) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) return logger logger = get_logger() def prepare_data(): """ Get all data from the parquet files """ logger.info("Getting all data") con = duckdb.connect(":memory:") # Query to fetch invalid trades data query = f""" SELECT * FROM read_parquet('./live_data/markets_live_data.parquet') """ df = con.execute(query).fetchdf() df["sample_datetime"] = df["sample_timestamp"].apply( lambda x: datetime.fromtimestamp(x) ) return df def get_extreme_cases(live_fpmms: pd.DataFrame): """Function to return the id of the best and worst case according to the dist gap metric""" # select markets with more than 1 sample samples_per_market = ( live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index() ) markets_with_multiple_samples = list( samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values ) selected_markets = live_fpmms.loc[ live_fpmms["id"].isin(markets_with_multiple_samples) ] selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True) return selected_markets.iloc[-1].id, selected_markets.iloc[0].id demo = gr.Blocks() markets_data = prepare_data() with demo: gr.HTML("