Spaces:

valory
/

olas_predict_live_markets

Sleeping

cyberosa

Adjusting graph configurations

17301f4 9 months ago

4.99 kB

	from datetime import datetime, timedelta
	import gradio as gr
	import matplotlib.pyplot as plt
	import pandas as pd
	import seaborn as sns
	import duckdb
	import logging
	from tabs.tokens_votes_dist import (
	get_based_tokens_distribution,
	get_based_votes_distribution,
	)
	from tabs.dist_gap import (
	get_distribution_plot,
	get_correlation_map,
	get_kde_with_trades,
	)


	def get_logger():
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.DEBUG)
	# stream handler and formatter
	stream_handler = logging.StreamHandler()
	stream_handler.setLevel(logging.DEBUG)
	formatter = logging.Formatter(
	"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
	)
	stream_handler.setFormatter(formatter)
	logger.addHandler(stream_handler)
	return logger


	logger = get_logger()


	def prepare_data():
	"""
	Get all data from the parquet files
	"""
	logger.info("Getting all data")
	con = duckdb.connect(":memory:")

	# Query to fetch invalid trades data
	query = f"""
	SELECT *
	FROM read_parquet('./live_data/markets_live_data.parquet')
	"""
	df = con.execute(query).fetchdf()
	df["sample_datetime"] = df["sample_timestamp"].apply(
	lambda x: datetime.fromtimestamp(x)
	)
	df["opening_datetime"] = df["openingTimestamp"].apply(
	lambda x: datetime.fromtimestamp(int(x))
	)
	df["days_to_resolution"] = (df["opening_datetime"] - df["sample_datetime"]).dt.days
	return df


	def get_extreme_cases(live_fpmms: pd.DataFrame):
	"""Function to return the id of the best and worst case according to the dist gap metric"""
	# select markets with more than 1 sample
	samples_per_market = (
	live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
	)
	markets_with_multiple_samples = list(
	samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
	)
	selected_markets = live_fpmms.loc[
	live_fpmms["id"].isin(markets_with_multiple_samples)
	]
	selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
	return selected_markets.iloc[-1].id, selected_markets.iloc[0].id


	demo = gr.Blocks()
	markets_data = prepare_data()

	with demo:
	gr.HTML("<h1>Olas Predict Live Markets </h1>")
	gr.Markdown("This app shows the distributions of predictions on the live markets.")
	best_market_id, worst_market_id = get_extreme_cases(markets_data)
	with gr.Tabs():
	with gr.TabItem("💹 Probability distributions of live markets"):
	with gr.Row():
	gr.Markdown("Best case: a market with a low gap between distributions")
	with gr.Row():
	gr.Markdown(f"Market id = {best_market_id}")
	with gr.Row():
	with gr.Column(min_width=350):
	gr.Markdown("# Evolution of outcomes probability based on tokens")
	best_market_tokens_dist = get_based_tokens_distribution(
	best_market_id, markets_data
	)
	with gr.Column(min_width=350):
	gr.Markdown("# Evolution of outcomes probability based on votes")
	best_market_votes_dist = get_based_votes_distribution(
	best_market_id, markets_data
	)

	with gr.Row():
	gr.Markdown("Worst case: a market with a high distribution gap metric")
	with gr.Row():
	gr.Markdown(f"Market id = {worst_market_id}")

	with gr.Row():
	with gr.Column(min_width=350):
	# gr.Markdown("# Evolution of outcomes probability based on tokens")
	worst_market_tokens_dist = get_based_tokens_distribution(
	worst_market_id, markets_data
	)
	with gr.Column(min_width=350):
	worst_market_votes_dist = get_based_votes_distribution(
	worst_market_id, markets_data
	)

	with gr.TabItem("📏 Distribution gap metric"):
	with gr.Row():
	gr.Markdown(
	"This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution"
	)
	with gr.Row():
	gr.Markdown("# Density distribution")
	with gr.Row():
	kde_plot = get_distribution_plot(markets_data)

	with gr.Row():
	gr.Markdown("# Relationship with number of trades")

	with gr.Row():
	kde_trades_plot = get_kde_with_trades(markets_data)

	with gr.Row():
	gr.Markdown("# Correlation analysis between variables")

	with gr.Row():
	correlation_plot = get_correlation_map(markets_data)

	demo.queue(default_concurrency_limit=40).launch()