cyberosa
filter very old markets from the mean graph
1e8b30d
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, UTC, date
import plotly.express as px
HEIGHT = 300
WIDTH = 600
def get_dist_gap_time_evolution(
market_id: str, all_markets: pd.DataFrame
) -> gr.LinePlot:
"""Function to paint the evolution in time of the distance gap between the tokens and the price weighted distributions"""
sns.set_style("darkgrid")
selected_market = all_markets.loc[all_markets["id"] == market_id]
selected_market["sample_date"] = selected_market["sample_date"].astype(str)
selected_market.columns = selected_market.columns.astype(str)
return gr.LinePlot(
value=selected_market,
x="sample_date",
y="dist_gap_perc",
y_title="Distribution gap in %",
interactive=True,
show_actions_button=True,
tooltip=[
"sample_datetime",
"dist_gap_perc",
"total_trades",
"total_bet_amount",
],
height=HEIGHT,
width=WIDTH,
)
def get_dist_gap_timeline_plotly(market_id: str, all_markets: pd.DataFrame) -> gr.Plot:
selected_market = all_markets.loc[all_markets["id"] == market_id]
fig = px.line(selected_market, x="sample_date", y="dist_gap_perc")
fig.update_layout(
xaxis_title="Day of the sample",
yaxis_title="Distribution gap in %",
)
fig.update_layout(width=WIDTH, height=HEIGHT)
fig.update_xaxes(tickformat="%b-%d-%Y")
return gr.Plot(value=fig)
def get_avg_gap_time_evolution_grouped_markets(all_markets: pd.DataFrame) -> gr.Plot:
# filter by the opening datetime
current = pd.Timestamp("today")
recent_markets = all_markets.loc[all_markets["opening_datetime"] > current]
recent_markets["creation_datetime"] = recent_markets["creationTimestamp"].apply(
lambda x: datetime.fromtimestamp(int(x))
)
recent_markets["creation_date"] = pd.to_datetime(
recent_markets["creation_datetime"]
).dt.date
# Define the cutoff date
cutoff_date = date(2024, 1, 1)
# Filter the DataFrame with very old markets
recent_markets = recent_markets[recent_markets["creation_date"] > cutoff_date]
avg_dist_gap_perc = (
recent_markets.groupby(["sample_date", "creation_date"])["dist_gap_perc"]
.mean()
.reset_index()
)
avg_dist_gap_perc["creation_date"] = avg_dist_gap_perc["creation_date"].astype(str)
avg_dist_gap_perc.rename(
columns={"dist_gap_perc": "mean_dist_gap_perc"}, inplace=True
)
fig = px.line(
avg_dist_gap_perc,
x="sample_date",
y="mean_dist_gap_perc",
color="creation_date",
)
fig.update_layout(
xaxis_title="Day the samples were collected",
yaxis_title="Mean dist gap percentage (%)",
)
fig.update_xaxes(tickformat="%b-%d-%Y")
return gr.Plot(value=fig)
def get_top_best_behaviour_markets(markets_data: pd.DataFrame):
"""Function to paint the top markets with the lowest metric of distribution gap"""
sorted_data = markets_data.sort_values(by="dist_gap_perc", ascending=False)
top_best_markets = sorted_data[["title", "sample_datetime", "dist_gap_perc"]].head(
5
)
return gr.DataFrame(top_best_markets)
def get_distribution_plot(markets_data: pd.DataFrame):
"""Function to paint the density plot of the metric distribution gap percentage"""
# A kernel density estimate (KDE) plot is a method for visualizing the distribution of
# observations in a dataset, analogous to a histogram. KDE represents the data using a
# continuous probability density curve in one or more dimensions.
sns.set_theme(palette="viridis")
plt.figure(figsize=(10, 5))
plot = sns.kdeplot(markets_data, x="dist_gap_perc", fill=True)
# TODO Add title and labels
# Display the plot using gr.Plot
return gr.Plot(value=plot.get_figure())
def get_kde_with_trades(markets_data: pd.DataFrame):
"""Function to paint the density plot of the metric in terms of the number of trades"""
plot = sns.kdeplot(markets_data, x="dist_gap_perc", y="total_trades", fill=True)
plt.ylabel("Total number of trades per market")
return gr.Plot(value=plot.get_figure())
def get_kde_with_total_bet_amount(markets_data: pd.DataFrame):
"""Function to paint the density plot of the metric in terms of the total bet amount"""
plot = sns.kdeplot(markets_data, x="dist_gap_perc", y="total_bet_amount", fill=True)
plt.ylabel("Total bet amount per market")
return gr.Plot(value=plot.get_figure())
def get_regplot_with_mean_trade_size(markets_data: pd.DataFrame):
"""Function to Plot data and a linear regression model fit between the metric and the mean trade size"""
regplot = sns.regplot(markets_data, x="dist_gap_perc", y="mean_trade_size")
plt.ylabel("Mean trade size in USD")
return gr.Plot(value=regplot.get_figure())
def get_correlation_map(markets_data: pd.DataFrame):
"""Function to paint the correlation between different variables"""
columns_of_interest = [
"total_trades",
"dist_gap_perc",
"liquidityMeasure",
"mean_trade_size",
"total_bet_amount",
]
data = markets_data[columns_of_interest]
# Compute the correlation matrix
correlation_matrix = data.corr()
# Create the heatmap
heatmap = sns.heatmap(
correlation_matrix,
annot=True, # Show the correlation values
cmap="coolwarm", # Color scheme
vmin=-1,
vmax=1, # Set the range of values
center=0, # Center the colormap at 0
square=True, # Make each cell square-shaped
linewidths=0.5, # Add lines between cells
cbar_kws={"shrink": 0.8},
) # Adjust the size of the colorbar
# Set the title
plt.title("Correlation Heatmap")
# Rotate the y-axis labels for better readability
plt.yticks(rotation=0)
# Show the plot
plt.tight_layout()
return gr.Plot(value=heatmap.get_figure())