cyberosa
daily mech requests per market graphs
e3d589e
import pandas as pd
import gradio as gr
import numpy as np
from tabs.metrics import tool_metric_choices
import plotly.express as px
HEIGHT = 600
WIDTH = 1000
tools_palette = {
"prediction-request-reasoning": "darkorchid",
"claude-prediction-offline": "rebeccapurple",
"prediction-request-reasoning-claude": "slateblue",
"prediction-request-rag-claude": "steelblue",
"prediction-online": "darkcyan",
"prediction-offline": "mediumaquamarine",
"claude-prediction-online": "mediumseagreen",
"prediction-online-sme": "yellowgreen",
"prediction-url-cot-claude": "gold",
"prediction-offline-sme": "orange",
"prediction-request-rag": "chocolate",
}
def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
tools["request_time"] = pd.to_datetime(tools["request_time"], utc=True)
tools["request_date"] = tools["request_time"].dt.date
tools = tools.sort_values(by="request_time", ascending=True)
tools["request_month_year_week"] = (
pd.to_datetime(tools["request_time"])
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
# preparing the tools graph
# adding the total
tools_all = tools.copy(deep=True)
tools_all["market_creator"] = "all"
# merging both dataframes
tools = pd.concat([tools, tools_all], ignore_index=True)
tools = tools.sort_values(by="request_time", ascending=True)
return tools
def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
"""Gets the overall winning rate data for the given tools and calculates the winning percentage."""
overall_wins = (
wins_df.groupby(["request_month_year_week", "market_creator"], sort=False)
.agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"})
.rename(columns={"0": "losses", "1": "wins"})
.reset_index()
)
return overall_wins
def sort_key(date_str):
month, day, year = date_str.split("-")
month_order = [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
]
month_num = month_order.index(month) + 1
day = int(day)
year = int(year)
return (year, month_num, day) # year, month, day
def integrated_plot_tool_winnings_overall_per_market_by_week(
winning_df: pd.DataFrame,
winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
) -> gr.Plot:
# get the column name from the metric name
column_name = tool_metric_choices.get(winning_selector)
wins_df = get_overall_winning_rate_by_market(winning_df)
# Sort the unique values of request_month_year_week
sorted_categories = sorted(
wins_df["request_month_year_week"].unique(), key=sort_key
)
# Create a categorical type with a specific order
wins_df["request_month_year_week"] = pd.Categorical(
wins_df["request_month_year_week"], categories=sorted_categories, ordered=True
)
# Sort the DataFrame based on the new categorical column
wins_df = wins_df.sort_values("request_month_year_week")
fig = px.bar(
wins_df,
x="request_month_year_week",
y=column_name,
color="market_creator",
barmode="group",
color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
category_orders={
"market_creator": ["pearl", "quickstart", "all"],
"request_month_year_week": sorted_categories,
},
)
fig.update_layout(
xaxis_title="Week",
yaxis_title=winning_selector,
legend=dict(yanchor="top", y=0.5),
)
fig.update_layout(width=WIDTH, height=HEIGHT)
fig.update_xaxes(tickformat="%b %d\n%Y")
return gr.Plot(value=fig)
def integrated_tool_winnings_by_tool_per_market(
wins_df: pd.DataFrame, tool: str
) -> gr.Plot:
tool_wins_df = wins_df[wins_df["tool"] == tool]
# Sort the unique values of request_month_year_week
sorted_categories = sorted(
tool_wins_df["request_month_year_week"].unique(), key=sort_key
)
# Create a categorical type with a specific order
tool_wins_df["request_month_year_week"] = pd.Categorical(
tool_wins_df["request_month_year_week"],
categories=sorted_categories,
ordered=True,
)
# Sort the DataFrame based on the new categorical column
wins_df = wins_df.sort_values("request_month_year_week")
fig = px.bar(
tool_wins_df,
x="request_month_year_week",
y="win_perc",
color="market_creator",
barmode="group",
color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
category_orders={
"market_creator": ["pearl", "quickstart", "all"],
"request_month_year_week": sorted_categories,
},
)
fig.update_layout(
xaxis_title="Week",
yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
legend=dict(yanchor="top", y=0.5),
)
fig.update_layout(width=WIDTH, height=HEIGHT)
fig.update_xaxes(tickformat="%b %d\n%Y")
return gr.Plot(value=fig)
def get_daily_mech_requests(
daily_mech_req_df: pd.DataFrame, market_creator: str
) -> gr.Plot:
if market_creator == "pearl":
daily_mech_req_per_tool = daily_mech_req_df.loc[
daily_mech_req_df["market_creator"] == "pearl"
]
else: # quickstart
daily_mech_req_per_tool = daily_mech_req_df.loc[
daily_mech_req_df["market_creator"] == "quickstart"
]
daily_mech_req_per_tool = daily_mech_req_per_tool[
["request_date", "tool", "total_mech_requests"]
]
pivoted = daily_mech_req_per_tool.pivot(
index="request_date", columns="tool", values="total_mech_requests"
)
# Sort the columns for each row independently
sorted_values = np.sort(pivoted.values, axis=1)[
:, ::-1
] # sort and reverse (descending)
sorted_columns = np.argsort(pivoted.values, axis=1)[:, ::-1] # get sorting indices
sorted_df = pd.DataFrame(
sorted_values,
index=pivoted.index,
columns=[
pivoted.columns[i] for i in sorted_columns[0]
], # use first row's order
)
sorted_long = sorted_df.reset_index().melt(
id_vars=["request_date"], var_name="tool", value_name="total_mech_requests"
)
fig = px.bar(
sorted_long,
x="request_date",
y="total_mech_requests",
color="tool",
color_discrete_map=tools_palette,
)
fig.update_layout(
xaxis_title="Day of the request",
yaxis_title="Total daily mech requests",
# legend=dict(yanchor="top", y=0.5),
)
fig.update_layout(width=WIDTH, height=HEIGHT)
return gr.Plot(value=fig)