import pandas as pd import gradio as gr import numpy as np from tabs.metrics import tool_metric_choices import plotly.express as px HEIGHT = 600 WIDTH = 1000 tools_palette = { "prediction-request-reasoning": "darkorchid", "claude-prediction-offline": "rebeccapurple", "prediction-request-reasoning-claude": "slateblue", "prediction-request-rag-claude": "steelblue", "prediction-online": "darkcyan", "prediction-offline": "mediumaquamarine", "claude-prediction-online": "mediumseagreen", "prediction-online-sme": "yellowgreen", "prediction-url-cot-claude": "gold", "prediction-offline-sme": "orange", "prediction-request-rag": "chocolate", } def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame: tools["request_time"] = pd.to_datetime(tools["request_time"], utc=True) tools["request_date"] = tools["request_time"].dt.date tools = tools.sort_values(by="request_time", ascending=True) tools["request_month_year_week"] = ( pd.to_datetime(tools["request_time"]) .dt.to_period("W") .dt.start_time.dt.strftime("%b-%d-%Y") ) # preparing the tools graph # adding the total tools_all = tools.copy(deep=True) tools_all["market_creator"] = "all" # merging both dataframes tools = pd.concat([tools, tools_all], ignore_index=True) tools = tools.sort_values(by="request_time", ascending=True) return tools def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame: """Gets the overall winning rate data for the given tools and calculates the winning percentage.""" overall_wins = ( wins_df.groupby(["request_month_year_week", "market_creator"], sort=False) .agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"}) .rename(columns={"0": "losses", "1": "wins"}) .reset_index() ) return overall_wins def sort_key(date_str): month, day, year = date_str.split("-") month_order = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ] month_num = month_order.index(month) + 1 day = int(day) year = int(year) return (year, month_num, day) # year, month, day def integrated_plot_tool_winnings_overall_per_market_by_week( winning_df: pd.DataFrame, winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %", ) -> gr.Plot: # get the column name from the metric name column_name = tool_metric_choices.get(winning_selector) wins_df = get_overall_winning_rate_by_market(winning_df) # Sort the unique values of request_month_year_week sorted_categories = sorted( wins_df["request_month_year_week"].unique(), key=sort_key ) # Create a categorical type with a specific order wins_df["request_month_year_week"] = pd.Categorical( wins_df["request_month_year_week"], categories=sorted_categories, ordered=True ) # Sort the DataFrame based on the new categorical column wins_df = wins_df.sort_values("request_month_year_week") fig = px.bar( wins_df, x="request_month_year_week", y=column_name, color="market_creator", barmode="group", color_discrete_sequence=["purple", "goldenrod", "darkgreen"], category_orders={ "market_creator": ["pearl", "quickstart", "all"], "request_month_year_week": sorted_categories, }, ) fig.update_layout( xaxis_title="Week", yaxis_title=winning_selector, legend=dict(yanchor="top", y=0.5), ) fig.update_layout(width=WIDTH, height=HEIGHT) fig.update_xaxes(tickformat="%b %d\n%Y") return gr.Plot(value=fig) def integrated_tool_winnings_by_tool_per_market( wins_df: pd.DataFrame, tool: str ) -> gr.Plot: tool_wins_df = wins_df[wins_df["tool"] == tool] # Sort the unique values of request_month_year_week sorted_categories = sorted( tool_wins_df["request_month_year_week"].unique(), key=sort_key ) # Create a categorical type with a specific order tool_wins_df["request_month_year_week"] = pd.Categorical( tool_wins_df["request_month_year_week"], categories=sorted_categories, ordered=True, ) # Sort the DataFrame based on the new categorical column wins_df = wins_df.sort_values("request_month_year_week") fig = px.bar( tool_wins_df, x="request_month_year_week", y="win_perc", color="market_creator", barmode="group", color_discrete_sequence=["purple", "goldenrod", "darkgreen"], category_orders={ "market_creator": ["pearl", "quickstart", "all"], "request_month_year_week": sorted_categories, }, ) fig.update_layout( xaxis_title="Week", yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %", legend=dict(yanchor="top", y=0.5), ) fig.update_layout(width=WIDTH, height=HEIGHT) fig.update_xaxes(tickformat="%b %d\n%Y") return gr.Plot(value=fig) def get_daily_mech_requests( daily_mech_req_df: pd.DataFrame, market_creator: str ) -> gr.Plot: if market_creator == "pearl": daily_mech_req_per_tool = daily_mech_req_df.loc[ daily_mech_req_df["market_creator"] == "pearl" ] else: # quickstart daily_mech_req_per_tool = daily_mech_req_df.loc[ daily_mech_req_df["market_creator"] == "quickstart" ] daily_mech_req_per_tool = daily_mech_req_per_tool[ ["request_date", "tool", "total_mech_requests"] ] pivoted = daily_mech_req_per_tool.pivot( index="request_date", columns="tool", values="total_mech_requests" ) # Sort the columns for each row independently sorted_values = np.sort(pivoted.values, axis=1)[ :, ::-1 ] # sort and reverse (descending) sorted_columns = np.argsort(pivoted.values, axis=1)[:, ::-1] # get sorting indices sorted_df = pd.DataFrame( sorted_values, index=pivoted.index, columns=[ pivoted.columns[i] for i in sorted_columns[0] ], # use first row's order ) sorted_long = sorted_df.reset_index().melt( id_vars=["request_date"], var_name="tool", value_name="total_mech_requests" ) fig = px.bar( sorted_long, x="request_date", y="total_mech_requests", color="tool", color_discrete_map=tools_palette, ) fig.update_layout( xaxis_title="Day of the request", yaxis_title="Total daily mech requests", # legend=dict(yanchor="top", y=0.5), ) fig.update_layout(width=WIDTH, height=HEIGHT) return gr.Plot(value=fig)