weekly-analysis / app.py
arshy's picture
initial commit
0869b01
raw
history blame
6.24 kB
import gradio as gr
import pandas as pd
tools = pd.read_csv("./data/tools.csv")
demo = gr.Blocks()
INC_TOOLS = [
'prediction-online',
'prediction-offline',
'claude-prediction-online',
'claude-prediction-offline',
'prediction-offline-sme',
'prediction-online-sme',
'prediction-request-rag',
'prediction-request-reasoning',
'prediction-url-cot-claude',
'prediction-request-rag-claude',
'prediction-request-reasoning-claude'
]
def set_error(row):
if row.error not in [True, False]:
if not row.prompt_response:
return True
return False
return row.error
def get_error_data():
tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
tools_inc['error'] = tools_inc.apply(set_error, axis=1)
error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
error['error_perc'] = (error[True] / (error[False] + error[True])) * 100
error['total_requests'] = error[False] + error[True]
return error
def get_error_data_all(error):
error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()
error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100
error_total.columns = error_total.columns.astype(str)
error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
return error_total
error = get_error_data()
error_all = get_error_data_all(error)
with demo:
gr.HTML("<h1>Olas Predict Actual Performance</h1>")
gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
with gr.Tabs():
with gr.TabItem("🔥 Error Dashboard"):
with gr.Row():
gr.Markdown("# Plot showing overall error")
with gr.Row():
# plot
with gr.Column():
gr.BarPlot(
value=error_all,
x="request_month_year_week",
y="error_perc",
title="Error Percentage",
x_title="Week",
y_title="Error Percentage",
height=800,
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["request_month_year_week", "error_perc"]
)
with gr.Row():
gr.Markdown("# Plot showing error by tool")
with gr.Row():
sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0])
with gr.Row():
plot_tool_error = gr.BarPlot(
title="Error Percentage",
x_title="Week",
y_title="Error Percentage",
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["request_month_year_week", "error_perc"],
width=800
)
with gr.Row():
gr.Markdown("# Plot showing error by week")
with gr.Row():
choices = error['request_month_year_week'].unique().tolist()
# sort the choices by the latest week to be on the top
choices = sorted(choices)
sel_week = gr.Dropdown(
label="Select a week",
choices=choices,
value=choices[-1]
)
with gr.Row():
plot_week_error = gr.BarPlot(
title="Error Percentage",
x_title="Tool",
y_title="Error Percentage",
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["tool", "error_perc"],
width=800
)
def update_tool_plot(selected_tool):
filtered_data = error[error['tool'] == selected_tool]
# convert column name to string
filtered_data.columns = filtered_data.columns.astype(str)
# convert error_perc to 4 decimal place
filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
update = gr.LinePlot(
title="Error Percentage",
x_title="Week",
y_title="Error Percentage",
x="request_month_year_week",
y="error_perc",
value=filtered_data
)
return update
def update_week_plot(selected_week):
filtered_data = error[error['request_month_year_week'] == selected_week]
# convert column name to string
filtered_data.columns = filtered_data.columns.astype(str)
# convert error_perc to 4 decimal place
filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
update = gr.BarPlot(
title="Error Percentage",
x_title="Tool",
y_title="Error Percentage",
x="tool",
y="error_perc",
value=filtered_data
)
return update
sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error)
with gr.Row():
sel_tool
with gr.Row():
plot_tool_error
with gr.Row():
sel_week
with gr.Row():
plot_week_error
with gr.TabItem("ℹ️ About"):
with gr.Accordion("About the Benchmark"):
gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
demo.queue(default_concurrency_limit=40).launch()