import gradio as gr import pandas as pd tools = pd.read_csv("./data/tools.csv") demo = gr.Blocks() INC_TOOLS = [ 'prediction-online', 'prediction-offline', 'claude-prediction-online', 'claude-prediction-offline', 'prediction-offline-sme', 'prediction-online-sme', 'prediction-request-rag', 'prediction-request-reasoning', 'prediction-url-cot-claude', 'prediction-request-rag-claude', 'prediction-request-reasoning-claude' ] def set_error(row): if row.error not in [True, False]: if not row.prompt_response: return True return False return row.error def get_error_data(): tools_inc = tools[tools['tool'].isin(INC_TOOLS)] tools_inc['error'] = tools_inc.apply(set_error, axis=1) error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index() error['error_perc'] = (error[True] / (error[False] + error[True])) * 100 error['total_requests'] = error[False] + error[True] return error def get_error_data_all(error): error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index() error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100 error_total.columns = error_total.columns.astype(str) error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4)) return error_total error = get_error_data() error_all = get_error_data_all(error) with demo: gr.HTML("

Olas Predict Actual Performance

") gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.") with gr.Tabs(): with gr.TabItem("🔥 Error Dashboard"): with gr.Row(): gr.Markdown("# Plot showing overall error") with gr.Row(): # plot with gr.Column(): gr.BarPlot( value=error_all, x="request_month_year_week", y="error_perc", title="Error Percentage", x_title="Week", y_title="Error Percentage", height=800, show_label=True, interactive=True, show_actions_button=True, tooltip=["request_month_year_week", "error_perc"] ) with gr.Row(): gr.Markdown("# Plot showing error by tool") with gr.Row(): sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0]) with gr.Row(): plot_tool_error = gr.BarPlot( title="Error Percentage", x_title="Week", y_title="Error Percentage", show_label=True, interactive=True, show_actions_button=True, tooltip=["request_month_year_week", "error_perc"], width=800 ) with gr.Row(): gr.Markdown("# Plot showing error by week") with gr.Row(): choices = error['request_month_year_week'].unique().tolist() # sort the choices by the latest week to be on the top choices = sorted(choices) sel_week = gr.Dropdown( label="Select a week", choices=choices, value=choices[-1] ) with gr.Row(): plot_week_error = gr.BarPlot( title="Error Percentage", x_title="Tool", y_title="Error Percentage", show_label=True, interactive=True, show_actions_button=True, tooltip=["tool", "error_perc"], width=800 ) def update_tool_plot(selected_tool): filtered_data = error[error['tool'] == selected_tool] # convert column name to string filtered_data.columns = filtered_data.columns.astype(str) # convert error_perc to 4 decimal place filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4)) update = gr.LinePlot( title="Error Percentage", x_title="Week", y_title="Error Percentage", x="request_month_year_week", y="error_perc", value=filtered_data ) return update def update_week_plot(selected_week): filtered_data = error[error['request_month_year_week'] == selected_week] # convert column name to string filtered_data.columns = filtered_data.columns.astype(str) # convert error_perc to 4 decimal place filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4)) update = gr.BarPlot( title="Error Percentage", x_title="Tool", y_title="Error Percentage", x="tool", y="error_perc", value=filtered_data ) return update sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error) sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error) with gr.Row(): sel_tool with gr.Row(): plot_tool_error with gr.Row(): sel_week with gr.Row(): plot_week_error with gr.TabItem("ℹ️ About"): with gr.Accordion("About the Benchmark"): gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.") demo.queue(default_concurrency_limit=40).launch()