Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
tools = pd.read_csv("./data/tools.csv") | |
# all_trades = pd.read_csv('./data/all_trades_profitability.csv') | |
demo = gr.Blocks() | |
INC_TOOLS = [ | |
'prediction-online', | |
'prediction-offline', | |
'claude-prediction-online', | |
'claude-prediction-offline', | |
'prediction-offline-sme', | |
'prediction-online-sme', | |
'prediction-request-rag', | |
'prediction-request-reasoning', | |
'prediction-url-cot-claude', | |
'prediction-request-rag-claude', | |
'prediction-request-reasoning-claude' | |
] | |
def set_error(row): | |
if row.error not in [True, False]: | |
if not row.prompt_response: | |
return True | |
return False | |
return row.error | |
def get_error_data(): | |
tools_inc = tools[tools['tool'].isin(INC_TOOLS)] | |
tools_inc['error'] = tools_inc.apply(set_error, axis=1) | |
error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index() | |
error['error_perc'] = (error[True] / (error[False] + error[True]))*100 | |
error['total_requests'] = error[False] + error[True] | |
return error | |
def get_error_data_all(error): | |
error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True:'sum'}).reset_index() | |
error_total['error_perc'] = (error_total[True] / error_total['total_requests'])*100 | |
# convert column name to string | |
error_total.columns = error_total.columns.astype(str) | |
# format all values to 4 decimal places for error_perc | |
error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4)) | |
return error_total | |
error = get_error_data() | |
error_all = get_error_data_all(error) | |
print(error_all.head()) | |
with demo: | |
gr.HTML("<h1>Olas Predict Actual Performance</h1>") | |
gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.") | |
with gr.Tabs(): | |
with gr.TabItem("🔥 Error Dashboard"): | |
with gr.Row(): | |
gr.Markdown("This plot shows the percentage of requests that resulted in an error.") | |
with gr.Row(): | |
# plot | |
with gr.Column(): | |
gr.LinePlot( | |
value=error_all, | |
x="request_month_year_week", | |
y="error_perc", | |
title="Error Percentage", | |
x_title="Week", | |
y_title="Error Percentage", | |
height=400, | |
show_label=True | |
) | |
gr.Markdown("This plot shows the percentage of requests that resulted in an error.") | |
# Dropdown for selecting the tool | |
sel_tool = gr.Dropdown( | |
value="prediction-online", | |
choices=INC_TOOLS, | |
label="Select a tool" | |
) | |
plot_tool_error = gr.LinePlot( | |
title="Error Percentage", | |
x_title="Week", | |
y_title="Error Percentage", | |
render=False | |
) | |
# Dropdown for selecting the week | |
sel_week = gr.Dropdown( | |
value=error['request_month_year_week'].iloc[-1], | |
choices=error['request_month_year_week'].unique().tolist(), | |
label="Select a week" | |
) | |
plot_week_error = gr.BarPlot( | |
title="Error Percentage", | |
x_title="Tool", | |
y_title="Error Percentage", | |
render=False | |
) | |
def update_tool_plot(selected_tool): | |
filtered_data = error[error['tool'] == selected_tool] | |
# convert column name to string | |
filtered_data.columns = filtered_data.columns.astype(str) | |
# conver error_perc to 4 decimal place | |
filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4)) | |
print(filtered_data.head()) | |
return { | |
"x": filtered_data['request_month_year_week'].tolist(), | |
"y": filtered_data['error_perc'].tolist(), | |
} | |
def update_week_plot(selected_week): | |
filtered_data = error[error['request_month_year_week'] == selected_week] | |
filtered_data.columns = filtered_data.columns.astype(str) | |
filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4)) | |
print(filtered_data.head()) | |
return { | |
"x": filtered_data['tool'].tolist(), | |
"y": filtered_data['error_perc'].tolist(), | |
} | |
sel_tool.change(fn=update_tool_plot, inputs=sel_tool, outputs=plot_tool_error) | |
sel_week.change(fn=update_week_plot, inputs=sel_week, outputs=plot_week_error) | |
with gr.Row(): | |
plot_tool_error.render() | |
with gr.Row(): | |
plot_week_error.render() | |
with gr.TabItem("ℹ️ About"): | |
with gr.Accordion("About the Benchmark", open=False): | |
gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.") | |
demo.queue(default_concurrency_limit=40).launch() | |