Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -166,29 +166,45 @@ def process_and_visualize(file_content):
|
|
166 |
|
167 |
output_text = f"Average Performance per Model:\n{averages.sort_values(by='Average Performance').to_string()}"
|
168 |
|
169 |
-
return output_text, image_avg, image_line, image_heatmap, image_boxplot, plotly_avg,
|
170 |
|
171 |
if __name__ == "__main__":
|
172 |
|
173 |
task_names = ['tinyArc', 'tinyHellaswag', 'tinyMMLU', 'tinyTruthfulQA', 'tinyTruthfulQA_mc1', 'tinyWinogrande']
|
174 |
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
gr.Image(label="Matplotlib Performance Distribution Boxplot"),
|
184 |
-
gr.HTML(label="Plotly Average Performance Chart"),
|
185 |
-
gr.TabbedInterface(
|
186 |
-
[gr.HTML(f"Plotly {task} Chart") for task in task_names]
|
187 |
-
)
|
188 |
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
output_text = f"Average Performance per Model:\n{averages.sort_values(by='Average Performance').to_string()}"
|
168 |
|
169 |
+
return output_text, image_avg, image_line, image_heatmap, image_boxplot, plotly_avg, plotly_tasks
|
170 |
|
171 |
if __name__ == "__main__":
|
172 |
|
173 |
task_names = ['tinyArc', 'tinyHellaswag', 'tinyMMLU', 'tinyTruthfulQA', 'tinyTruthfulQA_mc1', 'tinyWinogrande']
|
174 |
|
175 |
+
with gr.Blocks(title="LLM Benchmark Visualizer") as demo:
|
176 |
+
gr.Markdown("Upload your LLM benchmark data and visualize the results.")
|
177 |
+
|
178 |
+
with gr.Row():
|
179 |
+
input_text = gr.Textbox(lines=10, label="Paste your data here")
|
180 |
+
|
181 |
+
with gr.Row():
|
182 |
+
output_text = gr.Textbox(label="Average Performance per Model")
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
+
with gr.Row():
|
185 |
+
with gr.Column():
|
186 |
+
image_avg = gr.Image(label="Matplotlib Average Performance Chart")
|
187 |
+
image_line = gr.Image(label="Matplotlib Task Performance Line Chart")
|
188 |
+
with gr.Column():
|
189 |
+
image_heatmap = gr.Image(label="Matplotlib Task Performance Heatmap")
|
190 |
+
image_boxplot = gr.Image(label="Matplotlib Performance Distribution Boxplot")
|
191 |
+
with gr.Row():
|
192 |
+
plotly_avg = gr.HTML(label="Plotly Average Performance Chart")
|
193 |
+
|
194 |
+
with gr.Row():
|
195 |
+
with gr.TabbedInterface([gr.HTML(value=f"", label=f"Plotly {task} Chart") for task in task_names], label="Task Charts"):
|
196 |
+
pass
|
197 |
+
|
198 |
+
input_text.change(
|
199 |
+
fn=process_and_visualize,
|
200 |
+
inputs=input_text,
|
201 |
+
outputs=[output_text, image_avg, image_line, image_heatmap, image_boxplot, plotly_avg],
|
202 |
+
)
|
203 |
+
|
204 |
+
def update_tabs(file_content):
|
205 |
+
_, _, _, _, _, _, plotly_tasks = process_and_visualize(file_content)
|
206 |
+
return {task: gr.HTML(value=html) for task, html in plotly_tasks.items()}
|
207 |
+
|
208 |
+
input_text.change(fn=update_tabs, inputs=input_text, outputs=[])
|
209 |
+
|
210 |
+
demo.launch(share=True)
|