Quick fix.
Browse files- README.md +1 -0
- app.py +14 -16
- requirements.txt +1 -1
README.md
CHANGED
@@ -4,6 +4,7 @@ emoji: 🥇
|
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
license: mit
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.41.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: mit
|
app.py
CHANGED
@@ -56,12 +56,21 @@ def init_leaderboard(dataframe, default_selection=["Model", "pass@1", "pass@5",
|
|
56 |
)
|
57 |
|
58 |
# Gradio interface
|
59 |
-
models = df['Model'].unique().tolist()
|
60 |
-
scenarios = df['Scenario'].unique().tolist()
|
61 |
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
gr.Markdown("# 🏆 WebApp1K Models Leaderboard")
|
66 |
gr.Markdown(
|
67 |
"## [Discord](https://discord.gg/3qpAbWC7) " +
|
@@ -70,21 +79,10 @@ with demo:
|
|
70 |
"[Github](https://github.com/onekq/WebApp1k) " +
|
71 |
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
|
72 |
|
73 |
-
# Initialize leaderboard with the complete DataFrame
|
74 |
-
duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
75 |
-
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
|
76 |
-
}, index=['pass@1'])).reset_index()
|
77 |
-
|
78 |
-
complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
79 |
-
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
|
80 |
-
'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
|
81 |
-
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
82 |
-
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
83 |
-
|
84 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
85 |
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
|
86 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
87 |
-
|
88 |
|
89 |
# Launch the Gradio interface
|
90 |
demo.launch()
|
|
|
56 |
)
|
57 |
|
58 |
# Gradio interface
|
59 |
+
#models = df['Model'].unique().tolist()
|
60 |
+
#scenarios = df['Scenario'].unique().tolist()
|
61 |
|
62 |
+
# Initialize leaderboard with the complete DataFrame
|
63 |
+
duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
64 |
+
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
|
65 |
+
}, index=['pass@1'])).reset_index()
|
66 |
|
67 |
+
complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
|
68 |
+
'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
|
69 |
+
'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
|
70 |
+
'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
|
71 |
+
}, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
|
72 |
+
|
73 |
+
with gr.Blocks() as demo:
|
74 |
gr.Markdown("# 🏆 WebApp1K Models Leaderboard")
|
75 |
gr.Markdown(
|
76 |
"## [Discord](https://discord.gg/3qpAbWC7) " +
|
|
|
79 |
"[Github](https://github.com/onekq/WebApp1k) " +
|
80 |
"[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
gr.Markdown("# WebApp1K-Duo ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-Duo-React))")
|
83 |
duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, default_selection = ["Model", "pass@1"], height=400)
|
84 |
gr.Markdown("# WebApp1K ([Benchmark](https://huggingface.co/datasets/onekq-ai/WebApp1K-React))")
|
85 |
+
leaderboard = init_leaderboard(complete_pass_at_k, default_selection = [], height=800)
|
86 |
|
87 |
# Launch the Gradio interface
|
88 |
demo.launch()
|
requirements.txt
CHANGED
@@ -3,7 +3,7 @@ black
|
|
3 |
datasets
|
4 |
gradio
|
5 |
gradio[oauth]
|
6 |
-
gradio_leaderboard
|
7 |
gradio_client
|
8 |
huggingface-hub>=0.18.0
|
9 |
matplotlib
|
|
|
3 |
datasets
|
4 |
gradio
|
5 |
gradio[oauth]
|
6 |
+
gradio_leaderboard>=0.0.9
|
7 |
gradio_client
|
8 |
huggingface-hub>=0.18.0
|
9 |
matplotlib
|