Spaces:
Runtime error
Runtime error
Ahmed Ahmed
commited on
Commit
·
c1fc4e2
1
Parent(s):
466c93b
consolidate
Browse files
app.py
CHANGED
@@ -36,17 +36,38 @@ def init_leaderboard(dataframe):
|
|
36 |
],
|
37 |
)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
def run_perplexity_test(model_name, revision, precision):
|
40 |
"""Run perplexity evaluation on demand."""
|
41 |
if not model_name:
|
42 |
-
return "Please enter a model name."
|
43 |
|
44 |
success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
|
45 |
|
46 |
if success:
|
47 |
-
|
|
|
|
|
48 |
else:
|
49 |
-
return f"❌ Evaluation failed: {result}"
|
50 |
|
51 |
# Initialize results repository and directory
|
52 |
try:
|
@@ -110,7 +131,7 @@ with demo:
|
|
110 |
test_button.click(
|
111 |
run_perplexity_test,
|
112 |
[model_name, revision, precision],
|
113 |
-
result
|
114 |
)
|
115 |
|
116 |
demo.queue(default_concurrency_limit=5).launch()
|
|
|
36 |
],
|
37 |
)
|
38 |
|
39 |
+
def refresh_leaderboard():
|
40 |
+
"""Refresh leaderboard data from disk"""
|
41 |
+
try:
|
42 |
+
# Download latest results
|
43 |
+
snapshot_download(
|
44 |
+
repo_id=RESULTS_REPO,
|
45 |
+
local_dir=EVAL_RESULTS_PATH,
|
46 |
+
repo_type="dataset",
|
47 |
+
tqdm_class=None,
|
48 |
+
etag_timeout=30,
|
49 |
+
token=TOKEN
|
50 |
+
)
|
51 |
+
except Exception as e:
|
52 |
+
print(f"Error refreshing results: {e}")
|
53 |
+
|
54 |
+
# Get fresh leaderboard data
|
55 |
+
df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
|
56 |
+
return init_leaderboard(df)
|
57 |
+
|
58 |
def run_perplexity_test(model_name, revision, precision):
|
59 |
"""Run perplexity evaluation on demand."""
|
60 |
if not model_name:
|
61 |
+
return "Please enter a model name.", None
|
62 |
|
63 |
success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
|
64 |
|
65 |
if success:
|
66 |
+
# Get updated leaderboard
|
67 |
+
new_leaderboard = refresh_leaderboard()
|
68 |
+
return f"✅ Perplexity evaluation completed!\nPerplexity: {result:.4f}", new_leaderboard
|
69 |
else:
|
70 |
+
return f"❌ Evaluation failed: {result}", None
|
71 |
|
72 |
# Initialize results repository and directory
|
73 |
try:
|
|
|
131 |
test_button.click(
|
132 |
run_perplexity_test,
|
133 |
[model_name, revision, precision],
|
134 |
+
[result, leaderboard]
|
135 |
)
|
136 |
|
137 |
demo.queue(default_concurrency_limit=5).launch()
|