Ahmed Ahmed commited on
Commit
c1fc4e2
·
1 Parent(s): 466c93b

consolidate

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -36,17 +36,38 @@ def init_leaderboard(dataframe):
36
  ],
37
  )
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def run_perplexity_test(model_name, revision, precision):
40
  """Run perplexity evaluation on demand."""
41
  if not model_name:
42
- return "Please enter a model name."
43
 
44
  success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
45
 
46
  if success:
47
- return f"✅ Perplexity evaluation completed!\nPerplexity: {result:.4f}\n\nResults have been saved and will appear in the leaderboard shortly."
 
 
48
  else:
49
- return f"❌ Evaluation failed: {result}"
50
 
51
  # Initialize results repository and directory
52
  try:
@@ -110,7 +131,7 @@ with demo:
110
  test_button.click(
111
  run_perplexity_test,
112
  [model_name, revision, precision],
113
- result
114
  )
115
 
116
  demo.queue(default_concurrency_limit=5).launch()
 
36
  ],
37
  )
38
 
39
+ def refresh_leaderboard():
40
+ """Refresh leaderboard data from disk"""
41
+ try:
42
+ # Download latest results
43
+ snapshot_download(
44
+ repo_id=RESULTS_REPO,
45
+ local_dir=EVAL_RESULTS_PATH,
46
+ repo_type="dataset",
47
+ tqdm_class=None,
48
+ etag_timeout=30,
49
+ token=TOKEN
50
+ )
51
+ except Exception as e:
52
+ print(f"Error refreshing results: {e}")
53
+
54
+ # Get fresh leaderboard data
55
+ df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
56
+ return init_leaderboard(df)
57
+
58
  def run_perplexity_test(model_name, revision, precision):
59
  """Run perplexity evaluation on demand."""
60
  if not model_name:
61
+ return "Please enter a model name.", None
62
 
63
  success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
64
 
65
  if success:
66
+ # Get updated leaderboard
67
+ new_leaderboard = refresh_leaderboard()
68
+ return f"✅ Perplexity evaluation completed!\nPerplexity: {result:.4f}", new_leaderboard
69
  else:
70
+ return f"❌ Evaluation failed: {result}", None
71
 
72
  # Initialize results repository and directory
73
  try:
 
131
  test_button.click(
132
  run_perplexity_test,
133
  [model_name, revision, precision],
134
+ [result, leaderboard]
135
  )
136
 
137
  demo.queue(default_concurrency_limit=5).launch()