cache-management

#959
by alozowski HF staff - opened
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -60,8 +60,12 @@ NEW_DATA_ON_LEADERBOARD = True
60
  LEADERBOARD_DF = None
61
 
62
  def restart_space():
63
- API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
64
-
 
 
 
 
65
 
66
  def time_diff_wrapper(func):
67
  def wrapper(*args, **kwargs):
@@ -129,6 +133,9 @@ def get_latest_data_queue():
129
 
130
  def init_space():
131
  """Initializes the application space, loading only necessary data."""
 
 
 
132
  if DO_FULL_INIT:
133
  # These downloads only occur on full initialization
134
  try:
@@ -443,13 +450,16 @@ webhooks_server = enable_space_ci_and_return_server(ui=main_block)
443
  # Add webhooks
444
  @webhooks_server.add_webhook
445
  def update_leaderboard(payload: WebhookPayload) -> None:
446
- """Redownloads the leaderboard dataset each time it updates"""
447
  if payload.repo.type == "dataset" and payload.event.action == "update":
448
  global NEW_DATA_ON_LEADERBOARD
449
  if NEW_DATA_ON_LEADERBOARD:
 
450
  return
 
451
  NEW_DATA_ON_LEADERBOARD = True
452
 
 
453
  datasets.load_dataset(
454
  AGGREGATED_REPO,
455
  "default",
@@ -458,6 +468,7 @@ def update_leaderboard(payload: WebhookPayload) -> None:
458
  download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
459
  verification_mode="no_checks"
460
  )
 
461
 
462
  # The below code is not used at the moment, as we can manage the queue file locally
463
  LAST_UPDATE_QUEUE = datetime.datetime.now()
@@ -477,5 +488,6 @@ def update_queue(payload: WebhookPayload) -> None:
477
  webhooks_server.launch()
478
 
479
  scheduler = BackgroundScheduler()
480
- scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h as backup in case automatic updates are not working
 
481
  scheduler.start()
 
60
  LEADERBOARD_DF = None
61
 
62
  def restart_space():
63
+ try:
64
+ logging.info(f"Attempting to restart space with repo ID: {REPO_ID}")
65
+ API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
66
+ logging.info("Space restarted successfully.")
67
+ except Exception as e:
68
+ logging.error(f"Failed to restart space: {e}")
69
 
70
  def time_diff_wrapper(func):
71
  def wrapper(*args, **kwargs):
 
133
 
134
  def init_space():
135
  """Initializes the application space, loading only necessary data."""
136
+ global NEW_DATA_ON_LEADERBOARD
137
+ NEW_DATA_ON_LEADERBOARD = True # Ensure new data is always pulled on restart
138
+
139
  if DO_FULL_INIT:
140
  # These downloads only occur on full initialization
141
  try:
 
450
  # Add webhooks
451
  @webhooks_server.add_webhook
452
  def update_leaderboard(payload: WebhookPayload) -> None:
453
+ """Redownloads the leaderboard dataset each time it updates."""
454
  if payload.repo.type == "dataset" and payload.event.action == "update":
455
  global NEW_DATA_ON_LEADERBOARD
456
  if NEW_DATA_ON_LEADERBOARD:
457
+ logging.info("Leaderboard data is already marked for update, skipping...")
458
  return
459
+ logging.info("New data detected, downloading updated leaderboard dataset.")
460
  NEW_DATA_ON_LEADERBOARD = True
461
 
462
+ # Download the latest version of the dataset
463
  datasets.load_dataset(
464
  AGGREGATED_REPO,
465
  "default",
 
468
  download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
469
  verification_mode="no_checks"
470
  )
471
+ logging.info("Leaderboard dataset successfully downloaded.")
472
 
473
  # The below code is not used at the moment, as we can manage the queue file locally
474
  LAST_UPDATE_QUEUE = datetime.datetime.now()
 
488
  webhooks_server.launch()
489
 
490
  scheduler = BackgroundScheduler()
491
+ scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
492
+ logging.info("Scheduler initialized to restart space every 1 hour.")
493
  scheduler.start()