Raymond Weitekamp commited on
Commit
cafb905
·
1 Parent(s): 85e0f0a

fix: properly handle OAuth tokens and checkbox states for private datasets - Use oauth_token.token consistently for private dataset operations - Respect private_checkbox state - Add validation for destination selection - Follow HF OAuth docs pattern - Remove profile.token usage

Browse files
Files changed (1) hide show
  1. app.py +63 -60
app.py CHANGED
@@ -388,7 +388,7 @@ def create_gradio_interface():
388
  ):
389
  """Handle submission using separate credentials:
390
  - For public dataset updates, the master token is loaded from .env.
391
- - For private dataset updates, the user's token is used."""
392
  print(f"Debug - Initial params:")
393
  print(f"Text: {text[:50]}")
394
  image = upload_image if upload_image is not None else None
@@ -456,71 +456,74 @@ def create_gradio_interface():
456
  dataset.push_to_hub(public_repo_id, split="train", token=master_token)
457
  os.remove(temp_path_public)
458
 
459
- # Private dataset submission using user's credentials
460
- if oauth_token is None or not hasattr(oauth_token, 'token'):
461
- # Try to get token from profile if oauth_token is not available
462
- if not profile or not hasattr(profile, 'token'):
463
  raise gr.Error("Authentication token is missing. Please log in again.")
464
- token = profile.token
465
- else:
466
- token = oauth_token.token
467
-
468
- private_repo_id = f"{user_state.username}/handwriting-ocr-private"
469
- filename_private = f"{timestamp}_private.png"
470
- temp_path_private = os.path.join(temp_dir, filename_private)
471
- stripped_image.save(temp_path_private)
472
-
473
- try:
474
- # Initialize HfApi with the token
475
- hf_api = HfApi(token=token)
476
 
477
- try:
478
- # Try to get dataset info first
479
- hf_api.dataset_info(private_repo_id)
480
- except Exception:
481
- # Create repo if it doesn't exist
482
- hf_api.create_repo(
483
- repo_id=private_repo_id,
484
- repo_type="dataset",
485
- private=True,
486
- token=token # Explicitly pass token here
487
- )
488
-
489
- features = datasets.Features({
490
- 'text': datasets.Value('string'),
491
- 'image': datasets.Image(),
492
- 'timestamp': datasets.Value('string')
493
- })
494
 
495
  try:
496
- # Load dataset with explicit token
497
- dataset = datasets.load_dataset(private_repo_id, split="train", token=token)
498
- except Exception:
499
- # If dataset doesn't exist yet, create an empty one
500
- dataset = datasets.Dataset.from_dict({
501
- 'text': [],
502
- 'image': [],
503
- 'timestamp': []
504
- }, features=features)
505
-
506
- # Add the new item using add_item, just like the public dataset
507
- dataset = dataset.add_item({
508
- 'text': text,
509
- 'image': temp_path_private,
510
- 'timestamp': timestamp
511
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
- # Push to hub with explicit token
514
- dataset.push_to_hub(
515
- private_repo_id,
516
- split="train",
517
- token=token,
518
- private=True
519
- )
520
- os.remove(temp_path_private)
521
 
522
- except Exception as e:
523
- raise gr.Error(f"Failed to save to private dataset: {str(e)}")
 
524
 
525
  new_text = collector.get_random_text_block(max_words)
526
  return None, new_text, collector.get_leaderboard()
 
388
  ):
389
  """Handle submission using separate credentials:
390
  - For public dataset updates, the master token is loaded from .env.
391
+ - For private dataset updates, the user's OAuth token is used."""
392
  print(f"Debug - Initial params:")
393
  print(f"Text: {text[:50]}")
394
  image = upload_image if upload_image is not None else None
 
456
  dataset.push_to_hub(public_repo_id, split="train", token=master_token)
457
  os.remove(temp_path_public)
458
 
459
+ # Private dataset submission using user's OAuth token
460
+ if private_checkbox: # Only proceed with private dataset if checkbox is checked
461
+ if oauth_token is None:
 
462
  raise gr.Error("Authentication token is missing. Please log in again.")
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
+ if not hasattr(oauth_token, 'token') or not oauth_token.token:
465
+ raise gr.Error("Invalid OAuth token. Please log in again with the required scopes (write-repos, manage-repos).")
466
+
467
+ private_repo_id = f"{user_state.username}/handwriting-ocr-private"
468
+ filename_private = f"{timestamp}_private.png"
469
+ temp_path_private = os.path.join(temp_dir, filename_private)
470
+ stripped_image.save(temp_path_private)
 
 
 
 
 
 
 
 
 
 
471
 
472
  try:
473
+ # Initialize HfApi with the OAuth token
474
+ hf_api = HfApi(token=oauth_token.token)
475
+
476
+ try:
477
+ # Try to get dataset info first
478
+ hf_api.dataset_info(private_repo_id)
479
+ except Exception:
480
+ # Create repo if it doesn't exist
481
+ hf_api.create_repo(
482
+ repo_id=private_repo_id,
483
+ repo_type="dataset",
484
+ private=True,
485
+ token=oauth_token.token # Explicitly pass token here
486
+ )
487
+
488
+ features = datasets.Features({
489
+ 'text': datasets.Value('string'),
490
+ 'image': datasets.Image(),
491
+ 'timestamp': datasets.Value('string')
492
+ })
493
+
494
+ try:
495
+ # Load dataset with explicit token
496
+ dataset = datasets.load_dataset(private_repo_id, split="train", token=oauth_token.token)
497
+ except Exception:
498
+ # If dataset doesn't exist yet, create an empty one
499
+ dataset = datasets.Dataset.from_dict({
500
+ 'text': [],
501
+ 'image': [],
502
+ 'timestamp': []
503
+ }, features=features)
504
+
505
+ # Add the new item
506
+ dataset = dataset.add_item({
507
+ 'text': text,
508
+ 'image': temp_path_private,
509
+ 'timestamp': timestamp
510
+ })
511
+
512
+ # Push to hub with explicit token
513
+ dataset.push_to_hub(
514
+ private_repo_id,
515
+ split="train",
516
+ token=oauth_token.token,
517
+ private=True
518
+ )
519
+ os.remove(temp_path_private)
520
 
521
+ except Exception as e:
522
+ raise gr.Error(f"Failed to save to private dataset: {str(e)}")
 
 
 
 
 
 
523
 
524
+ # Ensure at least one checkbox is selected
525
+ if not public_checkbox and not private_checkbox:
526
+ raise gr.Error("Please select at least one dataset (public or private) to save to.")
527
 
528
  new_text = collector.get_random_text_block(max_words)
529
  return None, new_text, collector.get_leaderboard()