Spaces:

rawwerks
/

handwriting-ocr

Runtime error

App Files Files Community

Raymond Weitekamp commited on Feb 10

Commit

cafb905

1 Parent(s): 85e0f0a

fix: properly handle OAuth tokens and checkbox states for private datasets - Use oauth_token.token consistently for private dataset operations - Respect private_checkbox state - Add validation for destination selection - Follow HF OAuth docs pattern - Remove profile.token usage

Browse files

Files changed (1) hide show

app.py +63 -60

app.py CHANGED Viewed

@@ -388,7 +388,7 @@ def create_gradio_interface():
         ):
             """Handle submission using separate credentials:
                - For public dataset updates, the master token is loaded from .env.
-               - For private dataset updates, the user's token is used."""
             print(f"Debug - Initial params:")
             print(f"Text: {text[:50]}")
             image = upload_image if upload_image is not None else None
@@ -456,71 +456,74 @@ def create_gradio_interface():
                 dataset.push_to_hub(public_repo_id, split="train", token=master_token)
                 os.remove(temp_path_public)
-            # Private dataset submission using user's credentials
-            if oauth_token is None or not hasattr(oauth_token, 'token'):
-                # Try to get token from profile if oauth_token is not available
-                if not profile or not hasattr(profile, 'token'):
                     raise gr.Error("Authentication token is missing. Please log in again.")
-                token = profile.token
-            else:
-                token = oauth_token.token
-            private_repo_id = f"{user_state.username}/handwriting-ocr-private"
-            filename_private = f"{timestamp}_private.png"
-            temp_path_private = os.path.join(temp_dir, filename_private)
-            stripped_image.save(temp_path_private)
-            try:
-                # Initialize HfApi with the token
-                hf_api = HfApi(token=token)
-                try:
-                    # Try to get dataset info first
-                    hf_api.dataset_info(private_repo_id)
-                except Exception:
-                    # Create repo if it doesn't exist
-                    hf_api.create_repo(
-                        repo_id=private_repo_id,
-                        repo_type="dataset",
-                        private=True,
-                        token=token  # Explicitly pass token here
-                    )
-                features = datasets.Features({
-                    'text': datasets.Value('string'),
-                    'image': datasets.Image(),
-                    'timestamp': datasets.Value('string')
-                })
                 try:
-                    # Load dataset with explicit token
-                    dataset = datasets.load_dataset(private_repo_id, split="train", token=token)
-                except Exception:
-                    # If dataset doesn't exist yet, create an empty one
-                    dataset = datasets.Dataset.from_dict({
-                        'text': [],
-                        'image': [],
-                        'timestamp': []
-                    }, features=features)
-                # Add the new item using add_item, just like the public dataset
-                dataset = dataset.add_item({
-                    'text': text,
-                    'image': temp_path_private,
-                    'timestamp': timestamp
-                })
-                # Push to hub with explicit token
-                dataset.push_to_hub(
-                    private_repo_id,
-                    split="train",
-                    token=token,
-                    private=True
-                )
-                os.remove(temp_path_private)
-            except Exception as e:
-                raise gr.Error(f"Failed to save to private dataset: {str(e)}")
             new_text = collector.get_random_text_block(max_words)
             return None, new_text, collector.get_leaderboard()

         ):
             """Handle submission using separate credentials:
                - For public dataset updates, the master token is loaded from .env.
+               - For private dataset updates, the user's OAuth token is used."""
             print(f"Debug - Initial params:")
             print(f"Text: {text[:50]}")
             image = upload_image if upload_image is not None else None
                 dataset.push_to_hub(public_repo_id, split="train", token=master_token)
                 os.remove(temp_path_public)
+            # Private dataset submission using user's OAuth token
+            if private_checkbox:  # Only proceed with private dataset if checkbox is checked
+                if oauth_token is None:
                     raise gr.Error("Authentication token is missing. Please log in again.")
+                if not hasattr(oauth_token, 'token') or not oauth_token.token:
+                    raise gr.Error("Invalid OAuth token. Please log in again with the required scopes (write-repos, manage-repos).")
+                private_repo_id = f"{user_state.username}/handwriting-ocr-private"
+                filename_private = f"{timestamp}_private.png"
+                temp_path_private = os.path.join(temp_dir, filename_private)
+                stripped_image.save(temp_path_private)
                 try:
+                    # Initialize HfApi with the OAuth token
+                    hf_api = HfApi(token=oauth_token.token)
+                    try:
+                        # Try to get dataset info first
+                        hf_api.dataset_info(private_repo_id)
+                    except Exception:
+                        # Create repo if it doesn't exist
+                        hf_api.create_repo(
+                            repo_id=private_repo_id,
+                            repo_type="dataset",
+                            private=True,
+                            token=oauth_token.token  # Explicitly pass token here
+                        )
+                    features = datasets.Features({
+                        'text': datasets.Value('string'),
+                        'image': datasets.Image(),
+                        'timestamp': datasets.Value('string')
+                    })
+                    try:
+                        # Load dataset with explicit token
+                        dataset = datasets.load_dataset(private_repo_id, split="train", token=oauth_token.token)
+                    except Exception:
+                        # If dataset doesn't exist yet, create an empty one
+                        dataset = datasets.Dataset.from_dict({
+                            'text': [],
+                            'image': [],
+                            'timestamp': []
+                        }, features=features)
+                    # Add the new item
+                    dataset = dataset.add_item({
+                        'text': text,
+                        'image': temp_path_private,
+                        'timestamp': timestamp
+                    })
+                    # Push to hub with explicit token
+                    dataset.push_to_hub(
+                        private_repo_id,
+                        split="train",
+                        token=oauth_token.token,
+                        private=True
+                    )
+                    os.remove(temp_path_private)
+                except Exception as e:
+                    raise gr.Error(f"Failed to save to private dataset: {str(e)}")
+            # Ensure at least one checkbox is selected
+            if not public_checkbox and not private_checkbox:
+                raise gr.Error("Please select at least one dataset (public or private) to save to.")
             new_text = collector.get_random_text_block(max_words)
             return None, new_text, collector.get_leaderboard()