Spaces:

rawwerks
/

handwriting-ocr

Runtime error

App Files Files Community

Raymond Weitekamp commited on Feb 9

Commit

32a0510

1 Parent(s): b840d3e

progress - need to test live now

Browse files

Files changed (3) hide show

app.py +122 -74
requirements.txt +2 -1
test_app.py +5 -5

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import gradio as gr
 # Import statements that should only run once
 if gr.NO_RELOAD:
     import random
@@ -8,6 +10,7 @@ if gr.NO_RELOAD:
     from typing import Optional
     from PIL import Image  # Needed for working with PIL images
     import datasets
 # The list of sentences from our previous conversation.
 sentences = [
@@ -63,6 +66,17 @@ sentences = [
     "This additional section outlines today's most influential datasets and benchmarks, highlighting how they continue to shape the development of handwriting OCR systems."
 ]
 class OCRDataCollector:
     def __init__(self):
         self.collected_pairs = []
@@ -98,17 +112,30 @@ class OCRDataCollector:
 def strip_metadata(image: Image.Image) -> Image.Image:
     """
-    Helper function to strip all metadata from the provided PIL Image.
-    This creates a new image with the same pixel data but no additional info.
     """
     data = list(image.getdata())
     stripped_image = Image.new(image.mode, image.size)
     stripped_image.putdata(data)
     return stripped_image
 def create_gradio_interface():
     collector = OCRDataCollector()
     with gr.Blocks() as demo:
         gr.Markdown("# Handwriting OCR Dataset Creator")
@@ -120,15 +147,31 @@ def create_gradio_interface():
                 pass
             with gr.Column(scale=2, min_width=200):
                 login_btn = gr.LoginButton(elem_id="login_btn")
                 user_info = gr.Markdown(
                     value="<center>Please log in with your Hugging Face account to contribute to the dataset.</center>",
                     elem_id="user_info"
                 )
-                profile_state = gr.JSON(visible=False, elem_id="profile_state")
             with gr.Column(scale=1):
                 pass
-        # Instructions (always visible)
         gr.Markdown(
             "### Step 2: Read the text. "
             "You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. "
@@ -136,7 +179,6 @@ def create_gradio_interface():
             "If you wish to skip the current text, click 'Skip'."
         )
-        # Main interface elements (initially visible)
         text_box = gr.Textbox(
             value=collector.current_text_block,
             label="Text to Handwrite",
@@ -161,16 +203,13 @@ def create_gradio_interface():
             elem_id="regenerate_btn"
         )
-        # Step 3 section
         gr.Markdown("### Step 3: Upload an image of your handwritten version of the text")
-        # Message that changes based on login state
         upload_info = gr.Markdown(
             value="You must be logged in to do this, to help us prevent spam submissions",
             elem_id="upload_info"
         )
-        # Image upload and related components
         image_input = gr.Image(
             type="pil",
             label="Upload Handwritten Image",
@@ -205,9 +244,10 @@ def create_gradio_interface():
         with gr.Row(visible=False) as button_row:
             submit_btn = gr.Button("Submit", elem_id="submit_btn")
-        def update_ui_visibility(profile: gr.OAuthProfile | None) -> dict:
-            """Update visibility of UI elements based on login state"""
-            is_logged_in = profile is not None
             message = "Please upload your handwritten image of the text below." if is_logged_in else "You must be logged in to do this, to help us prevent spam submissions"
             return {
@@ -217,101 +257,109 @@ def create_gradio_interface():
                 button_row: gr.update(visible=is_logged_in)
             }
-        def update_user_info(profile: Optional[dict]) -> tuple[str, dict]:
-            if profile is None:
-                return "<center>Please log in with your Hugging Face account to contribute to the dataset.</center>", {}
-            return f"<center>Logged in as: {profile['username']}</center>", {"username": profile["username"]}
-        def handle_submit(profile, private_checkbox, public_checkbox, image, text, max_words):
-            if not profile or "username" not in profile:
                 raise gr.Error("Please log in to use this application")
-            username = profile["username"]
-            # Common processing: strip metadata, get timestamp, create features, and setup temp directory.
             stripped_image = strip_metadata(image)
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-            features = datasets.Features({
-                'text': datasets.Value('string'),
-                'image': datasets.Image(),
-                'timestamp': datasets.Value('string')
-            })
-            temp_dir = "temp"
-            os.makedirs(temp_dir, exist_ok=True)
-            # Define targets based on checkboxes: each entry is (dataset_type, repo_id, suffix, privacy_flag)
             targets = []
             if public_checkbox:
                 targets.append(("public", "rawwerks/handwriting-ocr-all", "_public", False))
-            if private_checkbox:
-                targets.append(("private", f"{username}/handwriting-ocr-private", "_private", True))
-            # Loop over each target, pushing the dataset with shared logic.
             for ds_type, repo_id, suffix, is_private in targets:
                 try:
                     collector.hf_api.dataset_info(repo_id)
-                except Exception as e:
                     collector.hf_api.create_repo(repo_id, repo_type="dataset", private=is_private)
                 filename = f"{timestamp}{suffix}.png"
                 temp_path = os.path.join(temp_dir, filename)
                 stripped_image.save(temp_path)
-                dataset_dict = {
-                    'text': [text],
-                    'image': [temp_path],
-                    'timestamp': [timestamp]
-                }
-                dataset = datasets.Dataset.from_dict(dataset_dict, features=features)
-                dataset.push_to_hub(repo_id)
-                os.remove(temp_path)
-                collector.collected_pairs.append({
-                    "text": text,
-                    "image": image,
-                    "timestamp": timestamp,
-                    "username": username,
-                    "dataset": ds_type
                 })
             new_text = collector.get_random_text_block(max_words)
             return None, new_text
-        def handle_regenerate(profile, text, max_words):
-            # Remove the login check - allow anyone to regenerate text
-            return collector.get_random_text_block(max_words)
-        # On load, update both the display message and the hidden profile state.
-        demo.load(
-            fn=update_user_info,
-            inputs=None,
-            outputs=[user_info, profile_state]
-        )
-        # Update UI when login state changes
-        demo.load(
-            fn=update_ui_visibility,
-            inputs=None,
-            outputs=[
-                upload_info,
-                image_input,
-                dataset_options,
-                button_row
-            ]
-        )
-        # Bind the submit and skip actions
         submit_btn.click(
             fn=handle_submit,
             inputs=[
-                profile_state, private_checkbox, public_checkbox,
-                image_input, text_box, max_words_slider
             ],
             outputs=[image_input, text_box]
         )
         regenerate_btn.click(
             fn=handle_regenerate,
-            inputs=[profile_state, text_box, max_words_slider],
             outputs=text_box
         )

 import gradio as gr
+from pydantic import BaseModel, Field
+from typing import Optional, Any
 # Import statements that should only run once
 if gr.NO_RELOAD:
     import random
     from typing import Optional
     from PIL import Image  # Needed for working with PIL images
     import datasets
+    import numpy as np  # Added to help handle numpy array images
 # The list of sentences from our previous conversation.
 sentences = [
     "This additional section outlines today's most influential datasets and benchmarks, highlighting how they continue to shape the development of handwriting OCR systems."
 ]
+class SubmissionData(BaseModel):
+    text: str = Field(..., description="Text to be handwritten")
+    profile: Any = Field(..., description="Gradio OAuth profile")
+    image: Optional[Image.Image] = Field(None, description="Uploaded handwritten image")
+    max_words: int = Field(..., ge=1, le=201, description="Maximum number of words")
+    public_checkbox: bool = Field(..., description="Submit to public dataset")
+    model_config = {
+        "arbitrary_types_allowed": True  # Allow PIL.Image.Image type
+    }
 class OCRDataCollector:
     def __init__(self):
         self.collected_pairs = []
 def strip_metadata(image: Image.Image) -> Image.Image:
     """
+    Helper function to strip all metadata from the provided image data.
     """
+    if image is None:
+        raise gr.Error("No valid image provided")
+    # Create a new image with the same pixel data but no metadata
     data = list(image.getdata())
     stripped_image = Image.new(image.mode, image.size)
     stripped_image.putdata(data)
     return stripped_image
+class UserState:
+    def __init__(self):
+        self.username = None
+        self.is_logged_in = False
+    def update_from_profile(self, profile: gr.OAuthProfile | None) -> None:
+        """Update user state from Gradio OAuth profile"""
+        self.is_logged_in = profile is not None and getattr(profile, "username", None) is not None
+        self.username = profile.username if self.is_logged_in else None
 def create_gradio_interface():
     collector = OCRDataCollector()
+    user_state = UserState()
     with gr.Blocks() as demo:
         gr.Markdown("# Handwriting OCR Dataset Creator")
                 pass
             with gr.Column(scale=2, min_width=200):
                 login_btn = gr.LoginButton(elem_id="login_btn")
+                # Activate the login button so OAuth is correctly initialized.
+                login_btn.activate()
                 user_info = gr.Markdown(
                     value="<center>Please log in with your Hugging Face account to contribute to the dataset.</center>",
                     elem_id="user_info"
                 )
+                # Create a hidden state component to store the OAuth profile.
+                profile_state = gr.State()
             with gr.Column(scale=1):
                 pass
+        # Update user info based on the OAuth profile.
+        def update_user_info(profile: gr.OAuthProfile | None) -> str:
+            if profile and getattr(profile, "username", None):
+                return f"<center>Logged in as: {profile.username}</center>"
+            else:
+                return "<center>Please log in with your Hugging Face account to contribute to the dataset.</center>"
+        demo.load(update_user_info, inputs=None, outputs=user_info)
+        # Store the OAuth profile in the hidden state.
+        def store_profile(profile: gr.OAuthProfile | None) -> gr.OAuthProfile | None:
+            return profile
+        demo.load(store_profile, inputs=None, outputs=profile_state)
         gr.Markdown(
             "### Step 2: Read the text. "
             "You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. "
             "If you wish to skip the current text, click 'Skip'."
         )
         text_box = gr.Textbox(
             value=collector.current_text_block,
             label="Text to Handwrite",
             elem_id="regenerate_btn"
         )
         gr.Markdown("### Step 3: Upload an image of your handwritten version of the text")
         upload_info = gr.Markdown(
             value="You must be logged in to do this, to help us prevent spam submissions",
             elem_id="upload_info"
         )
         image_input = gr.Image(
             type="pil",
             label="Upload Handwritten Image",
         with gr.Row(visible=False) as button_row:
             submit_btn = gr.Button("Submit", elem_id="submit_btn")
+        # Update user state when profile changes
+        def update_user_state(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None = None, *args):
+            user_state.update_from_profile(profile)
+            is_logged_in = user_state.is_logged_in
             message = "Please upload your handwritten image of the text below." if is_logged_in else "You must be logged in to do this, to help us prevent spam submissions"
             return {
                 button_row: gr.update(visible=is_logged_in)
             }
+        # Load initial state and update UI visibility
+        demo.load(update_user_state, inputs=profile_state, outputs=[upload_info, image_input, dataset_options, button_row])
+        def handle_submit(
+            text: str,
+            image: Image.Image,
+            max_words: int,
+            public_checkbox: bool,
+            collector: OCRDataCollector | None = None,
+            *args
+        ):
+            """Handle submission with clean parameter order"""
+            print(f"Debug - Initial params:")
+            print(f"Text: {text[:50]}")
+            print(f"Image type: {type(image)}")
+            print(f"Max words: {max_words}")
+            print(f"Public checkbox: {public_checkbox}")
+            print(f"Collector type: {type(collector)}")
+            if collector is None:
+                raise gr.Error("Internal error: OCR collector not initialized")
+            if not user_state.is_logged_in:
                 raise gr.Error("Please log in to use this application")
+            if not isinstance(image, Image.Image):
+                raise gr.Error("Please upload a valid image before submitting")
+            # Strip metadata from validated image
             stripped_image = strip_metadata(image)
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            # Define targets based on checkboxes
             targets = []
             if public_checkbox:
                 targets.append(("public", "rawwerks/handwriting-ocr-all", "_public", False))
+            targets.append(("private", f"{user_state.username}/handwriting-ocr-private", "_private", True))
+            temp_dir = "temp"
+            os.makedirs(temp_dir, exist_ok=True)
             for ds_type, repo_id, suffix, is_private in targets:
                 try:
                     collector.hf_api.dataset_info(repo_id)
+                except Exception:
                     collector.hf_api.create_repo(repo_id, repo_type="dataset", private=is_private)
                 filename = f"{timestamp}{suffix}.png"
                 temp_path = os.path.join(temp_dir, filename)
                 stripped_image.save(temp_path)
+                # Define features to properly handle image files
+                features = datasets.Features({
+                    'text': datasets.Value('string'),
+                    'image': datasets.Image(),
+                    'timestamp': datasets.Value('string')
+                })
+                try:
+                    # Try to load existing dataset
+                    dataset = datasets.load_dataset(repo_id, split="train")
+                except Exception:
+                    # If no existing dataset, create a new empty one
+                    dataset = datasets.Dataset.from_dict({
+                        'text': [],
+                        'image': [],
+                        'timestamp': []
+                    }, features=features)
+                # Add the new item
+                dataset = dataset.add_item({
+                    'text': text,
+                    'image': temp_path,
+                    'timestamp': timestamp
                 })
+                # Push updates to hub
+                dataset.push_to_hub(repo_id, split="train")
+                os.remove(temp_path)
             new_text = collector.get_random_text_block(max_words)
             return None, new_text
+        # Submit button click handler with simplified inputs
         submit_btn.click(
             fn=handle_submit,
             inputs=[
+                text_box,          # Text to handwrite
+                image_input,       # Uploaded image
+                max_words_slider,  # Max words
+                public_checkbox,   # Public dataset option
+                gr.State(collector)  # Pass the collector instance
             ],
             outputs=[image_input, text_box]
         )
+        def handle_regenerate(text, max_words):
+            # Allow anyone to regenerate text regardless of login status.
+            return collector.get_random_text_block(max_words)
         regenerate_btn.click(
             fn=handle_regenerate,
+            inputs=[text_box, max_words_slider],
             outputs=text_box
         )

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ pytest>=7.0.0
 pytest-playwright>=0.4.0
 pytest-asyncio>=0.23.0
 playwright>=1.40.0
-datasets>=2.16.0

 pytest-playwright>=0.4.0
 pytest-asyncio>=0.23.0
 playwright>=1.40.0
+datasets>=2.16.0
+pydantic>=2.6.1

test_app.py CHANGED Viewed

@@ -10,7 +10,7 @@ def collector():
 def test_get_random_text_block(collector):
     # Test that we get a non-empty string
-    text_block = collector.get_random_text_block()
     assert isinstance(text_block, str)
     assert len(text_block) > 0
@@ -18,13 +18,13 @@ def test_get_random_text_block(collector):
     assert any(sentence in text_block for sentence in sentences)
     # Test that we get different blocks (probabilistic, but very likely)
-    blocks = [collector.get_random_text_block() for _ in range(5)]
     assert len(set(blocks)) > 1, "Random blocks should be different"
 def test_skip_text(collector):
     # Test that we get a different text block when skipping
-    current_text = collector.get_random_text_block()
-    new_text = collector.get_random_text_block()
     assert isinstance(new_text, str)
     assert len(new_text) > 0
@@ -39,7 +39,7 @@ def test_submit_image(collector):
     test_image = Image.fromarray(img_array)
     # Test the current text block
-    current_text = collector.get_random_text_block()
     # Test submission with valid image
     new_text = collector.submit_image(test_image, current_text)

 def test_get_random_text_block(collector):
     # Test that we get a non-empty string
+    text_block = collector.get_random_text_block(max_words=50)
     assert isinstance(text_block, str)
     assert len(text_block) > 0
     assert any(sentence in text_block for sentence in sentences)
     # Test that we get different blocks (probabilistic, but very likely)
+    blocks = [collector.get_random_text_block(max_words=50) for _ in range(5)]
     assert len(set(blocks)) > 1, "Random blocks should be different"
 def test_skip_text(collector):
     # Test that we get a different text block when skipping
+    current_text = collector.get_random_text_block(max_words=50)
+    new_text = collector.get_random_text_block(max_words=50)
     assert isinstance(new_text, str)
     assert len(new_text) > 0
     test_image = Image.fromarray(img_array)
     # Test the current text block
+    current_text = collector.get_random_text_block(max_words=50)
     # Test submission with valid image
     new_text = collector.submit_image(test_image, current_text)