Spaces:

rawwerks
/

handwriting-ocr

Runtime error

App Files Files Community

Raymond Weitekamp commited on Feb 9

Commit

47b06ca

1 Parent(s): 491aa62

refactor: improve leaderboard layout and heading text

Browse files

Files changed (2) hide show

app.py +77 -25
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ if gr.NO_RELOAD:
     from PIL import Image  # Needed for working with PIL images
     import datasets
     import numpy as np  # Added to help handle numpy array images
 # Load environment variables from .env if available.
 from dotenv import load_dotenv
@@ -84,19 +85,43 @@ class SubmissionData(BaseModel):
 class OCRDataCollector:
     def __init__(self):
         self.collected_pairs = []
         self.current_text_block = self.get_random_text_block(201)  # Default max words
         self.hf_api = HfApi()
     def get_random_text_block(self, max_words: int):
-        block_length = random.randint(1, 5)
-        start_index = random.randint(0, len(sentences) - block_length)
-        block = " ".join(sentences[start_index:start_index + block_length])
         # Truncate to max_words if necessary
         words = block.split()
         if len(words) > max_words:
             block = " ".join(words[:max_words])
         return block
     def submit_image(self, image, text_block, username: Optional[str] = None):
@@ -122,12 +147,43 @@ class OCRDataCollector:
                 if item['user'] != 'anonymous':
                     user_counts[item['user']] = user_counts.get(item['user'], 0) + 1
-            # Sort by count (descending) and format for display
-            leaderboard = sorted(user_counts.items(), key=lambda x: x[1], reverse=True)
-            return [(f"🏆 {i+1}. {user}", count) for i, (user, count) in enumerate(leaderboard)]
         except Exception as e:
             print(f"Error fetching leaderboard: {e}")
-            return []
 def strip_metadata(image: Image.Image) -> Image.Image:
     """
@@ -158,21 +214,24 @@ def create_gradio_interface():
     with gr.Blocks() as demo:
         gr.Markdown("# Handwriting OCR Dataset Creator")
-        gr.Markdown("## After almost 100 years, handwriting recognition still sucks. Together, we can change that.")
         # Add leaderboard section at the top
         with gr.Row():
-            with gr.Column():
-                gr.Markdown("### 🏆 Top Contributors")
                 leaderboard = gr.Dataframe(
-                    headers=["User", "Contributions"],
                     value=collector.get_leaderboard(),
                     elem_id="leaderboard",
                     visible=True,
-                    interactive=False
                 )
-                refresh_btn = gr.Button("🔄 Refresh Leaderboard", elem_id="refresh_btn")
         gr.Markdown("### Step 1: Log in with your Hugging Face account to use this app.")
         # Login section - centered
         with gr.Row():
@@ -303,6 +362,9 @@ def create_gradio_interface():
         # Load initial state and update UI visibility
         demo.load(update_user_state, inputs=profile_state, outputs=[upload_info, image_input, dataset_options, button_row])
         def handle_submit(
             text: str,
@@ -451,16 +513,6 @@ def create_gradio_interface():
             outputs=text_box
         )
-        # Add leaderboard refresh handler
-        def refresh_leaderboard():
-            return collector.get_leaderboard()
-        refresh_btn.click(
-            fn=refresh_leaderboard,
-            inputs=[],
-            outputs=[leaderboard]
-        )
     return demo
 if __name__ == "__main__":

     from PIL import Image  # Needed for working with PIL images
     import datasets
     import numpy as np  # Added to help handle numpy array images
+    import pandas as pd  # Added for pandas DataFrame
 # Load environment variables from .env if available.
 from dotenv import load_dotenv
 class OCRDataCollector:
     def __init__(self):
         self.collected_pairs = []
+        self.last_text_block = None
         self.current_text_block = self.get_random_text_block(201)  # Default max words
         self.hf_api = HfApi()
     def get_random_text_block(self, max_words: int):
+        attempts = 0
+        max_attempts = 10  # Prevent infinite loop in case of very small sentence list
+        while attempts < max_attempts:
+            block_length = random.randint(1, 5)
+            start_index = random.randint(0, len(sentences) - block_length)
+            block = " ".join(sentences[start_index:start_index + block_length])
+            # Truncate to max_words if necessary
+            words = block.split()
+            if len(words) > max_words:
+                block = " ".join(words[:max_words])
+            # If this block is different from the last one, use it
+            if block != self.last_text_block:
+                self.last_text_block = block
+                return block
+            attempts += 1
+        # If we couldn't find a different block after max attempts,
+        # force a different block by using the next available sentences
+        current_start = sentences.index(self.last_text_block.split('.')[0] + '.') if self.last_text_block else 0
+        next_start = (current_start + 1) % len(sentences)
+        block = sentences[next_start]
         # Truncate to max_words if necessary
         words = block.split()
         if len(words) > max_words:
             block = " ".join(words[:max_words])
+        self.last_text_block = block
         return block
     def submit_image(self, image, text_block, username: Optional[str] = None):
                 if item['user'] != 'anonymous':
                     user_counts[item['user']] = user_counts.get(item['user'], 0) + 1
+            # Create a pandas DataFrame for better styling
+            df = pd.DataFrame(user_counts.items(), columns=['Username', 'Contributions'])
+            df['Rank'] = range(1, len(df) + 1)
+            df['Medal'] = df['Rank'].apply(lambda x: "🏆" if x == 1 else "🥈" if x == 2 else "🥉" if x == 3 else "👏")
+            # Reorder columns
+            df = df[['Rank', 'Medal', 'Username', 'Contributions']]
+            # Style the DataFrame
+            styled_df = df.style\
+                .set_properties(**{
+                    'text-align': 'center',
+                    'font-size': '16px',
+                    'padding': '10px',
+                    'border': '1px solid #ddd'
+                })\
+                .set_table_styles([
+                    {'selector': 'th', 'props': [
+                        ('background-color', '#f4f4f4'),
+                        ('color', '#333'),
+                        ('font-weight', 'bold'),
+                        ('text-align', 'center'),
+                        ('padding', '12px'),
+                        ('border', '1px solid #ddd')
+                    ]},
+                    {'selector': 'tr:nth-of-type(odd)', 'props': [
+                        ('background-color', '#f9f9f9')
+                    ]},
+                    {'selector': 'tr:hover', 'props': [
+                        ('background-color', '#f5f5f5')
+                    ]}
+                ])
+            return styled_df
         except Exception as e:
             print(f"Error fetching leaderboard: {e}")
+            return pd.DataFrame(columns=['Rank', 'Medal', 'Username', 'Contributions'])
 def strip_metadata(image: Image.Image) -> Image.Image:
     """
     with gr.Blocks() as demo:
         gr.Markdown("# Handwriting OCR Dataset Creator")
+        gr.Markdown("## After almost 100 years of research, handwriting recognition still sucks. Together, we can change that.")
         # Add leaderboard section at the top
+        gr.Markdown("### 🏆 Top Contributors", show_label=False)
         with gr.Row():
+            with gr.Column(scale=1):
+                pass
+            with gr.Column(scale=2, min_width=400):
                 leaderboard = gr.Dataframe(
                     value=collector.get_leaderboard(),
                     elem_id="leaderboard",
                     visible=True,
+                    interactive=False,
+                    show_label=False
                 )
+            with gr.Column(scale=1):
+                pass
         gr.Markdown("### Step 1: Log in with your Hugging Face account to use this app.")
         # Login section - centered
         with gr.Row():
         # Load initial state and update UI visibility
         demo.load(update_user_state, inputs=profile_state, outputs=[upload_info, image_input, dataset_options, button_row])
+        # Also load leaderboard on page load
+        demo.load(fn=lambda: collector.get_leaderboard(), outputs=leaderboard)
         def handle_submit(
             text: str,
             outputs=text_box
         )
     return demo
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ pytest-asyncio>=0.23.0
 playwright>=1.40.0
 datasets>=2.16.0
 pydantic>=2.6.1
-python-dotenv>=1.0.0

 playwright>=1.40.0
 datasets>=2.16.0
 pydantic>=2.6.1
+python-dotenv>=1.0.0
+pandas>=2.0.0