Raymond Weitekamp commited on
Commit
491aa62
·
1 Parent(s): 8388b94

feat: add leaderboard for top contributors

Browse files
Files changed (1) hide show
  1. app.py +41 -2
app.py CHANGED
@@ -113,6 +113,21 @@ class OCRDataCollector:
113
  def skip_text(self, text_block, username: Optional[str] = None):
114
  return self.get_random_text_block(201)
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  def strip_metadata(image: Image.Image) -> Image.Image:
118
  """
@@ -144,6 +159,20 @@ def create_gradio_interface():
144
  with gr.Blocks() as demo:
145
  gr.Markdown("# Handwriting OCR Dataset Creator")
146
  gr.Markdown("## After almost 100 years, handwriting recognition still sucks. Together, we can change that.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  gr.Markdown("### Step 1: Log in with your Hugging Face account to use this app.")
148
  # Login section - centered
149
  with gr.Row():
@@ -394,7 +423,7 @@ def create_gradio_interface():
394
  os.remove(temp_path_private)
395
 
396
  new_text = collector.get_random_text_block(max_words)
397
- return None, new_text
398
 
399
  # Submit button click handler with simplified inputs
400
  submit_btn.click(
@@ -409,7 +438,7 @@ def create_gradio_interface():
409
  gr.State(None), # Profile will be filled by Gradio
410
  gr.State(None) # Token will be filled by Gradio
411
  ],
412
- outputs=[image_input, text_box]
413
  )
414
 
415
  def handle_regenerate(text, max_words):
@@ -422,6 +451,16 @@ def create_gradio_interface():
422
  outputs=text_box
423
  )
424
 
 
 
 
 
 
 
 
 
 
 
425
  return demo
426
 
427
  if __name__ == "__main__":
 
113
  def skip_text(self, text_block, username: Optional[str] = None):
114
  return self.get_random_text_block(201)
115
 
116
+ def get_leaderboard(self):
117
+ try:
118
+ dataset = datasets.load_dataset("rawwerks/handwriting-ocr-all", split="train")
119
+ # Count contributions by non-anonymous users
120
+ user_counts = {}
121
+ for item in dataset:
122
+ if item['user'] != 'anonymous':
123
+ user_counts[item['user']] = user_counts.get(item['user'], 0) + 1
124
+
125
+ # Sort by count (descending) and format for display
126
+ leaderboard = sorted(user_counts.items(), key=lambda x: x[1], reverse=True)
127
+ return [(f"🏆 {i+1}. {user}", count) for i, (user, count) in enumerate(leaderboard)]
128
+ except Exception as e:
129
+ print(f"Error fetching leaderboard: {e}")
130
+ return []
131
 
132
  def strip_metadata(image: Image.Image) -> Image.Image:
133
  """
 
159
  with gr.Blocks() as demo:
160
  gr.Markdown("# Handwriting OCR Dataset Creator")
161
  gr.Markdown("## After almost 100 years, handwriting recognition still sucks. Together, we can change that.")
162
+
163
+ # Add leaderboard section at the top
164
+ with gr.Row():
165
+ with gr.Column():
166
+ gr.Markdown("### 🏆 Top Contributors")
167
+ leaderboard = gr.Dataframe(
168
+ headers=["User", "Contributions"],
169
+ value=collector.get_leaderboard(),
170
+ elem_id="leaderboard",
171
+ visible=True,
172
+ interactive=False
173
+ )
174
+ refresh_btn = gr.Button("🔄 Refresh Leaderboard", elem_id="refresh_btn")
175
+
176
  gr.Markdown("### Step 1: Log in with your Hugging Face account to use this app.")
177
  # Login section - centered
178
  with gr.Row():
 
423
  os.remove(temp_path_private)
424
 
425
  new_text = collector.get_random_text_block(max_words)
426
+ return None, new_text, collector.get_leaderboard()
427
 
428
  # Submit button click handler with simplified inputs
429
  submit_btn.click(
 
438
  gr.State(None), # Profile will be filled by Gradio
439
  gr.State(None) # Token will be filled by Gradio
440
  ],
441
+ outputs=[image_input, text_box, leaderboard]
442
  )
443
 
444
  def handle_regenerate(text, max_words):
 
451
  outputs=text_box
452
  )
453
 
454
+ # Add leaderboard refresh handler
455
+ def refresh_leaderboard():
456
+ return collector.get_leaderboard()
457
+
458
+ refresh_btn.click(
459
+ fn=refresh_leaderboard,
460
+ inputs=[],
461
+ outputs=[leaderboard]
462
+ )
463
+
464
  return demo
465
 
466
  if __name__ == "__main__":