Raymond Weitekamp commited on
Commit
a345416
·
1 Parent(s): 2412b6e

Add OAuth authentication to protect the interface and track user contributions

Browse files
Files changed (3) hide show
  1. README.md +11 -2
  2. app.py +47 -15
  3. requirements.txt +2 -1
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- title: Handwriting OCR Dataset Collection
3
  emoji: ✍️
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.15.0
 
8
  app_file: app.py
9
  pinned: false
10
  short_description: Collect handwritten text samples for OCR training
@@ -13,6 +14,14 @@ tags:
13
  - handwriting
14
  - dataset
15
  - computer-vision
 
 
 
 
 
 
 
 
16
  ---
17
 
18
  # Handwriting OCR Dataset Collection
 
1
  ---
2
+ title: Handwriting OCR Data Collection
3
  emoji: ✍️
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 3.40.0
8
+ python_version: 3.10.6
9
  app_file: app.py
10
  pinned: false
11
  short_description: Collect handwritten text samples for OCR training
 
14
  - handwriting
15
  - dataset
16
  - computer-vision
17
+
18
+ hf_oauth: true
19
+ hf_oauth_expiration_minutes: 480
20
+ hf_oauth_scopes:
21
+ - read-repos
22
+ - write-repos
23
+ - manage-repos
24
+ - inference-api
25
  ---
26
 
27
  # Handwriting OCR Dataset Collection
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import random
3
  import os
4
  from datetime import datetime
 
 
5
 
6
  # The list of sentences from our previous conversation.
7
  sentences = [
@@ -61,6 +63,7 @@ class OCRDataCollector:
61
  def __init__(self):
62
  self.collected_pairs = []
63
  self.current_text_block = self.get_random_text_block()
 
64
 
65
  def get_random_text_block(self):
66
  block_length = random.randint(1, 5)
@@ -68,13 +71,18 @@ class OCRDataCollector:
68
  block = " ".join(sentences[start_index:start_index + block_length])
69
  return block
70
 
71
- def submit_image(self, image, text_block):
72
- if image is not None:
73
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
74
- self.collected_pairs.append({"text": text_block, "image": image, "timestamp": timestamp})
 
 
 
 
 
75
  return self.get_random_text_block()
76
 
77
- def skip_text(self, text_block):
78
  return self.get_random_text_block()
79
 
80
  def create_gradio_interface():
@@ -82,30 +90,54 @@ def create_gradio_interface():
82
 
83
  with gr.Blocks() as demo:
84
  gr.Markdown("## Crowdsourcing Handwriting OCR Dataset")
85
- gr.Markdown("You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. If you wish to skip the current text, click 'Skip'.")
86
-
87
- text_box = gr.Textbox(value=collector.current_text_block, label="Text to Handwrite", interactive=False)
88
- image_input = gr.Image(type="pil", label="Upload Handwritten Image", sources=["upload"])
89
 
90
  with gr.Row():
91
- submit_btn = gr.Button("Submit")
92
- skip_btn = gr.Button("Skip")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  submit_btn.click(
95
- fn=collector.submit_image,
96
  inputs=[image_input, text_box],
97
  outputs=text_box
98
  )
99
 
100
  skip_btn.click(
101
- fn=collector.skip_text,
102
- inputs=text_box,
103
  outputs=text_box
104
  )
105
-
106
 
107
  return demo
108
 
109
  if __name__ == "__main__":
110
  demo = create_gradio_interface()
111
- demo.launch()
 
2
  import random
3
  import os
4
  from datetime import datetime
5
+ from huggingface_hub import HfApi
6
+ from typing import Optional
7
 
8
  # The list of sentences from our previous conversation.
9
  sentences = [
 
63
  def __init__(self):
64
  self.collected_pairs = []
65
  self.current_text_block = self.get_random_text_block()
66
+ self.hf_api = HfApi()
67
 
68
  def get_random_text_block(self):
69
  block_length = random.randint(1, 5)
 
71
  block = " ".join(sentences[start_index:start_index + block_length])
72
  return block
73
 
74
+ def submit_image(self, image, text_block, username: Optional[str] = None):
75
+ if image is not None and username:
76
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
77
+ self.collected_pairs.append({
78
+ "text": text_block,
79
+ "image": image,
80
+ "timestamp": timestamp,
81
+ "username": username
82
+ })
83
  return self.get_random_text_block()
84
 
85
+ def skip_text(self, text_block, username: Optional[str] = None):
86
  return self.get_random_text_block()
87
 
88
  def create_gradio_interface():
 
90
 
91
  with gr.Blocks() as demo:
92
  gr.Markdown("## Crowdsourcing Handwriting OCR Dataset")
 
 
 
 
93
 
94
  with gr.Row():
95
+ user_info = gr.Markdown("")
96
+
97
+ def update_user_info(request: gr.Request):
98
+ if request.username:
99
+ return f"Logged in as: {request.username}", gr.update(visible=True)
100
+ return "Please log in with your Hugging Face account to contribute to the dataset.", gr.update(visible=False)
101
+
102
+ with gr.Column(visible=False) as main_interface:
103
+ gr.Markdown("You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. If you wish to skip the current text, click 'Skip'.")
104
+
105
+ text_box = gr.Textbox(value=collector.current_text_block, label="Text to Handwrite", interactive=False)
106
+ image_input = gr.Image(type="pil", label="Upload Handwritten Image", sources=["upload"])
107
+
108
+ with gr.Row():
109
+ submit_btn = gr.Button("Submit")
110
+ skip_btn = gr.Button("Skip")
111
+
112
+ def check_login(request: gr.Request):
113
+ if request.username is None:
114
+ raise gr.Error("Please log in to use this application")
115
+ return request.username
116
+
117
+ def protected_submit(image, text_block, request: gr.Request):
118
+ username = check_login(request)
119
+ return collector.submit_image(image, text_block, username)
120
+
121
+ def protected_skip(text_block, request: gr.Request):
122
+ username = check_login(request)
123
+ return collector.skip_text(text_block, username)
124
+
125
+ demo.load(update_user_info, outputs=[user_info, main_interface])
126
 
127
  submit_btn.click(
128
+ fn=protected_submit,
129
  inputs=[image_input, text_box],
130
  outputs=text_box
131
  )
132
 
133
  skip_btn.click(
134
+ fn=protected_skip,
135
+ inputs=[text_box],
136
  outputs=text_box
137
  )
 
138
 
139
  return demo
140
 
141
  if __name__ == "__main__":
142
  demo = create_gradio_interface()
143
+ demo.launch(auth_message="Please login with your Hugging Face account to contribute to the dataset.")
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- gradio>=3.50.2
 
2
  Pillow>=10.0.0
3
  pytest>=7.0.0
4
  pytest-playwright>=0.4.0
 
1
+ gradio>=3.40.0
2
+ huggingface-hub>=0.19.0
3
  Pillow>=10.0.0
4
  pytest>=7.0.0
5
  pytest-playwright>=0.4.0