Spaces:
Runtime error
Runtime error
Raymond Weitekamp
commited on
Commit
·
a345416
1
Parent(s):
2412b6e
Add OAuth authentication to protect the interface and track user contributions
Browse files- README.md +11 -2
- app.py +47 -15
- requirements.txt +2 -1
README.md
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
---
|
2 |
-
title: Handwriting OCR
|
3 |
emoji: ✍️
|
4 |
colorFrom: blue
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: Collect handwritten text samples for OCR training
|
@@ -13,6 +14,14 @@ tags:
|
|
13 |
- handwriting
|
14 |
- dataset
|
15 |
- computer-vision
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
---
|
17 |
|
18 |
# Handwriting OCR Dataset Collection
|
|
|
1 |
---
|
2 |
+
title: Handwriting OCR Data Collection
|
3 |
emoji: ✍️
|
4 |
colorFrom: blue
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.40.0
|
8 |
+
python_version: 3.10.6
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
short_description: Collect handwritten text samples for OCR training
|
|
|
14 |
- handwriting
|
15 |
- dataset
|
16 |
- computer-vision
|
17 |
+
|
18 |
+
hf_oauth: true
|
19 |
+
hf_oauth_expiration_minutes: 480
|
20 |
+
hf_oauth_scopes:
|
21 |
+
- read-repos
|
22 |
+
- write-repos
|
23 |
+
- manage-repos
|
24 |
+
- inference-api
|
25 |
---
|
26 |
|
27 |
# Handwriting OCR Dataset Collection
|
app.py
CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
|
|
2 |
import random
|
3 |
import os
|
4 |
from datetime import datetime
|
|
|
|
|
5 |
|
6 |
# The list of sentences from our previous conversation.
|
7 |
sentences = [
|
@@ -61,6 +63,7 @@ class OCRDataCollector:
|
|
61 |
def __init__(self):
|
62 |
self.collected_pairs = []
|
63 |
self.current_text_block = self.get_random_text_block()
|
|
|
64 |
|
65 |
def get_random_text_block(self):
|
66 |
block_length = random.randint(1, 5)
|
@@ -68,13 +71,18 @@ class OCRDataCollector:
|
|
68 |
block = " ".join(sentences[start_index:start_index + block_length])
|
69 |
return block
|
70 |
|
71 |
-
def submit_image(self, image, text_block):
|
72 |
-
if image is not None:
|
73 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
74 |
-
self.collected_pairs.append({
|
|
|
|
|
|
|
|
|
|
|
75 |
return self.get_random_text_block()
|
76 |
|
77 |
-
def skip_text(self, text_block):
|
78 |
return self.get_random_text_block()
|
79 |
|
80 |
def create_gradio_interface():
|
@@ -82,30 +90,54 @@ def create_gradio_interface():
|
|
82 |
|
83 |
with gr.Blocks() as demo:
|
84 |
gr.Markdown("## Crowdsourcing Handwriting OCR Dataset")
|
85 |
-
gr.Markdown("You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. If you wish to skip the current text, click 'Skip'.")
|
86 |
-
|
87 |
-
text_box = gr.Textbox(value=collector.current_text_block, label="Text to Handwrite", interactive=False)
|
88 |
-
image_input = gr.Image(type="pil", label="Upload Handwritten Image", sources=["upload"])
|
89 |
|
90 |
with gr.Row():
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
submit_btn.click(
|
95 |
-
fn=
|
96 |
inputs=[image_input, text_box],
|
97 |
outputs=text_box
|
98 |
)
|
99 |
|
100 |
skip_btn.click(
|
101 |
-
fn=
|
102 |
-
inputs=text_box,
|
103 |
outputs=text_box
|
104 |
)
|
105 |
-
|
106 |
|
107 |
return demo
|
108 |
|
109 |
if __name__ == "__main__":
|
110 |
demo = create_gradio_interface()
|
111 |
-
demo.launch()
|
|
|
2 |
import random
|
3 |
import os
|
4 |
from datetime import datetime
|
5 |
+
from huggingface_hub import HfApi
|
6 |
+
from typing import Optional
|
7 |
|
8 |
# The list of sentences from our previous conversation.
|
9 |
sentences = [
|
|
|
63 |
def __init__(self):
|
64 |
self.collected_pairs = []
|
65 |
self.current_text_block = self.get_random_text_block()
|
66 |
+
self.hf_api = HfApi()
|
67 |
|
68 |
def get_random_text_block(self):
|
69 |
block_length = random.randint(1, 5)
|
|
|
71 |
block = " ".join(sentences[start_index:start_index + block_length])
|
72 |
return block
|
73 |
|
74 |
+
def submit_image(self, image, text_block, username: Optional[str] = None):
|
75 |
+
if image is not None and username:
|
76 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
77 |
+
self.collected_pairs.append({
|
78 |
+
"text": text_block,
|
79 |
+
"image": image,
|
80 |
+
"timestamp": timestamp,
|
81 |
+
"username": username
|
82 |
+
})
|
83 |
return self.get_random_text_block()
|
84 |
|
85 |
+
def skip_text(self, text_block, username: Optional[str] = None):
|
86 |
return self.get_random_text_block()
|
87 |
|
88 |
def create_gradio_interface():
|
|
|
90 |
|
91 |
with gr.Blocks() as demo:
|
92 |
gr.Markdown("## Crowdsourcing Handwriting OCR Dataset")
|
|
|
|
|
|
|
|
|
93 |
|
94 |
with gr.Row():
|
95 |
+
user_info = gr.Markdown("")
|
96 |
+
|
97 |
+
def update_user_info(request: gr.Request):
|
98 |
+
if request.username:
|
99 |
+
return f"Logged in as: {request.username}", gr.update(visible=True)
|
100 |
+
return "Please log in with your Hugging Face account to contribute to the dataset.", gr.update(visible=False)
|
101 |
+
|
102 |
+
with gr.Column(visible=False) as main_interface:
|
103 |
+
gr.Markdown("You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. If you wish to skip the current text, click 'Skip'.")
|
104 |
+
|
105 |
+
text_box = gr.Textbox(value=collector.current_text_block, label="Text to Handwrite", interactive=False)
|
106 |
+
image_input = gr.Image(type="pil", label="Upload Handwritten Image", sources=["upload"])
|
107 |
+
|
108 |
+
with gr.Row():
|
109 |
+
submit_btn = gr.Button("Submit")
|
110 |
+
skip_btn = gr.Button("Skip")
|
111 |
+
|
112 |
+
def check_login(request: gr.Request):
|
113 |
+
if request.username is None:
|
114 |
+
raise gr.Error("Please log in to use this application")
|
115 |
+
return request.username
|
116 |
+
|
117 |
+
def protected_submit(image, text_block, request: gr.Request):
|
118 |
+
username = check_login(request)
|
119 |
+
return collector.submit_image(image, text_block, username)
|
120 |
+
|
121 |
+
def protected_skip(text_block, request: gr.Request):
|
122 |
+
username = check_login(request)
|
123 |
+
return collector.skip_text(text_block, username)
|
124 |
+
|
125 |
+
demo.load(update_user_info, outputs=[user_info, main_interface])
|
126 |
|
127 |
submit_btn.click(
|
128 |
+
fn=protected_submit,
|
129 |
inputs=[image_input, text_box],
|
130 |
outputs=text_box
|
131 |
)
|
132 |
|
133 |
skip_btn.click(
|
134 |
+
fn=protected_skip,
|
135 |
+
inputs=[text_box],
|
136 |
outputs=text_box
|
137 |
)
|
|
|
138 |
|
139 |
return demo
|
140 |
|
141 |
if __name__ == "__main__":
|
142 |
demo = create_gradio_interface()
|
143 |
+
demo.launch(auth_message="Please login with your Hugging Face account to contribute to the dataset.")
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
gradio>=3.
|
|
|
2 |
Pillow>=10.0.0
|
3 |
pytest>=7.0.0
|
4 |
pytest-playwright>=0.4.0
|
|
|
1 |
+
gradio>=3.40.0
|
2 |
+
huggingface-hub>=0.19.0
|
3 |
Pillow>=10.0.0
|
4 |
pytest>=7.0.0
|
5 |
pytest-playwright>=0.4.0
|