Spaces:
Runtime error
Runtime error
Raymond Weitekamp
commited on
Commit
·
7ca2071
1
Parent(s):
0a75f20
private works
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import os
|
|
4 |
from datetime import datetime
|
5 |
from huggingface_hub import HfApi
|
6 |
from typing import Optional
|
|
|
7 |
|
8 |
# The list of sentences from our previous conversation.
|
9 |
sentences = [
|
@@ -85,6 +86,18 @@ class OCRDataCollector:
|
|
85 |
def skip_text(self, text_block, username: Optional[str] = None):
|
86 |
return self.get_random_text_block()
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
def create_gradio_interface():
|
89 |
collector = OCRDataCollector()
|
90 |
|
@@ -92,44 +105,109 @@ def create_gradio_interface():
|
|
92 |
gr.Markdown("## Crowdsourcing Handwriting OCR Dataset")
|
93 |
gr.LoginButton()
|
94 |
user_info = gr.Markdown()
|
|
|
|
|
95 |
|
96 |
-
gr.Markdown(
|
|
|
|
|
|
|
97 |
|
98 |
text_box = gr.Textbox(value=collector.current_text_block, label="Text to Handwrite", interactive=False, visible=False)
|
99 |
image_input = gr.Image(type="pil", label="Upload Handwritten Image", sources=["upload"], visible=False)
|
|
|
|
|
100 |
|
101 |
with gr.Row(visible=False) as button_row:
|
102 |
submit_btn = gr.Button("Submit")
|
103 |
skip_btn = gr.Button("Skip")
|
104 |
-
|
105 |
-
def update_user_info(profile: gr.OAuthProfile | None)
|
106 |
-
if profile is None:
|
107 |
-
return "Please log in with your Hugging Face account to contribute to the dataset."
|
108 |
-
return f"Logged in as: {profile.name}"
|
109 |
-
|
110 |
-
def handle_submit(profile: gr.OAuthProfile | None) -> str:
|
111 |
if profile is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
raise gr.Error("Please log in to use this application")
|
113 |
-
|
|
|
|
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
raise gr.Error("Please log in to use this application")
|
118 |
-
return collector.skip_text(
|
119 |
|
120 |
def update_visibility(profile: gr.OAuthProfile | None):
|
121 |
is_visible = profile is not None
|
|
|
122 |
return [
|
|
|
123 |
gr.update(visible=is_visible),
|
124 |
gr.update(visible=is_visible),
|
125 |
gr.update(visible=is_visible)
|
126 |
]
|
127 |
|
128 |
-
|
129 |
-
demo.load(
|
|
|
130 |
|
131 |
-
|
132 |
-
|
|
|
133 |
|
134 |
return demo
|
135 |
|
|
|
4 |
from datetime import datetime
|
5 |
from huggingface_hub import HfApi
|
6 |
from typing import Optional
|
7 |
+
from PIL import Image # Needed for working with PIL images
|
8 |
|
9 |
# The list of sentences from our previous conversation.
|
10 |
sentences = [
|
|
|
86 |
def skip_text(self, text_block, username: Optional[str] = None):
|
87 |
return self.get_random_text_block()
|
88 |
|
89 |
+
|
90 |
+
def strip_metadata(image: Image.Image) -> Image.Image:
|
91 |
+
"""
|
92 |
+
Helper function to strip all metadata from the provided PIL Image.
|
93 |
+
This creates a new image with the same pixel data but no additional info.
|
94 |
+
"""
|
95 |
+
data = list(image.getdata())
|
96 |
+
stripped_image = Image.new(image.mode, image.size)
|
97 |
+
stripped_image.putdata(data)
|
98 |
+
return stripped_image
|
99 |
+
|
100 |
+
|
101 |
def create_gradio_interface():
|
102 |
collector = OCRDataCollector()
|
103 |
|
|
|
105 |
gr.Markdown("## Crowdsourcing Handwriting OCR Dataset")
|
106 |
gr.LoginButton()
|
107 |
user_info = gr.Markdown()
|
108 |
+
# Hidden state to hold the user's OAuth profile as JSON.
|
109 |
+
profile_state = gr.JSON(visible=False)
|
110 |
|
111 |
+
gr.Markdown(
|
112 |
+
"You will be shown between 1 and 5 consecutive sentences. Please handwrite them on paper and upload an image of your handwriting. "
|
113 |
+
"If you wish to skip the current text, click 'Skip'."
|
114 |
+
)
|
115 |
|
116 |
text_box = gr.Textbox(value=collector.current_text_block, label="Text to Handwrite", interactive=False, visible=False)
|
117 |
image_input = gr.Image(type="pil", label="Upload Handwritten Image", sources=["upload"], visible=False)
|
118 |
+
# Toggle (using a radio button) for dataset choice.
|
119 |
+
dataset_radio = gr.Radio(choices=["Private", "Public"], label="Select Dataset", value="Private", visible=False)
|
120 |
|
121 |
with gr.Row(visible=False) as button_row:
|
122 |
submit_btn = gr.Button("Submit")
|
123 |
skip_btn = gr.Button("Skip")
|
124 |
+
|
125 |
+
def update_user_info(profile: gr.OAuthProfile | None):
|
|
|
|
|
|
|
|
|
|
|
126 |
if profile is None:
|
127 |
+
return "Please log in with your Hugging Face account to contribute to the dataset.", {}
|
128 |
+
# Use the username provided by the profile (from the "profile" scope)
|
129 |
+
return f"Logged in as: {profile.username}", {"username": profile.username}
|
130 |
+
|
131 |
+
def handle_submit(profile, dataset_choice, image, text):
|
132 |
+
"""
|
133 |
+
Depending on the dataset toggle selection, this function either:
|
134 |
+
- For 'Private': strips metadata from the image, checks/creates a private HF dataset,
|
135 |
+
pushes the image to it, and logs the submission.
|
136 |
+
- For 'Public': uses the existing `submit_image` logic.
|
137 |
+
"""
|
138 |
+
if not profile or "username" not in profile:
|
139 |
raise gr.Error("Please log in to use this application")
|
140 |
+
username = profile["username"]
|
141 |
+
# Use the username from the OAuth profile directly
|
142 |
+
repo_id = f"{username}/handwriting-ocr-private"
|
143 |
|
144 |
+
if dataset_choice == "Private":
|
145 |
+
# Remove all metadata for privacy
|
146 |
+
stripped_image = strip_metadata(image)
|
147 |
+
|
148 |
+
# Check if the dataset exists; if not, create it as private.
|
149 |
+
try:
|
150 |
+
collector.hf_api.dataset_info(repo_id)
|
151 |
+
except Exception as e:
|
152 |
+
collector.hf_api.create_repo(repo_id, repo_type="dataset", private=True)
|
153 |
+
|
154 |
+
# Save the stripped image to a temporary file
|
155 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
156 |
+
filename = f"{timestamp}.png"
|
157 |
+
temp_dir = "temp"
|
158 |
+
os.makedirs(temp_dir, exist_ok=True)
|
159 |
+
temp_path = os.path.join(temp_dir, filename)
|
160 |
+
stripped_image.save(temp_path)
|
161 |
+
|
162 |
+
# Upload the file to the Hugging Face dataset repository
|
163 |
+
collector.hf_api.upload_file(
|
164 |
+
path_or_fileobj=temp_path,
|
165 |
+
path_in_repo=filename,
|
166 |
+
repo_id=repo_id,
|
167 |
+
repo_type="dataset",
|
168 |
+
)
|
169 |
+
|
170 |
+
# Remove the temporary file
|
171 |
+
os.remove(temp_path)
|
172 |
+
|
173 |
+
# Log the submission locally with an indicator for the private dataset.
|
174 |
+
collector.collected_pairs.append({
|
175 |
+
"text": text,
|
176 |
+
"image": image, # The original image is recorded locally.
|
177 |
+
"timestamp": timestamp,
|
178 |
+
"username": username,
|
179 |
+
"dataset": "private"
|
180 |
+
})
|
181 |
+
new_text = collector.get_random_text_block()
|
182 |
+
# Return a tuple to clear the image input (set to None) and update the text.
|
183 |
+
return None, new_text
|
184 |
+
else:
|
185 |
+
# Fallback to public submission using the existing logic.
|
186 |
+
new_text = collector.submit_image(image, text, username)
|
187 |
+
return None, new_text
|
188 |
+
|
189 |
+
def handle_skip(profile, text):
|
190 |
+
if not profile or "username" not in profile:
|
191 |
raise gr.Error("Please log in to use this application")
|
192 |
+
return collector.skip_text(text, profile["username"])
|
193 |
|
194 |
def update_visibility(profile: gr.OAuthProfile | None):
|
195 |
is_visible = profile is not None
|
196 |
+
# Update the visibility of text_box, image_input, button_row, and dataset_radio.
|
197 |
return [
|
198 |
+
gr.update(visible=is_visible),
|
199 |
gr.update(visible=is_visible),
|
200 |
gr.update(visible=is_visible),
|
201 |
gr.update(visible=is_visible)
|
202 |
]
|
203 |
|
204 |
+
# On load, update both the display message and the hidden profile state.
|
205 |
+
demo.load(update_user_info, inputs=None, outputs=[user_info, profile_state])
|
206 |
+
demo.load(update_visibility, inputs=None, outputs=[text_box, image_input, button_row, dataset_radio])
|
207 |
|
208 |
+
# Bind the submit and skip actions with updated inputs.
|
209 |
+
submit_btn.click(handle_submit, inputs=[profile_state, dataset_radio, image_input, text_box], outputs=[image_input, text_box])
|
210 |
+
skip_btn.click(handle_skip, inputs=[profile_state, text_box], outputs=text_box)
|
211 |
|
212 |
return demo
|
213 |
|