Spaces:
Runtime error
Runtime error
Raymond Weitekamp
commited on
Commit
·
b840d3e
1
Parent(s):
99f73bb
have to test live
Browse files
app.py
CHANGED
@@ -226,61 +226,55 @@ def create_gradio_interface():
|
|
226 |
if not profile or "username" not in profile:
|
227 |
raise gr.Error("Please log in to use this application")
|
228 |
username = profile["username"]
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
if private_checkbox:
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
try:
|
237 |
collector.hf_api.dataset_info(repo_id)
|
238 |
except Exception as e:
|
239 |
-
collector.hf_api.create_repo(repo_id, repo_type="dataset", private=
|
240 |
-
|
241 |
-
|
242 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
243 |
-
filename = f"{timestamp}.png"
|
244 |
-
temp_dir = "temp"
|
245 |
-
os.makedirs(temp_dir, exist_ok=True)
|
246 |
temp_path = os.path.join(temp_dir, filename)
|
247 |
stripped_image.save(temp_path)
|
248 |
-
|
249 |
-
# Create a dataset dictionary with the image-text pair
|
250 |
-
features = datasets.Features({
|
251 |
-
'text': datasets.Value('string'),
|
252 |
-
'image': datasets.Image(),
|
253 |
-
'timestamp': datasets.Value('string')
|
254 |
-
})
|
255 |
-
|
256 |
dataset_dict = {
|
257 |
'text': [text],
|
258 |
'image': [temp_path],
|
259 |
'timestamp': [timestamp]
|
260 |
}
|
261 |
-
|
262 |
-
# Create the dataset and push to hub
|
263 |
dataset = datasets.Dataset.from_dict(dataset_dict, features=features)
|
264 |
dataset.push_to_hub(repo_id)
|
265 |
-
|
266 |
-
# Remove the temporary file
|
267 |
os.remove(temp_path)
|
268 |
-
|
269 |
-
# Log the submission locally
|
270 |
collector.collected_pairs.append({
|
271 |
"text": text,
|
272 |
"image": image,
|
273 |
"timestamp": timestamp,
|
274 |
"username": username,
|
275 |
-
"dataset":
|
276 |
})
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
elif public_checkbox:
|
281 |
-
# Fallback to public submission
|
282 |
-
new_text = collector.get_random_text_block(max_words)
|
283 |
-
return None, new_text
|
284 |
|
285 |
def handle_regenerate(profile, text, max_words):
|
286 |
# Remove the login check - allow anyone to regenerate text
|
|
|
226 |
if not profile or "username" not in profile:
|
227 |
raise gr.Error("Please log in to use this application")
|
228 |
username = profile["username"]
|
229 |
+
|
230 |
+
# Common processing: strip metadata, get timestamp, create features, and setup temp directory.
|
231 |
+
stripped_image = strip_metadata(image)
|
232 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
233 |
+
features = datasets.Features({
|
234 |
+
'text': datasets.Value('string'),
|
235 |
+
'image': datasets.Image(),
|
236 |
+
'timestamp': datasets.Value('string')
|
237 |
+
})
|
238 |
+
temp_dir = "temp"
|
239 |
+
os.makedirs(temp_dir, exist_ok=True)
|
240 |
+
|
241 |
+
# Define targets based on checkboxes: each entry is (dataset_type, repo_id, suffix, privacy_flag)
|
242 |
+
targets = []
|
243 |
+
if public_checkbox:
|
244 |
+
targets.append(("public", "rawwerks/handwriting-ocr-all", "_public", False))
|
245 |
if private_checkbox:
|
246 |
+
targets.append(("private", f"{username}/handwriting-ocr-private", "_private", True))
|
247 |
+
|
248 |
+
# Loop over each target, pushing the dataset with shared logic.
|
249 |
+
for ds_type, repo_id, suffix, is_private in targets:
|
250 |
try:
|
251 |
collector.hf_api.dataset_info(repo_id)
|
252 |
except Exception as e:
|
253 |
+
collector.hf_api.create_repo(repo_id, repo_type="dataset", private=is_private)
|
254 |
+
|
255 |
+
filename = f"{timestamp}{suffix}.png"
|
|
|
|
|
|
|
|
|
256 |
temp_path = os.path.join(temp_dir, filename)
|
257 |
stripped_image.save(temp_path)
|
258 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
dataset_dict = {
|
260 |
'text': [text],
|
261 |
'image': [temp_path],
|
262 |
'timestamp': [timestamp]
|
263 |
}
|
|
|
|
|
264 |
dataset = datasets.Dataset.from_dict(dataset_dict, features=features)
|
265 |
dataset.push_to_hub(repo_id)
|
|
|
|
|
266 |
os.remove(temp_path)
|
267 |
+
|
|
|
268 |
collector.collected_pairs.append({
|
269 |
"text": text,
|
270 |
"image": image,
|
271 |
"timestamp": timestamp,
|
272 |
"username": username,
|
273 |
+
"dataset": ds_type
|
274 |
})
|
275 |
+
|
276 |
+
new_text = collector.get_random_text_block(max_words)
|
277 |
+
return None, new_text
|
|
|
|
|
|
|
|
|
278 |
|
279 |
def handle_regenerate(profile, text, max_words):
|
280 |
# Remove the login check - allow anyone to regenerate text
|