Raymond Weitekamp commited on
Commit
b840d3e
·
1 Parent(s): 99f73bb

have to test live

Browse files
Files changed (1) hide show
  1. app.py +29 -35
app.py CHANGED
@@ -226,61 +226,55 @@ def create_gradio_interface():
226
  if not profile or "username" not in profile:
227
  raise gr.Error("Please log in to use this application")
228
  username = profile["username"]
229
- repo_id = f"{username}/handwriting-ocr-private"
230
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  if private_checkbox:
232
- # Remove all metadata for privacy
233
- stripped_image = strip_metadata(image)
234
-
235
- # Check if the dataset exists; if not, create it as private
236
  try:
237
  collector.hf_api.dataset_info(repo_id)
238
  except Exception as e:
239
- collector.hf_api.create_repo(repo_id, repo_type="dataset", private=True)
240
-
241
- # Save the stripped image to a temporary file
242
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
243
- filename = f"{timestamp}.png"
244
- temp_dir = "temp"
245
- os.makedirs(temp_dir, exist_ok=True)
246
  temp_path = os.path.join(temp_dir, filename)
247
  stripped_image.save(temp_path)
248
-
249
- # Create a dataset dictionary with the image-text pair
250
- features = datasets.Features({
251
- 'text': datasets.Value('string'),
252
- 'image': datasets.Image(),
253
- 'timestamp': datasets.Value('string')
254
- })
255
-
256
  dataset_dict = {
257
  'text': [text],
258
  'image': [temp_path],
259
  'timestamp': [timestamp]
260
  }
261
-
262
- # Create the dataset and push to hub
263
  dataset = datasets.Dataset.from_dict(dataset_dict, features=features)
264
  dataset.push_to_hub(repo_id)
265
-
266
- # Remove the temporary file
267
  os.remove(temp_path)
268
-
269
- # Log the submission locally
270
  collector.collected_pairs.append({
271
  "text": text,
272
  "image": image,
273
  "timestamp": timestamp,
274
  "username": username,
275
- "dataset": "private"
276
  })
277
-
278
- new_text = collector.get_random_text_block(max_words)
279
- return None, new_text
280
- elif public_checkbox:
281
- # Fallback to public submission
282
- new_text = collector.get_random_text_block(max_words)
283
- return None, new_text
284
 
285
  def handle_regenerate(profile, text, max_words):
286
  # Remove the login check - allow anyone to regenerate text
 
226
  if not profile or "username" not in profile:
227
  raise gr.Error("Please log in to use this application")
228
  username = profile["username"]
229
+
230
+ # Common processing: strip metadata, get timestamp, create features, and setup temp directory.
231
+ stripped_image = strip_metadata(image)
232
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
233
+ features = datasets.Features({
234
+ 'text': datasets.Value('string'),
235
+ 'image': datasets.Image(),
236
+ 'timestamp': datasets.Value('string')
237
+ })
238
+ temp_dir = "temp"
239
+ os.makedirs(temp_dir, exist_ok=True)
240
+
241
+ # Define targets based on checkboxes: each entry is (dataset_type, repo_id, suffix, privacy_flag)
242
+ targets = []
243
+ if public_checkbox:
244
+ targets.append(("public", "rawwerks/handwriting-ocr-all", "_public", False))
245
  if private_checkbox:
246
+ targets.append(("private", f"{username}/handwriting-ocr-private", "_private", True))
247
+
248
+ # Loop over each target, pushing the dataset with shared logic.
249
+ for ds_type, repo_id, suffix, is_private in targets:
250
  try:
251
  collector.hf_api.dataset_info(repo_id)
252
  except Exception as e:
253
+ collector.hf_api.create_repo(repo_id, repo_type="dataset", private=is_private)
254
+
255
+ filename = f"{timestamp}{suffix}.png"
 
 
 
 
256
  temp_path = os.path.join(temp_dir, filename)
257
  stripped_image.save(temp_path)
258
+
 
 
 
 
 
 
 
259
  dataset_dict = {
260
  'text': [text],
261
  'image': [temp_path],
262
  'timestamp': [timestamp]
263
  }
 
 
264
  dataset = datasets.Dataset.from_dict(dataset_dict, features=features)
265
  dataset.push_to_hub(repo_id)
 
 
266
  os.remove(temp_path)
267
+
 
268
  collector.collected_pairs.append({
269
  "text": text,
270
  "image": image,
271
  "timestamp": timestamp,
272
  "username": username,
273
+ "dataset": ds_type
274
  })
275
+
276
+ new_text = collector.get_random_text_block(max_words)
277
+ return None, new_text
 
 
 
 
278
 
279
  def handle_regenerate(profile, text, max_words):
280
  # Remove the login check - allow anyone to regenerate text