deenasun commited on
Commit
bf3d9ee
·
1 Parent(s): 03ba989

fix gradio interface

Browse files
Files changed (1) hide show
  1. app.py +107 -155
app.py CHANGED
@@ -23,7 +23,8 @@ R2_ACCESS_KEY_ID = os.environ.get("R2_ACCESS_KEY_ID")
23
  R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
24
 
25
  # Validate that required environment variables are set
26
- if not all([R2_ASL_VIDEOS_URL, R2_ENDPOINT, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY]):
 
27
  raise ValueError(
28
  "Missing required R2 environment variables. "
29
  "Please check your .env file."
@@ -54,15 +55,17 @@ s3 = session.client(
54
  )
55
 
56
  def clean_gloss_token(token):
57
- """
58
- Clean a gloss token by removing brackets, newlines, and extra whitespace
59
- """
60
- # Remove brackets and newlines
61
- cleaned = re.sub(r'[\[\]\n\r]', '', token)
 
 
62
  # Remove extra whitespace
63
  cleaned = re.sub(r'\s+', ' ', cleaned).strip()
64
- cleaned = cleaned.lower()
65
- return cleaned
66
 
67
 
68
  def upload_video_to_r2(video_path, bucket_name="asl-videos"):
@@ -84,8 +87,10 @@ def upload_video_to_r2(video_path, bucket_name="asl-videos"):
84
  )
85
 
86
  # Replace the endpoint with the domain for uploading
87
- public_domain = R2_ENDPOINT.replace('https://', '').split('.')[0]
88
- video_url = f"https://{public_domain}.r2.cloudflarestorage.com/{bucket_name}/{unique_filename}"
 
 
89
 
90
  print(f"Video uploaded to R2: {video_url}")
91
  public_video_url = f"{R2_ASL_VIDEOS_URL}/{unique_filename}"
@@ -150,52 +155,24 @@ def cleanup_temp_video(file_path):
150
  print(f"Error cleaning up file: {e}")
151
 
152
 
153
- def process_text_to_gloss(text):
154
- """
155
- Convert text directly to ASL gloss
156
- """
157
- try:
158
- # For text input, we can use a simpler approach or call the
159
- # document converter with a temporary text file
160
- import tempfile
161
-
162
- # Create a temporary text file
163
- with tempfile.NamedTemporaryFile(
164
- mode='w', suffix='.txt', delete=False
165
- ) as temp_file:
166
- temp_file.write(text)
167
- temp_file_path = temp_file.name
168
-
169
- # Use the existing document converter
170
- gloss = asl_converter.convert_document(temp_file_path)
171
-
172
- # Clean up the temporary file
173
- os.unlink(temp_file_path)
174
-
175
- return gloss
176
- except Exception as e:
177
- print(f"Error processing text: {e}")
178
- return None
179
-
180
-
181
  def process_input(input_data):
182
- """
183
- Process either text input or file upload
184
- input_data can be either a string (text) or a file object
185
- """
186
- if input_data is None:
187
- return None
188
-
189
- # Check if it's a file object (has .name attribute)
190
- if hasattr(input_data, 'name'):
191
- # It's a file upload
192
- print(f"Processing file: {input_data.name}")
193
- return asl_converter.convert_document(input_data.name)
 
 
194
  else:
195
- # It's text input
196
- print(f"Processing text input: "
197
- f"{input_data[:100]}...")
198
- return process_text_to_gloss(input_data)
199
 
200
 
201
  async def parse_vectorize_and_search_unified(input_data):
@@ -210,7 +187,7 @@ async def parse_vectorize_and_search_unified(input_data):
210
  return {
211
  "status": "error",
212
  "message": "Failed to process input"
213
- }, None, ""
214
 
215
  print("ASL", gloss)
216
 
@@ -264,44 +241,25 @@ async def parse_vectorize_and_search_unified(input_data):
264
  stitched_video_path = video_files[0]
265
 
266
  # Upload final video to R2 and get public URL
267
- final_video_url = None
268
  if stitched_video_path:
269
- final_video_url = upload_video_to_r2(stitched_video_path)
270
- # Clean up the local file after upload
271
- cleanup_temp_video(stitched_video_path)
272
 
273
  # Clean up individual video files after stitching
274
  for video_file in video_files:
275
  if video_file != stitched_video_path: # Don't delete the final output
276
  cleanup_temp_video(video_file)
277
 
278
- # Create download link HTML
279
- download_html = ""
280
- if final_video_url:
281
- download_html = f"""
282
- <div style="text-align: center; padding: 20px;">
283
- <h3>Download Your ASL Video</h3>
284
- <a href="{final_video_url}" download="asl_video.mp4"
285
- style="background-color: #4CAF50; color: white;
286
- padding: 12px 24px; text-decoration: none;
287
- border-radius: 4px; display: inline-block;">
288
- Download Video
289
- </a>
290
- <p style="margin-top: 10px; color: #666;">
291
- <small>Right-click and "Save As" if the download doesn't
292
- start automatically</small>
293
- </p>
294
- </div>
295
- """
296
-
297
  return {
298
  "status": "success",
299
  "videos": videos,
300
  "video_count": len(videos),
301
  "gloss": gloss,
302
  "cleaned_tokens": cleaned_tokens,
303
- "final_video_url": final_video_url
304
- }, final_video_url, download_html
305
 
306
 
307
  def parse_vectorize_and_search_unified_sync(input_data):
@@ -317,10 +275,35 @@ def predict_unified(input_data):
317
  return {
318
  "status": "error",
319
  "message": "Please provide text or upload a document"
320
- }, None, ""
321
 
322
  # Use the unified processing function
323
  result = parse_vectorize_and_search_unified_sync(input_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  return result
325
 
326
  except Exception as e:
@@ -328,90 +311,59 @@ def predict_unified(input_data):
328
  return {
329
  "status": "error",
330
  "message": f"An error occurred: {str(e)}"
331
- }, None, ""
332
 
333
 
334
  # Create the Gradio interface
335
  def create_interface():
336
  """Create and configure the Gradio interface"""
337
 
338
- with gr.Blocks(title=title) as demo:
339
- gr.Markdown(f"# {title}")
340
- gr.Markdown(description)
341
-
342
- with gr.Row():
343
- with gr.Column():
344
- # Input section
345
- gr.Markdown("## Input Options")
346
-
347
- # Text input
348
- gr.Markdown("### Option 1: Enter Text")
349
- text_input = gr.Textbox(
350
- label="Enter text to convert to ASL",
351
- placeholder="Type or paste your text here...",
352
- lines=5,
353
- max_lines=10
354
- )
355
-
356
- gr.Markdown("### Option 2: Upload Document")
357
- file_input = gr.File(
358
- label="Upload Document (pdf, txt, docx, or epub)",
359
- file_types=[".pdf", ".txt", ".docx", ".epub"]
360
- )
361
-
362
- # Processing options
363
- gr.Markdown("## Processing Options")
364
- use_r2 = gr.Checkbox(
365
- label="Use Cloud Storage (R2)",
366
- value=True,
367
- info=("Upload video to cloud storage for "
368
- "persistent access")
369
- )
370
-
371
- process_btn = gr.Button(
372
- "Generate ASL Video",
373
- variant="primary"
374
- )
375
-
376
- with gr.Column():
377
- # Output section
378
- gr.Markdown("## Results")
379
- json_output = gr.JSON(label="Processing Results")
380
- video_output = gr.Video(label="ASL Video Output")
381
- download_html = gr.HTML(label="Download Link")
382
-
383
- # Handle the processing
384
- def process_inputs(text, file, use_r2_storage):
385
- # Determine which input to use
386
- if text and text.strip():
387
- # Use text input
388
- input_data = text.strip()
389
- elif file is not None:
390
- # Use file input
391
- input_data = file
392
- else:
393
- # No input provided
394
- return {
395
- "status": "error",
396
- "message": "Please provide either text or upload a file"
397
- }, None, ""
398
-
399
- # Process using the unified function
400
- return predict_unified(input_data)
401
-
402
- process_btn.click(
403
- fn=process_inputs,
404
- inputs=[text_input, file_input, use_r2],
405
- outputs=[json_output, video_output, download_html]
406
- )
407
 
408
- # Footer
409
- gr.Markdown(article)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
- return demo
412
 
413
 
414
- # For Hugging Face Spaces, use the Blocks interface
415
  if __name__ == "__main__":
416
  demo = create_interface()
417
  demo.launch(
 
23
  R2_SECRET_ACCESS_KEY = os.environ.get("R2_SECRET_ACCESS_KEY")
24
 
25
  # Validate that required environment variables are set
26
+ if not all([R2_ASL_VIDEOS_URL, R2_ENDPOINT, R2_ACCESS_KEY_ID,
27
+ R2_SECRET_ACCESS_KEY]):
28
  raise ValueError(
29
  "Missing required R2 environment variables. "
30
  "Please check your .env file."
 
55
  )
56
 
57
  def clean_gloss_token(token):
58
+ """Clean a single gloss token"""
59
+ if not token:
60
+ return None
61
+
62
+ # Remove punctuation and convert to lowercase
63
+ cleaned = re.sub(r'[^\w\s]', '', token).lower().strip()
64
+
65
  # Remove extra whitespace
66
  cleaned = re.sub(r'\s+', ' ', cleaned).strip()
67
+
68
+ return cleaned if cleaned else None
69
 
70
 
71
  def upload_video_to_r2(video_path, bucket_name="asl-videos"):
 
87
  )
88
 
89
  # Replace the endpoint with the domain for uploading
90
+ public_domain = (R2_ENDPOINT.replace('https://', '')
91
+ .split('.')[0])
92
+ video_url = (f"https://{public_domain}.r2.cloudflarestorage.com/"
93
+ f"{bucket_name}/{unique_filename}")
94
 
95
  print(f"Video uploaded to R2: {video_url}")
96
  public_video_url = f"{R2_ASL_VIDEOS_URL}/{unique_filename}"
 
155
  print(f"Error cleaning up file: {e}")
156
 
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  def process_input(input_data):
159
+ """Process input data to extract text for ASL conversion"""
160
+ if isinstance(input_data, str):
161
+ # Direct text input
162
+ return input_data.strip()
163
+ elif hasattr(input_data, 'name'):
164
+ # File input - extract text from document
165
+ try:
166
+ print(f"Processing file: {input_data.name}")
167
+ gloss = asl_converter.convert_document(input_data.name)
168
+ print(f"Converted gloss: {gloss[:100]}...") # Show first 100 chars
169
+ return gloss
170
+ except Exception as e:
171
+ print(f"Error processing file: {e}")
172
+ return None
173
  else:
174
+ print(f"Unsupported input type: {type(input_data)}")
175
+ return None
 
 
176
 
177
 
178
  async def parse_vectorize_and_search_unified(input_data):
 
187
  return {
188
  "status": "error",
189
  "message": "Failed to process input"
190
+ }, None
191
 
192
  print("ASL", gloss)
193
 
 
241
  stitched_video_path = video_files[0]
242
 
243
  # Upload final video to R2 and get public URL
244
+ video_download_url = None
245
  if stitched_video_path:
246
+ video_download_url = upload_video_to_r2(stitched_video_path)
247
+ # Don't clean up the local file yet - let frontend use it first
 
248
 
249
  # Clean up individual video files after stitching
250
  for video_file in video_files:
251
  if video_file != stitched_video_path: # Don't delete the final output
252
  cleanup_temp_video(video_file)
253
 
254
+ # Return simplified results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  return {
256
  "status": "success",
257
  "videos": videos,
258
  "video_count": len(videos),
259
  "gloss": gloss,
260
  "cleaned_tokens": cleaned_tokens,
261
+ "video_download_url": video_download_url
262
+ }, stitched_video_path
263
 
264
 
265
  def parse_vectorize_and_search_unified_sync(input_data):
 
275
  return {
276
  "status": "error",
277
  "message": "Please provide text or upload a document"
278
+ }, None
279
 
280
  # Use the unified processing function
281
  result = parse_vectorize_and_search_unified_sync(input_data)
282
+
283
+ # Get the results
284
+ json_data, local_video_path = result
285
+
286
+ # If we have a local video path, use it directly for Gradio
287
+ if local_video_path and json_data.get("status") == "success":
288
+ # Schedule cleanup of the video file after a delay
289
+ # This gives Gradio time to load and display the video
290
+ import threading
291
+ import time
292
+
293
+ def delayed_cleanup(video_path):
294
+ time.sleep(30) # Wait 30 seconds before cleanup
295
+ cleanup_temp_video(video_path)
296
+
297
+ # Start cleanup thread
298
+ cleanup_thread = threading.Thread(
299
+ target=delayed_cleanup,
300
+ args=(local_video_path,)
301
+ )
302
+ cleanup_thread.daemon = True
303
+ cleanup_thread.start()
304
+
305
+ return json_data, local_video_path
306
+
307
  return result
308
 
309
  except Exception as e:
 
311
  return {
312
  "status": "error",
313
  "message": f"An error occurred: {str(e)}"
314
+ }, None
315
 
316
 
317
  # Create the Gradio interface
318
  def create_interface():
319
  """Create and configure the Gradio interface"""
320
 
321
+ def process_inputs(text, file):
322
+ """Process text or file input and return results"""
323
+ # Determine which input to use
324
+ if text and text.strip():
325
+ # Use text input
326
+ input_data = text.strip()
327
+ elif file is not None:
328
+ # Use file input
329
+ input_data = file
330
+ else:
331
+ # No input provided
332
+ return {
333
+ "status": "error",
334
+ "message": "Please provide either text or upload a file"
335
+ }, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
+ # Process using the unified function
338
+ return predict_unified(input_data)
339
+
340
+ # Create the interface
341
+ interface = gr.Interface(
342
+ fn=process_inputs,
343
+ inputs=[
344
+ gr.Textbox(
345
+ label="Enter text to convert to ASL",
346
+ placeholder="Type or paste your text here...",
347
+ lines=5
348
+ ),
349
+ gr.File(
350
+ label="Upload Document (pdf, txt, docx, or epub)",
351
+ file_types=[".pdf", ".txt", ".docx", ".epub"]
352
+ )
353
+ ],
354
+ outputs=[
355
+ gr.JSON(label="Results"),
356
+ gr.Video(label="ASL Video")
357
+ ],
358
+ title=title,
359
+ description=description,
360
+ article=article
361
+ )
362
 
363
+ return interface
364
 
365
 
366
+ # For Hugging Face Spaces, use the Interface
367
  if __name__ == "__main__":
368
  demo = create_interface()
369
  demo.launch(