Spaces:

deenasun
/

ai-sl-api

Running

App Files Files Community

deenasun commited on Jun 22

Commit

dadcb61

1 Parent(s): adcbc15

add cloudflare upload and base64 for video output response to gradio

Browse files

Files changed (3) hide show

README.md +80 -0
app.py +209 -38
example_usage.py +186 -0

README.md CHANGED Viewed

@@ -11,3 +11,83 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# AI-SL API
+Convert text documents to American Sign Language (ASL) videos using AI.
+## Video Output Options
+The Gradio interface provides multiple ways for users to receive and download the generated ASL videos:
+### 1. R2 Cloud Storage (Recommended)
+- Videos are automatically uploaded to Cloudflare R2 storage
+- Returns a public URL that users can download directly
+- Videos persist and can be shared via URL
+- Includes a styled download button in the interface
+### 2. Base64 Encoding (Alternative)
+- Videos are embedded as base64 data directly in the response
+- No external storage required
+- Good for smaller videos or when you want to avoid cloud storage
+- Can be downloaded directly from the interface
+### 3. Programmatic Access
+Users can access the video output programmatically using:
+```python
+from gradio_client import Client
+# Connect to the running interface
+client = Client("http://localhost:7860")
+# Upload a document and get results
+result = client.predict(
+    "path/to/document.pdf",
+    api_name="/predict"
+)
+# The result contains: (json_data, video_output, download_html)
+json_data, video_url, download_html = result
+# Download the video
+import requests
+response = requests.get(video_url)
+with open("asl_video.mp4", "wb") as f:
+    f.write(response.content)
+```
+### 4. Direct Download from Interface
+- The interface includes a styled download button
+- Users can right-click and "Save As" if automatic download doesn't work
+- Video files are named `asl_video.mp4` by default
+## Example Usage
+See `example_usage.py` for complete examples of how to:
+- Download videos from URLs
+- Process base64 video data
+- Use the interface programmatically
+- Perform further video processing
+## Requirements
+- Python 3.7+
+- Required packages listed in `requirements.txt`
+- Cloudflare R2 credentials (for cloud storage option)
+- Supabase credentials for video database
+## Setup
+1. Install dependencies: `pip install -r requirements.txt`
+2. Set up environment variables in `.env` file
+3. Run the interface: `python app.py`
+## Video Processing
+Once you have the video file, you can:
+- Upload to YouTube, Google Drive, or other services
+- Analyze with OpenCV for computer vision tasks
+- Convert to different formats
+- Extract frames for further processing
+- Add subtitles or overlays

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from botocore.config import Config
 from dotenv import load_dotenv
 import requests
 import tempfile
 # Load environment variables from .env file
 load_dotenv()
@@ -30,7 +32,8 @@ article = ("<p style='text-align: center'><a href='https://github.com/deenasun'
 inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
 outputs = [
     gr.JSON(label="Processing Results"),
-    gr.Video(label="ASL Video Output")
 ]
 asl_converter = DocumentToASLConverter()
@@ -57,6 +60,88 @@ def clean_gloss_token(token):
     cleaned = cleaned.lower()
     return cleaned
 async def parse_vectorize_and_search(file):
     print(file)
     gloss = asl_converter.convert_document(file)
@@ -109,61 +194,147 @@ async def parse_vectorize_and_search(file):
         # If only one video, just use it directly
         stitched_video_path = video_files[0]
     # Clean up individual video files after stitching
     for video_file in video_files:
         if video_file != stitched_video_path:  # Don't delete the final output
             cleanup_temp_video(video_file)
     return {
         "status": "success",
         "videos": videos,
         "video_count": len(videos),
         "gloss": gloss,
-        "cleaned_tokens": cleaned_tokens
-    }, stitched_video_path
 # Create a synchronous wrapper for Gradio
 def parse_vectorize_and_search_sync(file):
     return asyncio.run(parse_vectorize_and_search(file))
-def download_video_from_url(video_url):
-    """
-    Download a video from a public R2 URL
-    Returns the local file path where the video is saved
-    """
-    try:
-        # Create a temporary file with .mp4 extension
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-        temp_path = temp_file.name
-        temp_file.close()
-        # Download the video
-        print(f"Downloading video from: {video_url}")
-        response = requests.get(video_url, stream=True)
-        response.raise_for_status()
-        # Save to temporary file
-        with open(temp_path, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
-        print(f"Video downloaded to: {temp_path}")
-        return temp_path
-    except Exception as e:
-        print(f"Error downloading video: {e}")
-        return None
-def cleanup_temp_video(file_path):
     """
-    Clean up temporary video file
     """
-    try:
-        if file_path and os.path.exists(file_path):
-            os.unlink(file_path)
-            print(f"Cleaned up: {file_path}")
-    except Exception as e:
-        print(f"Error cleaning up file: {e}")
 intf = gr.Interface(
     fn=parse_vectorize_and_search_sync,

 from dotenv import load_dotenv
 import requests
 import tempfile
+import uuid
+import base64
 # Load environment variables from .env file
 load_dotenv()
 inputs = gr.File(label="Upload Document (pdf, txt, docx, or epub)")
 outputs = [
     gr.JSON(label="Processing Results"),
+    gr.Video(label="ASL Video Output"),
+    gr.HTML(label="Download Link")
 ]
 asl_converter = DocumentToASLConverter()
     cleaned = cleaned.lower()
     return cleaned
+def upload_video_to_r2(video_path, bucket_name="ai-sl-videos"):
+    """
+    Upload a video file to R2 and return a public URL
+    """
+    try:
+        # Generate a unique filename
+        file_extension = os.path.splitext(video_path)[1]
+        unique_filename = f"{uuid.uuid4()}{file_extension}"
+        # Upload to R2
+        with open(video_path, 'rb') as video_file:
+            s3.upload_fileobj(
+                video_file,
+                bucket_name,
+                unique_filename,
+                ExtraArgs={'ACL': 'public-read'}
+            )
+        # Generate the public URL
+        video_url = f"{R2_ENDPOINT}/{bucket_name}/{unique_filename}"
+        print(f"Video uploaded to R2: {video_url}")
+        return video_url
+    except Exception as e:
+        print(f"Error uploading video to R2: {e}")
+        return None
+def video_to_base64(video_path):
+    """
+    Convert a video file to base64 string for direct download
+    """
+    try:
+        with open(video_path, 'rb') as video_file:
+            video_data = video_file.read()
+            base64_data = base64.b64encode(video_data).decode('utf-8')
+            return f"data:video/mp4;base64,{base64_data}"
+    except Exception as e:
+        print(f"Error converting video to base64: {e}")
+        return None
+def download_video_from_url(video_url):
+    """
+    Download a video from a public R2 URL
+    Returns the local file path where the video is saved
+    """
+    try:
+        # Create a temporary file with .mp4 extension
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+        temp_path = temp_file.name
+        temp_file.close()
+        # Download the video
+        print(f"Downloading video from: {video_url}")
+        response = requests.get(video_url, stream=True)
+        response.raise_for_status()
+        # Save to temporary file
+        with open(temp_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Video downloaded to: {temp_path}")
+        return temp_path
+    except Exception as e:
+        print(f"Error downloading video: {e}")
+        return None
+def cleanup_temp_video(file_path):
+    """
+    Clean up temporary video file
+    """
+    try:
+        if file_path and os.path.exists(file_path):
+            os.unlink(file_path)
+            print(f"Cleaned up: {file_path}")
+    except Exception as e:
+        print(f"Error cleaning up file: {e}")
 async def parse_vectorize_and_search(file):
     print(file)
     gloss = asl_converter.convert_document(file)
         # If only one video, just use it directly
         stitched_video_path = video_files[0]
+    # Upload final video to R2 and get public URL
+    final_video_url = None
+    if stitched_video_path:
+        final_video_url = upload_video_to_r2(stitched_video_path)
+        # Clean up the local file after upload
+        cleanup_temp_video(stitched_video_path)
     # Clean up individual video files after stitching
     for video_file in video_files:
         if video_file != stitched_video_path:  # Don't delete the final output
             cleanup_temp_video(video_file)
+    # Create download link HTML
+    download_html = ""
+    if final_video_url:
+        download_html = f"""
+        <div style="text-align: center; padding: 20px;">
+            <h3>Download Your ASL Video</h3>
+            <a href="{final_video_url}" download="asl_video.mp4"
+               style="background-color: #4CAF50; color: white;
+                      padding: 12px 24px; text-decoration: none;
+                      border-radius: 4px; display: inline-block;">
+                Download Video
+            </a>
+            <p style="margin-top: 10px; color: #666;">
+                <small>Right-click and "Save As" if the download doesn't
+                       start automatically</small>
+            </p>
+        </div>
+        """
     return {
         "status": "success",
         "videos": videos,
         "video_count": len(videos),
         "gloss": gloss,
+        "cleaned_tokens": cleaned_tokens,
+        "final_video_url": final_video_url
+    }, final_video_url, download_html
 # Create a synchronous wrapper for Gradio
 def parse_vectorize_and_search_sync(file):
     return asyncio.run(parse_vectorize_and_search(file))
+async def parse_vectorize_and_search_base64(file):
     """
+    Alternative version that returns video as base64 data instead of uploading to R2
     """
+    print(file)
+    gloss = asl_converter.convert_document(file)
+    print("ASL", gloss)
+    # Split by spaces and clean each token
+    gloss_tokens = gloss.split()
+    cleaned_tokens = []
+    for token in gloss_tokens:
+        cleaned = clean_gloss_token(token)
+        if cleaned:  # Only add non-empty tokens
+            cleaned_tokens.append(cleaned)
+    print("Cleaned tokens:", cleaned_tokens)
+    videos = []
+    video_files = []  # Store local file paths for stitching
+    for g in cleaned_tokens:
+        print(f"Processing {g}")
+        try:
+            result = await vectorizer.vector_query_from_supabase(query=g)
+            print("result", result)
+            if result.get("match", False):
+                video_url = result["video_url"]
+                videos.append(video_url)
+                # Download the video
+                local_path = download_video_from_url(video_url)
+                if local_path:
+                    video_files.append(local_path)
+        except Exception as e:
+            print(f"Error processing {g}: {e}")
+            continue
+    # Create stitched video if we have multiple videos
+    stitched_video_path = None
+    if len(video_files) > 1:
+        try:
+            print(f"Creating stitched video from {len(video_files)} videos...")
+            stitched_video_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
+            create_multi_stitched_video(video_files, stitched_video_path)
+            print(f"Stitched video created: {stitched_video_path}")
+        except Exception as e:
+            print(f"Error creating stitched video: {e}")
+            stitched_video_path = None
+    elif len(video_files) == 1:
+        # If only one video, just use it directly
+        stitched_video_path = video_files[0]
+    # Convert final video to base64
+    final_video_base64 = None
+    if stitched_video_path:
+        final_video_base64 = video_to_base64(stitched_video_path)
+        # Clean up the local file after conversion
+        cleanup_temp_video(stitched_video_path)
+    # Clean up individual video files after stitching
+    for video_file in video_files:
+        if video_file != stitched_video_path:  # Don't delete the final output
+            cleanup_temp_video(video_file)
+    # Create download link HTML for base64
+    download_html = ""
+    if final_video_base64:
+        download_html = f"""
+        <div style="text-align: center; padding: 20px;">
+            <h3>Download Your ASL Video</h3>
+            <a href="{final_video_base64}" download="asl_video.mp4"
+               style="background-color: #4CAF50; color: white;
+                      padding: 12px 24px; text-decoration: none;
+                      border-radius: 4px; display: inline-block;">
+                Download Video
+            </a>
+            <p style="margin-top: 10px; color: #666;">
+                <small>Video is embedded directly - click to download</small>
+            </p>
+        </div>
+        """
+    return {
+        "status": "success",
+        "videos": videos,
+        "video_count": len(videos),
+        "gloss": gloss,
+        "cleaned_tokens": cleaned_tokens,
+        "video_format": "base64"
+    }, final_video_base64, download_html
+def parse_vectorize_and_search_base64_sync(file):
+    return asyncio.run(parse_vectorize_and_search_base64(file))
 intf = gr.Interface(
     fn=parse_vectorize_and_search_sync,

example_usage.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+Example: How to programmatically access video output from the AI-SL API
+This file demonstrates different ways users can receive and process video
+output from the Gradio interface.
+"""
+import requests
+import base64
+from pathlib import Path
+def download_video_from_url(video_url, output_path="downloaded_video.mp4"):
+    """
+    Download a video from a public URL
+    """
+    try:
+        response = requests.get(video_url, stream=True)
+        response.raise_for_status()
+        with open(output_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print(f"Video downloaded to: {output_path}")
+        return output_path
+    except Exception as e:
+        print(f"Error downloading video: {e}")
+        return None
+def save_base64_video(base64_data, output_path="video_from_base64.mp4"):
+    """
+    Save a base64-encoded video to a file
+    """
+    try:
+        # Remove the data URL prefix if present
+        if base64_data.startswith('data:video/mp4;base64,'):
+            base64_data = base64_data.split(',')[1]
+        # Decode and save
+        video_data = base64.b64decode(base64_data)
+        with open(output_path, 'wb') as f:
+            f.write(video_data)
+        print(f"Video saved from base64 to: {output_path}")
+        return output_path
+    except Exception as e:
+        print(f"Error saving base64 video: {e}")
+        return None
+def process_gradio_output(gradio_result):
+    """
+    Process the output from the Gradio interface
+    gradio_result should be a tuple: (json_data, video_output, download_html)
+    """
+    json_data, video_output, download_html = gradio_result
+    print("Processing Results:")
+    print(f"Status: {json_data['status']}")
+    print(f"Video Count: {json_data['video_count']}")
+    print(f"Gloss: {json_data['gloss']}")
+    # Handle video output based on format
+    if json_data.get('video_format') == 'base64':
+        # Video is base64 encoded
+        print("Video format: Base64")
+        video_path = save_base64_video(video_output, "asl_output.mp4")
+    else:
+        # Video is a URL (from R2 upload)
+        print("Video format: URL")
+        video_path = download_video_from_url(video_output, "asl_output.mp4")
+    return video_path
+# Example usage scenarios:
+def example_1_direct_download():
+    """
+    Example 1: Direct download from R2 URL
+    """
+    print("=== Example 1: Direct Download ===")
+    # Simulate getting a video URL from the interface
+    video_url = "https://your-r2-endpoint.com/bucket/video.mp4"
+    # Download the video
+    video_path = download_video_from_url(video_url)
+    if video_path:
+        print(f"Video ready for processing: {video_path}")
+        # Now you can use the video file for further processing
+        # e.g., upload to another service, analyze with OpenCV, etc.
+def example_2_base64_processing():
+    """
+    Example 2: Processing base64 video data
+    """
+    print("=== Example 2: Base64 Processing ===")
+    # Simulate getting base64 data from the interface
+    base64_video = ("data:video/mp4;base64,AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1wNDEAAAAIZnJlZQAA...")  # noqa: E501
+    # Save the video
+    video_path = save_base64_video(base64_video)
+    if video_path:
+        print(f"Video ready for processing: {video_path}")
+def example_3_programmatic_interface():
+    """
+    Example 3: Using the Gradio interface programmatically
+    """
+    print("=== Example 3: Programmatic Interface ===")
+    # If you want to call the Gradio interface programmatically
+    # You can use the gradio_client library
+    try:
+        from gradio_client import Client
+        # Connect to your running Gradio interface
+        client = Client("http://localhost:7860")  # Adjust URL as needed
+        # Upload a document and get results
+        result = client.predict(
+            "path/to/your/document.pdf",  # File path
+            api_name="/predict"  # Adjust based on your interface
+        )
+        # Process the results
+        video_path = process_gradio_output(result)
+        print(f"Processed video: {video_path}")
+    except ImportError:
+        print("gradio_client not installed. Install with: "
+              "pip install gradio_client")
+    except Exception as e:
+        print(f"Error calling Gradio interface: {e}")
+def example_4_video_processing():
+    """
+    Example 4: Further video processing
+    """
+    print("=== Example 4: Video Processing ===")
+    # Once you have the video file, you can process it further
+    video_path = "asl_output.mp4"
+    if Path(video_path).exists():
+        print(f"Processing video: {video_path}")
+        # Example: Get video information
+        try:
+            import cv2
+            cap = cv2.VideoCapture(video_path)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = frame_count / fps
+            cap.release()
+            print(f"Video info: {duration:.2f} seconds, {fps} FPS, "
+                  f"{frame_count} frames")
+        except ImportError:
+            print("OpenCV not installed. Install with: "
+                  "pip install opencv-python")
+        # Example: Upload to another service
+        # upload_to_youtube(video_path)
+        # upload_to_drive(video_path)
+        # etc.
+if __name__ == "__main__":
+    # Run examples
+    example_1_direct_download()
+    example_2_base64_processing()
+    example_3_programmatic_interface()
+    example_4_video_processing()