Spaces:

JigsawStack
/

speech-to-text

Running

App Files Files Community

vineet124jig commited on 13 days ago

Commit

6704495

verified ·

1 Parent(s): 1c7f64e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +93 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+import requests
+import json
+import os
+BASE_URL = "https://api.jigsawstack.com/v1"
+headers = {
+    "x-api-key": os.getenv("JIGSAWSTACK_API_KEY")
+}
+def transcribe_audio(input_type, audio_url, file_store_key, language):
+    """Transcribe audio using JigsawStack Speech-to-Text API"""
+    if input_type == "Audio URL" and not audio_url:
+        return "Error: Please provide an audio URL.", ""
+    if input_type == "File Store Key" and not file_store_key:
+        return "Error: Please provide a file store key.", ""
+    try:
+        payload = {}
+        if input_type == "Audio URL":
+            payload["url"] = audio_url.strip()
+        if input_type == "File Store Key":
+            payload["file_store_key"] = file_store_key.strip()
+        if language:
+            payload["language"] = language
+        response = requests.post(
+            f"{BASE_URL}/ai/transcribe",
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        result = response.json()
+        if not result.get("success"):
+            error_msg = f"Error: API call failed - {result.get('message', 'Unknown error')}"
+            return error_msg, ""
+        transcribed_text = result.get("text", "")
+        return "Transcription completed successfully!", transcribed_text
+    except requests.exceptions.RequestException as e:
+        return f"Request failed: {str(e)}", ""
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}", ""
+with gr.Blocks() as demo:
+    gr.Markdown("""
+        <div style='text-align: center; margin-bottom: 24px;'>
+            <h1 style='font-size:2.2em; margin-bottom: 0.2em;'>Speech-to-Text Transcription</h1>
+            <p style='font-size:1.2em; margin-top: 0;'>Transcribe video and audio files with ease leveraging Whisper large V3 AI model.</p>
+            <p style='font-size:1em; margin-top: 0.5em;'>Supported formats: MP3, WAV, M4A, FLAC, AAC, OGG, WEBM. Max file size: 100MB, Max duration: 4 hours.</p>
+        </div>
+        """)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("#### Audio Input")
+            input_type = gr.Radio([
+                "Audio URL",
+                "File Store Key"
+            ], value="Audio URL", label="Select Input Type")
+            audio_url = gr.Textbox(
+                label="Audio URL",
+                placeholder="Enter the URL of the audio/video file...",
+                visible=True
+            )
+            file_store_key = gr.Textbox(
+                label="File Store Key",
+                placeholder="Enter the file store key from JigsawStack File Storage...",
+                visible=False
+            )
+            language = gr.Textbox(
+                label="Language (optional)",
+                placeholder="e.g., en, es, fr, de, ja, zh... (leave empty for auto-detect)"
+            )
+            transcribe_btn = gr.Button("Start Transcription", variant="primary")
+        with gr.Column():
+            gr.Markdown("#### Transcription Result")
+            status_message = gr.Textbox(label="Status", interactive=False)
+            transcribed_text = gr.Textbox(
+                label="Transcribed Text",
+                interactive=False,
+                lines=10,
+                max_lines=20
+            )
+    def toggle_inputs(selected):
+        if selected == "Audio URL":
+            return gr.update(visible=True), gr.update(visible=False)
+        else:
+            return gr.update(visible=False), gr.update(visible=True)
+    input_type.change(toggle_inputs, inputs=[input_type], outputs=[audio_url, file_store_key])
+    transcribe_btn.click(
+        transcribe_audio,
+        inputs=[input_type, audio_url, file_store_key, language],
+        outputs=[status_message, transcribed_text]
+    )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+requests
+Pillow