Spaces:

JigsawStack
/

speech-to-text

Sleeping

App Files Files Community

speech-to-text / app.py

vineet124jig

Upload 2 files

6704495 verified 14 days ago

raw

history blame

3.81 kB

	import gradio as gr
	import requests
	import json
	import os

	BASE_URL = "https://api.jigsawstack.com/v1"
	headers = {
	"x-api-key": os.getenv("JIGSAWSTACK_API_KEY")
	}

	def transcribe_audio(input_type, audio_url, file_store_key, language):
	"""Transcribe audio using JigsawStack Speech-to-Text API"""
	if input_type == "Audio URL" and not audio_url:
	return "Error: Please provide an audio URL.", ""
	if input_type == "File Store Key" and not file_store_key:
	return "Error: Please provide a file store key.", ""
	try:
	payload = {}
	if input_type == "Audio URL":
	payload["url"] = audio_url.strip()
	if input_type == "File Store Key":
	payload["file_store_key"] = file_store_key.strip()
	if language:
	payload["language"] = language
	response = requests.post(
	f"{BASE_URL}/ai/transcribe",
	headers=headers,
	json=payload
	)
	response.raise_for_status()
	result = response.json()
	if not result.get("success"):
	error_msg = f"Error: API call failed - {result.get('message', 'Unknown error')}"
	return error_msg, ""
	transcribed_text = result.get("text", "")
	return "Transcription completed successfully!", transcribed_text
	except requests.exceptions.RequestException as e:
	return f"Request failed: {str(e)}", ""
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}", ""

	with gr.Blocks() as demo:
	gr.Markdown("""
	<div style='text-align: center; margin-bottom: 24px;'>
	<h1 style='font-size:2.2em; margin-bottom: 0.2em;'>Speech-to-Text Transcription</h1>
	<p style='font-size:1.2em; margin-top: 0;'>Transcribe video and audio files with ease leveraging Whisper large V3 AI model.</p>
	<p style='font-size:1em; margin-top: 0.5em;'>Supported formats: MP3, WAV, M4A, FLAC, AAC, OGG, WEBM. Max file size: 100MB, Max duration: 4 hours.</p>
	</div>
	""")
	with gr.Row():
	with gr.Column():
	gr.Markdown("#### Audio Input")
	input_type = gr.Radio([
	"Audio URL",
	"File Store Key"
	], value="Audio URL", label="Select Input Type")
	audio_url = gr.Textbox(
	label="Audio URL",
	placeholder="Enter the URL of the audio/video file...",
	visible=True
	)
	file_store_key = gr.Textbox(
	label="File Store Key",
	placeholder="Enter the file store key from JigsawStack File Storage...",
	visible=False
	)
	language = gr.Textbox(
	label="Language (optional)",
	placeholder="e.g., en, es, fr, de, ja, zh... (leave empty for auto-detect)"
	)
	transcribe_btn = gr.Button("Start Transcription", variant="primary")
	with gr.Column():
	gr.Markdown("#### Transcription Result")
	status_message = gr.Textbox(label="Status", interactive=False)
	transcribed_text = gr.Textbox(
	label="Transcribed Text",
	interactive=False,
	lines=10,
	max_lines=20
	)
	def toggle_inputs(selected):
	if selected == "Audio URL":
	return gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True)
	input_type.change(toggle_inputs, inputs=[input_type], outputs=[audio_url, file_store_key])
	transcribe_btn.click(
	transcribe_audio,
	inputs=[input_type, audio_url, file_store_key, language],
	outputs=[status_message, transcribed_text]
	)

	demo.launch()