Spaces:

jerrypan7
/

demo-asr

Running

App Files Files Community

demo-asr / app.py

jerrypan7

Update app.py

461d5bc verified 9 months ago

raw

history blame

7.63 kB

	import gradio as gr
	import requests
	from typing import Optional
	import json
	import subprocess
	import os
	import tempfile # Import tempfile
	from pydub import AudioSegment # Import AudioSegment

	# Define the FastAPI URL
	API_URL = "http://astarwiz.com:9998"
	rapid_key = os.environ.get("RAPID_API_KEY")
	last_modified = 1
	def fetch_youtube_id(youtube_url: str) -> str:
	if 'v=' in youtube_url:
	return youtube_url.split("v=")[1].split("&")[0]
	elif 'youtu.be/' in youtube_url:
	return youtube_url.split("youtu.be/")[1]
	#return youtube_url.replace("https://youtu.be/", "https://www.youtube.com/watch?v=").split("?")[0]
	elif 'shorts' in youtube_url:
	return youtube_url.split("/")[-1]
	else:
	raise Exception("Unsupported URL format")

	def download_youtube_audio(youtube_url: str, output_dir: Optional[str] = None) -> str:
	video_id = fetch_youtube_id(youtube_url)

	if output_dir is None:
	output_dir = tempfile.gettempdir()

	output_filename = os.path.join(output_dir, f"{video_id}.mp3")

	if os.path.exists(output_filename):
	return output_filename # Return if the file already exists

	url = "https://youtube86.p.rapidapi.com/api/youtube/links"
	headers = {
	'Content-Type': 'application/json',
	'x-rapidapi-host': 'youtube86.p.rapidapi.com',
	'x-rapidapi-key': rapid_key # Replace <key> with your actual API key
	}
	data = {
	"url": youtube_url
	}

	response = requests.post(url, headers=headers, json=data)
	print('Fetched audio links')

	if response.status_code == 200:
	result = response.json()
	for url in result[0]['urls']:
	if url.get('isBundle'):
	audio_url = url['url']
	extension = url['extension']
	audio_response = requests.get(audio_url)

	if audio_response.status_code == 200:
	temp_filename = os.path.join(output_dir, f"{video_id}.{extension}")
	with open(temp_filename, 'wb') as audio_file:
	audio_file.write(audio_response.content)

	# Convert to MP3 and downsample to 16000 Hz
	audio = AudioSegment.from_file(temp_filename, format=extension)
	audio = audio.set_frame_rate(16000)
	audio.export(output_filename, format="mp3", parameters=["-ar", "16000"])

	os.remove(temp_filename) # Remove the temporary file
	return output_filename # Return the final MP3 filename

	return None # Return None if no successful download occurs
	else:
	print("Error:", response.status_code, response.text)
	return None # Return None on failure

	def run_asr(audio_file, youtube_url):
	temp_file = None
	with_timestamp=False
	model_choice = "local_whisper"
	print ( "audio_file local_whisper ",audio_file, youtube_url )
	try:
	if last_modified == 2 and youtube_url:
	# It's a YouTube URL
	audio_file = download_youtube_audio(youtube_url)
	temp_file = audio_file
	elif not audio_file:
	return "Please provide either an audio file or a YouTube URL."

	files = {'file': open(audio_file, 'rb')}

	# Update model_name based on the user's choice
	if model_choice == "whisper_v3":
	model_name = "official-v3"
	else:
	model_name = "whisper-large-v2-imda"

	data = {'language': 'en', 'model_name': model_name, 'with_timestamp': with_timestamp}
	response = requests.post(f"{API_URL}/asr", data=data, files=files)

	if response.status_code == 200:
	return response.json().get("text", "")
	else:
	return f"Error: {response.status_code}"
	except Exception as e:
	return f"Error: {str(e)}"
	finally:
	# Clean up the temporary file if it was a YouTube download
	if temp_file and os.path.exists(temp_file):
	os.remove(temp_file)


	# Create the Gradio interface with improved aesthetics
	with gr.Blocks( as demo:
	gr.Markdown("Audio Transcription Service")
	gr.Markdown("Upload an audio file, record your voice, or paste a YouTube URL to get an English transcription.")

	with gr.Row():
	with gr.Column(scale=2):
	audio_input = gr.Audio(sources=['microphone', 'upload'], type ='filepath', label="Audio Input")
	youtube_input = gr.Textbox(label="YouTube URL", placeholder="Or paste a YouTube URL here...")
	video_player = gr.HTML(visible=False)
	#timestamp_toggle = gr.Checkbox(label="Include Timestamps", value=False)
	#model_choice = gr.Radio(["local_whisper", "whisper_v3"], label="Model Selection", value="local_whisper")
	with gr.Column(scale=3):
	result = gr.Textbox(
	label="Transcription Result",
	placeholder="Your transcription will appear here...",
	lines=10
	)

	run_button = gr.Button("Transcribe Audio", variant="primary", interactive=False)
	run_button.click(run_asr, inputs=[audio_input, youtube_input], outputs=[result])

	def update_button_state_audio(audio, youtube_url):
	global last_modified
	print(" audio: ",audio, " youtube_url: ", youtube_url)
	last_modified =1
	# Button is interactive if there's input and progress is 0 or 1 (not in progress)
	return gr.update(interactive=bool(audio) or bool(youtube_url !=""))
	#return {run_button: gr.update(interactive=bool(audio) or bool(youtube_url != ""))}

	def update_button_state_youtube(audio, youtube_url):
	global last_modified
	print(" youtube: ",audio, " youtube_url: ", youtube_url)
	last_modified=2
	# Button is interactive if there's input and progress is 0 or 1 (not in progress)
	return gr.update(interactive=bool(audio) or bool(youtube_url !=""))

	audio_input.change(
	fn=update_button_state_audio,
	inputs=[audio_input, youtube_input],
	outputs=run_button
	)
	youtube_input.change(
	fn=update_button_state_youtube,
	inputs=[audio_input, youtube_input],
	outputs=run_button
	)

	def update_video_embed(youtube_url):
	if youtube_url:
	try:
	video_id = fetch_youtube_id(youtube_url)
	embed_html = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{video_id}" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>'
	return embed_html
	except Exception as e:
	print(f"Error embedding video: {e}")
	return ""

	youtube_input.change(
	fn=update_video_embed,
	inputs=[youtube_input],
	outputs=[video_player]
	)

	gr.Markdown("### How to use:")
	gr.Markdown("1. Upload an audio file or record your voice using the microphone, OR paste a YouTube URL.")
	gr.Markdown("2. If you paste a YouTube URL, the video will be displayed for your reference, and any previous transcription or audio input will be cleared.")
	gr.Markdown("3. If you upload or record audio, any previous transcription, YouTube URL, and video will be cleared.")
	gr.Markdown("4. Click the 'Transcribe Audio' button to start the process.")
	gr.Markdown("5. Wait for a few seconds, and your transcription will appear in the result box.")

	# Launch the Gradio interface
	demo.launch(server_name='0.0.0.0')