Spaces:

Amarsaish
/

speech-to-text

Sleeping

App Files Files Community

speech-to-text / app.py

Amarsaish

Update app.py

6bd2321 verified 6 months ago

raw

history blame

2.54 kB

	import os
	import streamlit as st
	from pydub import AudioSegment
	from groq import Groq

	# Set ffmpeg path
	ffmpeg_path = r"ffmpeg.exe"
	os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
	AudioSegment.converter = ffmpeg_path

	# Groq API configuration
	groq_api_key = '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' # Replace with your actual API key
	client = Groq(api_key=groq_api_key)
	model = 'whisper-large-v3'

	# Function to ensure the file is in a suitable format
	def ensure_suitable_format(file_path):
	allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
	file_extension = file_path.split('.')[-1].lower()
	if file_extension not in allowed_formats:
	new_file_path = f"{os.path.splitext(file_path)[0]}.wav"
	os.rename(file_path, new_file_path)
	return new_file_path
	return file_path

	# Function to convert audio to WAV
	def convert_audio_to_wav(input_path, output_path):
	audio = AudioSegment.from_file(input_path)
	audio.export(output_path, format="wav")
	return output_path

	# Function to transcribe audio using Groq
	def audio_to_text(filepath):
	with open(filepath, "rb") as file:
	translation = client.audio.translations.create(
	file=(filepath, file.read()),
	model=model,
	)
	return translation.text

	# Streamlit App UI
	st.title("Audio-to-Text Transcription")
	st.write("Upload an audio file to get the transcribed text.")

	# File upload
	uploaded_file = st.file_uploader("Upload your audio file", type=["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"])

	if uploaded_file:
	# Save the uploaded file locally
	file_path = os.path.join("uploaded_audio", uploaded_file.name)
	os.makedirs("uploaded_audio", exist_ok=True)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	st.write(f"File uploaded: {uploaded_file.name}")

	# Ensure file format is suitable
	suitable_audio_path = ensure_suitable_format(file_path)

	# Convert audio to WAV
	wav_path = f"{os.path.splitext(suitable_audio_path)[0]}.wav"
	converted_audio = convert_audio_to_wav(suitable_audio_path, wav_path)

	# Transcribe audio
	st.write("Processing transcription...")
	try:
	transcription = audio_to_text(converted_audio)
	st.success("Transcription complete!")
	st.text_area("Transcribed Text", transcription, height=200)
	except Exception as e:
	st.error(f"Error during transcription: {e}")