Spaces:

fastx
/

Talkify

Sleeping

App Files Files Community

Talkify / app.py

fastx

Create app.py

1000bc9 verified 3 months ago

raw

history blame

2.13 kB

	import whisper
	import streamlit as st
	import tempfile
	import os
	from io import BytesIO
	from pydub import AudioSegment
	from gtts import gTTS # Import Google Text-to-Speech

	# Load Whisper model
	model = whisper.load_model("base")

	st.title("🎙️ Speech-to-Text & Back to Speech (Whisper AI + gTTS)")
	st.write("Upload an audio file to transcribe and convert back to speech!")

	# File uploader
	audio_file = st.file_uploader("Upload your audio file", type=["mp3", "wav", "m4a"])

	if audio_file is not None:
	# Convert audio to WAV if needed
	file_extension = audio_file.name.split(".")[-1]
	with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_audio:
	temp_audio.write(audio_file.read())
	temp_audio_path = temp_audio.name

	if file_extension != "wav":
	audio = AudioSegment.from_file(temp_audio_path, format=file_extension)
	temp_audio_path_wav = temp_audio_path.replace(file_extension, "wav")
	audio.export(temp_audio_path_wav, format="wav")
	os.remove(temp_audio_path) # Remove original temp file
	temp_audio_path = temp_audio_path_wav

	# Audio playback
	st.audio(audio_file, format="audio/wav")

	with st.spinner("Transcribing..."):
	result = model.transcribe(temp_audio_path)
	transcription = result["text"]
	st.success("Transcription Complete!")
	st.write(transcription)

	# Word count
	word_count = len(transcription.split())
	st.write(f"📝 Word Count: {word_count} words")

	# Download button for transcription
	output_txt = BytesIO()
	output_txt.write(transcription.encode())
	output_txt.seek(0)
	st.download_button("📥 Download Transcription", output_txt, file_name="transcription.txt", mime="text/plain")

	# Convert transcribed text back to speech using gTTS
	tts = gTTS(text=transcription, lang="en")
	speech_output_path = "output_speech.mp3"
	tts.save(speech_output_path)

	st.success("🔊 Text-to-Speech Conversion Complete!")
	st.audio(speech_output_path, format="audio/mp3")

	# Cleanup temp files
	os.remove(temp_audio_path)