Talkify / app.py
fastx's picture
Create app.py
1000bc9 verified
raw
history blame
2.13 kB
import whisper
import streamlit as st
import tempfile
import os
from io import BytesIO
from pydub import AudioSegment
from gtts import gTTS # Import Google Text-to-Speech
# Load Whisper model
model = whisper.load_model("base")
st.title("πŸŽ™οΈ Speech-to-Text & Back to Speech (Whisper AI + gTTS)")
st.write("Upload an audio file to transcribe and convert back to speech!")
# File uploader
audio_file = st.file_uploader("Upload your audio file", type=["mp3", "wav", "m4a"])
if audio_file is not None:
# Convert audio to WAV if needed
file_extension = audio_file.name.split(".")[-1]
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_audio:
temp_audio.write(audio_file.read())
temp_audio_path = temp_audio.name
if file_extension != "wav":
audio = AudioSegment.from_file(temp_audio_path, format=file_extension)
temp_audio_path_wav = temp_audio_path.replace(file_extension, "wav")
audio.export(temp_audio_path_wav, format="wav")
os.remove(temp_audio_path) # Remove original temp file
temp_audio_path = temp_audio_path_wav
# Audio playback
st.audio(audio_file, format="audio/wav")
with st.spinner("Transcribing..."):
result = model.transcribe(temp_audio_path)
transcription = result["text"]
st.success("Transcription Complete!")
st.write(transcription)
# Word count
word_count = len(transcription.split())
st.write(f"πŸ“ Word Count: **{word_count}** words")
# Download button for transcription
output_txt = BytesIO()
output_txt.write(transcription.encode())
output_txt.seek(0)
st.download_button("πŸ“₯ Download Transcription", output_txt, file_name="transcription.txt", mime="text/plain")
# Convert transcribed text back to speech using gTTS
tts = gTTS(text=transcription, lang="en")
speech_output_path = "output_speech.mp3"
tts.save(speech_output_path)
st.success("πŸ”Š Text-to-Speech Conversion Complete!")
st.audio(speech_output_path, format="audio/mp3")
# Cleanup temp files
os.remove(temp_audio_path)