File size: 2,129 Bytes
1000bc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import whisper
import streamlit as st
import tempfile
import os
from io import BytesIO
from pydub import AudioSegment
from gtts import gTTS # Import Google Text-to-Speech
# Load Whisper model
model = whisper.load_model("base")
st.title("ποΈ Speech-to-Text & Back to Speech (Whisper AI + gTTS)")
st.write("Upload an audio file to transcribe and convert back to speech!")
# File uploader
audio_file = st.file_uploader("Upload your audio file", type=["mp3", "wav", "m4a"])
if audio_file is not None:
# Convert audio to WAV if needed
file_extension = audio_file.name.split(".")[-1]
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_audio:
temp_audio.write(audio_file.read())
temp_audio_path = temp_audio.name
if file_extension != "wav":
audio = AudioSegment.from_file(temp_audio_path, format=file_extension)
temp_audio_path_wav = temp_audio_path.replace(file_extension, "wav")
audio.export(temp_audio_path_wav, format="wav")
os.remove(temp_audio_path) # Remove original temp file
temp_audio_path = temp_audio_path_wav
# Audio playback
st.audio(audio_file, format="audio/wav")
with st.spinner("Transcribing..."):
result = model.transcribe(temp_audio_path)
transcription = result["text"]
st.success("Transcription Complete!")
st.write(transcription)
# Word count
word_count = len(transcription.split())
st.write(f"π Word Count: **{word_count}** words")
# Download button for transcription
output_txt = BytesIO()
output_txt.write(transcription.encode())
output_txt.seek(0)
st.download_button("π₯ Download Transcription", output_txt, file_name="transcription.txt", mime="text/plain")
# Convert transcribed text back to speech using gTTS
tts = gTTS(text=transcription, lang="en")
speech_output_path = "output_speech.mp3"
tts.save(speech_output_path)
st.success("π Text-to-Speech Conversion Complete!")
st.audio(speech_output_path, format="audio/mp3")
# Cleanup temp files
os.remove(temp_audio_path) |