|
import whisper |
|
import streamlit as st |
|
import tempfile |
|
import os |
|
from io import BytesIO |
|
from pydub import AudioSegment |
|
from gtts import gTTS |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
st.title("ποΈ Speech-to-Text & Back to Speech (Whisper AI + gTTS)") |
|
st.write("Upload an audio file to transcribe and convert back to speech!") |
|
|
|
|
|
audio_file = st.file_uploader("Upload your audio file", type=["mp3", "wav", "m4a"]) |
|
|
|
if audio_file is not None: |
|
|
|
file_extension = audio_file.name.split(".")[-1] |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_audio: |
|
temp_audio.write(audio_file.read()) |
|
temp_audio_path = temp_audio.name |
|
|
|
if file_extension != "wav": |
|
audio = AudioSegment.from_file(temp_audio_path, format=file_extension) |
|
temp_audio_path_wav = temp_audio_path.replace(file_extension, "wav") |
|
audio.export(temp_audio_path_wav, format="wav") |
|
os.remove(temp_audio_path) |
|
temp_audio_path = temp_audio_path_wav |
|
|
|
|
|
st.audio(audio_file, format="audio/wav") |
|
|
|
with st.spinner("Transcribing..."): |
|
result = model.transcribe(temp_audio_path) |
|
transcription = result["text"] |
|
st.success("Transcription Complete!") |
|
st.write(transcription) |
|
|
|
|
|
word_count = len(transcription.split()) |
|
st.write(f"π Word Count: **{word_count}** words") |
|
|
|
|
|
output_txt = BytesIO() |
|
output_txt.write(transcription.encode()) |
|
output_txt.seek(0) |
|
st.download_button("π₯ Download Transcription", output_txt, file_name="transcription.txt", mime="text/plain") |
|
|
|
|
|
tts = gTTS(text=transcription, lang="en") |
|
speech_output_path = "output_speech.mp3" |
|
tts.save(speech_output_path) |
|
|
|
st.success("π Text-to-Speech Conversion Complete!") |
|
st.audio(speech_output_path, format="audio/mp3") |
|
|
|
|
|
os.remove(temp_audio_path) |