Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
import streamlit as st
|
3 |
+
import tempfile
|
4 |
+
import os
|
5 |
+
from io import BytesIO
|
6 |
+
from pydub import AudioSegment
|
7 |
+
from gtts import gTTS # Import Google Text-to-Speech
|
8 |
+
|
9 |
+
# Load Whisper model
|
10 |
+
model = whisper.load_model("base")
|
11 |
+
|
12 |
+
st.title("ποΈ Speech-to-Text & Back to Speech (Whisper AI + gTTS)")
|
13 |
+
st.write("Upload an audio file to transcribe and convert back to speech!")
|
14 |
+
|
15 |
+
# File uploader
|
16 |
+
audio_file = st.file_uploader("Upload your audio file", type=["mp3", "wav", "m4a"])
|
17 |
+
|
18 |
+
if audio_file is not None:
|
19 |
+
# Convert audio to WAV if needed
|
20 |
+
file_extension = audio_file.name.split(".")[-1]
|
21 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_audio:
|
22 |
+
temp_audio.write(audio_file.read())
|
23 |
+
temp_audio_path = temp_audio.name
|
24 |
+
|
25 |
+
if file_extension != "wav":
|
26 |
+
audio = AudioSegment.from_file(temp_audio_path, format=file_extension)
|
27 |
+
temp_audio_path_wav = temp_audio_path.replace(file_extension, "wav")
|
28 |
+
audio.export(temp_audio_path_wav, format="wav")
|
29 |
+
os.remove(temp_audio_path) # Remove original temp file
|
30 |
+
temp_audio_path = temp_audio_path_wav
|
31 |
+
|
32 |
+
# Audio playback
|
33 |
+
st.audio(audio_file, format="audio/wav")
|
34 |
+
|
35 |
+
with st.spinner("Transcribing..."):
|
36 |
+
result = model.transcribe(temp_audio_path)
|
37 |
+
transcription = result["text"]
|
38 |
+
st.success("Transcription Complete!")
|
39 |
+
st.write(transcription)
|
40 |
+
|
41 |
+
# Word count
|
42 |
+
word_count = len(transcription.split())
|
43 |
+
st.write(f"π Word Count: **{word_count}** words")
|
44 |
+
|
45 |
+
# Download button for transcription
|
46 |
+
output_txt = BytesIO()
|
47 |
+
output_txt.write(transcription.encode())
|
48 |
+
output_txt.seek(0)
|
49 |
+
st.download_button("π₯ Download Transcription", output_txt, file_name="transcription.txt", mime="text/plain")
|
50 |
+
|
51 |
+
# Convert transcribed text back to speech using gTTS
|
52 |
+
tts = gTTS(text=transcription, lang="en")
|
53 |
+
speech_output_path = "output_speech.mp3"
|
54 |
+
tts.save(speech_output_path)
|
55 |
+
|
56 |
+
st.success("π Text-to-Speech Conversion Complete!")
|
57 |
+
st.audio(speech_output_path, format="audio/mp3")
|
58 |
+
|
59 |
+
# Cleanup temp files
|
60 |
+
os.remove(temp_audio_path)
|