fastx commited on
Commit
1000bc9
Β·
verified Β·
1 Parent(s): a31904b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import streamlit as st
3
+ import tempfile
4
+ import os
5
+ from io import BytesIO
6
+ from pydub import AudioSegment
7
+ from gtts import gTTS # Import Google Text-to-Speech
8
+
9
+ # Load Whisper model
10
+ model = whisper.load_model("base")
11
+
12
+ st.title("πŸŽ™οΈ Speech-to-Text & Back to Speech (Whisper AI + gTTS)")
13
+ st.write("Upload an audio file to transcribe and convert back to speech!")
14
+
15
+ # File uploader
16
+ audio_file = st.file_uploader("Upload your audio file", type=["mp3", "wav", "m4a"])
17
+
18
+ if audio_file is not None:
19
+ # Convert audio to WAV if needed
20
+ file_extension = audio_file.name.split(".")[-1]
21
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_extension}") as temp_audio:
22
+ temp_audio.write(audio_file.read())
23
+ temp_audio_path = temp_audio.name
24
+
25
+ if file_extension != "wav":
26
+ audio = AudioSegment.from_file(temp_audio_path, format=file_extension)
27
+ temp_audio_path_wav = temp_audio_path.replace(file_extension, "wav")
28
+ audio.export(temp_audio_path_wav, format="wav")
29
+ os.remove(temp_audio_path) # Remove original temp file
30
+ temp_audio_path = temp_audio_path_wav
31
+
32
+ # Audio playback
33
+ st.audio(audio_file, format="audio/wav")
34
+
35
+ with st.spinner("Transcribing..."):
36
+ result = model.transcribe(temp_audio_path)
37
+ transcription = result["text"]
38
+ st.success("Transcription Complete!")
39
+ st.write(transcription)
40
+
41
+ # Word count
42
+ word_count = len(transcription.split())
43
+ st.write(f"πŸ“ Word Count: **{word_count}** words")
44
+
45
+ # Download button for transcription
46
+ output_txt = BytesIO()
47
+ output_txt.write(transcription.encode())
48
+ output_txt.seek(0)
49
+ st.download_button("πŸ“₯ Download Transcription", output_txt, file_name="transcription.txt", mime="text/plain")
50
+
51
+ # Convert transcribed text back to speech using gTTS
52
+ tts = gTTS(text=transcription, lang="en")
53
+ speech_output_path = "output_speech.mp3"
54
+ tts.save(speech_output_path)
55
+
56
+ st.success("πŸ”Š Text-to-Speech Conversion Complete!")
57
+ st.audio(speech_output_path, format="audio/mp3")
58
+
59
+ # Cleanup temp files
60
+ os.remove(temp_audio_path)