Spaces:
Sleeping
Sleeping
Delete text_speech_utils.py
Browse files- text_speech_utils.py +0 -71
text_speech_utils.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
from pydub import AudioSegment
|
2 |
-
from pydub.playback import play
|
3 |
-
import whisper
|
4 |
-
import soundfile as sf
|
5 |
-
from gtts import gTTS
|
6 |
-
import os
|
7 |
-
import tempfile
|
8 |
-
import time
|
9 |
-
|
10 |
-
# Load Whisper model
|
11 |
-
model = whisper.load_model("base") # You can also try "small", "medium", or "large"
|
12 |
-
|
13 |
-
# Function to record audio using pydub and save it as a .wav file
|
14 |
-
def record_audio(filename, sec=5, sr=44100):
|
15 |
-
from pydub.generators import Sine
|
16 |
-
import io
|
17 |
-
|
18 |
-
# Generate a sine wave (just as a placeholder for actual recording)
|
19 |
-
# In a real-world case, use a microphone input
|
20 |
-
print("Recording...")
|
21 |
-
|
22 |
-
# Simulate recording a sound for `sec` seconds at `sr` sample rate
|
23 |
-
# NOTE: You'd replace this with actual microphone recording code
|
24 |
-
sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds
|
25 |
-
|
26 |
-
sine_wave.export(filename, format="wav")
|
27 |
-
print(f"Audio saved as {filename}")
|
28 |
-
|
29 |
-
# Function to transcribe audio using Whisper
|
30 |
-
def transcribe_audio(filename):
|
31 |
-
print("Transcribing audio...")
|
32 |
-
result = model.transcribe(filename)
|
33 |
-
text = result['text']
|
34 |
-
print(f"Transcription: {text}")
|
35 |
-
return {"text": text}
|
36 |
-
|
37 |
-
# Function to save text as an audio file using gTTS (Google Text-to-Speech)
|
38 |
-
def save_text_as_audio(text, audio_filename):
|
39 |
-
print("Converting text to speech...")
|
40 |
-
|
41 |
-
try:
|
42 |
-
tts = gTTS(text=text, lang='en', slow=False)
|
43 |
-
|
44 |
-
# Save as mp3 file
|
45 |
-
mp3_filename = audio_filename.replace('.wav', '.mp3')
|
46 |
-
tts.save(mp3_filename)
|
47 |
-
print(f"Audio saved as {mp3_filename}")
|
48 |
-
|
49 |
-
# Convert mp3 to wav using pydub
|
50 |
-
audio = AudioSegment.from_mp3(mp3_filename)
|
51 |
-
audio.export(audio_filename, format="wav")
|
52 |
-
|
53 |
-
# Delete the temporary mp3 file
|
54 |
-
os.remove(mp3_filename)
|
55 |
-
print(f"Audio converted and saved as {audio_filename}")
|
56 |
-
|
57 |
-
except Exception as e:
|
58 |
-
print(f"Error occurred during text-to-speech conversion: {e}")
|
59 |
-
|
60 |
-
# In case of error (like hitting the rate limit), wait and retry
|
61 |
-
print("Waiting for 60 seconds before retrying...")
|
62 |
-
time.sleep(60) # wait for a minute before retrying
|
63 |
-
|
64 |
-
# Retry the conversion
|
65 |
-
save_text_as_audio(text, audio_filename) # Retry recursively
|
66 |
-
|
67 |
-
# Function to play audio using pydub's playback
|
68 |
-
def play_audio(filename):
|
69 |
-
print("Playing audio...")
|
70 |
-
audio = AudioSegment.from_wav(filename)
|
71 |
-
play(audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|