VoiceChat / text_speech_utils.py
shukdevdatta123's picture
Update text_speech_utils.py
61eb037 verified
raw
history blame
2.45 kB
from pydub import AudioSegment
from pydub.playback import play
import whisper
import soundfile as sf
from gtts import gTTS
import os
import tempfile
import time
# Load Whisper model
model = whisper.load_model("base") # You can also try "small", "medium", or "large"
# Function to record audio using pydub and save it as a .wav file
def record_audio(filename, sec=5, sr=44100):
from pydub.generators import Sine
import io
# Generate a sine wave (just as a placeholder for actual recording)
# In a real-world case, use a microphone input
print("Recording...")
# Simulate recording a sound for `sec` seconds at `sr` sample rate
# NOTE: You'd replace this with actual microphone recording code
sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds
sine_wave.export(filename, format="wav")
print(f"Audio saved as {filename}")
# Function to transcribe audio using Whisper
def transcribe_audio(filename):
print("Transcribing audio...")
result = model.transcribe(filename)
text = result['text']
print(f"Transcription: {text}")
return {"text": text}
# Function to save text as an audio file using gTTS (Google Text-to-Speech)
def save_text_as_audio(text, audio_filename):
print("Converting text to speech...")
try:
tts = gTTS(text=text, lang='en', slow=False)
# Save as mp3 file
mp3_filename = audio_filename.replace('.wav', '.mp3')
tts.save(mp3_filename)
print(f"Audio saved as {mp3_filename}")
# Convert mp3 to wav using pydub
audio = AudioSegment.from_mp3(mp3_filename)
audio.export(audio_filename, format="wav")
# Delete the temporary mp3 file
os.remove(mp3_filename)
print(f"Audio converted and saved as {audio_filename}")
except Exception as e:
print(f"Error occurred during text-to-speech conversion: {e}")
# In case of error (like hitting the rate limit), wait and retry
print("Waiting for 60 seconds before retrying...")
time.sleep(60) # wait for a minute before retrying
# Retry the conversion
save_text_as_audio(text, audio_filename) # Retry recursively
# Function to play audio using pydub's playback
def play_audio(filename):
print("Playing audio...")
audio = AudioSegment.from_wav(filename)
play(audio)