VoiceChat / text_speech_utils.py
shukdevdatta123's picture
Update text_speech_utils.py
0cfe2ed verified
raw
history blame
1.43 kB
import speech_recognition as sr
from gtts import gTTS
import os
import wave
def record_audio(filename, sec=5, sr=44100):
# Initialize recognizer class (for recognizing speech)
recognizer = sr.Recognizer()
# Set the microphone for recording
with sr.Microphone() as source:
print("Recording... Speak now!")
recognizer.adjust_for_ambient_noise(source) # Adjust for ambient noise
audio = recognizer.listen(source, timeout=sec)
# Save the audio to a file
with open(filename, "wb") as f:
f.write(audio.get_wav_data())
def transcribe_audio(filename):
recognizer = sr.Recognizer()
with sr.AudioFile(filename) as source:
audio = recognizer.record(source)
try:
# Recognize speech using Google's speech recognition
transcript = recognizer.recognize_google(audio)
return {"text": transcript}
except sr.UnknownValueError:
return {"text": "Sorry, I could not understand the audio."}
except sr.RequestError as e:
return {"text": f"Request failed; {e}"}
def save_text_as_audio(text, audio_filename):
tts = gTTS(text=text, lang='en', slow=False)
tts.save(audio_filename)
def play_audio(filename):
# Play audio using the default system player (e.g., vlc, mplayer)
os.system(f"start {filename}") # For Windows; on Linux/macOS use `os.system(f"mpg321 {filename}")`