Spaces:

shukdevdatta123
/

VoiceChat

Running

VoiceChat / text_speech_utils.py

Update text_speech_utils.py

0cfe2ed verified 6 months ago

1.43 kB

	import speech_recognition as sr
	from gtts import gTTS
	import os
	import wave

	def record_audio(filename, sec=5, sr=44100):
	# Initialize recognizer class (for recognizing speech)
	recognizer = sr.Recognizer()

	# Set the microphone for recording
	with sr.Microphone() as source:
	print("Recording... Speak now!")
	recognizer.adjust_for_ambient_noise(source) # Adjust for ambient noise
	audio = recognizer.listen(source, timeout=sec)

	# Save the audio to a file
	with open(filename, "wb") as f:
	f.write(audio.get_wav_data())

	def transcribe_audio(filename):
	recognizer = sr.Recognizer()

	with sr.AudioFile(filename) as source:
	audio = recognizer.record(source)

	try:
	# Recognize speech using Google's speech recognition
	transcript = recognizer.recognize_google(audio)
	return {"text": transcript}
	except sr.UnknownValueError:
	return {"text": "Sorry, I could not understand the audio."}
	except sr.RequestError as e:
	return {"text": f"Request failed; {e}"}

	def save_text_as_audio(text, audio_filename):
	tts = gTTS(text=text, lang='en', slow=False)
	tts.save(audio_filename)

	def play_audio(filename):
	# Play audio using the default system player (e.g., vlc, mplayer)
	os.system(f"start {filename}") # For Windows; on Linux/macOS use `os.system(f"mpg321 {filename}")`