Spaces:

shukdevdatta123
/

VoiceChat

Running

App Files Files Community

VoiceChat / text_speech_utils.py

shukdevdatta123

Update text_speech_utils.py

d19e200 verified 6 months ago

raw

history blame

1.77 kB

	import sounddevice as sd
	import soundfile as sf
	import speech_recognition as sr
	from gtts import gTTS
	import os

	# Function to record audio using sounddevice and save it as a .wav file
	def record_audio(filename, sec=5, sr=44100):
	print("Recording...")
	# Record the audio from the microphone (mono channel)
	audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16')
	sd.wait() # Wait until recording is done

	# Save the audio data to a file using soundfile
	sf.write(filename, audio_data, sr)
	print(f"Audio saved as {filename}")

	# Function to transcribe audio using Google's speech recognition
	def transcribe_audio(filename):
	recognizer = sr.Recognizer()

	# Open the audio file using SpeechRecognition
	with sr.AudioFile(filename) as source:
	audio = recognizer.record(source)

	try:
	print("Transcribing audio...")
	# Use Google's speech recognition API
	text = recognizer.recognize_google(audio)
	print(f"Transcription: {text}")
	return {"text": text}
	except sr.UnknownValueError:
	return {"text": "Sorry, I couldn't understand the audio."}
	except sr.RequestError as e:
	return {"text": f"Error in request: {e}"}

	# Function to save text as an audio file using gTTS (Google Text-to-Speech)
	def save_text_as_audio(text, audio_filename):
	print("Converting text to speech...")
	tts = gTTS(text=text, lang='en', slow=False)
	tts.save(audio_filename)
	print(f"Audio saved as {audio_filename}")

	# Function to play audio using the system's default audio player
	def play_audio(filename):
	print("Playing audio...")
	os.system(f"start {filename}") # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")`