Spaces:

shukdevdatta123
/

VoiceChat

Sleeping

VoiceChat / text_speech_utils.py

Update text_speech_utils.py

27541b9 verified 6 months ago

1.44 kB

	import sounddevice as sd
	import soundfile as sf
	import whisper
	from gtts import gTTS
	import os

	# Load the Whisper model
	model = whisper.load_model("base") # You can use other versions like "small", "medium", or "large"

	# Function to record audio using sounddevice and save it as a .wav file
	def record_audio(filename, sec=5, sr=44100):
	print("Recording...")
	# Record the audio from the microphone (mono channel)
	audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16')
	sd.wait() # Wait until recording is done

	# Save the audio data to a file using soundfile
	sf.write(filename, audio_data, sr)
	print(f"Audio saved as {filename}")

	# Function to transcribe audio using Whisper
	def transcribe_audio(filename):
	print("Transcribing audio...")
	result = model.transcribe(filename)
	text = result['text']
	print(f"Transcription: {text}")
	return {"text": text}

	# Function to save text as an audio file using gTTS (Google Text-to-Speech)
	def save_text_as_audio(text, audio_filename):
	print("Converting text to speech...")
	tts = gTTS(text=text, lang='en', slow=False)
	tts.save(audio_filename)
	print(f"Audio saved as {audio_filename}")

	# Function to play audio using the system's default audio player
	def play_audio(filename):
	print("Playing audio...")
	os.system(f"start {filename}") # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")`