Spaces:

shukdevdatta123
/

VoiceChat

Running

VoiceChat / text_speech_utils.py

Update text_speech_utils.py

37784fb verified 6 months ago

1.61 kB

	from pydub import AudioSegment
	from pydub.playback import play
	import whisper
	import soundfile as sf
	from gtts import gTTS
	import os
	import tempfile

	# Load Whisper model
	model = whisper.load_model("base") # You can also try "small", "medium", or "large"

	# Function to record audio using pydub and save it as a .wav file
	def record_audio(filename, sec=5, sr=44100):
	from pydub.generators import Sine
	import io

	# Generate a sine wave (just as a placeholder for actual recording)
	# In a real-world case, use a microphone input
	print("Recording...")

	# Simulate recording a sound for `sec` seconds at `sr` sample rate
	# NOTE: You'd replace this with actual microphone recording code
	sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds

	sine_wave.export(filename, format="wav")
	print(f"Audio saved as {filename}")

	# Function to transcribe audio using Whisper
	def transcribe_audio(filename):
	print("Transcribing audio...")
	result = model.transcribe(filename)
	text = result['text']
	print(f"Transcription: {text}")
	return {"text": text}

	# Function to save text as an audio file using gTTS (Google Text-to-Speech)
	def save_text_as_audio(text, audio_filename):
	print("Converting text to speech...")
	tts = gTTS(text=text, lang='en', slow=False)
	tts.save(audio_filename)
	print(f"Audio saved as {audio_filename}")

	# Function to play audio using pydub's playback
	def play_audio(filename):
	print("Playing audio...")
	audio = AudioSegment.from_wav(filename)
	play(audio)