Spaces:

shukdevdatta123
/

VoiceChat

Running

App Files Files Community

VoiceChat / text_speech_utils.py

shukdevdatta123

Update text_speech_utils.py

61eb037 verified 6 months ago

raw

history blame

2.45 kB

	from pydub import AudioSegment
	from pydub.playback import play
	import whisper
	import soundfile as sf
	from gtts import gTTS
	import os
	import tempfile
	import time

	# Load Whisper model
	model = whisper.load_model("base") # You can also try "small", "medium", or "large"

	# Function to record audio using pydub and save it as a .wav file
	def record_audio(filename, sec=5, sr=44100):
	from pydub.generators import Sine
	import io

	# Generate a sine wave (just as a placeholder for actual recording)
	# In a real-world case, use a microphone input
	print("Recording...")

	# Simulate recording a sound for `sec` seconds at `sr` sample rate
	# NOTE: You'd replace this with actual microphone recording code
	sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds

	sine_wave.export(filename, format="wav")
	print(f"Audio saved as {filename}")

	# Function to transcribe audio using Whisper
	def transcribe_audio(filename):
	print("Transcribing audio...")
	result = model.transcribe(filename)
	text = result['text']
	print(f"Transcription: {text}")
	return {"text": text}

	# Function to save text as an audio file using gTTS (Google Text-to-Speech)
	def save_text_as_audio(text, audio_filename):
	print("Converting text to speech...")

	try:
	tts = gTTS(text=text, lang='en', slow=False)

	# Save as mp3 file
	mp3_filename = audio_filename.replace('.wav', '.mp3')
	tts.save(mp3_filename)
	print(f"Audio saved as {mp3_filename}")

	# Convert mp3 to wav using pydub
	audio = AudioSegment.from_mp3(mp3_filename)
	audio.export(audio_filename, format="wav")

	# Delete the temporary mp3 file
	os.remove(mp3_filename)
	print(f"Audio converted and saved as {audio_filename}")

	except Exception as e:
	print(f"Error occurred during text-to-speech conversion: {e}")

	# In case of error (like hitting the rate limit), wait and retry
	print("Waiting for 60 seconds before retrying...")
	time.sleep(60) # wait for a minute before retrying

	# Retry the conversion
	save_text_as_audio(text, audio_filename) # Retry recursively

	# Function to play audio using pydub's playback
	def play_audio(filename):
	print("Playing audio...")
	audio = AudioSegment.from_wav(filename)
	play(audio)