VoiceChat / text_speech_utils.py
shukdevdatta123's picture
Update text_speech_utils.py
37784fb verified
raw
history blame
1.61 kB
from pydub import AudioSegment
from pydub.playback import play
import whisper
import soundfile as sf
from gtts import gTTS
import os
import tempfile
# Load Whisper model
model = whisper.load_model("base") # You can also try "small", "medium", or "large"
# Function to record audio using pydub and save it as a .wav file
def record_audio(filename, sec=5, sr=44100):
from pydub.generators import Sine
import io
# Generate a sine wave (just as a placeholder for actual recording)
# In a real-world case, use a microphone input
print("Recording...")
# Simulate recording a sound for `sec` seconds at `sr` sample rate
# NOTE: You'd replace this with actual microphone recording code
sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds
sine_wave.export(filename, format="wav")
print(f"Audio saved as {filename}")
# Function to transcribe audio using Whisper
def transcribe_audio(filename):
print("Transcribing audio...")
result = model.transcribe(filename)
text = result['text']
print(f"Transcription: {text}")
return {"text": text}
# Function to save text as an audio file using gTTS (Google Text-to-Speech)
def save_text_as_audio(text, audio_filename):
print("Converting text to speech...")
tts = gTTS(text=text, lang='en', slow=False)
tts.save(audio_filename)
print(f"Audio saved as {audio_filename}")
# Function to play audio using pydub's playback
def play_audio(filename):
print("Playing audio...")
audio = AudioSegment.from_wav(filename)
play(audio)