from pydub import AudioSegment from pydub.playback import play import whisper import soundfile as sf from gtts import gTTS import os import tempfile import time # Load Whisper model model = whisper.load_model("base") # You can also try "small", "medium", or "large" # Function to record audio using pydub and save it as a .wav file def record_audio(filename, sec=5, sr=44100): from pydub.generators import Sine import io # Generate a sine wave (just as a placeholder for actual recording) # In a real-world case, use a microphone input print("Recording...") # Simulate recording a sound for `sec` seconds at `sr` sample rate # NOTE: You'd replace this with actual microphone recording code sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds sine_wave.export(filename, format="wav") print(f"Audio saved as {filename}") # Function to transcribe audio using Whisper def transcribe_audio(filename): print("Transcribing audio...") result = model.transcribe(filename) text = result['text'] print(f"Transcription: {text}") return {"text": text} # Function to save text as an audio file using gTTS (Google Text-to-Speech) def save_text_as_audio(text, audio_filename): print("Converting text to speech...") try: tts = gTTS(text=text, lang='en', slow=False) # Save as mp3 file mp3_filename = audio_filename.replace('.wav', '.mp3') tts.save(mp3_filename) print(f"Audio saved as {mp3_filename}") # Convert mp3 to wav using pydub audio = AudioSegment.from_mp3(mp3_filename) audio.export(audio_filename, format="wav") # Delete the temporary mp3 file os.remove(mp3_filename) print(f"Audio converted and saved as {audio_filename}") except Exception as e: print(f"Error occurred during text-to-speech conversion: {e}") # In case of error (like hitting the rate limit), wait and retry print("Waiting for 60 seconds before retrying...") time.sleep(60) # wait for a minute before retrying # Retry the conversion save_text_as_audio(text, audio_filename) # Retry recursively # Function to play audio using pydub's playback def play_audio(filename): print("Playing audio...") audio = AudioSegment.from_wav(filename) play(audio)