from pydub import AudioSegment from pydub.playback import play import whisper import soundfile as sf from gtts import gTTS import os import tempfile # Load Whisper model model = whisper.load_model("base") # You can also try "small", "medium", or "large" # Function to record audio using pydub and save it as a .wav file def record_audio(filename, sec=5, sr=44100): from pydub.generators import Sine import io # Generate a sine wave (just as a placeholder for actual recording) # In a real-world case, use a microphone input print("Recording...") # Simulate recording a sound for `sec` seconds at `sr` sample rate # NOTE: You'd replace this with actual microphone recording code sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds sine_wave.export(filename, format="wav") print(f"Audio saved as {filename}") # Function to transcribe audio using Whisper def transcribe_audio(filename): print("Transcribing audio...") result = model.transcribe(filename) text = result['text'] print(f"Transcription: {text}") return {"text": text} # Function to save text as an audio file using gTTS (Google Text-to-Speech) def save_text_as_audio(text, audio_filename): print("Converting text to speech...") tts = gTTS(text=text, lang='en', slow=False) tts.save(audio_filename) print(f"Audio saved as {audio_filename}") # Function to play audio using pydub's playback def play_audio(filename): print("Playing audio...") audio = AudioSegment.from_wav(filename) play(audio)