import sounddevice as sd import soundfile as sf import whisper from gtts import gTTS import os # Load the Whisper model model = whisper.load_model("base") # You can use other versions like "small", "medium", or "large" # Function to record audio using sounddevice and save it as a .wav file def record_audio(filename, sec=5, sr=44100): print("Recording...") # Record the audio from the microphone (mono channel) audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16') sd.wait() # Wait until recording is done # Save the audio data to a file using soundfile sf.write(filename, audio_data, sr) print(f"Audio saved as {filename}") # Function to transcribe audio using Whisper def transcribe_audio(filename): print("Transcribing audio...") result = model.transcribe(filename) text = result['text'] print(f"Transcription: {text}") return {"text": text} # Function to save text as an audio file using gTTS (Google Text-to-Speech) def save_text_as_audio(text, audio_filename): print("Converting text to speech...") tts = gTTS(text=text, lang='en', slow=False) tts.save(audio_filename) print(f"Audio saved as {audio_filename}") # Function to play audio using the system's default audio player def play_audio(filename): print("Playing audio...") os.system(f"start {filename}") # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")`