Spaces:
Sleeping
Sleeping
import sounddevice as sd | |
import soundfile as sf | |
import whisper | |
from gtts import gTTS | |
import os | |
# Load the Whisper model | |
model = whisper.load_model("base") # You can use other versions like "small", "medium", or "large" | |
# Function to record audio using sounddevice and save it as a .wav file | |
def record_audio(filename, sec=5, sr=44100): | |
print("Recording...") | |
# Record the audio from the microphone (mono channel) | |
audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16') | |
sd.wait() # Wait until recording is done | |
# Save the audio data to a file using soundfile | |
sf.write(filename, audio_data, sr) | |
print(f"Audio saved as {filename}") | |
# Function to transcribe audio using Whisper | |
def transcribe_audio(filename): | |
print("Transcribing audio...") | |
result = model.transcribe(filename) | |
text = result['text'] | |
print(f"Transcription: {text}") | |
return {"text": text} | |
# Function to save text as an audio file using gTTS (Google Text-to-Speech) | |
def save_text_as_audio(text, audio_filename): | |
print("Converting text to speech...") | |
tts = gTTS(text=text, lang='en', slow=False) | |
tts.save(audio_filename) | |
print(f"Audio saved as {audio_filename}") | |
# Function to play audio using the system's default audio player | |
def play_audio(filename): | |
print("Playing audio...") | |
os.system(f"start {filename}") # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")` | |