Spaces:
Sleeping
Sleeping
from pydub import AudioSegment | |
from pydub.playback import play | |
import whisper | |
import soundfile as sf | |
from gtts import gTTS | |
import os | |
import tempfile | |
# Load Whisper model | |
model = whisper.load_model("base") # You can also try "small", "medium", or "large" | |
# Function to record audio using pydub and save it as a .wav file | |
def record_audio(filename, sec=5, sr=44100): | |
from pydub.generators import Sine | |
import io | |
# Generate a sine wave (just as a placeholder for actual recording) | |
# In a real-world case, use a microphone input | |
print("Recording...") | |
# Simulate recording a sound for `sec` seconds at `sr` sample rate | |
# NOTE: You'd replace this with actual microphone recording code | |
sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds | |
sine_wave.export(filename, format="wav") | |
print(f"Audio saved as {filename}") | |
# Function to transcribe audio using Whisper | |
def transcribe_audio(filename): | |
print("Transcribing audio...") | |
result = model.transcribe(filename) | |
text = result['text'] | |
print(f"Transcription: {text}") | |
return {"text": text} | |
# Function to save text as an audio file using gTTS (Google Text-to-Speech) | |
def save_text_as_audio(text, audio_filename): | |
print("Converting text to speech...") | |
tts = gTTS(text=text, lang='en', slow=False) | |
tts.save(audio_filename) | |
print(f"Audio saved as {audio_filename}") | |
# Function to play audio using pydub's playback | |
def play_audio(filename): | |
print("Playing audio...") | |
audio = AudioSegment.from_wav(filename) | |
play(audio) | |