Spaces:
Sleeping
Sleeping
File size: 1,606 Bytes
37784fb 27541b9 37784fb 896746e 0cfe2ed 37784fb 0cfe2ed 37784fb 27541b9 37784fb 0cfe2ed 37784fb d19e200 37784fb d19e200 896746e 27541b9 896746e 27541b9 896746e d19e200 896746e d19e200 0cfe2ed d19e200 0cfe2ed 37784fb 0cfe2ed d19e200 37784fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from pydub import AudioSegment
from pydub.playback import play
import whisper
import soundfile as sf
from gtts import gTTS
import os
import tempfile
# Load Whisper model
model = whisper.load_model("base") # You can also try "small", "medium", or "large"
# Function to record audio using pydub and save it as a .wav file
def record_audio(filename, sec=5, sr=44100):
from pydub.generators import Sine
import io
# Generate a sine wave (just as a placeholder for actual recording)
# In a real-world case, use a microphone input
print("Recording...")
# Simulate recording a sound for `sec` seconds at `sr` sample rate
# NOTE: You'd replace this with actual microphone recording code
sine_wave = Sine(440).to_audio_segment(duration=sec * 1000) # 440 Hz sine wave for `sec` seconds
sine_wave.export(filename, format="wav")
print(f"Audio saved as {filename}")
# Function to transcribe audio using Whisper
def transcribe_audio(filename):
print("Transcribing audio...")
result = model.transcribe(filename)
text = result['text']
print(f"Transcription: {text}")
return {"text": text}
# Function to save text as an audio file using gTTS (Google Text-to-Speech)
def save_text_as_audio(text, audio_filename):
print("Converting text to speech...")
tts = gTTS(text=text, lang='en', slow=False)
tts.save(audio_filename)
print(f"Audio saved as {audio_filename}")
# Function to play audio using pydub's playback
def play_audio(filename):
print("Playing audio...")
audio = AudioSegment.from_wav(filename)
play(audio)
|