Spaces:

shukdevdatta123
/

VoiceChat

Running

File size: 1,606 Bytes

37784fb
 
27541b9
37784fb
896746e
0cfe2ed
37784fb
0cfe2ed
37784fb
 
27541b9
37784fb
0cfe2ed
37784fb
 
 
 
 
d19e200
 
37784fb
 
 
 
 
d19e200
896746e
27541b9
896746e
27541b9
 
 
 
 
896746e
d19e200
896746e
d19e200
 
0cfe2ed
d19e200
0cfe2ed
37784fb
0cfe2ed
d19e200
37784fb

from pydub import AudioSegment
from pydub.playback import play
import whisper
import soundfile as sf
from gtts import gTTS
import os
import tempfile

# Load Whisper model
model = whisper.load_model("base")  # You can also try "small", "medium", or "large"

# Function to record audio using pydub and save it as a .wav file
def record_audio(filename, sec=5, sr=44100):
    from pydub.generators import Sine
    import io
    
    # Generate a sine wave (just as a placeholder for actual recording)
    # In a real-world case, use a microphone input
    print("Recording...")
    
    # Simulate recording a sound for `sec` seconds at `sr` sample rate
    # NOTE: You'd replace this with actual microphone recording code
    sine_wave = Sine(440).to_audio_segment(duration=sec * 1000)  # 440 Hz sine wave for `sec` seconds
    
    sine_wave.export(filename, format="wav")
    print(f"Audio saved as {filename}")

# Function to transcribe audio using Whisper
def transcribe_audio(filename):
    print("Transcribing audio...")
    result = model.transcribe(filename)
    text = result['text']
    print(f"Transcription: {text}")
    return {"text": text}

# Function to save text as an audio file using gTTS (Google Text-to-Speech)
def save_text_as_audio(text, audio_filename):
    print("Converting text to speech...")
    tts = gTTS(text=text, lang='en', slow=False)
    tts.save(audio_filename)
    print(f"Audio saved as {audio_filename}")

# Function to play audio using pydub's playback
def play_audio(filename):
    print("Playing audio...")
    audio = AudioSegment.from_wav(filename)
    play(audio)