Spaces:
Sleeping
Sleeping
import sounddevice as sd | |
import soundfile as sf | |
import speech_recognition as sr | |
from gtts import gTTS | |
import os | |
# Function to record audio using sounddevice and save it as a .wav file | |
def record_audio(filename, sec=5, sr=44100): | |
print("Recording...") | |
# Record the audio from the microphone (mono channel) | |
audio_data = sd.rec(int(sec * sr), samplerate=sr, channels=1, dtype='int16') | |
sd.wait() # Wait until recording is done | |
# Save the audio data to a file using soundfile | |
sf.write(filename, audio_data, sr) | |
print(f"Audio saved as {filename}") | |
# Function to transcribe audio using Google's speech recognition | |
def transcribe_audio(filename): | |
recognizer = sr.Recognizer() | |
# Open the audio file using SpeechRecognition | |
with sr.AudioFile(filename) as source: | |
audio = recognizer.record(source) | |
try: | |
print("Transcribing audio...") | |
# Use Google's speech recognition API | |
text = recognizer.recognize_google(audio) | |
print(f"Transcription: {text}") | |
return {"text": text} | |
except sr.UnknownValueError: | |
return {"text": "Sorry, I couldn't understand the audio."} | |
except sr.RequestError as e: | |
return {"text": f"Error in request: {e}"} | |
# Function to save text as an audio file using gTTS (Google Text-to-Speech) | |
def save_text_as_audio(text, audio_filename): | |
print("Converting text to speech...") | |
tts = gTTS(text=text, lang='en', slow=False) | |
tts.save(audio_filename) | |
print(f"Audio saved as {audio_filename}") | |
# Function to play audio using the system's default audio player | |
def play_audio(filename): | |
print("Playing audio...") | |
os.system(f"start {filename}") # For Windows; for Linux/macOS, use `os.system(f"mpg321 {filename}")` | |