Spaces:
Sleeping
Sleeping
import speech_recognition as sr | |
import os | |
from pydub import AudioSegment | |
import tempfile | |
class VoiceHandler: | |
def __init__(self): | |
self.recognizer = sr.Recognizer() | |
self.recognizer.energy_threshold = 20000 | |
self.recognizer.dynamic_energy_threshold = False | |
self.recognizer.pause_threshold = 0.8 | |
def process_audio_file(self, audio_path: str, language: str) -> str: | |
"""Process audio file and convert to text""" | |
try: | |
# Convert audio to wav format if needed | |
if not audio_path.endswith('.wav'): | |
audio = AudioSegment.from_file(audio_path) | |
temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) | |
audio.export(temp_wav.name, format='wav') | |
audio_path = temp_wav.name | |
with sr.AudioFile(audio_path) as source: | |
audio = self.recognizer.record(source) | |
if language == "Arabic": | |
return self.recognizer.recognize_google(audio, language="ar-SA") | |
elif language == "Mixed (Arabic/English)": | |
try: | |
return self.recognizer.recognize_google(audio, language="ar-SA") | |
except sr.UnknownValueError: | |
return self.recognizer.recognize_google(audio, language="en-US") | |
else: # English | |
return self.recognizer.recognize_google(audio, language="en-US") | |
except sr.RequestError as e: | |
return f"Error: Could not request results from speech service: {str(e)}" | |
except sr.UnknownValueError: | |
return "Error: Could not understand audio. Please speak clearly and try again." | |
except Exception as e: | |
return f"Error: {str(e)}" | |
finally: | |
# Clean up temporary file if it was created | |
if 'temp_wav' in locals(): | |
os.unlink(temp_wav.name) | |
def check_microphone_access(self) -> bool: | |
"""Check if microphone is accessible""" | |
try: | |
with sr.Microphone() as source: | |
self.recognizer.adjust_for_ambient_noise(source, duration=0.1) | |
return True | |
except (OSError, AttributeError, sr.RequestError): | |
return False |