import speech_recognition as sr import streamlit as st from typing import Optional, Tuple import platform import sys class VoiceHandler: def __init__(self): self.recognizer = sr.Recognizer() self.recognizer.energy_threshold = 4000 self.recognizer.dynamic_energy_threshold = True self.recognizer.pause_threshold = 0.8 self.permission_granted = False self._init_microphone() def _init_microphone(self): """Initialize microphone with proper settings based on platform""" try: # List available microphones mics = sr.Microphone.list_microphone_names() if not mics: st.error("No microphones detected. Please connect a microphone and refresh the page.") return # Use default microphone self.mic = sr.Microphone(device_index=None) except Exception as e: st.error(f"Error initializing microphone: {str(e)}") def check_microphone_access(self) -> Tuple[bool, str]: """Check if microphone is accessible and return status with message""" try: with self.mic as source: # Shorter duration for initial test self.recognizer.adjust_for_ambient_noise(source, duration=0.1) return True, "Microphone access granted" except AttributeError: return False, """ Microphone not initialized properly. Please ensure: 1. You have a working microphone connected 2. Your browser supports audio input 3. You're using a modern browser (Chrome, Firefox, Edge) """ except OSError as e: return False, f""" Could not access microphone. Error: {str(e)} Please check: 1. Microphone is properly connected 2. No other application is using the microphone 3. Browser has permission to access microphone """ except sr.RequestError as e: return False, f"Speech recognition service error: {str(e)}" except Exception as e: return False, f"Unexpected error: {str(e)}" def request_permissions(self) -> Tuple[bool, str]: """Request microphone permissions from the browser""" success, message = self.check_microphone_access() if success: self.permission_granted = True return True, "Microphone access granted successfully" return False, f""" Microphone access denied. Please: 1. Click the lock/camera icon in your browser's address bar 2. Select 'Allow' for microphone access 3. Refresh the page 4. If using Chrome, verify settings at chrome://settings/content/microphone Error details: {message} """ def listen_for_voice(self, language: str = "mixed") -> str: """ Listen for voice input in specified language. language can be: - "ar-SA" for Arabic - "en-US" for English - "mixed" for both Arabic and English """ if not hasattr(self, 'mic'): return "Error: Microphone not properly initialized" if not self.permission_granted: success, message = self.request_permissions() if not success: st.error(message) st.markdown(""" ### 🎤 Troubleshooting Steps: 1. Check browser compatibility (Chrome/Firefox/Edge recommended) 2. Verify microphone connection 3. Check system sound settings 4. Try a different microphone if available 5. Restart browser if needed """) return message try: with self.mic as source: st.info("Adjusting for ambient noise... Please wait.") self.recognizer.adjust_for_ambient_noise(source, duration=1) st.info("🎤 Listening... Speak now!") audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10) st.info("Processing speech...") return self._process_audio(audio, language) except sr.RequestError as e: error_msg = f"Could not request results from speech service: {str(e)}" st.error(error_msg) return error_msg except sr.UnknownValueError: error_msg = "Could not understand audio. Please speak clearly and try again." st.warning(error_msg) return error_msg except sr.WaitTimeoutError: error_msg = "Listening timed out. Please try again." st.warning(error_msg) return error_msg except Exception as e: error_msg = f"Error: {str(e)}" st.error(error_msg) return error_msg def _process_audio(self, audio, language: str) -> str: """Process audio input and convert to text""" if language in ["ar-SA", "mixed"]: try: return self.recognizer.recognize_google(audio, language="ar-SA") except sr.UnknownValueError: if language == "mixed": return self.recognizer.recognize_google(audio, language="en-US") raise return self.recognizer.recognize_google(audio, language="en-US")