File size: 5,640 Bytes
df9db76
830a5b7
 
081fb8f
 
df9db76
 
 
 
07730d1
 
 
 
081fb8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d93bc11
830a5b7
 
d93bc11
081fb8f
 
795e681
830a5b7
081fb8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830a5b7
081fb8f
830a5b7
 
795e681
830a5b7
795e681
830a5b7
 
 
081fb8f
 
 
 
 
 
 
 
 
 
 
df9db76
795e681
df9db76
 
 
 
 
 
 
081fb8f
 
 
830a5b7
 
 
 
 
081fb8f
 
 
 
 
 
830a5b7
 
795e681
df9db76
081fb8f
830a5b7
df9db76
830a5b7
 
df9db76
 
830a5b7
795e681
df9db76
 
830a5b7
 
 
df9db76
830a5b7
 
 
df9db76
830a5b7
 
 
df9db76
830a5b7
 
 
795e681
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import speech_recognition as sr
import streamlit as st
from typing import Optional, Tuple
import platform
import sys

class VoiceHandler:
    def __init__(self):
        self.recognizer = sr.Recognizer()
        self.recognizer.energy_threshold = 4000
        self.recognizer.dynamic_energy_threshold = True
        self.recognizer.pause_threshold = 0.8
        self.permission_granted = False
        self._init_microphone()
    
    def _init_microphone(self):
        """Initialize microphone with proper settings based on platform"""
        try:
            # List available microphones
            mics = sr.Microphone.list_microphone_names()
            if not mics:
                st.error("No microphones detected. Please connect a microphone and refresh the page.")
                return
            
            # Use default microphone
            self.mic = sr.Microphone(device_index=None)
            
        except Exception as e:
            st.error(f"Error initializing microphone: {str(e)}")
    
    def check_microphone_access(self) -> Tuple[bool, str]:
        """Check if microphone is accessible and return status with message"""
        try:
            with self.mic as source:
                # Shorter duration for initial test
                self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
                return True, "Microphone access granted"
        except AttributeError:
            return False, """
                Microphone not initialized properly. Please ensure:
                1. You have a working microphone connected
                2. Your browser supports audio input
                3. You're using a modern browser (Chrome, Firefox, Edge)
                """
        except OSError as e:
            return False, f"""
                Could not access microphone. Error: {str(e)}
                Please check:
                1. Microphone is properly connected
                2. No other application is using the microphone
                3. Browser has permission to access microphone
                """
        except sr.RequestError as e:
            return False, f"Speech recognition service error: {str(e)}"
        except Exception as e:
            return False, f"Unexpected error: {str(e)}"
    
    def request_permissions(self) -> Tuple[bool, str]:
        """Request microphone permissions from the browser"""
        success, message = self.check_microphone_access()
        if success:
            self.permission_granted = True
            return True, "Microphone access granted successfully"
        
        return False, f"""
            Microphone access denied. Please:
            1. Click the lock/camera icon in your browser's address bar
            2. Select 'Allow' for microphone access
            3. Refresh the page
            4. If using Chrome, verify settings at chrome://settings/content/microphone
            
            Error details: {message}
            """
    
    def listen_for_voice(self, language: str = "mixed") -> str:
        """
        Listen for voice input in specified language.
        language can be: 
        - "ar-SA" for Arabic
        - "en-US" for English
        - "mixed" for both Arabic and English
        """
        if not hasattr(self, 'mic'):
            return "Error: Microphone not properly initialized"
        
        if not self.permission_granted:
            success, message = self.request_permissions()
            if not success:
                st.error(message)
                st.markdown("""
                ### 🎀 Troubleshooting Steps:
                1. Check browser compatibility (Chrome/Firefox/Edge recommended)
                2. Verify microphone connection
                3. Check system sound settings
                4. Try a different microphone if available
                5. Restart browser if needed
                """)
                return message
        
        try:
            with self.mic as source:
                st.info("Adjusting for ambient noise... Please wait.")
                self.recognizer.adjust_for_ambient_noise(source, duration=1)
                
                st.info("🎀 Listening... Speak now!")
                audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)
                
                st.info("Processing speech...")
                return self._process_audio(audio, language)
                    
        except sr.RequestError as e:
            error_msg = f"Could not request results from speech service: {str(e)}"
            st.error(error_msg)
            return error_msg
        except sr.UnknownValueError:
            error_msg = "Could not understand audio. Please speak clearly and try again."
            st.warning(error_msg)
            return error_msg
        except sr.WaitTimeoutError:
            error_msg = "Listening timed out. Please try again."
            st.warning(error_msg)
            return error_msg
        except Exception as e:
            error_msg = f"Error: {str(e)}"
            st.error(error_msg)
            return error_msg
    
    def _process_audio(self, audio, language: str) -> str:
        """Process audio input and convert to text"""
        if language in ["ar-SA", "mixed"]:
            try:
                return self.recognizer.recognize_google(audio, language="ar-SA")
            except sr.UnknownValueError:
                if language == "mixed":
                    return self.recognizer.recognize_google(audio, language="en-US")
                raise
        return self.recognizer.recognize_google(audio, language="en-US")