Spaces:
Sleeping
Sleeping
shamimjony1000
commited on
Commit
•
795e681
1
Parent(s):
65f6621
Update voice_handler.py
Browse files- voice_handler.py +31 -24
voice_handler.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import speech_recognition as sr
|
2 |
import platform
|
|
|
3 |
|
4 |
class VoiceHandler:
|
5 |
def __init__(self):
|
@@ -9,16 +10,26 @@ class VoiceHandler:
|
|
9 |
self.recognizer.pause_threshold = 0.8
|
10 |
self.permission_granted = False
|
11 |
|
12 |
-
def
|
13 |
-
"""
|
14 |
try:
|
15 |
with sr.Microphone() as source:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
self.permission_granted = True
|
17 |
return True
|
18 |
-
|
|
|
19 |
return False
|
20 |
|
21 |
-
def listen_for_voice(self, language="mixed"):
|
22 |
"""
|
23 |
Listen for voice input in specified language.
|
24 |
language can be:
|
@@ -26,10 +37,9 @@ class VoiceHandler:
|
|
26 |
- "en-US" for English
|
27 |
- "mixed" for both Arabic and English
|
28 |
"""
|
29 |
-
if not self.permission_granted:
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
try:
|
34 |
with sr.Microphone() as source:
|
35 |
print("Adjusting for ambient noise...")
|
@@ -38,21 +48,7 @@ class VoiceHandler:
|
|
38 |
audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)
|
39 |
print("Processing speech...")
|
40 |
|
41 |
-
|
42 |
-
if language in ["ar-SA", "mixed"]:
|
43 |
-
try:
|
44 |
-
text = self.recognizer.recognize_google(audio, language="ar-SA")
|
45 |
-
return text
|
46 |
-
except sr.UnknownValueError:
|
47 |
-
if language == "mixed":
|
48 |
-
# If Arabic fails and mixed is specified, try English
|
49 |
-
text = self.recognizer.recognize_google(audio, language="en-US")
|
50 |
-
return text
|
51 |
-
raise
|
52 |
-
else:
|
53 |
-
# English only
|
54 |
-
text = self.recognizer.recognize_google(audio, language="en-US")
|
55 |
-
return text
|
56 |
|
57 |
except sr.RequestError as e:
|
58 |
return f"Could not request results from speech service: {str(e)}"
|
@@ -61,4 +57,15 @@ class VoiceHandler:
|
|
61 |
except sr.WaitTimeoutError:
|
62 |
return "Listening timed out. Please try again."
|
63 |
except Exception as e:
|
64 |
-
return f"Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import speech_recognition as sr
|
2 |
import platform
|
3 |
+
from typing import Optional
|
4 |
|
5 |
class VoiceHandler:
|
6 |
def __init__(self):
|
|
|
10 |
self.recognizer.pause_threshold = 0.8
|
11 |
self.permission_granted = False
|
12 |
|
13 |
+
def check_microphone_access(self) -> bool:
|
14 |
+
"""Check if microphone is accessible"""
|
15 |
try:
|
16 |
with sr.Microphone() as source:
|
17 |
+
self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
|
18 |
+
return True
|
19 |
+
except (OSError, AttributeError, sr.RequestError):
|
20 |
+
return False
|
21 |
+
|
22 |
+
def request_permissions(self) -> bool:
|
23 |
+
"""Request microphone permissions from the browser"""
|
24 |
+
try:
|
25 |
+
if self.check_microphone_access():
|
26 |
self.permission_granted = True
|
27 |
return True
|
28 |
+
return False
|
29 |
+
except Exception:
|
30 |
return False
|
31 |
|
32 |
+
def listen_for_voice(self, language: str = "mixed") -> str:
|
33 |
"""
|
34 |
Listen for voice input in specified language.
|
35 |
language can be:
|
|
|
37 |
- "en-US" for English
|
38 |
- "mixed" for both Arabic and English
|
39 |
"""
|
40 |
+
if not self.permission_granted and not self.request_permissions():
|
41 |
+
return "Error: Please grant microphone permissions to use voice input."
|
42 |
+
|
|
|
43 |
try:
|
44 |
with sr.Microphone() as source:
|
45 |
print("Adjusting for ambient noise...")
|
|
|
48 |
audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)
|
49 |
print("Processing speech...")
|
50 |
|
51 |
+
return self._process_audio(audio, language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
except sr.RequestError as e:
|
54 |
return f"Could not request results from speech service: {str(e)}"
|
|
|
57 |
except sr.WaitTimeoutError:
|
58 |
return "Listening timed out. Please try again."
|
59 |
except Exception as e:
|
60 |
+
return f"Error: {str(e)}"
|
61 |
+
|
62 |
+
def _process_audio(self, audio, language: str) -> str:
|
63 |
+
"""Process audio input and convert to text"""
|
64 |
+
if language in ["ar-SA", "mixed"]:
|
65 |
+
try:
|
66 |
+
return self.recognizer.recognize_google(audio, language="ar-SA")
|
67 |
+
except sr.UnknownValueError:
|
68 |
+
if language == "mixed":
|
69 |
+
return self.recognizer.recognize_google(audio, language="en-US")
|
70 |
+
raise
|
71 |
+
return self.recognizer.recognize_google(audio, language="en-US")
|