tai / voice_handler.py
shamimjony1000's picture
Update voice_handler.py
081fb8f verified
raw
history blame
5.64 kB
import speech_recognition as sr
import streamlit as st
from typing import Optional, Tuple
import platform
import sys
class VoiceHandler:
def __init__(self):
self.recognizer = sr.Recognizer()
self.recognizer.energy_threshold = 4000
self.recognizer.dynamic_energy_threshold = True
self.recognizer.pause_threshold = 0.8
self.permission_granted = False
self._init_microphone()
def _init_microphone(self):
"""Initialize microphone with proper settings based on platform"""
try:
# List available microphones
mics = sr.Microphone.list_microphone_names()
if not mics:
st.error("No microphones detected. Please connect a microphone and refresh the page.")
return
# Use default microphone
self.mic = sr.Microphone(device_index=None)
except Exception as e:
st.error(f"Error initializing microphone: {str(e)}")
def check_microphone_access(self) -> Tuple[bool, str]:
"""Check if microphone is accessible and return status with message"""
try:
with self.mic as source:
# Shorter duration for initial test
self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
return True, "Microphone access granted"
except AttributeError:
return False, """
Microphone not initialized properly. Please ensure:
1. You have a working microphone connected
2. Your browser supports audio input
3. You're using a modern browser (Chrome, Firefox, Edge)
"""
except OSError as e:
return False, f"""
Could not access microphone. Error: {str(e)}
Please check:
1. Microphone is properly connected
2. No other application is using the microphone
3. Browser has permission to access microphone
"""
except sr.RequestError as e:
return False, f"Speech recognition service error: {str(e)}"
except Exception as e:
return False, f"Unexpected error: {str(e)}"
def request_permissions(self) -> Tuple[bool, str]:
"""Request microphone permissions from the browser"""
success, message = self.check_microphone_access()
if success:
self.permission_granted = True
return True, "Microphone access granted successfully"
return False, f"""
Microphone access denied. Please:
1. Click the lock/camera icon in your browser's address bar
2. Select 'Allow' for microphone access
3. Refresh the page
4. If using Chrome, verify settings at chrome://settings/content/microphone
Error details: {message}
"""
def listen_for_voice(self, language: str = "mixed") -> str:
"""
Listen for voice input in specified language.
language can be:
- "ar-SA" for Arabic
- "en-US" for English
- "mixed" for both Arabic and English
"""
if not hasattr(self, 'mic'):
return "Error: Microphone not properly initialized"
if not self.permission_granted:
success, message = self.request_permissions()
if not success:
st.error(message)
st.markdown("""
### 🎀 Troubleshooting Steps:
1. Check browser compatibility (Chrome/Firefox/Edge recommended)
2. Verify microphone connection
3. Check system sound settings
4. Try a different microphone if available
5. Restart browser if needed
""")
return message
try:
with self.mic as source:
st.info("Adjusting for ambient noise... Please wait.")
self.recognizer.adjust_for_ambient_noise(source, duration=1)
st.info("🎀 Listening... Speak now!")
audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)
st.info("Processing speech...")
return self._process_audio(audio, language)
except sr.RequestError as e:
error_msg = f"Could not request results from speech service: {str(e)}"
st.error(error_msg)
return error_msg
except sr.UnknownValueError:
error_msg = "Could not understand audio. Please speak clearly and try again."
st.warning(error_msg)
return error_msg
except sr.WaitTimeoutError:
error_msg = "Listening timed out. Please try again."
st.warning(error_msg)
return error_msg
except Exception as e:
error_msg = f"Error: {str(e)}"
st.error(error_msg)
return error_msg
def _process_audio(self, audio, language: str) -> str:
"""Process audio input and convert to text"""
if language in ["ar-SA", "mixed"]:
try:
return self.recognizer.recognize_google(audio, language="ar-SA")
except sr.UnknownValueError:
if language == "mixed":
return self.recognizer.recognize_google(audio, language="en-US")
raise
return self.recognizer.recognize_google(audio, language="en-US")