Spaces:

shamimjony1000
/

tai

Sleeping

App Files Files Community

tai / voice_handler.py

shamimjony1000

Update voice_handler.py

081fb8f verified 8 months ago

raw

history blame

5.64 kB

	import speech_recognition as sr
	import streamlit as st
	from typing import Optional, Tuple
	import platform
	import sys

	class VoiceHandler:
	def __init__(self):
	self.recognizer = sr.Recognizer()
	self.recognizer.energy_threshold = 4000
	self.recognizer.dynamic_energy_threshold = True
	self.recognizer.pause_threshold = 0.8
	self.permission_granted = False
	self._init_microphone()

	def _init_microphone(self):
	"""Initialize microphone with proper settings based on platform"""
	try:
	# List available microphones
	mics = sr.Microphone.list_microphone_names()
	if not mics:
	st.error("No microphones detected. Please connect a microphone and refresh the page.")
	return

	# Use default microphone
	self.mic = sr.Microphone(device_index=None)

	except Exception as e:
	st.error(f"Error initializing microphone: {str(e)}")

	def check_microphone_access(self) -> Tuple[bool, str]:
	"""Check if microphone is accessible and return status with message"""
	try:
	with self.mic as source:
	# Shorter duration for initial test
	self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
	return True, "Microphone access granted"
	except AttributeError:
	return False, """
	Microphone not initialized properly. Please ensure:
	1. You have a working microphone connected
	2. Your browser supports audio input
	3. You're using a modern browser (Chrome, Firefox, Edge)
	"""
	except OSError as e:
	return False, f"""
	Could not access microphone. Error: {str(e)}
	Please check:
	1. Microphone is properly connected
	2. No other application is using the microphone
	3. Browser has permission to access microphone
	"""
	except sr.RequestError as e:
	return False, f"Speech recognition service error: {str(e)}"
	except Exception as e:
	return False, f"Unexpected error: {str(e)}"

	def request_permissions(self) -> Tuple[bool, str]:
	"""Request microphone permissions from the browser"""
	success, message = self.check_microphone_access()
	if success:
	self.permission_granted = True
	return True, "Microphone access granted successfully"

	return False, f"""
	Microphone access denied. Please:
	1. Click the lock/camera icon in your browser's address bar
	2. Select 'Allow' for microphone access
	3. Refresh the page
	4. If using Chrome, verify settings at chrome://settings/content/microphone

	Error details: {message}
	"""

	def listen_for_voice(self, language: str = "mixed") -> str:
	"""
	Listen for voice input in specified language.
	language can be:
	- "ar-SA" for Arabic
	- "en-US" for English
	- "mixed" for both Arabic and English
	"""
	if not hasattr(self, 'mic'):
	return "Error: Microphone not properly initialized"

	if not self.permission_granted:
	success, message = self.request_permissions()
	if not success:
	st.error(message)
	st.markdown("""
	### 🎤 Troubleshooting Steps:
	1. Check browser compatibility (Chrome/Firefox/Edge recommended)
	2. Verify microphone connection
	3. Check system sound settings
	4. Try a different microphone if available
	5. Restart browser if needed
	""")
	return message

	try:
	with self.mic as source:
	st.info("Adjusting for ambient noise... Please wait.")
	self.recognizer.adjust_for_ambient_noise(source, duration=1)

	st.info("🎤 Listening... Speak now!")
	audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=10)

	st.info("Processing speech...")
	return self._process_audio(audio, language)

	except sr.RequestError as e:
	error_msg = f"Could not request results from speech service: {str(e)}"
	st.error(error_msg)
	return error_msg
	except sr.UnknownValueError:
	error_msg = "Could not understand audio. Please speak clearly and try again."
	st.warning(error_msg)
	return error_msg
	except sr.WaitTimeoutError:
	error_msg = "Listening timed out. Please try again."
	st.warning(error_msg)
	return error_msg
	except Exception as e:
	error_msg = f"Error: {str(e)}"
	st.error(error_msg)
	return error_msg

	def _process_audio(self, audio, language: str) -> str:
	"""Process audio input and convert to text"""
	if language in ["ar-SA", "mixed"]:
	try:
	return self.recognizer.recognize_google(audio, language="ar-SA")
	except sr.UnknownValueError:
	if language == "mixed":
	return self.recognizer.recognize_google(audio, language="en-US")
	raise
	return self.recognizer.recognize_google(audio, language="en-US")