Spaces:

shamimjony1000
/

tt

Sleeping

App Files Files Community

shamimjony1000 commited on Nov 15, 2024

Commit

c82da72

verified ·

1 Parent(s): afd03fb

Upload 8 files

Browse files

Files changed (8) hide show

app.py +184 -0
database.py +108 -0
gemini_processor.py +92 -0
memory_handler.py +223 -0
packages.txt +1 -0
requests.db +0 -0
requirements.txt +8 -0
voice_handler.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import gradio as gr
+import pandas as pd
+from database import Database
+from voice_handler import VoiceHandler
+from gemini_processor import GeminiProcessor
+from memory_handler import MemoryHandler
+from gtts import gTTS
+import io
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Initialize components
+db = Database()
+voice_handler = VoiceHandler()
+gemini_processor = GeminiProcessor()
+memory_handler = MemoryHandler()
+def validate_request(project_number, project_name, amount, reason):
+    if not project_number or not project_name or not amount or not reason:
+        missing_fields = []
+        if not project_number: missing_fields.append("project number")
+        if not project_name: missing_fields.append("project name")
+        if not amount: missing_fields.append("amount")
+        if not reason: missing_fields.append("reason")
+        return False, f"Please provide: {', '.join(missing_fields)}"
+    return True, ""
+def process_text_input(text, language):
+    if not text:
+        return "Please enter some text first.", None, None, None, None
+    context = memory_handler.get_context()
+    details = gemini_processor.extract_request_details(text, context)
+    if not details:
+        return "Could not extract request details. Please try again.", None, None, None, None
+    memory_handler.add_interaction(text, details)
+    # Get any previously captured information
+    partial_info = memory_handler.get_partial_info()
+    return (
+        f"Text processed! {memory_handler.get_prompt_for_missing_info()}",
+        partial_info.get('project_number', ''),
+        partial_info.get('project_name', ''),
+        partial_info.get('amount', 0),
+        partial_info.get('reason', '')
+    )
+def process_voice_input(audio_path, language):
+    if not audio_path:
+        return "No audio detected.", None, None, None, None
+    voice_text = voice_handler.process_audio_file(audio_path, language)
+    if voice_text.startswith("Error:"):
+        return voice_text, None, None, None, None
+    context = memory_handler.get_context()
+    details = gemini_processor.extract_request_details(voice_text, context)
+    if not details:
+        return "Could not extract request details. Please try again.", None, None, None, None
+    memory_handler.add_interaction(voice_text, details)
+    # Get any previously captured information
+    partial_info = memory_handler.get_partial_info()
+    return (
+        f"Voice processed! You said: {voice_text}\n\n{memory_handler.get_prompt_for_missing_info()}",
+        partial_info.get('project_number', ''),
+        partial_info.get('project_name', ''),
+        partial_info.get('amount', 0),
+        partial_info.get('reason', '')
+    )
+def submit_request(project_number, project_name, amount, reason):
+    is_valid, message = validate_request(project_number, project_name, amount, reason)
+    if not is_valid:
+        return message, None
+    try:
+        db.add_request(project_number, project_name, float(amount), reason)
+        memory_handler.clear_memory()
+        return "Request successfully added!", get_requests_df()
+    except Exception as e:
+        return f"Error saving request: {str(e)}", None
+def get_requests_df():
+    try:
+        requests = db.get_all_requests()
+        if requests:
+            df = pd.DataFrame(requests)
+            columns = ['timestamp', 'project_number', 'project_name', 'amount', 'reason']
+            df = df[columns]
+            # Convert DataFrame to list of lists format required by Gradio
+            headers = df.columns.tolist()
+            data = df.values.tolist()
+            return {"headers": headers, "data": data}
+        return {"headers": ['timestamp', 'project_number', 'project_name', 'amount', 'reason'], "data": []}
+    except Exception as e:
+        print(f"Error getting requests: {str(e)}")
+        return {"headers": ['timestamp', 'project_number', 'project_name', 'amount', 'reason'], "data": []}
+def create_ui():
+    with gr.Blocks(title="AI Agent Money Request System") as app:
+        gr.Markdown("# AI Agent Money Request System")
+        with gr.Tab("Input"):
+            language = gr.Dropdown(
+                choices=["English", "Arabic", "Mixed (Arabic/English)"],
+                value="English",
+                label="Select Language"
+            )
+            with gr.Tab("Voice Input"):
+                audio_input = gr.Audio(
+                    label="Voice Input",
+                    type="filepath",
+                    sources=["microphone"]
+                )
+                voice_process_btn = gr.Button("Process Voice")
+            with gr.Tab("Text Input"):
+                text_input = gr.Textbox(
+                    lines=3,
+                    placeholder="Enter your request here...",
+                    label="Text Input"
+                )
+                text_process_btn = gr.Button("Process Text")
+            process_output = gr.Textbox(label="Processing Result")
+            with gr.Group():
+                project_number = gr.Textbox(label="Project Number")
+                project_name = gr.Textbox(label="Project Name")
+                amount = gr.Number(label="Amount (in riyals)")
+                reason = gr.Textbox(label="Reason for Request")
+                submit_btn = gr.Button("Submit Request")
+            result_text = gr.Textbox(label="Submission Result")
+        with gr.Tab("Existing Requests"):
+            requests_table = gr.DataFrame(
+                headers=["Timestamp", "Project Number", "Project Name", "Amount", "Reason"],
+                label="Existing Requests"
+            )
+            refresh_btn = gr.Button("Refresh")
+        # Event handlers
+        text_process_btn.click(
+            process_text_input,
+            inputs=[text_input, language],
+            outputs=[process_output, project_number, project_name, amount, reason]
+        )
+        voice_process_btn.click(
+            process_voice_input,
+            inputs=[audio_input, language],
+            outputs=[process_output, project_number, project_name, amount, reason]
+        )
+        submit_btn.click(
+            submit_request,
+            inputs=[project_number, project_name, amount, reason],
+            outputs=[result_text, requests_table]
+        )
+        refresh_btn.click(
+            lambda: get_requests_df(),
+            outputs=[requests_table]
+        )
+        # Initialize requests table
+        requests_table.value = get_requests_df()
+    return app
+if __name__ == "__main__":
+    app = create_ui()
+    app.launch()

database.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import sqlite3
+from datetime import datetime
+import os
+import time
+from contextlib import contextmanager
+class Database:
+    def __init__(self, db_name="requests.db"):
+        self.db_name = db_name
+        self.max_retries = 3
+        self.retry_delay = 1
+        self.initialize_database()
+    @contextmanager
+    def get_connection(self):
+        """Create a new connection for each operation"""
+        conn = sqlite3.connect(self.db_name)
+        try:
+            yield conn
+        finally:
+            conn.close()
+    def initialize_database(self):
+        """Initialize the database and create tables"""
+        for attempt in range(self.max_retries):
+            try:
+                with self.get_connection() as conn:
+                    conn.execute('PRAGMA encoding="UTF-8"')
+                    cursor = conn.cursor()
+                    # Check if table exists
+                    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='requests'")
+                    if not cursor.fetchone():
+                        self.create_table(conn)
+                    else:
+                        # Verify columns
+                        cursor.execute('PRAGMA table_info(requests)')
+                        columns = [col[1] for col in cursor.fetchall()]
+                        required_columns = ['id', 'timestamp', 'project_number', 'project_name', 'amount', 'reason', 'original_text']
+                        if not all(col in columns for col in required_columns):
+                            # Backup existing data
+                            cursor.execute('ALTER TABLE requests RENAME TO requests_old')
+                            self.create_table(conn)
+                            # Copy data from old table
+                            cursor.execute('''
+                                INSERT INTO requests (timestamp, project_number, project_name, amount, reason)
+                                SELECT timestamp, project_number, project_name, amount, reason
+                                FROM requests_old
+                            ''')
+                            cursor.execute('DROP TABLE requests_old')
+                            conn.commit()
+                return
+            except sqlite3.OperationalError as e:
+                if attempt < self.max_retries - 1:
+                    time.sleep(self.retry_delay)
+                    continue
+                raise Exception(f"Could not initialize database after {self.max_retries} attempts: {str(e)}")
+    def create_table(self, conn):
+        """Create the requests table"""
+        cursor = conn.cursor()
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS requests (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME,
+                project_number TEXT,
+                project_name TEXT,
+                amount REAL,
+                reason TEXT,
+                original_text TEXT
+            )
+        ''')
+        conn.commit()
+    def add_request(self, project_number, project_name, amount, reason, original_text=""):
+        """Add a new request to the database"""
+        for attempt in range(self.max_retries):
+            try:
+                with self.get_connection() as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('''
+                        INSERT INTO requests (timestamp, project_number, project_name, amount, reason, original_text)
+                        VALUES (?, ?, ?, ?, ?, ?)
+                    ''', (datetime.now(), project_number, project_name, amount, reason, original_text))
+                    conn.commit()
+                return
+            except sqlite3.OperationalError as e:
+                if attempt < self.max_retries - 1:
+                    time.sleep(self.retry_delay)
+                    continue
+                raise Exception(f"Could not add request after {self.max_retries} attempts: {str(e)}")
+    def get_all_requests(self):
+        """Get all requests from the database"""
+        for attempt in range(self.max_retries):
+            try:
+                with self.get_connection() as conn:
+                    cursor = conn.cursor()
+                    cursor.execute('SELECT * FROM requests ORDER BY timestamp DESC')
+                    columns = [description[0] for description in cursor.description]
+                    results = cursor.fetchall()
+                    return [dict(zip(columns, row)) for row in results]
+            except sqlite3.OperationalError as e:
+                if attempt < self.max_retries - 1:
+                    time.sleep(self.retry_delay)
+                    continue
+                raise Exception(f"Could not fetch requests after {self.max_retries} attempts: {str(e)}")

gemini_processor.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import google.generativeai as genai
+import os
+from dotenv import load_dotenv
+import json
+import re
+load_dotenv()
+class GeminiProcessor:
+    def __init__(self):
+        api_key = "AIzaSyCLyDgZNcE_v4wLMFF8SoimKga9bbLSun0"
+        if not api_key:
+            raise ValueError("GOOGLE_API_KEY not found in environment variables")
+        genai.configure(api_key=api_key)
+        self.model = genai.GenerativeModel('gemini-pro')
+        self.config = genai.GenerationConfig(  temperature=0 )
+    def is_arabic(self, text):
+        arabic_pattern = re.compile('[\u0600-\u06FF]')
+        return bool(arabic_pattern.search(text))
+    def translate_arabic_to_english(self, text):
+        prompt = f"""
+        Translate the following Arabic text to English. If the text is mixed (Arabic and English),
+        translate only the Arabic parts and keep the English parts as is.
+        Keep numbers in their original format.
+        Text to translate: {text}
+        """
+        try:
+            response = self.model.generate_content(prompt)
+            return response.text.strip()
+        except Exception as e:
+            print(f"Translation error: {e}")
+            return text
+    def extract_request_details(self, text, context=""):
+        full_text = f"{context} {text}".strip()
+        is_arabic_input = self.is_arabic(full_text)
+        # Translate if Arabic text is detected
+        if is_arabic_input:
+            translated_text = self.translate_arabic_to_english(full_text)
+            processing_text = translated_text
+        else:
+            processing_text = full_text
+        prompt = f"""
+        Extract the following information from this text and previous context.
+        The input has been translated from Arabic if it contained Arabic text.
+        If any information is missing, leave it empty.
+        Format the response exactly as a JSON object with these keys:
+        {{
+            "project_number": "extracted number or empty string",
+            "project_name": "extracted name or empty string",
+            "amount": extracted number or 0,
+            "reason": "extracted reason or empty string",
+            "missing_fields": ["list of missing required fields"],
+            "original_text": "the original input text"
+        }}
+        ##No preamble## Response in VALID JSON ONLY##
+        Text to analyze: {processing_text}
+        """
+        try:
+            response = self.model.generate_content(prompt,generation_config=self.config)
+            print("RESPONSE: ",response)
+            result = json.loads(response.text)
+            required_keys = ['project_number', 'project_name', 'amount', 'reason', 'missing_fields']
+            if not all(key in result for key in required_keys):
+                raise ValueError("Missing required keys in response")
+            result['amount'] = float(result.get('amount', 0))
+            result['original_text'] = full_text  # Keep the original Arabic text
+            # Add translation if it was performed
+            if is_arabic_input:
+                result['translated_text'] = processing_text
+            return result
+        except json.JSONDecodeError as e:
+            print(f"JSON parsing error: {e}")
+            return None
+        except Exception as e:
+            print(f"Error processing request: {e}")
+            return None

memory_handler.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from gtts import gTTS
+import io
+import gradio as gr
+from datetime import datetime, timedelta
+import json
+class MemoryHandler:
+    def __init__(self):
+        self.conversation_history = []
+        self.max_history = 5  # Keep last 5 interactions
+        self.context_timeout = timedelta(minutes=2)  # Context expires after 2 minutes
+        self.last_interaction_time = None
+        self.partial_info = {
+            'project_number': None,
+            'project_name': None,
+            'amount': None,
+            'reason': None,
+            'timestamp': None
+        }
+        self.confidence_scores = {
+            'project_number': 0.0,
+            'project_name': 0.0,
+            'amount': 0.0,
+            'reason': 0.0
+        }
+    def add_interaction(self, text: str, extracted_info: dict = None) -> None:
+        """
+        Add a new interaction to the conversation history and update partial information
+        Args:
+            text: The text from the voice/text input
+            extracted_info: Dictionary containing extracted request details
+        """
+        current_time = datetime.now()
+        # Check if we should clear context due to timeout
+        if self.last_interaction_time and \
+           (current_time - self.last_interaction_time) > self.context_timeout:
+            self.clear_partial_info()
+        # Update conversation history
+        if text:
+            # Add timestamp to conversation history
+            self.conversation_history.append({
+                'text': text,
+                'timestamp': current_time.isoformat(),
+                'extracted_info': extracted_info
+            })
+            if len(self.conversation_history) > self.max_history:
+                self.conversation_history.pop(0)
+        # Update partial information if provided
+        if extracted_info:
+            self._update_partial_info(extracted_info, current_time)
+        self.last_interaction_time = current_time
+    def _update_partial_info(self, extracted_info: dict, current_time: datetime) -> None:
+        """
+        Update partial information with confidence scoring
+        """
+        for key in self.partial_info:
+            if key in extracted_info and extracted_info[key]:
+                new_value = extracted_info[key]
+                current_value = self.partial_info[key]
+                # Update if empty or higher confidence
+                if (current_value is None or
+                    extracted_info.get(f'{key}_confidence', 0.5) >
+                    self.confidence_scores.get(key, 0)):
+                    self.partial_info[key] = new_value
+                    self.confidence_scores[key] = extracted_info.get(f'{key}_confidence', 0.5)
+        self.partial_info['timestamp'] = current_time
+    def get_context(self) -> str:
+        """
+        Get the current conversation context including partial information
+        """
+        # Start with the most recent conversation history
+        context_parts = []
+        # Add conversation history with timestamps
+        for entry in self.conversation_history:
+            timestamp = datetime.fromisoformat(entry['timestamp']).strftime('%H:%M:%S')
+            context_parts.append(f"[{timestamp}] {entry['text']}")
+        context = " ".join(context_parts)
+        # Add partial information to context if available
+        partial_context = []
+        for key, value in self.partial_info.items():
+            if value and key != 'timestamp':
+                confidence = self.confidence_scores.get(key, 0)
+                partial_context.append(f"{key}: {value} (confidence: {confidence:.2f})")
+        if partial_context:
+            context += "\nPartial information: " + ", ".join(partial_context)
+        return context
+    def get_partial_info(self) -> dict:
+        """Get current partial information with confidence scores"""
+        info = {k: v for k, v in self.partial_info.items()
+               if k != 'timestamp' and v is not None}
+        info['confidence_scores'] = self.confidence_scores
+        return info
+    def merge_partial_info(self, new_info: dict) -> None:
+        """
+        Merge new information with existing partial info based on confidence scores
+        """
+        for key in self.partial_info:
+            if key in new_info and new_info[key] is not None:
+                new_confidence = new_info.get(f'{key}_confidence', 0.5)
+                if (self.partial_info[key] is None or
+                    new_confidence > self.confidence_scores.get(key, 0)):
+                    self.partial_info[key] = new_info[key]
+                    self.confidence_scores[key] = new_confidence
+    def clear_partial_info(self) -> None:
+        """Clear partial information and confidence scores"""
+        self.partial_info = {
+            'project_number': None,
+            'project_name': None,
+            'amount': None,
+            'reason': None,
+            'timestamp': None
+        }
+        self.confidence_scores = {
+            'project_number': 0.0,
+            'project_name': 0.0,
+            'amount': 0.0,
+            'reason': 0.0
+        }
+    def clear_memory(self) -> None:
+        """Clear all conversation history and partial information"""
+        self.conversation_history = []
+        self.clear_partial_info()
+        self.last_interaction_time = None
+        return "Memory cleared!"
+    def get_missing_fields(self) -> list:
+        """Get list of missing required fields with confidence thresholds"""
+        missing = []
+        confidence_threshold = 0.5  # Minimum confidence required
+        for field in ['project_number', 'project_name', 'amount', 'reason']:
+            if (self.partial_info.get(field) is None or
+                self.confidence_scores.get(field, 0) < confidence_threshold):
+                missing.append(field)
+        return missing
+    def text_to_speech(self, text: str) -> tuple[str, str]:
+        """Convert text to speech and return audio path"""
+        try:
+            tts = gTTS(text=text, lang='en')
+            audio_path = "temp_audio.mp3"
+            tts.save(audio_path)
+            return audio_path, None
+        except Exception as e:
+            return None, f"Error generating audio: {str(e)}"
+    def create_confirmation_audio(self, project_number: str, project_name: str,
+                                amount: float, reason: str) -> tuple[str, str]:
+        """Create confirmation message audio with confidence information"""
+        confidence_info = "\n".join([
+            f"{field}: {self.confidence_scores.get(field, 0):.2f} confidence"
+            for field in ['project_number', 'project_name', 'amount', 'reason']
+        ])
+        confirmation_text = (
+            f"You are going to add request money for project ID: {project_number}, "
+            f"Project name: {project_name}, request amount: {amount}, "
+            f"reason: {reason}.\n\nConfidence scores:\n{confidence_info}\n"
+            f"Are you sure you want to proceed?"
+        )
+        return self.text_to_speech(confirmation_text)
+    def get_prompt_for_missing_info(self) -> str:
+        """Generate a prompt for missing information with confidence scores"""
+        missing = self.get_missing_fields()
+        if not missing:
+            return "All required information has been provided with sufficient confidence."
+        current_info = self.get_partial_info()
+        prompt = "Current information:\n"
+        # Show current information with confidence scores
+        for key, value in current_info.items():
+            if key != 'confidence_scores' and value is not None:
+                confidence = self.confidence_scores.get(key, 0)
+                prompt += f"- {key}: {value} (confidence: {confidence:.2f})\n"
+        prompt += "\nPlease provide or clarify the following information:\n"
+        for field in missing:
+            current_confidence = self.confidence_scores.get(field, 0)
+            if current_confidence > 0:
+                prompt += f"- {field} (current confidence: {current_confidence:.2f}, needs improvement)\n"
+            else:
+                prompt += f"- {field} (missing)\n"
+        return prompt
+    def to_json(self) -> str:
+        """Serialize the memory state to JSON"""
+        return json.dumps({
+            'conversation_history': self.conversation_history,
+            'partial_info': self.partial_info,
+            'confidence_scores': self.confidence_scores,
+            'last_interaction_time': self.last_interaction_time.isoformat() if self.last_interaction_time else None
+        })
+    def from_json(self, json_str: str) -> None:
+        """Restore memory state from JSON"""
+        data = json.loads(json_str)
+        self.conversation_history = data['conversation_history']
+        self.partial_info = data['partial_info']
+        self.confidence_scores = data['confidence_scores']
+        self.last_interaction_time = (datetime.fromisoformat(data['last_interaction_time'])
+                                    if data['last_interaction_time'] else None)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ portaudio19-dev

requests.db ADDED Viewed

Binary file (12.3 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio>=4.0.0
+pandas
+SpeechRecognition
+google-generativeai
+python-dotenv
+gTTS
+playsound
+pydub

voice_handler.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import speech_recognition as sr
+import os
+from pydub import AudioSegment
+import tempfile
+class VoiceHandler:
+    def __init__(self):
+        self.recognizer = sr.Recognizer()
+        self.recognizer.energy_threshold = 20000
+        self.recognizer.dynamic_energy_threshold = False
+        self.recognizer.pause_threshold = 0.8
+    def process_audio_file(self, audio_path: str, language: str) -> str:
+        """Process audio file and convert to text"""
+        try:
+            # Convert audio to wav format if needed
+            if not audio_path.endswith('.wav'):
+                audio = AudioSegment.from_file(audio_path)
+                temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+                audio.export(temp_wav.name, format='wav')
+                audio_path = temp_wav.name
+            with sr.AudioFile(audio_path) as source:
+                audio = self.recognizer.record(source)
+                if language == "Arabic":
+                    return self.recognizer.recognize_google(audio, language="ar-SA")
+                elif language == "Mixed (Arabic/English)":
+                    try:
+                        return self.recognizer.recognize_google(audio, language="ar-SA")
+                    except sr.UnknownValueError:
+                        return self.recognizer.recognize_google(audio, language="en-US")
+                else:  # English
+                    return self.recognizer.recognize_google(audio, language="en-US")
+        except sr.RequestError as e:
+            return f"Error: Could not request results from speech service: {str(e)}"
+        except sr.UnknownValueError:
+            return "Error: Could not understand audio. Please speak clearly and try again."
+        except Exception as e:
+            return f"Error: {str(e)}"
+        finally:
+            # Clean up temporary file if it was created
+            if 'temp_wav' in locals():
+                os.unlink(temp_wav.name)
+    def check_microphone_access(self) -> bool:
+        """Check if microphone is accessible"""
+        try:
+            with sr.Microphone() as source:
+                self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
+                return True
+        except (OSError, AttributeError, sr.RequestError):
+            return False