Spaces:
Sleeping
Sleeping
shamimjony1000
commited on
Upload 8 files
Browse files- app.py +184 -0
- database.py +108 -0
- gemini_processor.py +92 -0
- memory_handler.py +223 -0
- packages.txt +1 -0
- requests.db +0 -0
- requirements.txt +8 -0
- voice_handler.py +54 -0
app.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from database import Database
|
4 |
+
from voice_handler import VoiceHandler
|
5 |
+
from gemini_processor import GeminiProcessor
|
6 |
+
from memory_handler import MemoryHandler
|
7 |
+
from gtts import gTTS
|
8 |
+
import io
|
9 |
+
import os
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
# Initialize components
|
15 |
+
db = Database()
|
16 |
+
voice_handler = VoiceHandler()
|
17 |
+
gemini_processor = GeminiProcessor()
|
18 |
+
memory_handler = MemoryHandler()
|
19 |
+
|
20 |
+
def validate_request(project_number, project_name, amount, reason):
|
21 |
+
if not project_number or not project_name or not amount or not reason:
|
22 |
+
missing_fields = []
|
23 |
+
if not project_number: missing_fields.append("project number")
|
24 |
+
if not project_name: missing_fields.append("project name")
|
25 |
+
if not amount: missing_fields.append("amount")
|
26 |
+
if not reason: missing_fields.append("reason")
|
27 |
+
return False, f"Please provide: {', '.join(missing_fields)}"
|
28 |
+
return True, ""
|
29 |
+
|
30 |
+
def process_text_input(text, language):
|
31 |
+
if not text:
|
32 |
+
return "Please enter some text first.", None, None, None, None
|
33 |
+
|
34 |
+
context = memory_handler.get_context()
|
35 |
+
details = gemini_processor.extract_request_details(text, context)
|
36 |
+
|
37 |
+
if not details:
|
38 |
+
return "Could not extract request details. Please try again.", None, None, None, None
|
39 |
+
|
40 |
+
memory_handler.add_interaction(text, details)
|
41 |
+
|
42 |
+
# Get any previously captured information
|
43 |
+
partial_info = memory_handler.get_partial_info()
|
44 |
+
|
45 |
+
return (
|
46 |
+
f"Text processed! {memory_handler.get_prompt_for_missing_info()}",
|
47 |
+
partial_info.get('project_number', ''),
|
48 |
+
partial_info.get('project_name', ''),
|
49 |
+
partial_info.get('amount', 0),
|
50 |
+
partial_info.get('reason', '')
|
51 |
+
)
|
52 |
+
|
53 |
+
def process_voice_input(audio_path, language):
|
54 |
+
if not audio_path:
|
55 |
+
return "No audio detected.", None, None, None, None
|
56 |
+
|
57 |
+
voice_text = voice_handler.process_audio_file(audio_path, language)
|
58 |
+
if voice_text.startswith("Error:"):
|
59 |
+
return voice_text, None, None, None, None
|
60 |
+
|
61 |
+
context = memory_handler.get_context()
|
62 |
+
details = gemini_processor.extract_request_details(voice_text, context)
|
63 |
+
|
64 |
+
if not details:
|
65 |
+
return "Could not extract request details. Please try again.", None, None, None, None
|
66 |
+
|
67 |
+
memory_handler.add_interaction(voice_text, details)
|
68 |
+
|
69 |
+
# Get any previously captured information
|
70 |
+
partial_info = memory_handler.get_partial_info()
|
71 |
+
|
72 |
+
return (
|
73 |
+
f"Voice processed! You said: {voice_text}\n\n{memory_handler.get_prompt_for_missing_info()}",
|
74 |
+
partial_info.get('project_number', ''),
|
75 |
+
partial_info.get('project_name', ''),
|
76 |
+
partial_info.get('amount', 0),
|
77 |
+
partial_info.get('reason', '')
|
78 |
+
)
|
79 |
+
|
80 |
+
def submit_request(project_number, project_name, amount, reason):
|
81 |
+
is_valid, message = validate_request(project_number, project_name, amount, reason)
|
82 |
+
if not is_valid:
|
83 |
+
return message, None
|
84 |
+
|
85 |
+
try:
|
86 |
+
db.add_request(project_number, project_name, float(amount), reason)
|
87 |
+
memory_handler.clear_memory()
|
88 |
+
return "Request successfully added!", get_requests_df()
|
89 |
+
except Exception as e:
|
90 |
+
return f"Error saving request: {str(e)}", None
|
91 |
+
|
92 |
+
def get_requests_df():
|
93 |
+
try:
|
94 |
+
requests = db.get_all_requests()
|
95 |
+
if requests:
|
96 |
+
df = pd.DataFrame(requests)
|
97 |
+
columns = ['timestamp', 'project_number', 'project_name', 'amount', 'reason']
|
98 |
+
df = df[columns]
|
99 |
+
# Convert DataFrame to list of lists format required by Gradio
|
100 |
+
headers = df.columns.tolist()
|
101 |
+
data = df.values.tolist()
|
102 |
+
return {"headers": headers, "data": data}
|
103 |
+
return {"headers": ['timestamp', 'project_number', 'project_name', 'amount', 'reason'], "data": []}
|
104 |
+
except Exception as e:
|
105 |
+
print(f"Error getting requests: {str(e)}")
|
106 |
+
return {"headers": ['timestamp', 'project_number', 'project_name', 'amount', 'reason'], "data": []}
|
107 |
+
|
108 |
+
def create_ui():
|
109 |
+
with gr.Blocks(title="AI Agent Money Request System") as app:
|
110 |
+
gr.Markdown("# AI Agent Money Request System")
|
111 |
+
|
112 |
+
with gr.Tab("Input"):
|
113 |
+
language = gr.Dropdown(
|
114 |
+
choices=["English", "Arabic", "Mixed (Arabic/English)"],
|
115 |
+
value="English",
|
116 |
+
label="Select Language"
|
117 |
+
)
|
118 |
+
|
119 |
+
with gr.Tab("Voice Input"):
|
120 |
+
audio_input = gr.Audio(
|
121 |
+
label="Voice Input",
|
122 |
+
type="filepath",
|
123 |
+
sources=["microphone"]
|
124 |
+
)
|
125 |
+
voice_process_btn = gr.Button("Process Voice")
|
126 |
+
|
127 |
+
with gr.Tab("Text Input"):
|
128 |
+
text_input = gr.Textbox(
|
129 |
+
lines=3,
|
130 |
+
placeholder="Enter your request here...",
|
131 |
+
label="Text Input"
|
132 |
+
)
|
133 |
+
text_process_btn = gr.Button("Process Text")
|
134 |
+
|
135 |
+
process_output = gr.Textbox(label="Processing Result")
|
136 |
+
|
137 |
+
with gr.Group():
|
138 |
+
project_number = gr.Textbox(label="Project Number")
|
139 |
+
project_name = gr.Textbox(label="Project Name")
|
140 |
+
amount = gr.Number(label="Amount (in riyals)")
|
141 |
+
reason = gr.Textbox(label="Reason for Request")
|
142 |
+
submit_btn = gr.Button("Submit Request")
|
143 |
+
|
144 |
+
result_text = gr.Textbox(label="Submission Result")
|
145 |
+
|
146 |
+
with gr.Tab("Existing Requests"):
|
147 |
+
requests_table = gr.DataFrame(
|
148 |
+
headers=["Timestamp", "Project Number", "Project Name", "Amount", "Reason"],
|
149 |
+
label="Existing Requests"
|
150 |
+
)
|
151 |
+
refresh_btn = gr.Button("Refresh")
|
152 |
+
|
153 |
+
# Event handlers
|
154 |
+
text_process_btn.click(
|
155 |
+
process_text_input,
|
156 |
+
inputs=[text_input, language],
|
157 |
+
outputs=[process_output, project_number, project_name, amount, reason]
|
158 |
+
)
|
159 |
+
|
160 |
+
voice_process_btn.click(
|
161 |
+
process_voice_input,
|
162 |
+
inputs=[audio_input, language],
|
163 |
+
outputs=[process_output, project_number, project_name, amount, reason]
|
164 |
+
)
|
165 |
+
|
166 |
+
submit_btn.click(
|
167 |
+
submit_request,
|
168 |
+
inputs=[project_number, project_name, amount, reason],
|
169 |
+
outputs=[result_text, requests_table]
|
170 |
+
)
|
171 |
+
|
172 |
+
refresh_btn.click(
|
173 |
+
lambda: get_requests_df(),
|
174 |
+
outputs=[requests_table]
|
175 |
+
)
|
176 |
+
|
177 |
+
# Initialize requests table
|
178 |
+
requests_table.value = get_requests_df()
|
179 |
+
|
180 |
+
return app
|
181 |
+
|
182 |
+
if __name__ == "__main__":
|
183 |
+
app = create_ui()
|
184 |
+
app.launch()
|
database.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sqlite3
|
2 |
+
from datetime import datetime
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
from contextlib import contextmanager
|
6 |
+
|
7 |
+
class Database:
|
8 |
+
def __init__(self, db_name="requests.db"):
|
9 |
+
self.db_name = db_name
|
10 |
+
self.max_retries = 3
|
11 |
+
self.retry_delay = 1
|
12 |
+
self.initialize_database()
|
13 |
+
|
14 |
+
@contextmanager
|
15 |
+
def get_connection(self):
|
16 |
+
"""Create a new connection for each operation"""
|
17 |
+
conn = sqlite3.connect(self.db_name)
|
18 |
+
try:
|
19 |
+
yield conn
|
20 |
+
finally:
|
21 |
+
conn.close()
|
22 |
+
|
23 |
+
def initialize_database(self):
|
24 |
+
"""Initialize the database and create tables"""
|
25 |
+
for attempt in range(self.max_retries):
|
26 |
+
try:
|
27 |
+
with self.get_connection() as conn:
|
28 |
+
conn.execute('PRAGMA encoding="UTF-8"')
|
29 |
+
cursor = conn.cursor()
|
30 |
+
|
31 |
+
# Check if table exists
|
32 |
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='requests'")
|
33 |
+
if not cursor.fetchone():
|
34 |
+
self.create_table(conn)
|
35 |
+
else:
|
36 |
+
# Verify columns
|
37 |
+
cursor.execute('PRAGMA table_info(requests)')
|
38 |
+
columns = [col[1] for col in cursor.fetchall()]
|
39 |
+
required_columns = ['id', 'timestamp', 'project_number', 'project_name', 'amount', 'reason', 'original_text']
|
40 |
+
|
41 |
+
if not all(col in columns for col in required_columns):
|
42 |
+
# Backup existing data
|
43 |
+
cursor.execute('ALTER TABLE requests RENAME TO requests_old')
|
44 |
+
self.create_table(conn)
|
45 |
+
# Copy data from old table
|
46 |
+
cursor.execute('''
|
47 |
+
INSERT INTO requests (timestamp, project_number, project_name, amount, reason)
|
48 |
+
SELECT timestamp, project_number, project_name, amount, reason
|
49 |
+
FROM requests_old
|
50 |
+
''')
|
51 |
+
cursor.execute('DROP TABLE requests_old')
|
52 |
+
conn.commit()
|
53 |
+
return
|
54 |
+
except sqlite3.OperationalError as e:
|
55 |
+
if attempt < self.max_retries - 1:
|
56 |
+
time.sleep(self.retry_delay)
|
57 |
+
continue
|
58 |
+
raise Exception(f"Could not initialize database after {self.max_retries} attempts: {str(e)}")
|
59 |
+
|
60 |
+
def create_table(self, conn):
|
61 |
+
"""Create the requests table"""
|
62 |
+
cursor = conn.cursor()
|
63 |
+
cursor.execute('''
|
64 |
+
CREATE TABLE IF NOT EXISTS requests (
|
65 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
66 |
+
timestamp DATETIME,
|
67 |
+
project_number TEXT,
|
68 |
+
project_name TEXT,
|
69 |
+
amount REAL,
|
70 |
+
reason TEXT,
|
71 |
+
original_text TEXT
|
72 |
+
)
|
73 |
+
''')
|
74 |
+
conn.commit()
|
75 |
+
|
76 |
+
def add_request(self, project_number, project_name, amount, reason, original_text=""):
|
77 |
+
"""Add a new request to the database"""
|
78 |
+
for attempt in range(self.max_retries):
|
79 |
+
try:
|
80 |
+
with self.get_connection() as conn:
|
81 |
+
cursor = conn.cursor()
|
82 |
+
cursor.execute('''
|
83 |
+
INSERT INTO requests (timestamp, project_number, project_name, amount, reason, original_text)
|
84 |
+
VALUES (?, ?, ?, ?, ?, ?)
|
85 |
+
''', (datetime.now(), project_number, project_name, amount, reason, original_text))
|
86 |
+
conn.commit()
|
87 |
+
return
|
88 |
+
except sqlite3.OperationalError as e:
|
89 |
+
if attempt < self.max_retries - 1:
|
90 |
+
time.sleep(self.retry_delay)
|
91 |
+
continue
|
92 |
+
raise Exception(f"Could not add request after {self.max_retries} attempts: {str(e)}")
|
93 |
+
|
94 |
+
def get_all_requests(self):
|
95 |
+
"""Get all requests from the database"""
|
96 |
+
for attempt in range(self.max_retries):
|
97 |
+
try:
|
98 |
+
with self.get_connection() as conn:
|
99 |
+
cursor = conn.cursor()
|
100 |
+
cursor.execute('SELECT * FROM requests ORDER BY timestamp DESC')
|
101 |
+
columns = [description[0] for description in cursor.description]
|
102 |
+
results = cursor.fetchall()
|
103 |
+
return [dict(zip(columns, row)) for row in results]
|
104 |
+
except sqlite3.OperationalError as e:
|
105 |
+
if attempt < self.max_retries - 1:
|
106 |
+
time.sleep(self.retry_delay)
|
107 |
+
continue
|
108 |
+
raise Exception(f"Could not fetch requests after {self.max_retries} attempts: {str(e)}")
|
gemini_processor.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import json
|
5 |
+
import re
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
class GeminiProcessor:
|
10 |
+
def __init__(self):
|
11 |
+
api_key = "AIzaSyCLyDgZNcE_v4wLMFF8SoimKga9bbLSun0"
|
12 |
+
if not api_key:
|
13 |
+
raise ValueError("GOOGLE_API_KEY not found in environment variables")
|
14 |
+
genai.configure(api_key=api_key)
|
15 |
+
self.model = genai.GenerativeModel('gemini-pro')
|
16 |
+
self.config = genai.GenerationConfig( temperature=0 )
|
17 |
+
|
18 |
+
|
19 |
+
def is_arabic(self, text):
|
20 |
+
arabic_pattern = re.compile('[\u0600-\u06FF]')
|
21 |
+
return bool(arabic_pattern.search(text))
|
22 |
+
|
23 |
+
def translate_arabic_to_english(self, text):
|
24 |
+
prompt = f"""
|
25 |
+
Translate the following Arabic text to English. If the text is mixed (Arabic and English),
|
26 |
+
translate only the Arabic parts and keep the English parts as is.
|
27 |
+
Keep numbers in their original format.
|
28 |
+
|
29 |
+
Text to translate: {text}
|
30 |
+
"""
|
31 |
+
try:
|
32 |
+
response = self.model.generate_content(prompt)
|
33 |
+
return response.text.strip()
|
34 |
+
except Exception as e:
|
35 |
+
print(f"Translation error: {e}")
|
36 |
+
return text
|
37 |
+
|
38 |
+
def extract_request_details(self, text, context=""):
|
39 |
+
full_text = f"{context} {text}".strip()
|
40 |
+
is_arabic_input = self.is_arabic(full_text)
|
41 |
+
|
42 |
+
# Translate if Arabic text is detected
|
43 |
+
if is_arabic_input:
|
44 |
+
translated_text = self.translate_arabic_to_english(full_text)
|
45 |
+
processing_text = translated_text
|
46 |
+
else:
|
47 |
+
processing_text = full_text
|
48 |
+
|
49 |
+
prompt = f"""
|
50 |
+
Extract the following information from this text and previous context.
|
51 |
+
The input has been translated from Arabic if it contained Arabic text.
|
52 |
+
|
53 |
+
If any information is missing, leave it empty.
|
54 |
+
Format the response exactly as a JSON object with these keys:
|
55 |
+
{{
|
56 |
+
"project_number": "extracted number or empty string",
|
57 |
+
"project_name": "extracted name or empty string",
|
58 |
+
"amount": extracted number or 0,
|
59 |
+
"reason": "extracted reason or empty string",
|
60 |
+
"missing_fields": ["list of missing required fields"],
|
61 |
+
"original_text": "the original input text"
|
62 |
+
}}
|
63 |
+
|
64 |
+
##No preamble## Response in VALID JSON ONLY##
|
65 |
+
|
66 |
+
Text to analyze: {processing_text}
|
67 |
+
"""
|
68 |
+
|
69 |
+
try:
|
70 |
+
response = self.model.generate_content(prompt,generation_config=self.config)
|
71 |
+
|
72 |
+
print("RESPONSE: ",response)
|
73 |
+
result = json.loads(response.text)
|
74 |
+
|
75 |
+
required_keys = ['project_number', 'project_name', 'amount', 'reason', 'missing_fields']
|
76 |
+
if not all(key in result for key in required_keys):
|
77 |
+
raise ValueError("Missing required keys in response")
|
78 |
+
|
79 |
+
result['amount'] = float(result.get('amount', 0))
|
80 |
+
result['original_text'] = full_text # Keep the original Arabic text
|
81 |
+
|
82 |
+
# Add translation if it was performed
|
83 |
+
if is_arabic_input:
|
84 |
+
result['translated_text'] = processing_text
|
85 |
+
|
86 |
+
return result
|
87 |
+
except json.JSONDecodeError as e:
|
88 |
+
print(f"JSON parsing error: {e}")
|
89 |
+
return None
|
90 |
+
except Exception as e:
|
91 |
+
print(f"Error processing request: {e}")
|
92 |
+
return None
|
memory_handler.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gtts import gTTS
|
2 |
+
import io
|
3 |
+
import gradio as gr
|
4 |
+
from datetime import datetime, timedelta
|
5 |
+
import json
|
6 |
+
|
7 |
+
class MemoryHandler:
|
8 |
+
def __init__(self):
|
9 |
+
self.conversation_history = []
|
10 |
+
self.max_history = 5 # Keep last 5 interactions
|
11 |
+
self.context_timeout = timedelta(minutes=2) # Context expires after 2 minutes
|
12 |
+
self.last_interaction_time = None
|
13 |
+
self.partial_info = {
|
14 |
+
'project_number': None,
|
15 |
+
'project_name': None,
|
16 |
+
'amount': None,
|
17 |
+
'reason': None,
|
18 |
+
'timestamp': None
|
19 |
+
}
|
20 |
+
self.confidence_scores = {
|
21 |
+
'project_number': 0.0,
|
22 |
+
'project_name': 0.0,
|
23 |
+
'amount': 0.0,
|
24 |
+
'reason': 0.0
|
25 |
+
}
|
26 |
+
|
27 |
+
def add_interaction(self, text: str, extracted_info: dict = None) -> None:
|
28 |
+
"""
|
29 |
+
Add a new interaction to the conversation history and update partial information
|
30 |
+
|
31 |
+
Args:
|
32 |
+
text: The text from the voice/text input
|
33 |
+
extracted_info: Dictionary containing extracted request details
|
34 |
+
"""
|
35 |
+
current_time = datetime.now()
|
36 |
+
|
37 |
+
# Check if we should clear context due to timeout
|
38 |
+
if self.last_interaction_time and \
|
39 |
+
(current_time - self.last_interaction_time) > self.context_timeout:
|
40 |
+
self.clear_partial_info()
|
41 |
+
|
42 |
+
# Update conversation history
|
43 |
+
if text:
|
44 |
+
# Add timestamp to conversation history
|
45 |
+
self.conversation_history.append({
|
46 |
+
'text': text,
|
47 |
+
'timestamp': current_time.isoformat(),
|
48 |
+
'extracted_info': extracted_info
|
49 |
+
})
|
50 |
+
if len(self.conversation_history) > self.max_history:
|
51 |
+
self.conversation_history.pop(0)
|
52 |
+
|
53 |
+
# Update partial information if provided
|
54 |
+
if extracted_info:
|
55 |
+
self._update_partial_info(extracted_info, current_time)
|
56 |
+
|
57 |
+
self.last_interaction_time = current_time
|
58 |
+
|
59 |
+
def _update_partial_info(self, extracted_info: dict, current_time: datetime) -> None:
|
60 |
+
"""
|
61 |
+
Update partial information with confidence scoring
|
62 |
+
"""
|
63 |
+
for key in self.partial_info:
|
64 |
+
if key in extracted_info and extracted_info[key]:
|
65 |
+
new_value = extracted_info[key]
|
66 |
+
current_value = self.partial_info[key]
|
67 |
+
|
68 |
+
# Update if empty or higher confidence
|
69 |
+
if (current_value is None or
|
70 |
+
extracted_info.get(f'{key}_confidence', 0.5) >
|
71 |
+
self.confidence_scores.get(key, 0)):
|
72 |
+
self.partial_info[key] = new_value
|
73 |
+
self.confidence_scores[key] = extracted_info.get(f'{key}_confidence', 0.5)
|
74 |
+
|
75 |
+
self.partial_info['timestamp'] = current_time
|
76 |
+
|
77 |
+
def get_context(self) -> str:
|
78 |
+
"""
|
79 |
+
Get the current conversation context including partial information
|
80 |
+
"""
|
81 |
+
# Start with the most recent conversation history
|
82 |
+
context_parts = []
|
83 |
+
|
84 |
+
# Add conversation history with timestamps
|
85 |
+
for entry in self.conversation_history:
|
86 |
+
timestamp = datetime.fromisoformat(entry['timestamp']).strftime('%H:%M:%S')
|
87 |
+
context_parts.append(f"[{timestamp}] {entry['text']}")
|
88 |
+
|
89 |
+
context = " ".join(context_parts)
|
90 |
+
|
91 |
+
# Add partial information to context if available
|
92 |
+
partial_context = []
|
93 |
+
for key, value in self.partial_info.items():
|
94 |
+
if value and key != 'timestamp':
|
95 |
+
confidence = self.confidence_scores.get(key, 0)
|
96 |
+
partial_context.append(f"{key}: {value} (confidence: {confidence:.2f})")
|
97 |
+
|
98 |
+
if partial_context:
|
99 |
+
context += "\nPartial information: " + ", ".join(partial_context)
|
100 |
+
|
101 |
+
return context
|
102 |
+
|
103 |
+
def get_partial_info(self) -> dict:
|
104 |
+
"""Get current partial information with confidence scores"""
|
105 |
+
info = {k: v for k, v in self.partial_info.items()
|
106 |
+
if k != 'timestamp' and v is not None}
|
107 |
+
info['confidence_scores'] = self.confidence_scores
|
108 |
+
return info
|
109 |
+
|
110 |
+
def merge_partial_info(self, new_info: dict) -> None:
|
111 |
+
"""
|
112 |
+
Merge new information with existing partial info based on confidence scores
|
113 |
+
"""
|
114 |
+
for key in self.partial_info:
|
115 |
+
if key in new_info and new_info[key] is not None:
|
116 |
+
new_confidence = new_info.get(f'{key}_confidence', 0.5)
|
117 |
+
if (self.partial_info[key] is None or
|
118 |
+
new_confidence > self.confidence_scores.get(key, 0)):
|
119 |
+
self.partial_info[key] = new_info[key]
|
120 |
+
self.confidence_scores[key] = new_confidence
|
121 |
+
|
122 |
+
def clear_partial_info(self) -> None:
|
123 |
+
"""Clear partial information and confidence scores"""
|
124 |
+
self.partial_info = {
|
125 |
+
'project_number': None,
|
126 |
+
'project_name': None,
|
127 |
+
'amount': None,
|
128 |
+
'reason': None,
|
129 |
+
'timestamp': None
|
130 |
+
}
|
131 |
+
self.confidence_scores = {
|
132 |
+
'project_number': 0.0,
|
133 |
+
'project_name': 0.0,
|
134 |
+
'amount': 0.0,
|
135 |
+
'reason': 0.0
|
136 |
+
}
|
137 |
+
|
138 |
+
def clear_memory(self) -> None:
|
139 |
+
"""Clear all conversation history and partial information"""
|
140 |
+
self.conversation_history = []
|
141 |
+
self.clear_partial_info()
|
142 |
+
self.last_interaction_time = None
|
143 |
+
return "Memory cleared!"
|
144 |
+
|
145 |
+
def get_missing_fields(self) -> list:
|
146 |
+
"""Get list of missing required fields with confidence thresholds"""
|
147 |
+
missing = []
|
148 |
+
confidence_threshold = 0.5 # Minimum confidence required
|
149 |
+
|
150 |
+
for field in ['project_number', 'project_name', 'amount', 'reason']:
|
151 |
+
if (self.partial_info.get(field) is None or
|
152 |
+
self.confidence_scores.get(field, 0) < confidence_threshold):
|
153 |
+
missing.append(field)
|
154 |
+
return missing
|
155 |
+
|
156 |
+
def text_to_speech(self, text: str) -> tuple[str, str]:
|
157 |
+
"""Convert text to speech and return audio path"""
|
158 |
+
try:
|
159 |
+
tts = gTTS(text=text, lang='en')
|
160 |
+
audio_path = "temp_audio.mp3"
|
161 |
+
tts.save(audio_path)
|
162 |
+
return audio_path, None
|
163 |
+
except Exception as e:
|
164 |
+
return None, f"Error generating audio: {str(e)}"
|
165 |
+
|
166 |
+
def create_confirmation_audio(self, project_number: str, project_name: str,
|
167 |
+
amount: float, reason: str) -> tuple[str, str]:
|
168 |
+
"""Create confirmation message audio with confidence information"""
|
169 |
+
confidence_info = "\n".join([
|
170 |
+
f"{field}: {self.confidence_scores.get(field, 0):.2f} confidence"
|
171 |
+
for field in ['project_number', 'project_name', 'amount', 'reason']
|
172 |
+
])
|
173 |
+
|
174 |
+
confirmation_text = (
|
175 |
+
f"You are going to add request money for project ID: {project_number}, "
|
176 |
+
f"Project name: {project_name}, request amount: {amount}, "
|
177 |
+
f"reason: {reason}.\n\nConfidence scores:\n{confidence_info}\n"
|
178 |
+
f"Are you sure you want to proceed?"
|
179 |
+
)
|
180 |
+
return self.text_to_speech(confirmation_text)
|
181 |
+
|
182 |
+
def get_prompt_for_missing_info(self) -> str:
|
183 |
+
"""Generate a prompt for missing information with confidence scores"""
|
184 |
+
missing = self.get_missing_fields()
|
185 |
+
if not missing:
|
186 |
+
return "All required information has been provided with sufficient confidence."
|
187 |
+
|
188 |
+
current_info = self.get_partial_info()
|
189 |
+
prompt = "Current information:\n"
|
190 |
+
|
191 |
+
# Show current information with confidence scores
|
192 |
+
for key, value in current_info.items():
|
193 |
+
if key != 'confidence_scores' and value is not None:
|
194 |
+
confidence = self.confidence_scores.get(key, 0)
|
195 |
+
prompt += f"- {key}: {value} (confidence: {confidence:.2f})\n"
|
196 |
+
|
197 |
+
prompt += "\nPlease provide or clarify the following information:\n"
|
198 |
+
for field in missing:
|
199 |
+
current_confidence = self.confidence_scores.get(field, 0)
|
200 |
+
if current_confidence > 0:
|
201 |
+
prompt += f"- {field} (current confidence: {current_confidence:.2f}, needs improvement)\n"
|
202 |
+
else:
|
203 |
+
prompt += f"- {field} (missing)\n"
|
204 |
+
|
205 |
+
return prompt
|
206 |
+
|
207 |
+
def to_json(self) -> str:
|
208 |
+
"""Serialize the memory state to JSON"""
|
209 |
+
return json.dumps({
|
210 |
+
'conversation_history': self.conversation_history,
|
211 |
+
'partial_info': self.partial_info,
|
212 |
+
'confidence_scores': self.confidence_scores,
|
213 |
+
'last_interaction_time': self.last_interaction_time.isoformat() if self.last_interaction_time else None
|
214 |
+
})
|
215 |
+
|
216 |
+
def from_json(self, json_str: str) -> None:
|
217 |
+
"""Restore memory state from JSON"""
|
218 |
+
data = json.loads(json_str)
|
219 |
+
self.conversation_history = data['conversation_history']
|
220 |
+
self.partial_info = data['partial_info']
|
221 |
+
self.confidence_scores = data['confidence_scores']
|
222 |
+
self.last_interaction_time = (datetime.fromisoformat(data['last_interaction_time'])
|
223 |
+
if data['last_interaction_time'] else None)
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
portaudio19-dev
|
requests.db
ADDED
Binary file (12.3 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
pandas
|
3 |
+
SpeechRecognition
|
4 |
+
google-generativeai
|
5 |
+
python-dotenv
|
6 |
+
gTTS
|
7 |
+
playsound
|
8 |
+
pydub
|
voice_handler.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import speech_recognition as sr
|
2 |
+
import os
|
3 |
+
from pydub import AudioSegment
|
4 |
+
import tempfile
|
5 |
+
|
6 |
+
class VoiceHandler:
|
7 |
+
def __init__(self):
|
8 |
+
self.recognizer = sr.Recognizer()
|
9 |
+
self.recognizer.energy_threshold = 20000
|
10 |
+
self.recognizer.dynamic_energy_threshold = False
|
11 |
+
self.recognizer.pause_threshold = 0.8
|
12 |
+
|
13 |
+
def process_audio_file(self, audio_path: str, language: str) -> str:
|
14 |
+
"""Process audio file and convert to text"""
|
15 |
+
try:
|
16 |
+
# Convert audio to wav format if needed
|
17 |
+
if not audio_path.endswith('.wav'):
|
18 |
+
audio = AudioSegment.from_file(audio_path)
|
19 |
+
temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
|
20 |
+
audio.export(temp_wav.name, format='wav')
|
21 |
+
audio_path = temp_wav.name
|
22 |
+
|
23 |
+
with sr.AudioFile(audio_path) as source:
|
24 |
+
audio = self.recognizer.record(source)
|
25 |
+
|
26 |
+
if language == "Arabic":
|
27 |
+
return self.recognizer.recognize_google(audio, language="ar-SA")
|
28 |
+
elif language == "Mixed (Arabic/English)":
|
29 |
+
try:
|
30 |
+
return self.recognizer.recognize_google(audio, language="ar-SA")
|
31 |
+
except sr.UnknownValueError:
|
32 |
+
return self.recognizer.recognize_google(audio, language="en-US")
|
33 |
+
else: # English
|
34 |
+
return self.recognizer.recognize_google(audio, language="en-US")
|
35 |
+
|
36 |
+
except sr.RequestError as e:
|
37 |
+
return f"Error: Could not request results from speech service: {str(e)}"
|
38 |
+
except sr.UnknownValueError:
|
39 |
+
return "Error: Could not understand audio. Please speak clearly and try again."
|
40 |
+
except Exception as e:
|
41 |
+
return f"Error: {str(e)}"
|
42 |
+
finally:
|
43 |
+
# Clean up temporary file if it was created
|
44 |
+
if 'temp_wav' in locals():
|
45 |
+
os.unlink(temp_wav.name)
|
46 |
+
|
47 |
+
def check_microphone_access(self) -> bool:
|
48 |
+
"""Check if microphone is accessible"""
|
49 |
+
try:
|
50 |
+
with sr.Microphone() as source:
|
51 |
+
self.recognizer.adjust_for_ambient_noise(source, duration=0.1)
|
52 |
+
return True
|
53 |
+
except (OSError, AttributeError, sr.RequestError):
|
54 |
+
return False
|