Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,10 @@ import PyPDF2
|
|
9 |
log_file_path = "/tmp/support_bot_log.txt"
|
10 |
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
|
11 |
|
|
|
|
|
|
|
|
|
12 |
class SupportBotAgent:
|
13 |
def __init__(self, document_path):
|
14 |
# Load a pre-trained question-answering model
|
@@ -18,8 +22,14 @@ class SupportBotAgent:
|
|
18 |
# Load the document text and split it into sections (by paragraphs)
|
19 |
self.document_text = self.load_document(document_path)
|
20 |
self.sections = self.document_text.split('\n\n')
|
|
|
|
|
|
|
|
|
|
|
21 |
self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
|
22 |
logging.info(f"Loaded document: {document_path}")
|
|
|
23 |
|
24 |
def load_document(self, path):
|
25 |
"""Loads and extracts text from a TXT or PDF file."""
|
@@ -39,8 +49,10 @@ class SupportBotAgent:
|
|
39 |
else:
|
40 |
file_type = "Unsupported Format"
|
41 |
logging.error(f"Unsupported file format: {path}")
|
|
|
42 |
raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
|
43 |
logging.info(f"Loaded {file_type}: {path}")
|
|
|
44 |
return text
|
45 |
|
46 |
def find_relevant_section(self, query):
|
@@ -53,22 +65,26 @@ class SupportBotAgent:
|
|
53 |
best_idx = similarities.argmax().item()
|
54 |
best_section = self.sections[best_idx]
|
55 |
similarity_score = similarities[best_idx].item()
|
56 |
-
SIMILARITY_THRESHOLD = 0.4
|
57 |
|
58 |
if similarity_score >= SIMILARITY_THRESHOLD:
|
59 |
-
logging.info(f"Found relevant section using embeddings for query: {query}")
|
|
|
60 |
return best_section
|
61 |
|
62 |
-
logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
|
|
|
63 |
query_words = {word for word in query.lower().split() if word not in stopwords}
|
64 |
for section in self.sections:
|
65 |
section_words = {word for word in section.lower().split() if word not in stopwords}
|
66 |
common_words = query_words.intersection(section_words)
|
67 |
if len(common_words) >= 2:
|
68 |
logging.info(f"Keyword match for query: {query} with common words: {common_words}")
|
|
|
69 |
return section
|
70 |
|
71 |
logging.info("No good keyword match found. Returning default response.")
|
|
|
72 |
return "I don’t have enough information to answer that."
|
73 |
|
74 |
def answer_query(self, query):
|
@@ -79,6 +95,7 @@ class SupportBotAgent:
|
|
79 |
result = self.qa_model(question=query, context=context, max_answer_len=50)
|
80 |
answer = result["answer"]
|
81 |
logging.info(f"Answer for query '{query}': {answer}")
|
|
|
82 |
return answer
|
83 |
|
84 |
def adjust_response(self, query, response, feedback):
|
@@ -91,6 +108,7 @@ class SupportBotAgent:
|
|
91 |
else:
|
92 |
adjusted_response = response
|
93 |
logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
|
|
|
94 |
return adjusted_response
|
95 |
|
96 |
# --- Gradio Functions and App Workflow ---
|
@@ -98,8 +116,10 @@ class SupportBotAgent:
|
|
98 |
def process_file(file, state):
|
99 |
"""Handles file upload and initializes the SupportBotAgent."""
|
100 |
logging.info("Received file upload request")
|
|
|
101 |
if file is None:
|
102 |
logging.info("No file uploaded")
|
|
|
103 |
return [("Bot", "Please upload a TXT or PDF file.")], state
|
104 |
|
105 |
# Save the uploaded file to /tmp. Handle both file objects and NamedString.
|
@@ -112,10 +132,14 @@ def process_file(file, state):
|
|
112 |
if isinstance(content, str):
|
113 |
content = content.encode("utf-8")
|
114 |
f.write(content)
|
|
|
|
|
115 |
|
116 |
try:
|
117 |
state["agent"] = SupportBotAgent(temp_path)
|
118 |
except Exception as e:
|
|
|
|
|
119 |
return [("Bot", f"Error processing file: {str(e)}")], state
|
120 |
|
121 |
state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
|
@@ -136,6 +160,7 @@ def process_input(user_input, state):
|
|
136 |
if user_input.lower() == "exit":
|
137 |
state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
|
138 |
state["mode"] = "ended"
|
|
|
139 |
return state["chat_history"], state
|
140 |
|
141 |
if state["mode"] == "query":
|
@@ -157,6 +182,7 @@ def process_input(user_input, state):
|
|
157 |
state["last_answer"] = new_answer
|
158 |
state["feedback_count"] += 1
|
159 |
state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
|
|
|
160 |
return state["chat_history"], state
|
161 |
|
162 |
# --- Gradio UI Setup ---
|
|
|
9 |
log_file_path = "/tmp/support_bot_log.txt"
|
10 |
logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
|
11 |
|
12 |
+
def flush_logs():
|
13 |
+
for handler in logging.getLogger().handlers:
|
14 |
+
handler.flush()
|
15 |
+
|
16 |
class SupportBotAgent:
|
17 |
def __init__(self, document_path):
|
18 |
# Load a pre-trained question-answering model
|
|
|
22 |
# Load the document text and split it into sections (by paragraphs)
|
23 |
self.document_text = self.load_document(document_path)
|
24 |
self.sections = self.document_text.split('\n\n')
|
25 |
+
flush_logs()
|
26 |
+
# Log document length for debugging
|
27 |
+
logging.info(f"Document length: {len(self.document_text)} characters")
|
28 |
+
flush_logs()
|
29 |
+
# Create embeddings for all sections
|
30 |
self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
|
31 |
logging.info(f"Loaded document: {document_path}")
|
32 |
+
flush_logs()
|
33 |
|
34 |
def load_document(self, path):
|
35 |
"""Loads and extracts text from a TXT or PDF file."""
|
|
|
49 |
else:
|
50 |
file_type = "Unsupported Format"
|
51 |
logging.error(f"Unsupported file format: {path}")
|
52 |
+
flush_logs()
|
53 |
raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
|
54 |
logging.info(f"Loaded {file_type}: {path}")
|
55 |
+
flush_logs()
|
56 |
return text
|
57 |
|
58 |
def find_relevant_section(self, query):
|
|
|
65 |
best_idx = similarities.argmax().item()
|
66 |
best_section = self.sections[best_idx]
|
67 |
similarity_score = similarities[best_idx].item()
|
68 |
+
SIMILARITY_THRESHOLD = 0.4 # Adjust if needed
|
69 |
|
70 |
if similarity_score >= SIMILARITY_THRESHOLD:
|
71 |
+
logging.info(f"Found relevant section using embeddings for query: {query} (score: {similarity_score})")
|
72 |
+
flush_logs()
|
73 |
return best_section
|
74 |
|
75 |
+
logging.info(f"Low similarity ({similarity_score}) for query: {query}. Falling back to keyword search.")
|
76 |
+
flush_logs()
|
77 |
query_words = {word for word in query.lower().split() if word not in stopwords}
|
78 |
for section in self.sections:
|
79 |
section_words = {word for word in section.lower().split() if word not in stopwords}
|
80 |
common_words = query_words.intersection(section_words)
|
81 |
if len(common_words) >= 2:
|
82 |
logging.info(f"Keyword match for query: {query} with common words: {common_words}")
|
83 |
+
flush_logs()
|
84 |
return section
|
85 |
|
86 |
logging.info("No good keyword match found. Returning default response.")
|
87 |
+
flush_logs()
|
88 |
return "I don’t have enough information to answer that."
|
89 |
|
90 |
def answer_query(self, query):
|
|
|
95 |
result = self.qa_model(question=query, context=context, max_answer_len=50)
|
96 |
answer = result["answer"]
|
97 |
logging.info(f"Answer for query '{query}': {answer}")
|
98 |
+
flush_logs()
|
99 |
return answer
|
100 |
|
101 |
def adjust_response(self, query, response, feedback):
|
|
|
108 |
else:
|
109 |
adjusted_response = response
|
110 |
logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
|
111 |
+
flush_logs()
|
112 |
return adjusted_response
|
113 |
|
114 |
# --- Gradio Functions and App Workflow ---
|
|
|
116 |
def process_file(file, state):
|
117 |
"""Handles file upload and initializes the SupportBotAgent."""
|
118 |
logging.info("Received file upload request")
|
119 |
+
flush_logs()
|
120 |
if file is None:
|
121 |
logging.info("No file uploaded")
|
122 |
+
flush_logs()
|
123 |
return [("Bot", "Please upload a TXT or PDF file.")], state
|
124 |
|
125 |
# Save the uploaded file to /tmp. Handle both file objects and NamedString.
|
|
|
132 |
if isinstance(content, str):
|
133 |
content = content.encode("utf-8")
|
134 |
f.write(content)
|
135 |
+
logging.info(f"Saved uploaded file to {temp_path} (size: {os.path.getsize(temp_path)} bytes)")
|
136 |
+
flush_logs()
|
137 |
|
138 |
try:
|
139 |
state["agent"] = SupportBotAgent(temp_path)
|
140 |
except Exception as e:
|
141 |
+
logging.error(f"Error processing file: {str(e)}")
|
142 |
+
flush_logs()
|
143 |
return [("Bot", f"Error processing file: {str(e)}")], state
|
144 |
|
145 |
state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
|
|
|
160 |
if user_input.lower() == "exit":
|
161 |
state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
|
162 |
state["mode"] = "ended"
|
163 |
+
flush_logs()
|
164 |
return state["chat_history"], state
|
165 |
|
166 |
if state["mode"] == "query":
|
|
|
182 |
state["last_answer"] = new_answer
|
183 |
state["feedback_count"] += 1
|
184 |
state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
|
185 |
+
flush_logs()
|
186 |
return state["chat_history"], state
|
187 |
|
188 |
# --- Gradio UI Setup ---
|