Spaces:

Shriharsh
/

Customer_Support_Bot_with_Document_Training

Sleeping

App Files Files Community

Shriharsh commited on Mar 14

Commit

bccbeba

verified ·

1 Parent(s): 2a28b9c

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -3

app.py CHANGED Viewed

@@ -9,6 +9,10 @@ import PyPDF2
 log_file_path = "/tmp/support_bot_log.txt"
 logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
 class SupportBotAgent:
     def __init__(self, document_path):
         # Load a pre-trained question-answering model
@@ -18,8 +22,14 @@ class SupportBotAgent:
         # Load the document text and split it into sections (by paragraphs)
         self.document_text = self.load_document(document_path)
         self.sections = self.document_text.split('\n\n')
         self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
         logging.info(f"Loaded document: {document_path}")
     def load_document(self, path):
         """Loads and extracts text from a TXT or PDF file."""
@@ -39,8 +49,10 @@ class SupportBotAgent:
         else:
             file_type = "Unsupported Format"
             logging.error(f"Unsupported file format: {path}")
             raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
         logging.info(f"Loaded {file_type}: {path}")
         return text
     def find_relevant_section(self, query):
@@ -53,22 +65,26 @@ class SupportBotAgent:
         best_idx = similarities.argmax().item()
         best_section = self.sections[best_idx]
         similarity_score = similarities[best_idx].item()
-        SIMILARITY_THRESHOLD = 0.4
         if similarity_score >= SIMILARITY_THRESHOLD:
-            logging.info(f"Found relevant section using embeddings for query: {query}")
             return best_section
-        logging.info(f"Low similarity ({similarity_score}). Falling back to keyword search.")
         query_words = {word for word in query.lower().split() if word not in stopwords}
         for section in self.sections:
             section_words = {word for word in section.lower().split() if word not in stopwords}
             common_words = query_words.intersection(section_words)
             if len(common_words) >= 2:
                 logging.info(f"Keyword match for query: {query} with common words: {common_words}")
                 return section
         logging.info("No good keyword match found. Returning default response.")
         return "I don’t have enough information to answer that."
     def answer_query(self, query):
@@ -79,6 +95,7 @@ class SupportBotAgent:
             result = self.qa_model(question=query, context=context, max_answer_len=50)
             answer = result["answer"]
         logging.info(f"Answer for query '{query}': {answer}")
         return answer
     def adjust_response(self, query, response, feedback):
@@ -91,6 +108,7 @@ class SupportBotAgent:
         else:
             adjusted_response = response
         logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
         return adjusted_response
 # --- Gradio Functions and App Workflow ---
@@ -98,8 +116,10 @@ class SupportBotAgent:
 def process_file(file, state):
     """Handles file upload and initializes the SupportBotAgent."""
     logging.info("Received file upload request")
     if file is None:
         logging.info("No file uploaded")
         return [("Bot", "Please upload a TXT or PDF file.")], state
     # Save the uploaded file to /tmp. Handle both file objects and NamedString.
@@ -112,10 +132,14 @@ def process_file(file, state):
         if isinstance(content, str):
             content = content.encode("utf-8")
         f.write(content)
     try:
         state["agent"] = SupportBotAgent(temp_path)
     except Exception as e:
         return [("Bot", f"Error processing file: {str(e)}")], state
     state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
@@ -136,6 +160,7 @@ def process_input(user_input, state):
     if user_input.lower() == "exit":
         state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
         state["mode"] = "ended"
         return state["chat_history"], state
     if state["mode"] == "query":
@@ -157,6 +182,7 @@ def process_input(user_input, state):
             state["last_answer"] = new_answer
             state["feedback_count"] += 1
             state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
     return state["chat_history"], state
 # --- Gradio UI Setup ---

 log_file_path = "/tmp/support_bot_log.txt"
 logging.basicConfig(filename=log_file_path, level=logging.INFO, format='%(asctime)s - %(message)s')
+def flush_logs():
+    for handler in logging.getLogger().handlers:
+        handler.flush()
 class SupportBotAgent:
     def __init__(self, document_path):
         # Load a pre-trained question-answering model
         # Load the document text and split it into sections (by paragraphs)
         self.document_text = self.load_document(document_path)
         self.sections = self.document_text.split('\n\n')
+        flush_logs()
+        # Log document length for debugging
+        logging.info(f"Document length: {len(self.document_text)} characters")
+        flush_logs()
+        # Create embeddings for all sections
         self.section_embeddings = self.embedder.encode(self.sections, convert_to_tensor=True)
         logging.info(f"Loaded document: {document_path}")
+        flush_logs()
     def load_document(self, path):
         """Loads and extracts text from a TXT or PDF file."""
         else:
             file_type = "Unsupported Format"
             logging.error(f"Unsupported file format: {path}")
+            flush_logs()
             raise ValueError("Unsupported file format. Please provide a TXT or PDF file.")
         logging.info(f"Loaded {file_type}: {path}")
+        flush_logs()
         return text
     def find_relevant_section(self, query):
         best_idx = similarities.argmax().item()
         best_section = self.sections[best_idx]
         similarity_score = similarities[best_idx].item()
+        SIMILARITY_THRESHOLD = 0.4  # Adjust if needed
         if similarity_score >= SIMILARITY_THRESHOLD:
+            logging.info(f"Found relevant section using embeddings for query: {query} (score: {similarity_score})")
+            flush_logs()
             return best_section
+        logging.info(f"Low similarity ({similarity_score}) for query: {query}. Falling back to keyword search.")
+        flush_logs()
         query_words = {word for word in query.lower().split() if word not in stopwords}
         for section in self.sections:
             section_words = {word for word in section.lower().split() if word not in stopwords}
             common_words = query_words.intersection(section_words)
             if len(common_words) >= 2:
                 logging.info(f"Keyword match for query: {query} with common words: {common_words}")
+                flush_logs()
                 return section
         logging.info("No good keyword match found. Returning default response.")
+        flush_logs()
         return "I don’t have enough information to answer that."
     def answer_query(self, query):
             result = self.qa_model(question=query, context=context, max_answer_len=50)
             answer = result["answer"]
         logging.info(f"Answer for query '{query}': {answer}")
+        flush_logs()
         return answer
     def adjust_response(self, query, response, feedback):
         else:
             adjusted_response = response
         logging.info(f"Adjusted answer for query '{query}': {adjusted_response}")
+        flush_logs()
         return adjusted_response
 # --- Gradio Functions and App Workflow ---
 def process_file(file, state):
     """Handles file upload and initializes the SupportBotAgent."""
     logging.info("Received file upload request")
+    flush_logs()
     if file is None:
         logging.info("No file uploaded")
+        flush_logs()
         return [("Bot", "Please upload a TXT or PDF file.")], state
     # Save the uploaded file to /tmp. Handle both file objects and NamedString.
         if isinstance(content, str):
             content = content.encode("utf-8")
         f.write(content)
+    logging.info(f"Saved uploaded file to {temp_path} (size: {os.path.getsize(temp_path)} bytes)")
+    flush_logs()
     try:
         state["agent"] = SupportBotAgent(temp_path)
     except Exception as e:
+        logging.error(f"Error processing file: {str(e)}")
+        flush_logs()
         return [("Bot", f"Error processing file: {str(e)}")], state
     state["chat_history"] = [("Bot", "File loaded successfully. Enter your query (or type 'exit' to end):")]
     if user_input.lower() == "exit":
         state["chat_history"].append(("Bot", "Session ended. You may now download the log file."))
         state["mode"] = "ended"
+        flush_logs()
         return state["chat_history"], state
     if state["mode"] == "query":
             state["last_answer"] = new_answer
             state["feedback_count"] += 1
             state["chat_history"].append(("Bot", f"Updated Answer: {new_answer}\nPlease provide feedback (good, too vague, not helpful):"))
+    flush_logs()
     return state["chat_history"], state
 # --- Gradio UI Setup ---