Spaces:

sandeep-huggingface
/

RAG_CSV_Chatbot

Sleeping

App Files Files Community

sandeep-huggingface commited on 27 days ago

Commit

061aa76

verified ·

1 Parent(s): 82b7d29

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -72

app.py CHANGED Viewed

@@ -40,10 +40,10 @@ class TransformersLLM(LLM):
         max_new_tokens = kwargs.pop('max_new_tokens', 256)
         temperature = kwargs.pop('temperature', 0.7)
         do_sample = kwargs.pop('do_sample', True)
         # Call super with model_name explicitly
         super().__init__(model_name=model_name, **kwargs)
         # Set our custom attributes
         self.pipeline = pipeline_obj
         self.max_new_tokens = max_new_tokens
@@ -59,7 +59,7 @@ class TransformersLLM(LLM):
                 user_question = prompt.split("Question:")[-1].replace("Helpful Answer:", "").strip()
             else:
                 user_question = prompt
             # Create a focused prompt for the model
             if "qwen" in self.model_name.lower():
                 system_prompt = "You are a helpful assistant that analyzes CSV data and answers questions accurately and concisely."
@@ -71,7 +71,7 @@ class TransformersLLM(LLM):
                 formatted_prompt = f"Question: {user_question}\n\nBased on the provided CSV data, please provide a clear and informative answer:\n\nAnswer:"
             print(f"Generating response for: {user_question[:100]}...")
             # Generate response
             with torch.no_grad():
                 response = self.pipeline(
@@ -82,10 +82,10 @@ class TransformersLLM(LLM):
                     pad_token_id=self.pipeline.tokenizer.eos_token_id,
                     return_full_text=False
                 )
             if response and len(response) > 0:
                 generated_text = response[0]['generated_text'].strip()
                 # Clean up the response
                 if "assistant" in generated_text:
                     generated_text = generated_text.split("assistant")[-1].strip()
@@ -93,18 +93,18 @@ class TransformersLLM(LLM):
                     generated_text = generated_text.split("<|im_end|>")[0].strip()
                 if "<|eot_id|>" in generated_text:
                     generated_text = generated_text.split("<|eot_id|>")[0].strip()
                 # Remove any remaining special tokens
                 for token in ["<|im_start|>", "<|im_end|>", "<|eot_id|>", "<|begin_of_text|>", "<|end_of_text|>"]:
                     generated_text = generated_text.replace(token, "")
                 generated_text = generated_text.strip()
                 if len(generated_text) > 10:
                     return generated_text
             return "I apologize, but I couldn't generate a meaningful response. Please try rephrasing your question."
         except Exception as e:
             print(f"Error in LLM generation: {e}")
             return f"I encountered an error while processing your question: {str(e)}. Please try again."
@@ -115,8 +115,8 @@ class TransformersLLM(LLM):
 # Available models
 AVAILABLE_MODELS = {
-    "Qwen2.5-7B-Instruct": "Qwen/Qwen2.5-7B-Instruct",
-    "Llama-3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct"
 }
 CHUNK_SIZES = {
@@ -128,10 +128,10 @@ CHUNK_SIZES = {
 def load_model(model_choice: str, progress=gr.Progress()):
     """Load the selected model with proper memory management"""
     global current_model, current_tokenizer, current_pipeline
     try:
         model_id = AVAILABLE_MODELS[model_choice]
         with model_lock:
             # Clear existing model if different
             if current_model is not None:
@@ -144,22 +144,22 @@ def load_model(model_choice: str, progress=gr.Progress()):
                 else:
                     # Same model already loaded
                     return current_pipeline, f"✅ Model {model_choice} already loaded!"
             progress(0.2, desc=f"Loading tokenizer for {model_choice}...")
             print(f"Loading tokenizer for {model_id}...")
             tokenizer = AutoTokenizer.from_pretrained(
-                model_id,
                 trust_remote_code=True
             )
             # Set pad token if not exists
             if tokenizer.pad_token is None:
                 tokenizer.pad_token = tokenizer.eos_token
             progress(0.5, desc=f"Loading model {model_choice}...")
             print(f"Loading model {model_id}...")
             # Load model with appropriate settings for Colab
             # model = AutoModelForCausalLM.from_pretrained(
             #     model_id,
@@ -190,7 +190,7 @@ def load_model(model_choice: str, progress=gr.Progress()):
                 )
             progress(0.8, desc="Creating pipeline...")
             print("Creating text generation pipeline...")
             # Create pipeline
             # pipe = pipeline(
             #     "text-generation",
@@ -210,11 +210,11 @@ def load_model(model_choice: str, progress=gr.Progress()):
             current_model = model
             current_tokenizer = tokenizer
             current_pipeline = pipe
             progress(1.0, desc="Model loaded successfully!")
             return pipe, f"✅ Model {model_choice} loaded successfully!"
     except Exception as e:
         print(f"Error loading model: {e}")
         traceback.print_exc()
@@ -250,7 +250,7 @@ def csv_to_documents(file_path: str) -> List[Document]:
         # Read CSV file with multiple encoding attempts
         df = None
         encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
         for encoding in encodings:
             try:
                 df = pd.read_csv(file_path, encoding=encoding)
@@ -258,7 +258,7 @@ def csv_to_documents(file_path: str) -> List[Document]:
                 break
             except UnicodeDecodeError:
                 continue
         if df is None:
             print(f"Could not read {file_path} with any encoding")
             return []
@@ -269,12 +269,12 @@ def csv_to_documents(file_path: str) -> List[Document]:
         # Clean the dataframe
         df = df.dropna(how='all')  # Remove completely empty rows
         # Get basic info about the CSV
         filename = os.path.basename(file_path)
         total_rows = len(df)
         columns = list(df.columns)
         print(f"Processing {filename}: {total_rows} rows, {len(columns)} columns")
         print(f"Columns: {columns}")
@@ -282,7 +282,7 @@ def csv_to_documents(file_path: str) -> List[Document]:
         # Create a summary document first
         summary_content = f"Dataset: {filename}\nTotal rows: {total_rows}\nColumns: {', '.join(columns)}\n"
         # Add column statistics if numeric columns exist
         numeric_cols = df.select_dtypes(include=['number']).columns
         if len(numeric_cols) > 0:
@@ -314,7 +314,7 @@ def csv_to_documents(file_path: str) -> List[Document]:
             try:
                 # Create a more structured text representation
                 row_text_parts = []
                 # Add row identifier
                 row_text_parts.append(f"Row {idx + 1} of {filename}")
@@ -323,7 +323,7 @@ def csv_to_documents(file_path: str) -> List[Document]:
                     value = row[col]
                     if pd.isna(value):
                         continue  # Skip NaN values
                     # Clean and format the value
                     if isinstance(value, (int, float)):
                         formatted_value = f"{value:,.2f}" if isinstance(value, float) else f"{value:,}"
@@ -331,7 +331,7 @@ def csv_to_documents(file_path: str) -> List[Document]:
                         formatted_value = str(value).replace('\n', ' ').replace('\r', ' ').strip()
                         if len(formatted_value) > 100:
                             formatted_value = formatted_value[:100] + "..."
                     row_text_parts.append(f"{col}: {formatted_value}")
                 # Combine all parts
@@ -395,18 +395,18 @@ def load_doc(list_file_path: List[str], splitting_strategy: str, chunk_size: str
         # Apply text splitting with adjusted parameters
         text_splitter = get_text_splitter(
-            splitting_strategy,
-            chunk_size_value,
             max(50, chunk_size_value // 10)  # Dynamic overlap
         )
         doc_splits = text_splitter.split_documents(all_documents)
         print(f"Total document chunks after splitting: {len(doc_splits)}")
         # Print some sample chunks for debugging
         for i, split in enumerate(doc_splits[:3]):
             print(f"Sample chunk {i+1}: {split.page_content[:100]}...")
         return doc_splits
     except Exception as e:
@@ -421,7 +421,7 @@ def create_db(splits, db_choice: str = "faiss"):
             raise ValueError("No document splits provided")
         print(f"Creating {db_choice} database with {len(splits)} documents")
         # Use a reliable embedding model with better parameters
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
@@ -434,29 +434,29 @@ def create_db(splits, db_choice: str = "faiss"):
                 'batch_size': 16
             }
         )
         print("Testing embeddings with sample text...")
         test_embedding = embeddings.embed_query("test query")
         print(f"Embedding dimension: {len(test_embedding)}")
         db_creators = {
             "faiss": lambda: FAISS.from_documents(splits, embeddings),
             "chroma": lambda: Chroma.from_documents(
-                splits,
                 embeddings,
                 persist_directory=None  # In-memory
             )
         }
         db = db_creators[db_choice]()
         print(f"Successfully created {db_choice} database")
         # Test the database with a simple query
         test_results = db.similarity_search("test", k=1)
         print(f"Database test successful, found {len(test_results)} results")
         return db
     except Exception as e:
         print(f"Error creating database: {e}")
         traceback.print_exc()
@@ -513,15 +513,15 @@ def initialize_llmchain(model_choice, temperature, max_tokens, top_k, vector_db,
             return None, "❌ Please create vector database first."
         progress(0.2, desc="Loading model...")
         # Load the selected model
         pipeline_obj, model_status = load_model(model_choice, progress)
         if pipeline_obj is None:
             return None, model_status
         progress(0.7, desc="Creating LLM instance...")
         # Create our custom LLM wrapper
         llm = TransformersLLM(
             model_name=AVAILABLE_MODELS[model_choice],
@@ -530,9 +530,9 @@ def initialize_llmchain(model_choice, temperature, max_tokens, top_k, vector_db,
             temperature=max(0.1, min(1.0, temperature)),
             do_sample=temperature > 0.1
         )
         progress(0.8, desc="Setting up retriever...")
         # Create retriever with optimized parameters
         retriever = vector_db.as_retriever(
             search_type="similarity",
@@ -541,7 +541,7 @@ def initialize_llmchain(model_choice, temperature, max_tokens, top_k, vector_db,
                 "fetch_k": min(max(3, top_k * 2), 20)  # Fetch more, then filter
             }
         )
         # Test the retriever
         try:
             test_docs = retriever.get_relevant_documents("test query")
@@ -551,7 +551,7 @@ def initialize_llmchain(model_choice, temperature, max_tokens, top_k, vector_db,
             return None, f"❌ Database retriever failed: {str(e)}"
         progress(0.9, desc="Creating QA chain...")
         # Create QA chain with error handling
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
@@ -568,7 +568,7 @@ def initialize_llmchain(model_choice, temperature, max_tokens, top_k, vector_db,
                       f"🌡️ Temperature: {temperature}\n"
                       f"📝 Max tokens: {max_tokens}\n"
                       f"🔍 Retriever K: {top_k}")
         return qa_chain, success_msg
     except Exception as e:
@@ -601,14 +601,14 @@ def conversation(qa_chain, message, history):
         print(f"\n{'='*50}")
         print(f"Processing question: {message}")
         print(f"{'='*50}")
         # Enhance the query for better CSV data understanding
         enhanced_query = f"""Based on the CSV data provided, please answer this question: {message.strip()}
 Please provide a clear, informative answer that directly addresses the question. If you're analyzing data, include specific values, trends, or patterns you observe."""
         print(f"Enhanced query: {enhanced_query}")
         # Call the QA chain with timeout handling
         start_time = time.time()
         try:
@@ -628,7 +628,7 @@ Please provide a clear, informative answer that directly addresses the question.
                     fallback_response += "Please try rephrasing your question or try again."
                 else:
                     fallback_response = "I couldn't find relevant information in your CSV data for this question. Please try a different question or check if your data contains the information you're looking for."
                 return (
                     qa_chain,
                     gr.update(value=""),
@@ -643,18 +643,18 @@ Please provide a clear, informative answer that directly addresses the question.
             except Exception as fallback_error:
                 print(f"Fallback also failed: {fallback_error}")
                 error_response = f"I encountered an error processing your question: {str(qa_error)}. Please try:\n\n1. Using a simpler question\n2. Waiting a moment and trying again\n3. Reloading the model"
                 return (
                     qa_chain,
                     gr.update(value=""),
                     history + [(message, error_response)],
                     f"Error: {str(qa_error)}", "Error processing", "", "No source", "", "No source"
                 )
         # Extract and process the response
         print(f"Raw response type: {type(response)}")
         print(f"Response keys: {response.keys() if isinstance(response, dict) else 'Not a dict'}")
         if isinstance(response, dict):
             # Get the answer
             response_answer = response.get("result") or response.get("answer") or str(response)
@@ -672,15 +672,15 @@ Please provide a clear, informative answer that directly addresses the question.
             response_answer = response_answer.replace("Based on the following context, please provide a helpful and accurate answer", "").strip()
             response_answer = response_answer.replace("Helpful Answer:", "").strip()
             response_answer = response_answer.replace("Answer:", "").strip()
             # Remove repeated prompts
             if enhanced_query[:50] in response_answer:
                 response_answer = response_answer.replace(enhanced_query, "").strip()
             # Ensure we have a meaningful response
             if len(response_answer.strip()) < 10:
                 response_answer = "I was able to process your question, but the response was too brief. Please try rephrasing your question or providing more context."
         if not response_answer or response_answer.strip() == "":
             response_answer = "I apologize, but I couldn't generate a meaningful response to your question. Please try rephrasing your question or ensure your CSV data contains relevant information."
@@ -703,14 +703,14 @@ Please provide a clear, informative answer that directly addresses the question.
                     content = content[:300] + "..."
                 source_contents.append(content)
                 if doc_type == "csv_summary":
                     source_info.append(f"Summary of {source_file}")
                 else:
                     source_info.append(f"File: {source_file} | Row: {row_info}")
                 print(f"Source {i+1}: {source_info[-1][:50]}...")
             except Exception as e:
                 print(f"Error processing source {i}: {e}")
                 source_contents.append(f"Error processing source: {str(e)}")
@@ -724,11 +724,14 @@ Please provide a clear, informative answer that directly addresses the question.
         print(f"Final response length: {len(response_answer)} characters")
         print(f"Response preview: {response_answer[:100]}...")
         print(f"Sources: {[info for info in source_info if info != 'No additional sources']}")
         return (
             qa_chain,
             gr.update(value=""),
-            history + [(message, response_answer)],
             source_contents[0],
             source_info[0],
             source_contents[1],
@@ -742,9 +745,9 @@ Please provide a clear, informative answer that directly addresses the question.
         print(f"Error: {str(e)}")
         print(f"Error type: {type(e).__name__}")
         traceback.print_exc()
         error_msg = f"❌ I encountered an error while processing your question:\n\n{str(e)}\n\nPlease try:\n1. Using a simpler question\n2. Waiting a moment and trying again\n3. Reloading the model\n4. Recreating the database"
         return (
             qa_chain,
             gr.update(value=""),
@@ -905,7 +908,7 @@ def demo():
                 **Available Models:**
                 - **Qwen2.5-7B-Instruct**: Advanced Chinese-English bilingual model, excellent for analysis
                 - **Llama-3.1-8B-Instruct**: Meta's powerful instruction-following model
                 **Note**: Models are loaded locally with 4-bit quantization for memory efficiency. First load may take several minutes.
                 """)
@@ -994,7 +997,7 @@ def demo():
         )
     demo.queue().launch(
-        debug=True,
         share=False,
         show_error=True
     )

         max_new_tokens = kwargs.pop('max_new_tokens', 256)
         temperature = kwargs.pop('temperature', 0.7)
         do_sample = kwargs.pop('do_sample', True)
         # Call super with model_name explicitly
         super().__init__(model_name=model_name, **kwargs)
         # Set our custom attributes
         self.pipeline = pipeline_obj
         self.max_new_tokens = max_new_tokens
                 user_question = prompt.split("Question:")[-1].replace("Helpful Answer:", "").strip()
             else:
                 user_question = prompt
             # Create a focused prompt for the model
             if "qwen" in self.model_name.lower():
                 system_prompt = "You are a helpful assistant that analyzes CSV data and answers questions accurately and concisely."
                 formatted_prompt = f"Question: {user_question}\n\nBased on the provided CSV data, please provide a clear and informative answer:\n\nAnswer:"
             print(f"Generating response for: {user_question[:100]}...")
             # Generate response
             with torch.no_grad():
                 response = self.pipeline(
                     pad_token_id=self.pipeline.tokenizer.eos_token_id,
                     return_full_text=False
                 )
             if response and len(response) > 0:
                 generated_text = response[0]['generated_text'].strip()
                 # Clean up the response
                 if "assistant" in generated_text:
                     generated_text = generated_text.split("assistant")[-1].strip()
                     generated_text = generated_text.split("<|im_end|>")[0].strip()
                 if "<|eot_id|>" in generated_text:
                     generated_text = generated_text.split("<|eot_id|>")[0].strip()
                 # Remove any remaining special tokens
                 for token in ["<|im_start|>", "<|im_end|>", "<|eot_id|>", "<|begin_of_text|>", "<|end_of_text|>"]:
                     generated_text = generated_text.replace(token, "")
                 generated_text = generated_text.strip()
                 if len(generated_text) > 10:
                     return generated_text
             return "I apologize, but I couldn't generate a meaningful response. Please try rephrasing your question."
         except Exception as e:
             print(f"Error in LLM generation: {e}")
             return f"I encountered an error while processing your question: {str(e)}. Please try again."
 # Available models
 AVAILABLE_MODELS = {
+    "Llama-3.2-1B-Instruct": "meta-llama/Llama-3.2-1B-Instruct",
+    "Qwen2.5-0.5B-Instruct": "Qwen/Qwen2.5-0.5B-Instruct"
 }
 CHUNK_SIZES = {
 def load_model(model_choice: str, progress=gr.Progress()):
     """Load the selected model with proper memory management"""
     global current_model, current_tokenizer, current_pipeline
     try:
         model_id = AVAILABLE_MODELS[model_choice]
         with model_lock:
             # Clear existing model if different
             if current_model is not None:
                 else:
                     # Same model already loaded
                     return current_pipeline, f"✅ Model {model_choice} already loaded!"
             progress(0.2, desc=f"Loading tokenizer for {model_choice}...")
             print(f"Loading tokenizer for {model_id}...")
             tokenizer = AutoTokenizer.from_pretrained(
+                model_id,
                 trust_remote_code=True
             )
             # Set pad token if not exists
             if tokenizer.pad_token is None:
                 tokenizer.pad_token = tokenizer.eos_token
             progress(0.5, desc=f"Loading model {model_choice}...")
             print(f"Loading model {model_id}...")
             # Load model with appropriate settings for Colab
             # model = AutoModelForCausalLM.from_pretrained(
             #     model_id,
                 )
             progress(0.8, desc="Creating pipeline...")
             print("Creating text generation pipeline...")
             # Create pipeline
             # pipe = pipeline(
             #     "text-generation",
             current_model = model
             current_tokenizer = tokenizer
             current_pipeline = pipe
             progress(1.0, desc="Model loaded successfully!")
             return pipe, f"✅ Model {model_choice} loaded successfully!"
     except Exception as e:
         print(f"Error loading model: {e}")
         traceback.print_exc()
         # Read CSV file with multiple encoding attempts
         df = None
         encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
         for encoding in encodings:
             try:
                 df = pd.read_csv(file_path, encoding=encoding)
                 break
             except UnicodeDecodeError:
                 continue
         if df is None:
             print(f"Could not read {file_path} with any encoding")
             return []
         # Clean the dataframe
         df = df.dropna(how='all')  # Remove completely empty rows
         # Get basic info about the CSV
         filename = os.path.basename(file_path)
         total_rows = len(df)
         columns = list(df.columns)
         print(f"Processing {filename}: {total_rows} rows, {len(columns)} columns")
         print(f"Columns: {columns}")
         # Create a summary document first
         summary_content = f"Dataset: {filename}\nTotal rows: {total_rows}\nColumns: {', '.join(columns)}\n"
         # Add column statistics if numeric columns exist
         numeric_cols = df.select_dtypes(include=['number']).columns
         if len(numeric_cols) > 0:
             try:
                 # Create a more structured text representation
                 row_text_parts = []
                 # Add row identifier
                 row_text_parts.append(f"Row {idx + 1} of {filename}")
                     value = row[col]
                     if pd.isna(value):
                         continue  # Skip NaN values
                     # Clean and format the value
                     if isinstance(value, (int, float)):
                         formatted_value = f"{value:,.2f}" if isinstance(value, float) else f"{value:,}"
                         formatted_value = str(value).replace('\n', ' ').replace('\r', ' ').strip()
                         if len(formatted_value) > 100:
                             formatted_value = formatted_value[:100] + "..."
                     row_text_parts.append(f"{col}: {formatted_value}")
                 # Combine all parts
         # Apply text splitting with adjusted parameters
         text_splitter = get_text_splitter(
+            splitting_strategy,
+            chunk_size_value,
             max(50, chunk_size_value // 10)  # Dynamic overlap
         )
         doc_splits = text_splitter.split_documents(all_documents)
         print(f"Total document chunks after splitting: {len(doc_splits)}")
         # Print some sample chunks for debugging
         for i, split in enumerate(doc_splits[:3]):
             print(f"Sample chunk {i+1}: {split.page_content[:100]}...")
         return doc_splits
     except Exception as e:
             raise ValueError("No document splits provided")
         print(f"Creating {db_choice} database with {len(splits)} documents")
         # Use a reliable embedding model with better parameters
         embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2",
                 'batch_size': 16
             }
         )
         print("Testing embeddings with sample text...")
         test_embedding = embeddings.embed_query("test query")
         print(f"Embedding dimension: {len(test_embedding)}")
         db_creators = {
             "faiss": lambda: FAISS.from_documents(splits, embeddings),
             "chroma": lambda: Chroma.from_documents(
+                splits,
                 embeddings,
                 persist_directory=None  # In-memory
             )
         }
         db = db_creators[db_choice]()
         print(f"Successfully created {db_choice} database")
         # Test the database with a simple query
         test_results = db.similarity_search("test", k=1)
         print(f"Database test successful, found {len(test_results)} results")
         return db
     except Exception as e:
         print(f"Error creating database: {e}")
         traceback.print_exc()
             return None, "❌ Please create vector database first."
         progress(0.2, desc="Loading model...")
         # Load the selected model
         pipeline_obj, model_status = load_model(model_choice, progress)
         if pipeline_obj is None:
             return None, model_status
         progress(0.7, desc="Creating LLM instance...")
         # Create our custom LLM wrapper
         llm = TransformersLLM(
             model_name=AVAILABLE_MODELS[model_choice],
             temperature=max(0.1, min(1.0, temperature)),
             do_sample=temperature > 0.1
         )
         progress(0.8, desc="Setting up retriever...")
         # Create retriever with optimized parameters
         retriever = vector_db.as_retriever(
             search_type="similarity",
                 "fetch_k": min(max(3, top_k * 2), 20)  # Fetch more, then filter
             }
         )
         # Test the retriever
         try:
             test_docs = retriever.get_relevant_documents("test query")
             return None, f"❌ Database retriever failed: {str(e)}"
         progress(0.9, desc="Creating QA chain...")
         # Create QA chain with error handling
         qa_chain = RetrievalQA.from_chain_type(
             llm=llm,
                       f"🌡️ Temperature: {temperature}\n"
                       f"📝 Max tokens: {max_tokens}\n"
                       f"🔍 Retriever K: {top_k}")
         return qa_chain, success_msg
     except Exception as e:
         print(f"\n{'='*50}")
         print(f"Processing question: {message}")
         print(f"{'='*50}")
         # Enhance the query for better CSV data understanding
         enhanced_query = f"""Based on the CSV data provided, please answer this question: {message.strip()}
 Please provide a clear, informative answer that directly addresses the question. If you're analyzing data, include specific values, trends, or patterns you observe."""
         print(f"Enhanced query: {enhanced_query}")
         # Call the QA chain with timeout handling
         start_time = time.time()
         try:
                     fallback_response += "Please try rephrasing your question or try again."
                 else:
                     fallback_response = "I couldn't find relevant information in your CSV data for this question. Please try a different question or check if your data contains the information you're looking for."
                 return (
                     qa_chain,
                     gr.update(value=""),
             except Exception as fallback_error:
                 print(f"Fallback also failed: {fallback_error}")
                 error_response = f"I encountered an error processing your question: {str(qa_error)}. Please try:\n\n1. Using a simpler question\n2. Waiting a moment and trying again\n3. Reloading the model"
                 return (
                     qa_chain,
                     gr.update(value=""),
                     history + [(message, error_response)],
                     f"Error: {str(qa_error)}", "Error processing", "", "No source", "", "No source"
                 )
         # Extract and process the response
         print(f"Raw response type: {type(response)}")
         print(f"Response keys: {response.keys() if isinstance(response, dict) else 'Not a dict'}")
         if isinstance(response, dict):
             # Get the answer
             response_answer = response.get("result") or response.get("answer") or str(response)
             response_answer = response_answer.replace("Based on the following context, please provide a helpful and accurate answer", "").strip()
             response_answer = response_answer.replace("Helpful Answer:", "").strip()
             response_answer = response_answer.replace("Answer:", "").strip()
             # Remove repeated prompts
             if enhanced_query[:50] in response_answer:
                 response_answer = response_answer.replace(enhanced_query, "").strip()
             # Ensure we have a meaningful response
             if len(response_answer.strip()) < 10:
                 response_answer = "I was able to process your question, but the response was too brief. Please try rephrasing your question or providing more context."
         if not response_answer or response_answer.strip() == "":
             response_answer = "I apologize, but I couldn't generate a meaningful response to your question. Please try rephrasing your question or ensure your CSV data contains relevant information."
                     content = content[:300] + "..."
                 source_contents.append(content)
                 if doc_type == "csv_summary":
                     source_info.append(f"Summary of {source_file}")
                 else:
                     source_info.append(f"File: {source_file} | Row: {row_info}")
                 print(f"Source {i+1}: {source_info[-1][:50]}...")
             except Exception as e:
                 print(f"Error processing source {i}: {e}")
                 source_contents.append(f"Error processing source: {str(e)}")
         print(f"Final response length: {len(response_answer)} characters")
         print(f"Response preview: {response_answer[:100]}...")
         print(f"Sources: {[info for info in source_info if info != 'No additional sources']}")
+        new_history = history.copy()
+        new_history.append({"role": "user", "content": message})
+        new_history.append({"role": "assistant", "content": response_answer})
         return (
             qa_chain,
             gr.update(value=""),
+            new_history,
             source_contents[0],
             source_info[0],
             source_contents[1],
         print(f"Error: {str(e)}")
         print(f"Error type: {type(e).__name__}")
         traceback.print_exc()
         error_msg = f"❌ I encountered an error while processing your question:\n\n{str(e)}\n\nPlease try:\n1. Using a simpler question\n2. Waiting a moment and trying again\n3. Reloading the model\n4. Recreating the database"
         return (
             qa_chain,
             gr.update(value=""),
                 **Available Models:**
                 - **Qwen2.5-7B-Instruct**: Advanced Chinese-English bilingual model, excellent for analysis
                 - **Llama-3.1-8B-Instruct**: Meta's powerful instruction-following model
                 **Note**: Models are loaded locally with 4-bit quantization for memory efficiency. First load may take several minutes.
                 """)
         )
     demo.queue().launch(
+        debug=True,
         share=False,
         show_error=True
     )