Spaces:

gmustafa413
/

UE_ChatBot

Sleeping

App Files Files Community

gmustafa413 commited on Mar 27

Commit

cf5ee13

verified ·

1 Parent(s): 58bc589

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -27

app.py CHANGED Viewed

@@ -10,18 +10,19 @@ import warnings
 warnings.filterwarnings("ignore")
 # Configuration - PUT YOUR API KEY HERE
-GEMINI_API_KEY = "AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0"  # ⚠️ REPLACE WITH YOUR KEY
 MODEL_NAME = "all-MiniLM-L6-v2"
 GENAI_MODEL = "gemini-pro"
 DATASET_NAME = "midrees2806/7K_Dataset"
 CHUNK_SIZE = 500
 TOP_K = 3
-# Initialize Gemini
 genai.configure(
     api_key=GEMINI_API_KEY,
     client_options={
-        'api_endpoint': "https://generativelanguage.googleapis.com/v1beta"
     }
 )
@@ -32,71 +33,93 @@ class GeminiRAGSystem:
         self.dataset_loaded = False
         self.loading_error = None
-        # Initialize embedding model
         try:
             self.embedding_model = SentenceTransformer(MODEL_NAME)
         except Exception as e:
-            raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
-        # Load dataset
         self.load_dataset()
     def load_dataset(self):
-        """Load dataset synchronously"""
         try:
             dataset = load_dataset(
                 DATASET_NAME,
                 split='train',
                 download_mode="force_redownload"
             )
             if 'text' in dataset.features:
-                self.chunks = dataset['text'][:1000]  # Use first 1000 entries
             elif 'context' in dataset.features:
                 self.chunks = dataset['context'][:1000]
             else:
                 raise ValueError("Dataset must have 'text' or 'context' field")
-            # Create embeddings
             embeddings = self.embedding_model.encode(
                 self.chunks,
                 show_progress_bar=False,
                 convert_to_numpy=True
             )
             self.index = faiss.IndexFlatL2(embeddings.shape[1])
             self.index.add(embeddings.astype('float32'))
             self.dataset_loaded = True
         except Exception as e:
-            self.loading_error = str(e)
-            print(f"Dataset loading failed: {str(e)}")
     def get_relevant_context(self, query: str) -> str:
-        """Retrieve most relevant chunks"""
         if not self.index:
             return ""
         try:
             query_embed = self.embedding_model.encode(
                 [query],
                 convert_to_numpy=True
             ).astype('float32')
-            _, indices = self.index.search(query_embed, k=TOP_K)
-            return "\n\n".join([self.chunks[i] for i in indices[0] if i < len(self.chunks)])
         except Exception as e:
             print(f"Search error: {str(e)}")
             return ""
     def generate_response(self, query: str) -> str:
-        """Generate response with error handling"""
         if not self.dataset_loaded:
-            if self.loading_error:
-                return f"⚠️ Dataset loading failed: {self.loading_error}"
-            return "⚠️ System initializing..."
         context = self.get_relevant_context(query)
         if not context:
             return "No relevant context found"
         prompt = f"""Answer based on this context:
@@ -105,35 +128,48 @@ class GeminiRAGSystem:
         Question: {query}
         Answer concisely:"""
         try:
             model = genai.GenerativeModel(GENAI_MODEL)
             response = model.generate_content(
                 prompt,
                 generation_config=genai.types.GenerationConfig(
-                    temperature=0.3
                 )
             )
             if response.candidates and response.candidates[0].content.parts:
-                return response.candidates[0].content.parts[0].text
             return "⚠️ No response from API"
         except Exception as e:
-            return f"⚠️ API Error: {str(e)}"
-# Initialize system
 try:
     rag_system = GeminiRAGSystem()
     init_status = "✅ System ready" if rag_system.dataset_loaded else f"⚠️ Initializing... {rag_system.loading_error or ''}"
 except Exception as e:
     init_status = f"❌ Initialization failed: {str(e)}"
     rag_system = None
-# Create interface
 with gr.Blocks(title="Document Chatbot") as app:
     gr.Markdown("# Document Chatbot with Gemini")
     with gr.Row():
-        chatbot = gr.Chatbot(height=500)
     with gr.Row():
         query = gr.Textbox(label="Your question", placeholder="Ask about the documents...")
@@ -142,15 +178,20 @@ with gr.Blocks(title="Document Chatbot") as app:
         submit_btn = gr.Button("Submit", variant="primary")
         clear_btn = gr.Button("Clear", variant="secondary")
-    status = gr.Textbox(label="Status", value=init_status)
     def respond(message, chat_history):
         if not rag_system:
-            return chat_history + [(message, "System initialization failed")]
         response = rag_system.generate_response(message)
         return chat_history + [(message, response)]
     def clear_chat():
         return []
     submit_btn.click(respond, [query, chatbot], [chatbot])
@@ -158,4 +199,5 @@ with gr.Blocks(title="Document Chatbot") as app:
     clear_btn.click(clear_chat, outputs=chatbot)
 if __name__ == "__main__":
-    app.launch()

 warnings.filterwarnings("ignore")
 # Configuration - PUT YOUR API KEY HERE
+GEMINI_API_KEY = "AIzaSyYourActualApiKeyHere"  # ⚠️ REPLACE WITH YOUR KEY
 MODEL_NAME = "all-MiniLM-L6-v2"
 GENAI_MODEL = "gemini-pro"
 DATASET_NAME = "midrees2806/7K_Dataset"
 CHUNK_SIZE = 500
 TOP_K = 3
+# Initialize Gemini with enhanced configuration
 genai.configure(
     api_key=GEMINI_API_KEY,
+    transport='rest',  # Force REST API
     client_options={
+        'api_endpoint': "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent"
     }
 )
         self.dataset_loaded = False
         self.loading_error = None
+        print("Initializing embedding model...")
         try:
             self.embedding_model = SentenceTransformer(MODEL_NAME)
+            print("Embedding model initialized successfully")
         except Exception as e:
+            error_msg = f"Failed to initialize embedding model: {str(e)}"
+            print(error_msg)
+            raise RuntimeError(error_msg)
+        print("Loading dataset...")
         self.load_dataset()
     def load_dataset(self):
+        """Load dataset with detailed error handling"""
         try:
+            print(f"Downloading dataset: {DATASET_NAME}")
             dataset = load_dataset(
                 DATASET_NAME,
                 split='train',
                 download_mode="force_redownload"
             )
+            print("Dataset downloaded successfully")
             if 'text' in dataset.features:
+                self.chunks = dataset['text'][:1000]
+                print(f"Loaded {len(self.chunks)} text chunks")
             elif 'context' in dataset.features:
                 self.chunks = dataset['context'][:1000]
+                print(f"Loaded {len(self.chunks)} context chunks")
             else:
                 raise ValueError("Dataset must have 'text' or 'context' field")
+            print("Creating embeddings...")
             embeddings = self.embedding_model.encode(
                 self.chunks,
                 show_progress_bar=False,
                 convert_to_numpy=True
             )
+            print(f"Created embeddings with shape {embeddings.shape}")
             self.index = faiss.IndexFlatL2(embeddings.shape[1])
             self.index.add(embeddings.astype('float32'))
+            print("FAISS index created successfully")
             self.dataset_loaded = True
+            print("Dataset loading complete")
         except Exception as e:
+            error_msg = f"Dataset loading failed: {str(e)}"
+            print(error_msg)
+            self.loading_error = error_msg
     def get_relevant_context(self, query: str) -> str:
+        """Retrieve context with debugging"""
         if not self.index:
+            print("No index available for search")
             return ""
         try:
+            print(f"Processing query: {query}")
             query_embed = self.embedding_model.encode(
                 [query],
                 convert_to_numpy=True
             ).astype('float32')
+            print("Query embedded successfully")
+            distances, indices = self.index.search(query_embed, k=TOP_K)
+            print(f"Search results - distances: {distances}, indices: {indices}")
+            context = "\n\n".join([self.chunks[i] for i in indices[0] if i < len(self.chunks)])
+            print(f"Context length: {len(context)} characters")
+            return context
         except Exception as e:
             print(f"Search error: {str(e)}")
             return ""
     def generate_response(self, query: str) -> str:
+        """Generate response with detailed error handling"""
         if not self.dataset_loaded:
+            msg = f"⚠️ Dataset loading failed: {self.loading_error}" if self.loading_error else "⚠️ System initializing..."
+            print(msg)
+            return msg
+        print(f"\n{'='*40}\nNew Query: {query}\n{'='*40}")
         context = self.get_relevant_context(query)
         if not context:
+            print("No relevant context found")
             return "No relevant context found"
         prompt = f"""Answer based on this context:
         Question: {query}
         Answer concisely:"""
+        print(f"\nPrompt sent to Gemini:\n{prompt}\n")
         try:
             model = genai.GenerativeModel(GENAI_MODEL)
             response = model.generate_content(
                 prompt,
                 generation_config=genai.types.GenerationConfig(
+                    temperature=0.3,
+                    max_output_tokens=1000
                 )
             )
+            print(f"Raw API response: {response}")
             if response.candidates and response.candidates[0].content.parts:
+                answer = response.candidates[0].content.parts[0].text
+                print(f"Answer: {answer}")
+                return answer
+            print("⚠️ Empty response from API")
             return "⚠️ No response from API"
         except Exception as e:
+            error_msg = f"⚠️ API Error: {str(e)}"
+            print(error_msg)
+            return error_msg
+# Initialize system with verbose logging
+print("Initializing RAG system...")
 try:
     rag_system = GeminiRAGSystem()
     init_status = "✅ System ready" if rag_system.dataset_loaded else f"⚠️ Initializing... {rag_system.loading_error or ''}"
+    print(init_status)
 except Exception as e:
     init_status = f"❌ Initialization failed: {str(e)}"
+    print(init_status)
     rag_system = None
+# Create interface with enhanced debugging
 with gr.Blocks(title="Document Chatbot") as app:
     gr.Markdown("# Document Chatbot with Gemini")
     with gr.Row():
+        chatbot = gr.Chatbot(height=500, label="Chat History")
     with gr.Row():
         query = gr.Textbox(label="Your question", placeholder="Ask about the documents...")
         submit_btn = gr.Button("Submit", variant="primary")
         clear_btn = gr.Button("Clear", variant="secondary")
+    status = gr.Textbox(label="System Status", value=init_status, interactive=False)
     def respond(message, chat_history):
+        print(f"\n{'='*40}\nUser Query: {message}\n{'='*40}")
         if not rag_system:
+            error_msg = "System initialization failed"
+            print(error_msg)
+            return chat_history + [(message, error_msg)]
         response = rag_system.generate_response(message)
         return chat_history + [(message, response)]
     def clear_chat():
+        print("Chat cleared")
         return []
     submit_btn.click(respond, [query, chatbot], [chatbot])
     clear_btn.click(clear_chat, outputs=chatbot)
 if __name__ == "__main__":
+    print("Launching Gradio interface...")
+    app.launch(debug=True)