gmustafa413 commited on
Commit
cf5ee13
·
verified ·
1 Parent(s): 58bc589

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -27
app.py CHANGED
@@ -10,18 +10,19 @@ import warnings
10
  warnings.filterwarnings("ignore")
11
 
12
  # Configuration - PUT YOUR API KEY HERE
13
- GEMINI_API_KEY = "AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0" # ⚠️ REPLACE WITH YOUR KEY
14
  MODEL_NAME = "all-MiniLM-L6-v2"
15
  GENAI_MODEL = "gemini-pro"
16
  DATASET_NAME = "midrees2806/7K_Dataset"
17
  CHUNK_SIZE = 500
18
  TOP_K = 3
19
 
20
- # Initialize Gemini
21
  genai.configure(
22
  api_key=GEMINI_API_KEY,
 
23
  client_options={
24
- 'api_endpoint': "https://generativelanguage.googleapis.com/v1beta"
25
  }
26
  )
27
 
@@ -32,71 +33,93 @@ class GeminiRAGSystem:
32
  self.dataset_loaded = False
33
  self.loading_error = None
34
 
35
- # Initialize embedding model
36
  try:
37
  self.embedding_model = SentenceTransformer(MODEL_NAME)
 
38
  except Exception as e:
39
- raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
 
 
40
 
41
- # Load dataset
42
  self.load_dataset()
43
 
44
  def load_dataset(self):
45
- """Load dataset synchronously"""
46
  try:
 
47
  dataset = load_dataset(
48
  DATASET_NAME,
49
  split='train',
50
  download_mode="force_redownload"
51
  )
 
52
 
53
  if 'text' in dataset.features:
54
- self.chunks = dataset['text'][:1000] # Use first 1000 entries
 
55
  elif 'context' in dataset.features:
56
  self.chunks = dataset['context'][:1000]
 
57
  else:
58
  raise ValueError("Dataset must have 'text' or 'context' field")
59
 
60
- # Create embeddings
61
  embeddings = self.embedding_model.encode(
62
  self.chunks,
63
  show_progress_bar=False,
64
  convert_to_numpy=True
65
  )
 
 
66
  self.index = faiss.IndexFlatL2(embeddings.shape[1])
67
  self.index.add(embeddings.astype('float32'))
 
68
 
69
  self.dataset_loaded = True
 
70
  except Exception as e:
71
- self.loading_error = str(e)
72
- print(f"Dataset loading failed: {str(e)}")
 
73
 
74
  def get_relevant_context(self, query: str) -> str:
75
- """Retrieve most relevant chunks"""
76
  if not self.index:
 
77
  return ""
78
 
79
  try:
 
80
  query_embed = self.embedding_model.encode(
81
  [query],
82
  convert_to_numpy=True
83
  ).astype('float32')
 
 
 
 
84
 
85
- _, indices = self.index.search(query_embed, k=TOP_K)
86
- return "\n\n".join([self.chunks[i] for i in indices[0] if i < len(self.chunks)])
 
87
  except Exception as e:
88
  print(f"Search error: {str(e)}")
89
  return ""
90
 
91
  def generate_response(self, query: str) -> str:
92
- """Generate response with error handling"""
93
  if not self.dataset_loaded:
94
- if self.loading_error:
95
- return f"⚠️ Dataset loading failed: {self.loading_error}"
96
- return "⚠️ System initializing..."
 
 
97
 
98
  context = self.get_relevant_context(query)
99
  if not context:
 
100
  return "No relevant context found"
101
 
102
  prompt = f"""Answer based on this context:
@@ -105,35 +128,48 @@ class GeminiRAGSystem:
105
  Question: {query}
106
  Answer concisely:"""
107
 
 
 
108
  try:
109
  model = genai.GenerativeModel(GENAI_MODEL)
110
  response = model.generate_content(
111
  prompt,
112
  generation_config=genai.types.GenerationConfig(
113
- temperature=0.3
 
114
  )
115
  )
116
 
 
 
117
  if response.candidates and response.candidates[0].content.parts:
118
- return response.candidates[0].content.parts[0].text
 
 
 
119
  return "⚠️ No response from API"
120
  except Exception as e:
121
- return f"⚠️ API Error: {str(e)}"
 
 
122
 
123
- # Initialize system
 
124
  try:
125
  rag_system = GeminiRAGSystem()
126
  init_status = "✅ System ready" if rag_system.dataset_loaded else f"⚠️ Initializing... {rag_system.loading_error or ''}"
 
127
  except Exception as e:
128
  init_status = f"❌ Initialization failed: {str(e)}"
 
129
  rag_system = None
130
 
131
- # Create interface
132
  with gr.Blocks(title="Document Chatbot") as app:
133
  gr.Markdown("# Document Chatbot with Gemini")
134
 
135
  with gr.Row():
136
- chatbot = gr.Chatbot(height=500)
137
 
138
  with gr.Row():
139
  query = gr.Textbox(label="Your question", placeholder="Ask about the documents...")
@@ -142,15 +178,20 @@ with gr.Blocks(title="Document Chatbot") as app:
142
  submit_btn = gr.Button("Submit", variant="primary")
143
  clear_btn = gr.Button("Clear", variant="secondary")
144
 
145
- status = gr.Textbox(label="Status", value=init_status)
146
 
147
  def respond(message, chat_history):
 
148
  if not rag_system:
149
- return chat_history + [(message, "System initialization failed")]
 
 
 
150
  response = rag_system.generate_response(message)
151
  return chat_history + [(message, response)]
152
 
153
  def clear_chat():
 
154
  return []
155
 
156
  submit_btn.click(respond, [query, chatbot], [chatbot])
@@ -158,4 +199,5 @@ with gr.Blocks(title="Document Chatbot") as app:
158
  clear_btn.click(clear_chat, outputs=chatbot)
159
 
160
  if __name__ == "__main__":
161
- app.launch()
 
 
10
  warnings.filterwarnings("ignore")
11
 
12
  # Configuration - PUT YOUR API KEY HERE
13
+ GEMINI_API_KEY = "AIzaSyYourActualApiKeyHere" # ⚠️ REPLACE WITH YOUR KEY
14
  MODEL_NAME = "all-MiniLM-L6-v2"
15
  GENAI_MODEL = "gemini-pro"
16
  DATASET_NAME = "midrees2806/7K_Dataset"
17
  CHUNK_SIZE = 500
18
  TOP_K = 3
19
 
20
+ # Initialize Gemini with enhanced configuration
21
  genai.configure(
22
  api_key=GEMINI_API_KEY,
23
+ transport='rest', # Force REST API
24
  client_options={
25
+ 'api_endpoint': "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent"
26
  }
27
  )
28
 
 
33
  self.dataset_loaded = False
34
  self.loading_error = None
35
 
36
+ print("Initializing embedding model...")
37
  try:
38
  self.embedding_model = SentenceTransformer(MODEL_NAME)
39
+ print("Embedding model initialized successfully")
40
  except Exception as e:
41
+ error_msg = f"Failed to initialize embedding model: {str(e)}"
42
+ print(error_msg)
43
+ raise RuntimeError(error_msg)
44
 
45
+ print("Loading dataset...")
46
  self.load_dataset()
47
 
48
  def load_dataset(self):
49
+ """Load dataset with detailed error handling"""
50
  try:
51
+ print(f"Downloading dataset: {DATASET_NAME}")
52
  dataset = load_dataset(
53
  DATASET_NAME,
54
  split='train',
55
  download_mode="force_redownload"
56
  )
57
+ print("Dataset downloaded successfully")
58
 
59
  if 'text' in dataset.features:
60
+ self.chunks = dataset['text'][:1000]
61
+ print(f"Loaded {len(self.chunks)} text chunks")
62
  elif 'context' in dataset.features:
63
  self.chunks = dataset['context'][:1000]
64
+ print(f"Loaded {len(self.chunks)} context chunks")
65
  else:
66
  raise ValueError("Dataset must have 'text' or 'context' field")
67
 
68
+ print("Creating embeddings...")
69
  embeddings = self.embedding_model.encode(
70
  self.chunks,
71
  show_progress_bar=False,
72
  convert_to_numpy=True
73
  )
74
+ print(f"Created embeddings with shape {embeddings.shape}")
75
+
76
  self.index = faiss.IndexFlatL2(embeddings.shape[1])
77
  self.index.add(embeddings.astype('float32'))
78
+ print("FAISS index created successfully")
79
 
80
  self.dataset_loaded = True
81
+ print("Dataset loading complete")
82
  except Exception as e:
83
+ error_msg = f"Dataset loading failed: {str(e)}"
84
+ print(error_msg)
85
+ self.loading_error = error_msg
86
 
87
  def get_relevant_context(self, query: str) -> str:
88
+ """Retrieve context with debugging"""
89
  if not self.index:
90
+ print("No index available for search")
91
  return ""
92
 
93
  try:
94
+ print(f"Processing query: {query}")
95
  query_embed = self.embedding_model.encode(
96
  [query],
97
  convert_to_numpy=True
98
  ).astype('float32')
99
+ print("Query embedded successfully")
100
+
101
+ distances, indices = self.index.search(query_embed, k=TOP_K)
102
+ print(f"Search results - distances: {distances}, indices: {indices}")
103
 
104
+ context = "\n\n".join([self.chunks[i] for i in indices[0] if i < len(self.chunks)])
105
+ print(f"Context length: {len(context)} characters")
106
+ return context
107
  except Exception as e:
108
  print(f"Search error: {str(e)}")
109
  return ""
110
 
111
  def generate_response(self, query: str) -> str:
112
+ """Generate response with detailed error handling"""
113
  if not self.dataset_loaded:
114
+ msg = f"⚠️ Dataset loading failed: {self.loading_error}" if self.loading_error else "⚠️ System initializing..."
115
+ print(msg)
116
+ return msg
117
+
118
+ print(f"\n{'='*40}\nNew Query: {query}\n{'='*40}")
119
 
120
  context = self.get_relevant_context(query)
121
  if not context:
122
+ print("No relevant context found")
123
  return "No relevant context found"
124
 
125
  prompt = f"""Answer based on this context:
 
128
  Question: {query}
129
  Answer concisely:"""
130
 
131
+ print(f"\nPrompt sent to Gemini:\n{prompt}\n")
132
+
133
  try:
134
  model = genai.GenerativeModel(GENAI_MODEL)
135
  response = model.generate_content(
136
  prompt,
137
  generation_config=genai.types.GenerationConfig(
138
+ temperature=0.3,
139
+ max_output_tokens=1000
140
  )
141
  )
142
 
143
+ print(f"Raw API response: {response}")
144
+
145
  if response.candidates and response.candidates[0].content.parts:
146
+ answer = response.candidates[0].content.parts[0].text
147
+ print(f"Answer: {answer}")
148
+ return answer
149
+ print("⚠️ Empty response from API")
150
  return "⚠️ No response from API"
151
  except Exception as e:
152
+ error_msg = f"⚠️ API Error: {str(e)}"
153
+ print(error_msg)
154
+ return error_msg
155
 
156
+ # Initialize system with verbose logging
157
+ print("Initializing RAG system...")
158
  try:
159
  rag_system = GeminiRAGSystem()
160
  init_status = "✅ System ready" if rag_system.dataset_loaded else f"⚠️ Initializing... {rag_system.loading_error or ''}"
161
+ print(init_status)
162
  except Exception as e:
163
  init_status = f"❌ Initialization failed: {str(e)}"
164
+ print(init_status)
165
  rag_system = None
166
 
167
+ # Create interface with enhanced debugging
168
  with gr.Blocks(title="Document Chatbot") as app:
169
  gr.Markdown("# Document Chatbot with Gemini")
170
 
171
  with gr.Row():
172
+ chatbot = gr.Chatbot(height=500, label="Chat History")
173
 
174
  with gr.Row():
175
  query = gr.Textbox(label="Your question", placeholder="Ask about the documents...")
 
178
  submit_btn = gr.Button("Submit", variant="primary")
179
  clear_btn = gr.Button("Clear", variant="secondary")
180
 
181
+ status = gr.Textbox(label="System Status", value=init_status, interactive=False)
182
 
183
  def respond(message, chat_history):
184
+ print(f"\n{'='*40}\nUser Query: {message}\n{'='*40}")
185
  if not rag_system:
186
+ error_msg = "System initialization failed"
187
+ print(error_msg)
188
+ return chat_history + [(message, error_msg)]
189
+
190
  response = rag_system.generate_response(message)
191
  return chat_history + [(message, response)]
192
 
193
  def clear_chat():
194
+ print("Chat cleared")
195
  return []
196
 
197
  submit_btn.click(respond, [query, chatbot], [chatbot])
 
199
  clear_btn.click(clear_chat, outputs=chatbot)
200
 
201
  if __name__ == "__main__":
202
+ print("Launching Gradio interface...")
203
+ app.launch(debug=True)