mgbam commited on
Commit
eeb0aa2
·
verified ·
1 Parent(s): c9ef26c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -4
app.py CHANGED
@@ -61,7 +61,7 @@ Respond with:
61
  Format: Markdown with LaTeX mathematical notation where applicable
62
  """
63
 
64
- # Validation
65
  if not ResearchConfig.DEEPSEEK_API_KEY:
66
  st.error("""**Research Portal Configuration Required**
67
  1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
@@ -87,6 +87,8 @@ class QuantumDocumentManager:
87
  separators=["\n\n", "\n", "|||"]
88
  )
89
  docs = splitter.create_documents(documents)
 
 
90
  return Chroma.from_documents(
91
  documents=docs,
92
  embedding=self.embeddings,
@@ -134,8 +136,11 @@ class ResearchRetriever:
134
 
135
  def retrieve(self, query: str, domain: str) -> List[Any]:
136
  try:
137
- return self.retrievers[domain].invoke(query)
 
 
138
  except KeyError:
 
139
  return []
140
 
141
  retriever = ResearchRetriever()
@@ -150,7 +155,7 @@ class CognitiveProcessor:
150
 
151
  def process_query(self, prompt: str) -> Dict:
152
  futures = []
153
- for _ in range(3): # Triple redundancy
154
  futures.append(self.executor.submit(
155
  self._execute_api_request,
156
  prompt
@@ -197,6 +202,7 @@ class CognitiveProcessor:
197
  valid = [r for r in results if "error" not in r]
198
  if not valid:
199
  return {"error": "All API requests failed"}
 
200
  return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
201
 
202
  # ------------------------------
@@ -209,6 +215,7 @@ class ResearchWorkflow:
209
  self._build_workflow()
210
 
211
  def _build_workflow(self):
 
212
  self.workflow.add_node("ingest", self.ingest_query)
213
  self.workflow.add_node("retrieve", self.retrieve_documents)
214
  self.workflow.add_node("analyze", self.analyze_content)
@@ -231,6 +238,7 @@ class ResearchWorkflow:
231
  def ingest_query(self, state: AgentState) -> Dict:
232
  try:
233
  query = state["messages"][-1].content
 
234
  return {
235
  "messages": [AIMessage(content="Query ingested successfully")],
236
  "context": {"raw_query": query},
@@ -243,6 +251,8 @@ class ResearchWorkflow:
243
  try:
244
  query = state["context"]["raw_query"]
245
  docs = retriever.retrieve(query, "research")
 
 
246
  return {
247
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
248
  "context": {
@@ -255,7 +265,13 @@ class ResearchWorkflow:
255
 
256
  def analyze_content(self, state: AgentState) -> Dict:
257
  try:
258
- docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
 
 
 
 
 
 
259
  prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
260
  response = self.processor.process_query(prompt)
261
 
@@ -304,9 +320,11 @@ Improve:
304
 
305
  def _quality_check(self, state: AgentState) -> str:
306
  content = state["messages"][-1].content
 
307
  return "valid" if "VALID" in content else "invalid"
308
 
309
  def _error_state(self, message: str) -> Dict:
 
310
  return {
311
  "messages": [AIMessage(content=f"❌ {message}")],
312
  "context": {"error": True},
 
61
  Format: Markdown with LaTeX mathematical notation where applicable
62
  """
63
 
64
+ # Validate API key configuration
65
  if not ResearchConfig.DEEPSEEK_API_KEY:
66
  st.error("""**Research Portal Configuration Required**
67
  1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
 
87
  separators=["\n\n", "\n", "|||"]
88
  )
89
  docs = splitter.create_documents(documents)
90
+ # Log how many chunks were created
91
+ st.write(f"Created {len(docs)} chunks for collection '{collection_name}'")
92
  return Chroma.from_documents(
93
  documents=docs,
94
  embedding=self.embeddings,
 
136
 
137
  def retrieve(self, query: str, domain: str) -> List[Any]:
138
  try:
139
+ results = self.retrievers[domain].invoke(query)
140
+ st.write(f"[DEBUG] Retrieved {len(results)} documents for query: '{query}' in domain '{domain}'")
141
+ return results
142
  except KeyError:
143
+ st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
144
  return []
145
 
146
  retriever = ResearchRetriever()
 
155
 
156
  def process_query(self, prompt: str) -> Dict:
157
  futures = []
158
+ for _ in range(3): # Triple redundancy for robustness
159
  futures.append(self.executor.submit(
160
  self._execute_api_request,
161
  prompt
 
202
  valid = [r for r in results if "error" not in r]
203
  if not valid:
204
  return {"error": "All API requests failed"}
205
+ # Choose the result with the longest content
206
  return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
207
 
208
  # ------------------------------
 
215
  self._build_workflow()
216
 
217
  def _build_workflow(self):
218
+ # Register nodes in the state graph
219
  self.workflow.add_node("ingest", self.ingest_query)
220
  self.workflow.add_node("retrieve", self.retrieve_documents)
221
  self.workflow.add_node("analyze", self.analyze_content)
 
238
  def ingest_query(self, state: AgentState) -> Dict:
239
  try:
240
  query = state["messages"][-1].content
241
+ st.write(f"[DEBUG] Ingesting query: {query}")
242
  return {
243
  "messages": [AIMessage(content="Query ingested successfully")],
244
  "context": {"raw_query": query},
 
251
  try:
252
  query = state["context"]["raw_query"]
253
  docs = retriever.retrieve(query, "research")
254
+ # Log the retrieval result for debugging
255
+ st.write(f"[DEBUG] Retrieved {len(docs)} documents from retrieval node.")
256
  return {
257
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
258
  "context": {
 
265
 
266
  def analyze_content(self, state: AgentState) -> Dict:
267
  try:
268
+ # Ensure documents are present before proceeding
269
+ if "documents" not in state["context"] or not state["context"]["documents"]:
270
+ return self._error_state("No documents retrieved; please check your query or retrieval process.")
271
+
272
+ # Concatenate all document content for analysis
273
+ docs = "\n\n".join([d.page_content for d in state["context"]["documents"] if hasattr(d, "page_content")])
274
+ st.write(f"[DEBUG] Analyzing content from {len(state['context']['documents'])} documents.")
275
  prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
276
  response = self.processor.process_query(prompt)
277
 
 
320
 
321
  def _quality_check(self, state: AgentState) -> str:
322
  content = state["messages"][-1].content
323
+ # Check for the keyword "VALID" in the output; if missing, trigger refinement
324
  return "valid" if "VALID" in content else "invalid"
325
 
326
  def _error_state(self, message: str) -> Dict:
327
+ st.write(f"[ERROR] {message}")
328
  return {
329
  "messages": [AIMessage(content=f"❌ {message}")],
330
  "context": {"error": True},