mgbam commited on
Commit
d4c248d
·
verified ·
1 Parent(s): 56fa1a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -169
app.py CHANGED
@@ -1,6 +1,3 @@
1
- # ------------------------------
2
- # UniversalResearch AI with LADDER (OpenAI Integration)
3
- # ------------------------------
4
  import logging
5
  import os
6
  import re
@@ -11,12 +8,11 @@ import sys
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
  from typing import List, Dict, Any, Optional, Sequence
14
-
15
  import chromadb
16
  import requests
17
  import streamlit as st
18
 
19
- # LangChain & LangGraph imports
20
  from langchain_openai import OpenAIEmbeddings
21
  from langchain_community.vectorstores import Chroma
22
  from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
@@ -27,7 +23,7 @@ from langgraph.graph.message import add_messages
27
  from typing_extensions import TypedDict, Annotated
28
  from langchain.tools.retriever import create_retriever_tool
29
 
30
- # Increase Python's recursion limit at the start (if needed)
31
  sys.setrecursionlimit(10000)
32
 
33
  # ------------------------------
@@ -43,12 +39,6 @@ logger = logging.getLogger(__name__)
43
  # State Schema Definition
44
  # ------------------------------
45
  class AgentState(TypedDict):
46
- """
47
- Stores the messages and context for each step in the workflow.
48
- 'messages': conversation so far
49
- 'context': domain-specific data (docs, counters)
50
- 'metadata': any additional info (timestamps, status)
51
- """
52
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
53
  context: Dict[str, Any]
54
  metadata: Dict[str, Any]
@@ -57,56 +47,46 @@ class AgentState(TypedDict):
57
  # Configuration
58
  # ------------------------------
59
  class ResearchConfig:
60
- """
61
- Universal config for the advanced AI system with Tufa Labs' LADDER approach,
62
- using OpenAI for both embeddings and completions.
63
-
64
- Make sure to set OPENAI_API_KEY in your environment or HF Space secrets.
65
- """
66
- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") # Must match your HF secret name
67
  CHROMA_PATH = "chroma_db"
68
  CHUNK_SIZE = 512
69
  CHUNK_OVERLAP = 64
70
  MAX_CONCURRENT_REQUESTS = 5
71
  EMBEDDING_DIMENSIONS = 1536
72
-
73
- # Example map for featured documents
74
  DOCUMENT_MAP = {
75
- "Sample Research Document 1": "Topic A Overview",
76
- "Sample Research Document 2": "Topic B Analysis",
77
- "Sample Research Document 3": "Topic C Innovations"
 
 
 
78
  }
79
-
80
- # Analysis template referencing LADDER's approach
81
  ANALYSIS_TEMPLATE = (
82
- "Analyze the following research documents with scientific rigor:\n{context}\n\n"
83
- "Use Tufa Labs’ LADDER method to:\n"
84
- "1. Break down complex problems into subproblems.\n"
85
- "2. Iteratively refine the solution.\n"
86
- "3. Provide analysis including:\n"
87
- " a. Key Contributions\n"
88
- " b. Novel Methodologies\n"
89
- " c. Empirical Results (with metrics)\n"
90
- " d. Potential Applications\n"
91
- " e. Limitations & Future Directions\n\n"
92
- "Format your response in Markdown with LaTeX where applicable."
93
  )
94
 
95
- # Early check for missing API key
96
- if not ResearchConfig.OPENAI_API_KEY:
97
  st.error(
98
- """**OpenAI API Key Not Found**
99
- Please set `OPENAI_API_KEY` in your Space secrets and rebuild the Space."""
 
 
100
  )
101
  st.stop()
102
 
103
  # ------------------------------
104
- # Universal Document Processing
105
  # ------------------------------
106
- class UniversalDocumentManager:
107
  """
108
- Manages creation of document collections for any research domain,
109
- using OpenAI embeddings for semantic search.
110
  """
111
  def __init__(self) -> None:
112
  try:
@@ -115,8 +95,6 @@ class UniversalDocumentManager:
115
  except Exception as e:
116
  logger.error(f"Error initializing PersistentClient: {e}")
117
  self.client = chromadb.Client() # Fallback to in-memory client
118
-
119
- # Configure embeddings from openai
120
  self.embeddings = OpenAIEmbeddings(
121
  model="text-embedding-3-large",
122
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
@@ -124,7 +102,7 @@ class UniversalDocumentManager:
124
 
125
  def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
126
  """
127
- Splits documents into chunks and stores them in a Chroma collection.
128
  """
129
  splitter = RecursiveCharacterTextSplitter(
130
  chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -133,7 +111,7 @@ class UniversalDocumentManager:
133
  )
134
  try:
135
  docs = splitter.create_documents(documents)
136
- logger.info(f"Created {len(docs)} doc chunks for collection '{collection_name}'.")
137
  except Exception as e:
138
  logger.error(f"Error splitting documents: {e}")
139
  raise e
@@ -148,22 +126,22 @@ class UniversalDocumentManager:
148
 
149
  def _document_id(self, content: str) -> str:
150
  """
151
- Generates a unique ID using SHA256 + timestamp.
152
  """
153
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
154
 
155
- # Example collections (replace with your own)
156
- udm = UniversalDocumentManager()
157
- research_docs = udm.create_collection([
158
- "Research Report: Novel AI Techniques in Renewable Energy",
159
- "Academic Paper: Advances in Quantum Computing for Data Analysis",
160
- "Survey: Emerging Trends in Biomedical Research"
161
  ], "research")
162
 
163
- development_docs = udm.create_collection([
164
- "Project Update: New Algorithms in Software Engineering",
165
- "Development Report: Innovations in User Interface Design",
166
- "Case Study: Agile Methodologies in Large-Scale Software Projects"
167
  ], "development")
168
 
169
  # ------------------------------
@@ -171,8 +149,7 @@ development_docs = udm.create_collection([
171
  # ------------------------------
172
  class ResearchRetriever:
173
  """
174
- Provides retrieval methods for multiple domains (e.g., research, development).
175
- Uses MMR or similarity-based retrieval from Chroma.
176
  """
177
  def __init__(self) -> None:
178
  try:
@@ -191,8 +168,7 @@ class ResearchRetriever:
191
 
192
  def retrieve(self, query: str, domain: str) -> List[Any]:
193
  """
194
- Retrieves documents for a given query and domain.
195
- Defaults to 'research' if domain is unrecognized.
196
  """
197
  try:
198
  if domain == "research":
@@ -200,8 +176,8 @@ class ResearchRetriever:
200
  elif domain == "development":
201
  return self.development_retriever.invoke(query)
202
  else:
203
- logger.warning(f"Domain '{domain}' not recognized. Defaulting to 'research'.")
204
- return self.research_retriever.invoke(query)
205
  except Exception as e:
206
  logger.error(f"Retrieval error for domain '{domain}': {e}")
207
  return []
@@ -213,8 +189,8 @@ retriever = ResearchRetriever()
213
  # ------------------------------
214
  class CognitiveProcessor:
215
  """
216
- Executes requests to the OpenAI Chat Completions endpoint in parallel,
217
- then consolidates the results using a consensus mechanism (picks the longest).
218
  """
219
  def __init__(self) -> None:
220
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
@@ -222,10 +198,10 @@ class CognitiveProcessor:
222
 
223
  def process_query(self, prompt: str) -> Dict:
224
  """
225
- Sends multiple parallel requests (triple redundancy) to OpenAI's ChatCompletion.
226
  """
227
  futures = []
228
- for _ in range(3):
229
  futures.append(self.executor.submit(self._execute_api_request, prompt))
230
 
231
  results = []
@@ -240,63 +216,53 @@ class CognitiveProcessor:
240
 
241
  def _execute_api_request(self, prompt: str) -> Dict:
242
  """
243
- Executes a single request to OpenAI's ChatCompletion endpoint.
244
  """
245
- # Use your OPENAI_API_KEY
246
  headers = {
247
- "Authorization": f"Bearer {ResearchConfig.OPENAI_API_KEY}",
248
- "Content-Type": "application/json"
 
249
  }
250
  payload = {
251
- "model": "gpt-3.5-turbo", # or "gpt-4", depending on your account
252
- "messages": [
253
- {
254
- "role": "user",
255
- "content": prompt
256
- }
257
- ],
258
  "temperature": 0.7,
259
  "max_tokens": 1500,
260
  "top_p": 0.9
261
  }
262
  try:
263
  response = requests.post(
264
- "https://api.openai.com/v1/chat/completions",
265
  headers=headers,
266
  json=payload,
267
  timeout=45
268
  )
269
  response.raise_for_status()
270
- logger.info("OpenAI ChatCompletion request successful.")
271
  return response.json()
272
  except requests.exceptions.RequestException as e:
273
- logger.error(f"OpenAI request failed: {e}")
274
  return {"error": str(e)}
275
 
276
  def _consensus_check(self, results: List[Dict]) -> Dict:
277
  """
278
- Chooses the 'best' response by comparing content lengths, discarding errors.
279
  """
280
- valid = [r for r in results if "error" not in r]
281
- if not valid:
282
  logger.error("All API requests failed.")
283
  return {"error": "All API requests failed"}
284
- return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
285
 
286
  # ------------------------------
287
- # Research Workflow Engine (Tufa Labs' LADDER)
288
  # ------------------------------
289
  class ResearchWorkflow:
290
  """
291
- Defines a multi-step workflow using LangGraph with Tufa Labs’ LADDER approach:
292
- 1. Ingest Query
293
- 2. Retrieve Documents
294
- 3. Analyze Content
295
- 4. Validate Output
296
- 5. Refine (Recursive Self-Learning)
297
-
298
- The refine step uses iterative subproblem breakdown,
299
- potentially combined with test-time reinforcement.
300
  """
301
  def __init__(self) -> None:
302
  self.processor = CognitiveProcessor()
@@ -305,14 +271,13 @@ class ResearchWorkflow:
305
  self.app = self.workflow.compile()
306
 
307
  def _build_workflow(self) -> None:
308
- # Node definitions
309
  self.workflow.add_node("ingest", self.ingest_query)
310
  self.workflow.add_node("retrieve", self.retrieve_documents)
311
  self.workflow.add_node("analyze", self.analyze_content)
312
  self.workflow.add_node("validate", self.validate_output)
313
  self.workflow.add_node("refine", self.refine_results)
314
-
315
- # Graph edges
316
  self.workflow.set_entry_point("ingest")
317
  self.workflow.add_edge("ingest", "retrieve")
318
  self.workflow.add_edge("retrieve", "analyze")
@@ -326,10 +291,11 @@ class ResearchWorkflow:
326
 
327
  def ingest_query(self, state: AgentState) -> Dict:
328
  """
329
- Ingest the user query and initialize the refine counter for LADDER recursion.
330
  """
331
  try:
332
  query = state["messages"][-1].content
 
333
  new_context = {"raw_query": query, "refine_count": 0}
334
  logger.info("Query ingested.")
335
  return {
@@ -338,11 +304,11 @@ class ResearchWorkflow:
338
  "metadata": {"timestamp": datetime.now().isoformat()}
339
  }
340
  except Exception as e:
341
- return self._error_state(f"Ingestion Error: {e}")
342
 
343
  def retrieve_documents(self, state: AgentState) -> Dict:
344
  """
345
- Retrieves relevant documents from the specified domain (default: research).
346
  """
347
  try:
348
  query = state["context"]["raw_query"]
@@ -350,19 +316,14 @@ class ResearchWorkflow:
350
  logger.info(f"Retrieved {len(docs)} documents for query.")
351
  return {
352
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
353
- "context": {
354
- "documents": docs,
355
- "retrieval_time": time.time(),
356
- "refine_count": state["context"].get("refine_count", 0)
357
- }
358
  }
359
  except Exception as e:
360
- return self._error_state(f"Retrieval Error: {e}")
361
 
362
  def analyze_content(self, state: AgentState) -> Dict:
363
  """
364
- Uses the LADDER approach to break down and analyze documents,
365
- returning a structured research analysis.
366
  """
367
  try:
368
  docs = state["context"].get("documents", [])
@@ -371,82 +332,66 @@ class ResearchWorkflow:
371
  response = self.processor.process_query(prompt)
372
  if "error" in response:
373
  return self._error_state(response["error"])
374
- logger.info("Analysis completed.")
375
  return {
376
- "messages": [
377
- AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))
378
- ],
379
- "context": {
380
- "analysis": response,
381
- "refine_count": state["context"].get("refine_count", 0)
382
- }
383
  }
384
  except Exception as e:
385
- return self._error_state(f"Analysis Error: {e}")
386
 
387
  def validate_output(self, state: AgentState) -> Dict:
388
  """
389
- Validates the analysis. If invalid, the system can refine
390
- using Tufa Labs’ LADDER approach.
391
  """
392
  analysis = state["messages"][-1].content
393
  validation_prompt = (
394
- f"Validate this analysis:\n{analysis}\n\n"
395
  "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
396
- "Respond with 'VALID' or 'INVALID'."
397
  )
398
  response = self.processor.process_query(validation_prompt)
399
- logger.info("Validation completed.")
400
  return {
401
- "messages": [
402
- AIMessage(
403
- content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}"
404
- )
405
- ]
406
  }
407
 
408
  def refine_results(self, state: AgentState) -> Dict:
409
  """
410
- LADDER refinement: break down subproblems, re-solve them
411
- with no external data, potentially using TTRL for dynamic updates.
412
  """
413
  current_count = state["context"].get("refine_count", 0)
414
  state["context"]["refine_count"] = current_count + 1
415
- logger.info(f"LADDER refinement iteration: {state['context']['refine_count']}")
416
-
417
  refinement_prompt = (
418
- "Refine this analysis with LADDER’s self-improvement approach:\n"
419
- f"{state['messages'][-1].content}\n\n"
420
- "Break down complex points further, re-solve them, and enhance:\n"
421
- "- Technical precision\n- Empirical grounding\n- Theoretical coherence"
422
  )
423
  response = self.processor.process_query(refinement_prompt)
424
  logger.info("Refinement completed.")
425
  return {
426
- "messages": [
427
- AIMessage(
428
- content=response.get('choices', [{}])[0].get('message', {}).get('content', '')
429
- )
430
- ],
431
  "context": state["context"]
432
  }
433
 
434
  def _quality_check(self, state: AgentState) -> str:
435
  """
436
- Checks if the analysis is valid. If the refine_count >= 3,
437
- forcibly accept to avoid infinite loops.
438
  """
439
  refine_count = state["context"].get("refine_count", 0)
440
  if refine_count >= 3:
441
- logger.warning("Refinement limit reached. Forcing valid outcome.")
442
  return "valid"
443
-
444
  content = state["messages"][-1].content
445
- return "valid" if "VALID" in content else "invalid"
 
 
446
 
447
  def _error_state(self, message: str) -> Dict:
448
  """
449
- Returns an error state if any node fails.
450
  """
451
  logger.error(message)
452
  return {
@@ -456,12 +401,11 @@ class ResearchWorkflow:
456
  }
457
 
458
  # ------------------------------
459
- # Streamlit UI
460
  # ------------------------------
461
  class ResearchInterface:
462
  """
463
- Provides a Streamlit-based interface for the UniversalResearch AI
464
- with Tufa Labs' LADDER approach, using OpenAI for both embeddings & completions.
465
  """
466
  def __init__(self) -> None:
467
  self.workflow = ResearchWorkflow()
@@ -469,7 +413,7 @@ class ResearchInterface:
469
 
470
  def _initialize_interface(self) -> None:
471
  st.set_page_config(
472
- page_title="UniversalResearch AI (OpenAI + LADDER)",
473
  layout="wide",
474
  initial_sidebar_state="expanded"
475
  )
@@ -523,8 +467,8 @@ class ResearchInterface:
523
 
524
  def _build_sidebar(self) -> None:
525
  with st.sidebar:
526
- st.title("🔍 Research Database (LADDER)")
527
- st.subheader("Featured Research Topics")
528
  for title, short in ResearchConfig.DOCUMENT_MAP.items():
529
  with st.expander(short):
530
  st.markdown(f"```\n{title}\n```")
@@ -533,22 +477,19 @@ class ResearchInterface:
533
  st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
534
 
535
  def _build_main_interface(self) -> None:
536
- st.title("🧠 UniversalResearch AI")
537
- st.write(
538
- "This system uses OpenAI for embeddings & completions"
539
- )
540
  query = st.text_area(
541
  "Research Query:",
542
  height=200,
543
- placeholder="Enter a research question (e.g., advanced math, code tasks, etc.)..."
544
  )
545
  if st.button("Execute Analysis", type="primary"):
546
  self._execute_analysis(query)
547
 
548
  def _execute_analysis(self, query: str) -> None:
549
  try:
550
- with st.spinner("Initializing LADDER-based Analysis..."):
551
- # The recursion_limit ensures multiple refine iterations are possible
552
  results = self.workflow.app.stream({
553
  "messages": [HumanMessage(content=query)],
554
  "context": {},
@@ -565,14 +506,10 @@ class ResearchInterface:
565
  Potential issues:
566
  - Complex query structure
567
  - Document correlation failure
568
- - Rate limits or invalid API key
569
  - Temporal processing constraints"""
570
  )
571
 
572
  def _render_event(self, event: Dict) -> None:
573
- """
574
- Renders each event in the Streamlit UI, from ingestion to validation/refinement.
575
- """
576
  if 'ingest' in event:
577
  with st.container():
578
  st.success("✅ Query Ingested")
@@ -587,7 +524,7 @@ Potential issues:
587
  elif 'analyze' in event:
588
  with st.container():
589
  content = event['analyze']['messages'][0].content
590
- with st.expander("Research Analysis Report", expanded=True):
591
  st.markdown(content)
592
  elif 'validate' in event:
593
  with st.container():
@@ -595,7 +532,6 @@ Potential issues:
595
  if "VALID" in content:
596
  st.success("✅ Validation Passed")
597
  with st.expander("View Validated Analysis", expanded=True):
598
- # Hide "Validation: ..." from final output
599
  st.markdown(content.split("Validation:")[0])
600
  else:
601
  st.warning("⚠️ Validation Issues Detected")
 
 
 
 
1
  import logging
2
  import os
3
  import re
 
8
  from datetime import datetime
9
  from concurrent.futures import ThreadPoolExecutor, as_completed
10
  from typing import List, Dict, Any, Optional, Sequence
 
11
  import chromadb
12
  import requests
13
  import streamlit as st
14
 
15
+ # LangChain and LangGraph imports
16
  from langchain_openai import OpenAIEmbeddings
17
  from langchain_community.vectorstores import Chroma
18
  from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 
23
  from typing_extensions import TypedDict, Annotated
24
  from langchain.tools.retriever import create_retriever_tool
25
 
26
+ # Increase Python's recursion limit at the very start (if needed)
27
  sys.setrecursionlimit(10000)
28
 
29
  # ------------------------------
 
39
  # State Schema Definition
40
  # ------------------------------
41
  class AgentState(TypedDict):
 
 
 
 
 
 
42
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
43
  context: Dict[str, Any]
44
  metadata: Dict[str, Any]
 
47
  # Configuration
48
  # ------------------------------
49
  class ResearchConfig:
50
+ DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
 
 
 
 
 
 
51
  CHROMA_PATH = "chroma_db"
52
  CHUNK_SIZE = 512
53
  CHUNK_OVERLAP = 64
54
  MAX_CONCURRENT_REQUESTS = 5
55
  EMBEDDING_DIMENSIONS = 1536
 
 
56
  DOCUMENT_MAP = {
57
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
58
+ "CV-Transformer Hybrid Architecture",
59
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
60
+ "Transformer Architecture Analysis",
61
+ "Latest Trends in Machine Learning Methods Using Quantum Computing":
62
+ "Quantum ML Frontiers"
63
  }
 
 
64
  ANALYSIS_TEMPLATE = (
65
+ "Analyze these technical documents with scientific rigor:\n{context}\n\n"
66
+ "Respond with:\n"
67
+ "1. Key Technical Contributions (bullet points)\n"
68
+ "2. Novel Methodologies\n"
69
+ "3. Empirical Results (with metrics)\n"
70
+ "4. Potential Applications\n"
71
+ "5. Limitations & Future Directions\n\n"
72
+ "Format: Markdown with LaTeX mathematical notation where applicable"
 
 
 
73
  )
74
 
75
+ if not ResearchConfig.DEEPSEEK_API_KEY:
 
76
  st.error(
77
+ """**Research Portal Configuration Required**
78
+ 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
79
+ 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
80
+ 3. Rebuild deployment"""
81
  )
82
  st.stop()
83
 
84
  # ------------------------------
85
+ # Quantum Document Processing
86
  # ------------------------------
87
+ class QuantumDocumentManager:
88
  """
89
+ Manages creation of Chroma collections from raw document texts.
 
90
  """
91
  def __init__(self) -> None:
92
  try:
 
95
  except Exception as e:
96
  logger.error(f"Error initializing PersistentClient: {e}")
97
  self.client = chromadb.Client() # Fallback to in-memory client
 
 
98
  self.embeddings = OpenAIEmbeddings(
99
  model="text-embedding-3-large",
100
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
 
102
 
103
  def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
104
  """
105
+ Splits documents into chunks and stores them as a Chroma collection.
106
  """
107
  splitter = RecursiveCharacterTextSplitter(
108
  chunk_size=ResearchConfig.CHUNK_SIZE,
 
111
  )
112
  try:
113
  docs = splitter.create_documents(documents)
114
+ logger.info(f"Created {len(docs)} document chunks for collection '{collection_name}'.")
115
  except Exception as e:
116
  logger.error(f"Error splitting documents: {e}")
117
  raise e
 
126
 
127
  def _document_id(self, content: str) -> str:
128
  """
129
+ Generates a unique document ID using SHA256 and the current timestamp.
130
  """
131
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
132
 
133
+ # Initialize document collections
134
+ qdm = QuantumDocumentManager()
135
+ research_docs = qdm.create_collection([
136
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
137
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
138
+ "Latest Trends in Machine Learning Methods Using Quantum Computing"
139
  ], "research")
140
 
141
+ development_docs = qdm.create_collection([
142
+ "Project A: UI Design Completed, API Integration in Progress",
143
+ "Project B: Testing New Feature X, Bug Fixes Needed",
144
+ "Product Y: In the Performance Optimization Stage Before Release"
145
  ], "development")
146
 
147
  # ------------------------------
 
149
  # ------------------------------
150
  class ResearchRetriever:
151
  """
152
+ Provides retrieval methods for different domains.
 
153
  """
154
  def __init__(self) -> None:
155
  try:
 
168
 
169
  def retrieve(self, query: str, domain: str) -> List[Any]:
170
  """
171
+ Retrieves documents based on the query and domain.
 
172
  """
173
  try:
174
  if domain == "research":
 
176
  elif domain == "development":
177
  return self.development_retriever.invoke(query)
178
  else:
179
+ logger.warning(f"Domain '{domain}' not recognized.")
180
+ return []
181
  except Exception as e:
182
  logger.error(f"Retrieval error for domain '{domain}': {e}")
183
  return []
 
189
  # ------------------------------
190
  class CognitiveProcessor:
191
  """
192
+ Executes API requests to the DeepSeek backend using triple redundancy
193
+ and consolidates results via a consensus mechanism.
194
  """
195
  def __init__(self) -> None:
196
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
 
198
 
199
  def process_query(self, prompt: str) -> Dict:
200
  """
201
+ Processes a query by sending multiple API requests in parallel.
202
  """
203
  futures = []
204
+ for _ in range(3): # Triple redundancy for reliability
205
  futures.append(self.executor.submit(self._execute_api_request, prompt))
206
 
207
  results = []
 
216
 
217
  def _execute_api_request(self, prompt: str) -> Dict:
218
  """
219
+ Executes a single API request to the DeepSeek endpoint.
220
  """
 
221
  headers = {
222
+ "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
223
+ "Content-Type": "application/json",
224
+ "X-Research-Session": self.session_id
225
  }
226
  payload = {
227
+ "model": "deepseek-chat",
228
+ "messages": [{
229
+ "role": "user",
230
+ "content": f"Respond as Senior AI Researcher:\n{prompt}"
231
+ }],
 
 
232
  "temperature": 0.7,
233
  "max_tokens": 1500,
234
  "top_p": 0.9
235
  }
236
  try:
237
  response = requests.post(
238
+ "https://api.deepseek.com/v1/chat/completions",
239
  headers=headers,
240
  json=payload,
241
  timeout=45
242
  )
243
  response.raise_for_status()
244
+ logger.info("DeepSeek API request successful.")
245
  return response.json()
246
  except requests.exceptions.RequestException as e:
247
+ logger.error(f"DeepSeek API request failed: {e}")
248
  return {"error": str(e)}
249
 
250
  def _consensus_check(self, results: List[Dict]) -> Dict:
251
  """
252
+ Consolidates multiple API responses, selecting the one with the most content.
253
  """
254
+ valid_results = [r for r in results if "error" not in r]
255
+ if not valid_results:
256
  logger.error("All API requests failed.")
257
  return {"error": "All API requests failed"}
258
+ return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
259
 
260
  # ------------------------------
261
+ # Research Workflow Engine
262
  # ------------------------------
263
  class ResearchWorkflow:
264
  """
265
+ Defines the multi-step research workflow using a state graph.
 
 
 
 
 
 
 
 
266
  """
267
  def __init__(self) -> None:
268
  self.processor = CognitiveProcessor()
 
271
  self.app = self.workflow.compile()
272
 
273
  def _build_workflow(self) -> None:
274
+ # Define nodes
275
  self.workflow.add_node("ingest", self.ingest_query)
276
  self.workflow.add_node("retrieve", self.retrieve_documents)
277
  self.workflow.add_node("analyze", self.analyze_content)
278
  self.workflow.add_node("validate", self.validate_output)
279
  self.workflow.add_node("refine", self.refine_results)
280
+ # Set entry point and edges
 
281
  self.workflow.set_entry_point("ingest")
282
  self.workflow.add_edge("ingest", "retrieve")
283
  self.workflow.add_edge("retrieve", "analyze")
 
291
 
292
  def ingest_query(self, state: AgentState) -> Dict:
293
  """
294
+ Ingests the research query and initializes the refinement counter.
295
  """
296
  try:
297
  query = state["messages"][-1].content
298
+ # Initialize context with raw query and refinement counter
299
  new_context = {"raw_query": query, "refine_count": 0}
300
  logger.info("Query ingested.")
301
  return {
 
304
  "metadata": {"timestamp": datetime.now().isoformat()}
305
  }
306
  except Exception as e:
307
+ return self._error_state(f"Ingestion Error: {str(e)}")
308
 
309
  def retrieve_documents(self, state: AgentState) -> Dict:
310
  """
311
+ Retrieves research documents based on the query.
312
  """
313
  try:
314
  query = state["context"]["raw_query"]
 
316
  logger.info(f"Retrieved {len(docs)} documents for query.")
317
  return {
318
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
319
+ "context": {"documents": docs, "retrieval_time": time.time(), "refine_count": state["context"].get("refine_count", 0)}
 
 
 
 
320
  }
321
  except Exception as e:
322
+ return self._error_state(f"Retrieval Error: {str(e)}")
323
 
324
  def analyze_content(self, state: AgentState) -> Dict:
325
  """
326
+ Analyzes the retrieved documents using the DeepSeek API.
 
327
  """
328
  try:
329
  docs = state["context"].get("documents", [])
 
332
  response = self.processor.process_query(prompt)
333
  if "error" in response:
334
  return self._error_state(response["error"])
335
+ logger.info("Content analysis completed.")
336
  return {
337
+ "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
338
+ "context": {"analysis": response, "refine_count": state["context"].get("refine_count", 0)}
 
 
 
 
 
339
  }
340
  except Exception as e:
341
+ return self._error_state(f"Analysis Error: {str(e)}")
342
 
343
  def validate_output(self, state: AgentState) -> Dict:
344
  """
345
+ Validates the technical analysis report.
 
346
  """
347
  analysis = state["messages"][-1].content
348
  validation_prompt = (
349
+ f"Validate research analysis:\n{analysis}\n\n"
350
  "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
351
+ "Respond with 'VALID' or 'INVALID'"
352
  )
353
  response = self.processor.process_query(validation_prompt)
354
+ logger.info("Output validation completed.")
355
  return {
356
+ "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
 
 
 
 
357
  }
358
 
359
  def refine_results(self, state: AgentState) -> Dict:
360
  """
361
+ Refines the analysis report if validation fails.
362
+ Increments the refinement counter to limit infinite loops.
363
  """
364
  current_count = state["context"].get("refine_count", 0)
365
  state["context"]["refine_count"] = current_count + 1
366
+ logger.info(f"Refinement iteration: {state['context']['refine_count']}")
 
367
  refinement_prompt = (
368
+ f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
369
+ "Improve:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence"
 
 
370
  )
371
  response = self.processor.process_query(refinement_prompt)
372
  logger.info("Refinement completed.")
373
  return {
374
+ "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
 
 
 
 
375
  "context": state["context"]
376
  }
377
 
378
  def _quality_check(self, state: AgentState) -> str:
379
  """
380
+ Checks whether the analysis report is valid.
381
+ Forces a valid state if the refinement count exceeds a threshold.
382
  """
383
  refine_count = state["context"].get("refine_count", 0)
384
  if refine_count >= 3:
385
+ logger.warning("Refinement limit reached. Forcing valid outcome to prevent infinite recursion.")
386
  return "valid"
 
387
  content = state["messages"][-1].content
388
+ quality = "valid" if "VALID" in content else "invalid"
389
+ logger.info(f"Quality check returned: {quality}")
390
+ return quality
391
 
392
  def _error_state(self, message: str) -> Dict:
393
  """
394
+ Returns a standardized error state.
395
  """
396
  logger.error(message)
397
  return {
 
401
  }
402
 
403
  # ------------------------------
404
+ # Research Interface (Streamlit UI)
405
  # ------------------------------
406
  class ResearchInterface:
407
  """
408
+ Provides the Streamlit-based interface for executing the research workflow.
 
409
  """
410
  def __init__(self) -> None:
411
  self.workflow = ResearchWorkflow()
 
413
 
414
  def _initialize_interface(self) -> None:
415
  st.set_page_config(
416
+ page_title="NeuroResearch AI",
417
  layout="wide",
418
  initial_sidebar_state="expanded"
419
  )
 
467
 
468
  def _build_sidebar(self) -> None:
469
  with st.sidebar:
470
+ st.title("🔍 Research Database")
471
+ st.subheader("Technical Papers")
472
  for title, short in ResearchConfig.DOCUMENT_MAP.items():
473
  with st.expander(short):
474
  st.markdown(f"```\n{title}\n```")
 
477
  st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
478
 
479
  def _build_main_interface(self) -> None:
480
+ st.title("🧠 NeuroResearch AI")
 
 
 
481
  query = st.text_area(
482
  "Research Query:",
483
  height=200,
484
+ placeholder="Enter technical research question..."
485
  )
486
  if st.button("Execute Analysis", type="primary"):
487
  self._execute_analysis(query)
488
 
489
  def _execute_analysis(self, query: str) -> None:
490
  try:
491
+ with st.spinner("Initializing Quantum Analysis..."):
492
+ # Pass a recursion limit configuration into the graph invocation
493
  results = self.workflow.app.stream({
494
  "messages": [HumanMessage(content=query)],
495
  "context": {},
 
506
  Potential issues:
507
  - Complex query structure
508
  - Document correlation failure
 
509
  - Temporal processing constraints"""
510
  )
511
 
512
  def _render_event(self, event: Dict) -> None:
 
 
 
513
  if 'ingest' in event:
514
  with st.container():
515
  st.success("✅ Query Ingested")
 
524
  elif 'analyze' in event:
525
  with st.container():
526
  content = event['analyze']['messages'][0].content
527
+ with st.expander("Technical Analysis Report", expanded=True):
528
  st.markdown(content)
529
  elif 'validate' in event:
530
  with st.container():
 
532
  if "VALID" in content:
533
  st.success("✅ Validation Passed")
534
  with st.expander("View Validated Analysis", expanded=True):
 
535
  st.markdown(content.split("Validation:")[0])
536
  else:
537
  st.warning("⚠️ Validation Issues Detected")