mgbam commited on
Commit
f7afb44
·
verified ·
1 Parent(s): 4839711

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -59
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # ------------------------------
2
- # Enhanced NeuroResearch AI System with Refinement Counter and Increased Recursion Limit
3
  # ------------------------------
4
  import logging
5
  import os
@@ -11,6 +11,7 @@ import sys
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
  from typing import List, Dict, Any, Optional, Sequence
 
14
  import chromadb
15
  import requests
16
  import streamlit as st
@@ -50,46 +51,49 @@ class AgentState(TypedDict):
50
  # Configuration
51
  # ------------------------------
52
  class ResearchConfig:
 
 
 
 
53
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
54
  CHROMA_PATH = "chroma_db"
55
  CHUNK_SIZE = 512
56
  CHUNK_OVERLAP = 64
57
  MAX_CONCURRENT_REQUESTS = 5
58
  EMBEDDING_DIMENSIONS = 1536
 
59
  DOCUMENT_MAP = {
60
- "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
61
- "CV-Transformer Hybrid Architecture",
62
- "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
63
- "Transformer Architecture Analysis",
64
- "Latest Trends in Machine Learning Methods Using Quantum Computing":
65
- "Quantum ML Frontiers"
66
  }
67
  ANALYSIS_TEMPLATE = (
68
- "Analyze these technical documents with scientific rigor:\n{context}\n\n"
69
- "Respond with:\n"
70
- "1. Key Technical Contributions (bullet points)\n"
71
  "2. Novel Methodologies\n"
72
  "3. Empirical Results (with metrics)\n"
73
  "4. Potential Applications\n"
74
  "5. Limitations & Future Directions\n\n"
75
- "Format: Markdown with LaTeX mathematical notation where applicable"
76
  )
77
 
78
  if not ResearchConfig.DEEPSEEK_API_KEY:
79
  st.error(
80
  """**Research Portal Configuration Required**
81
- 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
82
- 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
83
- 3. Rebuild deployment"""
84
  )
85
  st.stop()
86
 
87
  # ------------------------------
88
- # Quantum Document Processing
89
  # ------------------------------
90
- class QuantumDocumentManager:
91
  """
92
- Manages creation of Chroma collections from raw document texts.
 
93
  """
94
  def __init__(self) -> None:
95
  try:
@@ -98,6 +102,7 @@ class QuantumDocumentManager:
98
  except Exception as e:
99
  logger.error(f"Error initializing PersistentClient: {e}")
100
  self.client = chromadb.Client() # Fallback to in-memory client
 
101
  self.embeddings = OpenAIEmbeddings(
102
  model="text-embedding-3-large",
103
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
@@ -105,7 +110,7 @@ class QuantumDocumentManager:
105
 
106
  def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
107
  """
108
- Splits documents into chunks and stores them as a Chroma collection.
109
  """
110
  splitter = RecursiveCharacterTextSplitter(
111
  chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -129,22 +134,23 @@ class QuantumDocumentManager:
129
 
130
  def _document_id(self, content: str) -> str:
131
  """
132
- Generates a unique document ID using SHA256 and the current timestamp.
133
  """
134
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
135
 
136
- # Initialize document collections
137
- qdm = QuantumDocumentManager()
138
- research_docs = qdm.create_collection([
139
- "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
140
- "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
141
- "Latest Trends in Machine Learning Methods Using Quantum Computing"
 
142
  ], "research")
143
 
144
- development_docs = qdm.create_collection([
145
- "Project A: UI Design Completed, API Integration in Progress",
146
- "Project B: Testing New Feature X, Bug Fixes Needed",
147
- "Product Y: In the Performance Optimization Stage Before Release"
148
  ], "development")
149
 
150
  # ------------------------------
@@ -152,7 +158,8 @@ development_docs = qdm.create_collection([
152
  # ------------------------------
153
  class ResearchRetriever:
154
  """
155
- Provides retrieval methods for different domains.
 
156
  """
157
  def __init__(self) -> None:
158
  try:
@@ -171,7 +178,7 @@ class ResearchRetriever:
171
 
172
  def retrieve(self, query: str, domain: str) -> List[Any]:
173
  """
174
- Retrieves documents based on the query and domain.
175
  """
176
  try:
177
  if domain == "research":
@@ -179,8 +186,8 @@ class ResearchRetriever:
179
  elif domain == "development":
180
  return self.development_retriever.invoke(query)
181
  else:
182
- logger.warning(f"Domain '{domain}' not recognized.")
183
- return []
184
  except Exception as e:
185
  logger.error(f"Retrieval error for domain '{domain}': {e}")
186
  return []
@@ -192,8 +199,8 @@ retriever = ResearchRetriever()
192
  # ------------------------------
193
  class CognitiveProcessor:
194
  """
195
- Executes API requests to the DeepSeek backend using triple redundancy
196
- and consolidates results via a consensus mechanism.
197
  """
198
  def __init__(self) -> None:
199
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
@@ -204,7 +211,7 @@ class CognitiveProcessor:
204
  Processes a query by sending multiple API requests in parallel.
205
  """
206
  futures = []
207
- for _ in range(3): # Triple redundancy for reliability
208
  futures.append(self.executor.submit(self._execute_api_request, prompt))
209
 
210
  results = []
@@ -230,7 +237,7 @@ class CognitiveProcessor:
230
  "model": "deepseek-chat",
231
  "messages": [{
232
  "role": "user",
233
- "content": f"Respond as Senior AI Researcher:\n{prompt}"
234
  }],
235
  "temperature": 0.7,
236
  "max_tokens": 1500,
@@ -252,7 +259,7 @@ class CognitiveProcessor:
252
 
253
  def _consensus_check(self, results: List[Dict]) -> Dict:
254
  """
255
- Consolidates multiple API responses, selecting the one with the most content.
256
  """
257
  valid_results = [r for r in results if "error" not in r]
258
  if not valid_results:
@@ -265,7 +272,8 @@ class CognitiveProcessor:
265
  # ------------------------------
266
  class ResearchWorkflow:
267
  """
268
- Defines the multi-step research workflow using a state graph.
 
269
  """
270
  def __init__(self) -> None:
271
  self.processor = CognitiveProcessor()
@@ -274,13 +282,13 @@ class ResearchWorkflow:
274
  self.app = self.workflow.compile()
275
 
276
  def _build_workflow(self) -> None:
277
- # Define nodes
278
  self.workflow.add_node("ingest", self.ingest_query)
279
  self.workflow.add_node("retrieve", self.retrieve_documents)
280
  self.workflow.add_node("analyze", self.analyze_content)
281
  self.workflow.add_node("validate", self.validate_output)
282
  self.workflow.add_node("refine", self.refine_results)
283
- # Set entry point and edges
284
  self.workflow.set_entry_point("ingest")
285
  self.workflow.add_edge("ingest", "retrieve")
286
  self.workflow.add_edge("retrieve", "analyze")
@@ -298,7 +306,6 @@ class ResearchWorkflow:
298
  """
299
  try:
300
  query = state["messages"][-1].content
301
- # Initialize context with raw query and refinement counter
302
  new_context = {"raw_query": query, "refine_count": 0}
303
  logger.info("Query ingested.")
304
  return {
@@ -311,7 +318,7 @@ class ResearchWorkflow:
311
 
312
  def retrieve_documents(self, state: AgentState) -> Dict:
313
  """
314
- Retrieves research documents based on the query.
315
  """
316
  try:
317
  query = state["context"]["raw_query"]
@@ -326,7 +333,7 @@ class ResearchWorkflow:
326
 
327
  def analyze_content(self, state: AgentState) -> Dict:
328
  """
329
- Analyzes the retrieved documents using the DeepSeek API.
330
  """
331
  try:
332
  docs = state["context"].get("documents", [])
@@ -345,13 +352,13 @@ class ResearchWorkflow:
345
 
346
  def validate_output(self, state: AgentState) -> Dict:
347
  """
348
- Validates the technical analysis report.
349
  """
350
  analysis = state["messages"][-1].content
351
  validation_prompt = (
352
- f"Validate research analysis:\n{analysis}\n\n"
353
- "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
354
- "Respond with 'VALID' or 'INVALID'"
355
  )
356
  response = self.processor.process_query(validation_prompt)
357
  logger.info("Output validation completed.")
@@ -362,14 +369,14 @@ class ResearchWorkflow:
362
  def refine_results(self, state: AgentState) -> Dict:
363
  """
364
  Refines the analysis report if validation fails.
365
- Increments the refinement counter to limit infinite loops.
366
  """
367
  current_count = state["context"].get("refine_count", 0)
368
  state["context"]["refine_count"] = current_count + 1
369
  logger.info(f"Refinement iteration: {state['context']['refine_count']}")
370
  refinement_prompt = (
371
  f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
372
- "Improve:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence"
373
  )
374
  response = self.processor.process_query(refinement_prompt)
375
  logger.info("Refinement completed.")
@@ -381,7 +388,7 @@ class ResearchWorkflow:
381
  def _quality_check(self, state: AgentState) -> str:
382
  """
383
  Checks whether the analysis report is valid.
384
- Forces a valid state if the refinement count exceeds a threshold.
385
  """
386
  refine_count = state["context"].get("refine_count", 0)
387
  if refine_count >= 3:
@@ -408,7 +415,8 @@ class ResearchWorkflow:
408
  # ------------------------------
409
  class ResearchInterface:
410
  """
411
- Provides the Streamlit-based interface for executing the research workflow.
 
412
  """
413
  def __init__(self) -> None:
414
  self.workflow = ResearchWorkflow()
@@ -416,7 +424,7 @@ class ResearchInterface:
416
 
417
  def _initialize_interface(self) -> None:
418
  st.set_page_config(
419
- page_title="NeuroResearch AI",
420
  layout="wide",
421
  initial_sidebar_state="expanded"
422
  )
@@ -471,7 +479,8 @@ class ResearchInterface:
471
  def _build_sidebar(self) -> None:
472
  with st.sidebar:
473
  st.title("🔍 Research Database")
474
- st.subheader("Technical Papers")
 
475
  for title, short in ResearchConfig.DOCUMENT_MAP.items():
476
  with st.expander(short):
477
  st.markdown(f"```\n{title}\n```")
@@ -480,19 +489,19 @@ class ResearchInterface:
480
  st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
481
 
482
  def _build_main_interface(self) -> None:
483
- st.title("🧠 NeuroResearch AI")
484
  query = st.text_area(
485
  "Research Query:",
486
  height=200,
487
- placeholder="Enter technical research question..."
488
  )
489
  if st.button("Execute Analysis", type="primary"):
490
  self._execute_analysis(query)
491
 
492
  def _execute_analysis(self, query: str) -> None:
493
  try:
494
- with st.spinner("Initializing Quantum Analysis..."):
495
- # Pass a recursion limit configuration into the graph invocation
496
  results = self.workflow.app.stream({
497
  "messages": [HumanMessage(content=query)],
498
  "context": {},
@@ -527,7 +536,7 @@ Potential issues:
527
  elif 'analyze' in event:
528
  with st.container():
529
  content = event['analyze']['messages'][0].content
530
- with st.expander("Technical Analysis Report", expanded=True):
531
  st.markdown(content)
532
  elif 'validate' in event:
533
  with st.container():
 
1
  # ------------------------------
2
+ # UniversalResearch AI System with Refinement Counter and Increased Recursion Limit
3
  # ------------------------------
4
  import logging
5
  import os
 
11
  from datetime import datetime
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
  from typing import List, Dict, Any, Optional, Sequence
14
+
15
  import chromadb
16
  import requests
17
  import streamlit as st
 
51
  # Configuration
52
  # ------------------------------
53
  class ResearchConfig:
54
+ """
55
+ Generic configuration for the UniversalResearch AI System.
56
+ This configuration is designed to be applicable to any research domain.
57
+ """
58
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
59
  CHROMA_PATH = "chroma_db"
60
  CHUNK_SIZE = 512
61
  CHUNK_OVERLAP = 64
62
  MAX_CONCURRENT_REQUESTS = 5
63
  EMBEDDING_DIMENSIONS = 1536
64
+ # An optional map can be used to list pre-loaded or featured research topics.
65
  DOCUMENT_MAP = {
66
+ "Sample Research Document 1": "Topic A Overview",
67
+ "Sample Research Document 2": "Topic B Analysis",
68
+ "Sample Research Document 3": "Topic C Innovations"
 
 
 
69
  }
70
  ANALYSIS_TEMPLATE = (
71
+ "Analyze the following research documents with scientific rigor:\n{context}\n\n"
72
+ "Provide your analysis with the following structure:\n"
73
+ "1. Key Contributions (bullet points)\n"
74
  "2. Novel Methodologies\n"
75
  "3. Empirical Results (with metrics)\n"
76
  "4. Potential Applications\n"
77
  "5. Limitations & Future Directions\n\n"
78
+ "Format your response in Markdown with LaTeX mathematical notation where applicable."
79
  )
80
 
81
  if not ResearchConfig.DEEPSEEK_API_KEY:
82
  st.error(
83
  """**Research Portal Configuration Required**
84
+ 1. Obtain your DeepSeek API key from [platform.deepseek.com](https://platform.deepseek.com/)
85
+ 2. Set the secret: `DEEPSEEK_API_KEY` in your deployment settings
86
+ 3. Rebuild your deployment."""
87
  )
88
  st.stop()
89
 
90
  # ------------------------------
91
+ # Universal Document Processing
92
  # ------------------------------
93
+ class UniversalDocumentManager:
94
  """
95
+ Manages the creation of document collections for any research domain.
96
+ Documents are split into manageable chunks and embedded using OpenAI embeddings.
97
  """
98
  def __init__(self) -> None:
99
  try:
 
102
  except Exception as e:
103
  logger.error(f"Error initializing PersistentClient: {e}")
104
  self.client = chromadb.Client() # Fallback to in-memory client
105
+
106
  self.embeddings = OpenAIEmbeddings(
107
  model="text-embedding-3-large",
108
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
 
110
 
111
  def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
112
  """
113
+ Splits documents into chunks and stores them in a Chroma collection.
114
  """
115
  splitter = RecursiveCharacterTextSplitter(
116
  chunk_size=ResearchConfig.CHUNK_SIZE,
 
134
 
135
  def _document_id(self, content: str) -> str:
136
  """
137
+ Generates a unique document ID using a SHA256 hash combined with the current timestamp.
138
  """
139
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
140
 
141
+ # Initialize document collections for multiple research domains
142
+ udm = UniversalDocumentManager()
143
+ # Example collections – these can be updated with any research domain documents.
144
+ research_docs = udm.create_collection([
145
+ "Research Report: Novel AI Techniques in Renewable Energy",
146
+ "Academic Paper: Advances in Quantum Computing for Data Analysis",
147
+ "Survey: Emerging Trends in Biomedical Research"
148
  ], "research")
149
 
150
+ development_docs = udm.create_collection([
151
+ "Project Update: New Algorithms in Software Engineering",
152
+ "Development Report: Innovations in User Interface Design",
153
+ "Case Study: Agile Methodologies in Large-Scale Software Projects"
154
  ], "development")
155
 
156
  # ------------------------------
 
158
  # ------------------------------
159
  class ResearchRetriever:
160
  """
161
+ Provides retrieval methods for research documents.
162
+ This class supports multiple domains, such as academic research and development.
163
  """
164
  def __init__(self) -> None:
165
  try:
 
178
 
179
  def retrieve(self, query: str, domain: str) -> List[Any]:
180
  """
181
+ Retrieves documents for a given query and domain.
182
  """
183
  try:
184
  if domain == "research":
 
186
  elif domain == "development":
187
  return self.development_retriever.invoke(query)
188
  else:
189
+ logger.warning(f"Domain '{domain}' not recognized. Defaulting to research.")
190
+ return self.research_retriever.invoke(query)
191
  except Exception as e:
192
  logger.error(f"Retrieval error for domain '{domain}': {e}")
193
  return []
 
199
  # ------------------------------
200
  class CognitiveProcessor:
201
  """
202
+ Executes API requests to the DeepSeek backend using redundant parallel requests.
203
+ The responses are consolidated via a consensus mechanism.
204
  """
205
  def __init__(self) -> None:
206
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
 
211
  Processes a query by sending multiple API requests in parallel.
212
  """
213
  futures = []
214
+ for _ in range(3): # Triple redundancy for improved reliability
215
  futures.append(self.executor.submit(self._execute_api_request, prompt))
216
 
217
  results = []
 
237
  "model": "deepseek-chat",
238
  "messages": [{
239
  "role": "user",
240
+ "content": f"Respond as a Senior Researcher:\n{prompt}"
241
  }],
242
  "temperature": 0.7,
243
  "max_tokens": 1500,
 
259
 
260
  def _consensus_check(self, results: List[Dict]) -> Dict:
261
  """
262
+ Consolidates multiple API responses by selecting the one with the most content.
263
  """
264
  valid_results = [r for r in results if "error" not in r]
265
  if not valid_results:
 
272
  # ------------------------------
273
  class ResearchWorkflow:
274
  """
275
+ Defines a multi-step research workflow using a state graph.
276
+ This workflow is designed to be domain-agnostic, working for any research area.
277
  """
278
  def __init__(self) -> None:
279
  self.processor = CognitiveProcessor()
 
282
  self.app = self.workflow.compile()
283
 
284
  def _build_workflow(self) -> None:
285
+ # Define workflow nodes
286
  self.workflow.add_node("ingest", self.ingest_query)
287
  self.workflow.add_node("retrieve", self.retrieve_documents)
288
  self.workflow.add_node("analyze", self.analyze_content)
289
  self.workflow.add_node("validate", self.validate_output)
290
  self.workflow.add_node("refine", self.refine_results)
291
+ # Set entry point and define transitions
292
  self.workflow.set_entry_point("ingest")
293
  self.workflow.add_edge("ingest", "retrieve")
294
  self.workflow.add_edge("retrieve", "analyze")
 
306
  """
307
  try:
308
  query = state["messages"][-1].content
 
309
  new_context = {"raw_query": query, "refine_count": 0}
310
  logger.info("Query ingested.")
311
  return {
 
318
 
319
  def retrieve_documents(self, state: AgentState) -> Dict:
320
  """
321
+ Retrieves research documents for the given query.
322
  """
323
  try:
324
  query = state["context"]["raw_query"]
 
333
 
334
  def analyze_content(self, state: AgentState) -> Dict:
335
  """
336
+ Analyzes the retrieved research documents using the DeepSeek API.
337
  """
338
  try:
339
  docs = state["context"].get("documents", [])
 
352
 
353
  def validate_output(self, state: AgentState) -> Dict:
354
  """
355
+ Validates the analysis report for technical accuracy and consistency.
356
  """
357
  analysis = state["messages"][-1].content
358
  validation_prompt = (
359
+ f"Validate the following research analysis:\n{analysis}\n\n"
360
+ "Check for:\n1. Technical accuracy\n2. Adequate citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
361
+ "Respond with 'VALID' or 'INVALID'."
362
  )
363
  response = self.processor.process_query(validation_prompt)
364
  logger.info("Output validation completed.")
 
369
  def refine_results(self, state: AgentState) -> Dict:
370
  """
371
  Refines the analysis report if validation fails.
372
+ Increments the refinement counter to avoid infinite loops.
373
  """
374
  current_count = state["context"].get("refine_count", 0)
375
  state["context"]["refine_count"] = current_count + 1
376
  logger.info(f"Refinement iteration: {state['context']['refine_count']}")
377
  refinement_prompt = (
378
  f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
379
+ "Improve by enhancing technical precision, empirical grounding, and theoretical coherence."
380
  )
381
  response = self.processor.process_query(refinement_prompt)
382
  logger.info("Refinement completed.")
 
388
  def _quality_check(self, state: AgentState) -> str:
389
  """
390
  Checks whether the analysis report is valid.
391
+ Forces a valid state if the refinement counter exceeds a preset threshold.
392
  """
393
  refine_count = state["context"].get("refine_count", 0)
394
  if refine_count >= 3:
 
415
  # ------------------------------
416
  class ResearchInterface:
417
  """
418
+ Provides a Streamlit-based interface for executing the UniversalResearch AI workflow.
419
+ The interface is domain-agnostic, making it suitable for research in any field.
420
  """
421
  def __init__(self) -> None:
422
  self.workflow = ResearchWorkflow()
 
424
 
425
  def _initialize_interface(self) -> None:
426
  st.set_page_config(
427
+ page_title="UniversalResearch AI",
428
  layout="wide",
429
  initial_sidebar_state="expanded"
430
  )
 
479
  def _build_sidebar(self) -> None:
480
  with st.sidebar:
481
  st.title("🔍 Research Database")
482
+ st.subheader("Featured Research Topics")
483
+ # Display featured research topics from the DOCUMENT_MAP.
484
  for title, short in ResearchConfig.DOCUMENT_MAP.items():
485
  with st.expander(short):
486
  st.markdown(f"```\n{title}\n```")
 
489
  st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
490
 
491
  def _build_main_interface(self) -> None:
492
+ st.title("🧠 UniversalResearch AI")
493
  query = st.text_area(
494
  "Research Query:",
495
  height=200,
496
+ placeholder="Enter a research question or topic from any domain..."
497
  )
498
  if st.button("Execute Analysis", type="primary"):
499
  self._execute_analysis(query)
500
 
501
  def _execute_analysis(self, query: str) -> None:
502
  try:
503
+ with st.spinner("Initializing Universal Analysis..."):
504
+ # Invoke the workflow with an increased recursion limit configuration.
505
  results = self.workflow.app.stream({
506
  "messages": [HumanMessage(content=query)],
507
  "context": {},
 
536
  elif 'analyze' in event:
537
  with st.container():
538
  content = event['analyze']['messages'][0].content
539
+ with st.expander("Research Analysis Report", expanded=True):
540
  st.markdown(content)
541
  elif 'validate' in event:
542
  with st.container():