mgbam commited on
Commit
2c0f60e
Β·
verified Β·
1 Parent(s): 99fb1d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -84
app.py CHANGED
@@ -1,10 +1,6 @@
1
- # ---------------------------------------------
2
- # Imports & Initial Configuration
3
- # ---------------------------------------------
4
- import streamlit as st
5
- # IMPORTANT: Must be the first Streamlit command
6
- st.set_page_config(page_title="NeuroResearch AI", layout="wide", initial_sidebar_state="expanded")
7
-
8
  from langchain_openai import OpenAIEmbeddings
9
  from langchain_community.vectorstores import Chroma
10
  from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
@@ -15,24 +11,28 @@ from langgraph.graph.message import add_messages
15
  from typing_extensions import TypedDict, Annotated
16
  from typing import Sequence, Dict, List, Optional, Any
17
  import chromadb
 
18
  import os
 
19
  import requests
20
  import hashlib
 
21
  import time
 
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
23
  from datetime import datetime
24
 
25
- # ---------------------------------------------
26
  # State Schema Definition
27
- # ---------------------------------------------
28
  class AgentState(TypedDict):
29
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
30
  context: Dict[str, Any]
31
  metadata: Dict[str, Any]
32
 
33
- # ---------------------------------------------
34
  # Configuration
35
- # ---------------------------------------------
36
  class ResearchConfig:
37
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
38
  CHROMA_PATH = "chroma_db"
@@ -61,17 +61,17 @@ Respond with:
61
  Format: Markdown with LaTeX mathematical notation where applicable
62
  """
63
 
64
- # Validate API key configuration
65
  if not ResearchConfig.DEEPSEEK_API_KEY:
66
  st.error("""**Research Portal Configuration Required**
67
- 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
68
- 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
69
- 3. Rebuild deployment""")
70
  st.stop()
71
 
72
- # ---------------------------------------------
73
  # Quantum Document Processing
74
- # ---------------------------------------------
75
  class QuantumDocumentManager:
76
  def __init__(self):
77
  self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
@@ -87,7 +87,6 @@ class QuantumDocumentManager:
87
  separators=["\n\n", "\n", "|||"]
88
  )
89
  docs = splitter.create_documents(documents)
90
- # Removed debug line that displayed chunk creation count
91
  return Chroma.from_documents(
92
  documents=docs,
93
  embedding=self.embeddings,
@@ -97,7 +96,6 @@ class QuantumDocumentManager:
97
  )
98
 
99
  def _document_id(self, content: str) -> str:
100
- """Create a unique ID for each document chunk."""
101
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
102
 
103
  # Initialize document collections
@@ -114,9 +112,9 @@ development_docs = qdm.create_collection([
114
  "Product Y: In the Performance Optimization Stage Before Release"
115
  ], "development")
116
 
117
- # ---------------------------------------------
118
  # Advanced Retrieval System
119
- # ---------------------------------------------
120
  class ResearchRetriever:
121
  def __init__(self):
122
  self.retrievers = {
@@ -135,29 +133,28 @@ class ResearchRetriever:
135
  }
136
 
137
  def retrieve(self, query: str, domain: str) -> List[Any]:
138
- """Retrieve documents from the specified domain using the appropriate retriever."""
139
  try:
140
- results = self.retrievers[domain].invoke(query)
141
- return results
142
  except KeyError:
143
- st.error(f"[ERROR] Retrieval domain '{domain}' not found.")
144
  return []
145
 
146
  retriever = ResearchRetriever()
147
 
148
- # ---------------------------------------------
149
  # Cognitive Processing Unit
150
- # ---------------------------------------------
151
  class CognitiveProcessor:
152
  def __init__(self):
153
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
154
  self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
155
 
156
  def process_query(self, prompt: str) -> Dict:
157
- """Send the prompt to the DeepSeek API using triple redundancy for robustness."""
158
  futures = []
159
- for _ in range(3):
160
- futures.append(self.executor.submit(self._execute_api_request, prompt))
 
 
 
161
 
162
  results = []
163
  for future in as_completed(futures):
@@ -169,7 +166,6 @@ class CognitiveProcessor:
169
  return self._consensus_check(results)
170
 
171
  def _execute_api_request(self, prompt: str) -> Dict:
172
- """Make a single request to the DeepSeek API."""
173
  headers = {
174
  "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
175
  "Content-Type": "application/json",
@@ -198,15 +194,14 @@ class CognitiveProcessor:
198
  return {"error": str(e)}
199
 
200
  def _consensus_check(self, results: List[Dict]) -> Dict:
201
- """Pick the best result by comparing content length among successful responses."""
202
  valid = [r for r in results if "error" not in r]
203
  if not valid:
204
  return {"error": "All API requests failed"}
205
  return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
206
 
207
- # ---------------------------------------------
208
  # Research Workflow Engine
209
- # ---------------------------------------------
210
  class ResearchWorkflow:
211
  def __init__(self):
212
  self.processor = CognitiveProcessor()
@@ -214,14 +209,12 @@ class ResearchWorkflow:
214
  self._build_workflow()
215
 
216
  def _build_workflow(self):
217
- # Register nodes in the state graph
218
  self.workflow.add_node("ingest", self.ingest_query)
219
  self.workflow.add_node("retrieve", self.retrieve_documents)
220
  self.workflow.add_node("analyze", self.analyze_content)
221
  self.workflow.add_node("validate", self.validate_output)
222
  self.workflow.add_node("refine", self.refine_results)
223
 
224
- # Define workflow transitions
225
  self.workflow.set_entry_point("ingest")
226
  self.workflow.add_edge("ingest", "retrieve")
227
  self.workflow.add_edge("retrieve", "analyze")
@@ -236,7 +229,6 @@ class ResearchWorkflow:
236
  self.app = self.workflow.compile()
237
 
238
  def ingest_query(self, state: AgentState) -> Dict:
239
- """Extract the user query and store it in the state."""
240
  try:
241
  query = state["messages"][-1].content
242
  return {
@@ -248,7 +240,6 @@ class ResearchWorkflow:
248
  return self._error_state(f"Ingestion Error: {str(e)}")
249
 
250
  def retrieve_documents(self, state: AgentState) -> Dict:
251
- """Retrieve relevant documents from the 'research' domain."""
252
  try:
253
  query = state["context"]["raw_query"]
254
  docs = retriever.retrieve(query, "research")
@@ -263,15 +254,8 @@ class ResearchWorkflow:
263
  return self._error_state(f"Retrieval Error: {str(e)}")
264
 
265
  def analyze_content(self, state: AgentState) -> Dict:
266
- """Concatenate document contents and analyze them using the CognitiveProcessor."""
267
  try:
268
- if "documents" not in state["context"] or not state["context"]["documents"]:
269
- return self._error_state("No documents retrieved; please check your query or retrieval process.")
270
-
271
- docs = "\n\n".join([
272
- d.page_content for d in state["context"]["documents"]
273
- if hasattr(d, "page_content") and d.page_content
274
- ])
275
  prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
276
  response = self.processor.process_query(prompt)
277
 
@@ -286,18 +270,17 @@ class ResearchWorkflow:
286
  return self._error_state(f"Analysis Error: {str(e)}")
287
 
288
  def validate_output(self, state: AgentState) -> Dict:
289
- """Validate the technical correctness of the analysis output."""
290
  analysis = state["messages"][-1].content
291
  validation_prompt = f"""Validate research analysis:
292
- {analysis}
293
-
294
- Check for:
295
- 1. Technical accuracy
296
- 2. Citation support
297
- 3. Logical consistency
298
- 4. Methodological soundness
299
-
300
- Respond with 'VALID' or 'INVALID'"""
301
 
302
  response = self.processor.process_query(validation_prompt)
303
  return {
@@ -305,14 +288,13 @@ Respond with 'VALID' or 'INVALID'"""
305
  }
306
 
307
  def refine_results(self, state: AgentState) -> Dict:
308
- """Refine the analysis based on the validation feedback."""
309
  refinement_prompt = f"""Refine this analysis:
310
- {state["messages"][-1].content}
311
-
312
- Improve:
313
- 1. Technical precision
314
- 2. Empirical grounding
315
- 3. Theoretical coherence"""
316
 
317
  response = self.processor.process_query(refinement_prompt)
318
  return {
@@ -321,32 +303,35 @@ Improve:
321
  }
322
 
323
  def _quality_check(self, state: AgentState) -> str:
324
- """Check if the validation step indicates a 'VALID' or 'INVALID' output."""
325
  content = state["messages"][-1].content
326
  return "valid" if "VALID" in content else "invalid"
327
 
328
  def _error_state(self, message: str) -> Dict:
329
- """Return an error message and mark the state as erroneous."""
330
- st.error(f"[ERROR] {message}")
331
  return {
332
  "messages": [AIMessage(content=f"❌ {message}")],
333
  "context": {"error": True},
334
  "metadata": {"status": "error"}
335
  }
336
 
337
- # ---------------------------------------------
338
  # Research Interface
339
- # ---------------------------------------------
340
  class ResearchInterface:
341
  def __init__(self):
342
  self.workflow = ResearchWorkflow()
343
- # We've already set the page config at the top.
 
 
 
 
 
 
 
344
  self._inject_styles()
345
  self._build_sidebar()
346
  self._build_main_interface()
347
 
348
  def _inject_styles(self):
349
- """Inject custom CSS for a sleek interface."""
350
  st.markdown("""
351
  <style>
352
  :root {
@@ -393,7 +378,6 @@ class ResearchInterface:
393
  """, unsafe_allow_html=True)
394
 
395
  def _build_sidebar(self):
396
- """Construct the left sidebar with document info and metrics."""
397
  with st.sidebar:
398
  st.title("πŸ” Research Database")
399
  st.subheader("Technical Papers")
@@ -406,37 +390,37 @@ class ResearchInterface:
406
  st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
407
 
408
  def _build_main_interface(self):
409
- """Construct the main interface for query input and result display."""
410
  st.title("🧠 NeuroResearch AI")
411
  query = st.text_area("Research Query:", height=200,
412
- placeholder="Enter technical research question...")
413
 
414
  if st.button("Execute Analysis", type="primary"):
415
  self._execute_analysis(query)
416
 
417
  def _execute_analysis(self, query: str):
418
- """Execute the entire research workflow and render the results."""
419
  try:
420
  with st.spinner("Initializing Quantum Analysis..."):
421
  results = self.workflow.app.stream(
422
  {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
423
  )
 
424
  for event in results:
425
  self._render_event(event)
 
426
  st.success("βœ… Analysis Completed Successfully")
427
  except Exception as e:
428
  st.error(f"""**Analysis Failed**
429
- {str(e)}
430
- Potential issues:
431
- - Complex query structure
432
- - Document correlation failure
433
- - Temporal processing constraints""")
434
 
435
  def _render_event(self, event: Dict):
436
- """Render each node's output in the UI as it streams through the workflow."""
437
  if 'ingest' in event:
438
  with st.container():
439
  st.success("βœ… Query Ingested")
 
440
  elif 'retrieve' in event:
441
  with st.container():
442
  docs = event['retrieve']['context']['documents']
@@ -445,11 +429,13 @@ Potential issues:
445
  for i, doc in enumerate(docs, 1):
446
  st.markdown(f"**Document {i}**")
447
  st.code(doc.page_content, language='text')
 
448
  elif 'analyze' in event:
449
  with st.container():
450
  content = event['analyze']['messages'][0].content
451
  with st.expander("Technical Analysis Report", expanded=True):
452
  st.markdown(content)
 
453
  elif 'validate' in event:
454
  with st.container():
455
  content = event['validate']['messages'][0].content
@@ -462,8 +448,5 @@ Potential issues:
462
  with st.expander("View Validation Details", expanded=True):
463
  st.markdown(content)
464
 
465
- # ---------------------------------------------
466
- # Main Execution
467
- # ---------------------------------------------
468
  if __name__ == "__main__":
469
- ResearchInterface()
 
1
+ # ------------------------------
2
+ # Imports & Dependencies
3
+ # ------------------------------
 
 
 
 
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
  from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 
11
  from typing_extensions import TypedDict, Annotated
12
  from typing import Sequence, Dict, List, Optional, Any
13
  import chromadb
14
+ import re
15
  import os
16
+ import streamlit as st
17
  import requests
18
  import hashlib
19
+ import json
20
  import time
21
+ from langchain.tools.retriever import create_retriever_tool
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
23
  from datetime import datetime
24
 
25
+ # ------------------------------
26
  # State Schema Definition
27
+ # ------------------------------
28
  class AgentState(TypedDict):
29
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
30
  context: Dict[str, Any]
31
  metadata: Dict[str, Any]
32
 
33
+ # ------------------------------
34
  # Configuration
35
+ # ------------------------------
36
  class ResearchConfig:
37
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
38
  CHROMA_PATH = "chroma_db"
 
61
  Format: Markdown with LaTeX mathematical notation where applicable
62
  """
63
 
64
+ # Validation
65
  if not ResearchConfig.DEEPSEEK_API_KEY:
66
  st.error("""**Research Portal Configuration Required**
67
+ 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
68
+ 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
69
+ 3. Rebuild deployment""")
70
  st.stop()
71
 
72
+ # ------------------------------
73
  # Quantum Document Processing
74
+ # ------------------------------
75
  class QuantumDocumentManager:
76
  def __init__(self):
77
  self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
 
87
  separators=["\n\n", "\n", "|||"]
88
  )
89
  docs = splitter.create_documents(documents)
 
90
  return Chroma.from_documents(
91
  documents=docs,
92
  embedding=self.embeddings,
 
96
  )
97
 
98
  def _document_id(self, content: str) -> str:
 
99
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
100
 
101
  # Initialize document collections
 
112
  "Product Y: In the Performance Optimization Stage Before Release"
113
  ], "development")
114
 
115
+ # ------------------------------
116
  # Advanced Retrieval System
117
+ # ------------------------------
118
  class ResearchRetriever:
119
  def __init__(self):
120
  self.retrievers = {
 
133
  }
134
 
135
  def retrieve(self, query: str, domain: str) -> List[Any]:
 
136
  try:
137
+ return self.retrievers[domain].invoke(query)
 
138
  except KeyError:
 
139
  return []
140
 
141
  retriever = ResearchRetriever()
142
 
143
+ # ------------------------------
144
  # Cognitive Processing Unit
145
+ # ------------------------------
146
  class CognitiveProcessor:
147
  def __init__(self):
148
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
149
  self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
150
 
151
  def process_query(self, prompt: str) -> Dict:
 
152
  futures = []
153
+ for _ in range(3): # Triple redundancy
154
+ futures.append(self.executor.submit(
155
+ self._execute_api_request,
156
+ prompt
157
+ ))
158
 
159
  results = []
160
  for future in as_completed(futures):
 
166
  return self._consensus_check(results)
167
 
168
  def _execute_api_request(self, prompt: str) -> Dict:
 
169
  headers = {
170
  "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
171
  "Content-Type": "application/json",
 
194
  return {"error": str(e)}
195
 
196
  def _consensus_check(self, results: List[Dict]) -> Dict:
 
197
  valid = [r for r in results if "error" not in r]
198
  if not valid:
199
  return {"error": "All API requests failed"}
200
  return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
201
 
202
+ # ------------------------------
203
  # Research Workflow Engine
204
+ # ------------------------------
205
  class ResearchWorkflow:
206
  def __init__(self):
207
  self.processor = CognitiveProcessor()
 
209
  self._build_workflow()
210
 
211
  def _build_workflow(self):
 
212
  self.workflow.add_node("ingest", self.ingest_query)
213
  self.workflow.add_node("retrieve", self.retrieve_documents)
214
  self.workflow.add_node("analyze", self.analyze_content)
215
  self.workflow.add_node("validate", self.validate_output)
216
  self.workflow.add_node("refine", self.refine_results)
217
 
 
218
  self.workflow.set_entry_point("ingest")
219
  self.workflow.add_edge("ingest", "retrieve")
220
  self.workflow.add_edge("retrieve", "analyze")
 
229
  self.app = self.workflow.compile()
230
 
231
  def ingest_query(self, state: AgentState) -> Dict:
 
232
  try:
233
  query = state["messages"][-1].content
234
  return {
 
240
  return self._error_state(f"Ingestion Error: {str(e)}")
241
 
242
  def retrieve_documents(self, state: AgentState) -> Dict:
 
243
  try:
244
  query = state["context"]["raw_query"]
245
  docs = retriever.retrieve(query, "research")
 
254
  return self._error_state(f"Retrieval Error: {str(e)}")
255
 
256
  def analyze_content(self, state: AgentState) -> Dict:
 
257
  try:
258
+ docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
 
 
 
 
 
 
259
  prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
260
  response = self.processor.process_query(prompt)
261
 
 
270
  return self._error_state(f"Analysis Error: {str(e)}")
271
 
272
  def validate_output(self, state: AgentState) -> Dict:
 
273
  analysis = state["messages"][-1].content
274
  validation_prompt = f"""Validate research analysis:
275
+ {analysis}
276
+
277
+ Check for:
278
+ 1. Technical accuracy
279
+ 2. Citation support
280
+ 3. Logical consistency
281
+ 4. Methodological soundness
282
+
283
+ Respond with 'VALID' or 'INVALID'"""
284
 
285
  response = self.processor.process_query(validation_prompt)
286
  return {
 
288
  }
289
 
290
  def refine_results(self, state: AgentState) -> Dict:
 
291
  refinement_prompt = f"""Refine this analysis:
292
+ {state["messages"][-1].content}
293
+
294
+ Improve:
295
+ 1. Technical precision
296
+ 2. Empirical grounding
297
+ 3. Theoretical coherence"""
298
 
299
  response = self.processor.process_query(refinement_prompt)
300
  return {
 
303
  }
304
 
305
  def _quality_check(self, state: AgentState) -> str:
 
306
  content = state["messages"][-1].content
307
  return "valid" if "VALID" in content else "invalid"
308
 
309
  def _error_state(self, message: str) -> Dict:
 
 
310
  return {
311
  "messages": [AIMessage(content=f"❌ {message}")],
312
  "context": {"error": True},
313
  "metadata": {"status": "error"}
314
  }
315
 
316
+ # ------------------------------
317
  # Research Interface
318
+ # ------------------------------
319
  class ResearchInterface:
320
  def __init__(self):
321
  self.workflow = ResearchWorkflow()
322
+ self._initialize_interface()
323
+
324
+ def _initialize_interface(self):
325
+ st.set_page_config(
326
+ page_title="NeuroResearch AI",
327
+ layout="wide",
328
+ initial_sidebar_state="expanded"
329
+ )
330
  self._inject_styles()
331
  self._build_sidebar()
332
  self._build_main_interface()
333
 
334
  def _inject_styles(self):
 
335
  st.markdown("""
336
  <style>
337
  :root {
 
378
  """, unsafe_allow_html=True)
379
 
380
  def _build_sidebar(self):
 
381
  with st.sidebar:
382
  st.title("πŸ” Research Database")
383
  st.subheader("Technical Papers")
 
390
  st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
391
 
392
  def _build_main_interface(self):
 
393
  st.title("🧠 NeuroResearch AI")
394
  query = st.text_area("Research Query:", height=200,
395
+ placeholder="Enter technical research question...")
396
 
397
  if st.button("Execute Analysis", type="primary"):
398
  self._execute_analysis(query)
399
 
400
  def _execute_analysis(self, query: str):
 
401
  try:
402
  with st.spinner("Initializing Quantum Analysis..."):
403
  results = self.workflow.app.stream(
404
  {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
405
  )
406
+
407
  for event in results:
408
  self._render_event(event)
409
+
410
  st.success("βœ… Analysis Completed Successfully")
411
  except Exception as e:
412
  st.error(f"""**Analysis Failed**
413
+ {str(e)}
414
+ Potential issues:
415
+ - Complex query structure
416
+ - Document correlation failure
417
+ - Temporal processing constraints""")
418
 
419
  def _render_event(self, event: Dict):
 
420
  if 'ingest' in event:
421
  with st.container():
422
  st.success("βœ… Query Ingested")
423
+
424
  elif 'retrieve' in event:
425
  with st.container():
426
  docs = event['retrieve']['context']['documents']
 
429
  for i, doc in enumerate(docs, 1):
430
  st.markdown(f"**Document {i}**")
431
  st.code(doc.page_content, language='text')
432
+
433
  elif 'analyze' in event:
434
  with st.container():
435
  content = event['analyze']['messages'][0].content
436
  with st.expander("Technical Analysis Report", expanded=True):
437
  st.markdown(content)
438
+
439
  elif 'validate' in event:
440
  with st.container():
441
  content = event['validate']['messages'][0].content
 
448
  with st.expander("View Validation Details", expanded=True):
449
  st.markdown(content)
450
 
 
 
 
451
  if __name__ == "__main__":
452
+ ResearchInterface()