mgbam commited on
Commit
dfecac2
Β·
verified Β·
1 Parent(s): a3fcfdd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +233 -218
app.py CHANGED
@@ -3,33 +3,30 @@
3
  # ------------------------------
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
- from langchain_core.messages import HumanMessage, AIMessage
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain_core.documents import Document
9
  from langgraph.graph import END, StateGraph
10
- from langgraph.graph.message import add_messages # <-- FIXED IMPORT
 
11
  from typing_extensions import TypedDict, Annotated
12
  from typing import Sequence, Dict, List, Optional, Any
13
  import chromadb
14
- import numpy as np
15
  import os
16
  import streamlit as st
17
  import requests
18
  import hashlib
19
- import re
20
  import time
 
21
  from concurrent.futures import ThreadPoolExecutor, as_completed
22
  from datetime import datetime
23
- from sklearn.metrics.pairwise import cosine_similarity
24
 
25
  # ------------------------------
26
- # State Schema Definition (Fixed)
27
  # ------------------------------
28
  class AgentState(TypedDict):
29
- messages: Annotated[
30
- Sequence[AIMessage | HumanMessage],
31
- add_messages # <-- NOW PROPERLY DEFINED
32
- ]
33
  context: Dict[str, Any]
34
  metadata: Dict[str, Any]
35
 
@@ -43,62 +40,37 @@ class ResearchConfig:
43
  CHUNK_OVERLAP = 64
44
  MAX_CONCURRENT_REQUESTS = 5
45
  EMBEDDING_DIMENSIONS = 1536
46
-
47
  DOCUMENT_MAP = {
48
- "CV-Transformer Model": {
49
- "title": "Hybrid CV-Transformer Architecture",
50
- "content": """
51
- Combines CNN feature extraction with transformer attention mechanisms.
52
- Key equation: $f(x) = \text{Softmax}(\frac{QK^T}{\sqrt{d_k}})V$
53
- Achieves 98.2% accuracy on ImageNet-1k with 42ms inference speed
54
- """
55
- },
56
- "Quantum ML": {
57
- "title": "Quantum Machine Learning",
58
- "content": """
59
- Quantum-enhanced optimization techniques for ML models.
60
- $\theta_{t+1} = \theta_t - \eta \nabla_\theta \mathcal{L}(\theta_t)$
61
- 100x speedup on optimization tasks with 58% energy reduction
62
- """
63
- }
64
  }
65
-
66
- ANALYSIS_TEMPLATE = """Analyze these technical documents:
67
  {context}
68
 
69
- Respond in MARKDOWN with:
70
- 1. **Key Innovations** (mathematical formulations)
71
- 2. **Methodologies** (algorithms & architectures)
72
- 3. **Empirical Results** (comparative metrics)
73
- 4. **Applications** (industry use cases)
74
- 5. **Limitations** (theoretical boundaries)
75
 
76
- Include LaTeX equations where applicable."""
77
-
78
- # Check for Chroma migration
79
- if os.path.exists(ResearchConfig.CHROMA_PATH):
80
- st.warning("""
81
- **ChromDB Migration Required**
82
- Existing Chroma database detected. Run these commands:
83
-
84
- ```bash
85
- pip install chroma-migrate
86
- chroma-migrate
87
- ```
88
-
89
- Then restart the application.
90
- """)
91
- st.stop()
92
 
 
93
  if not ResearchConfig.DEEPSEEK_API_KEY:
94
- st.error("""**Configuration Required**
95
- 1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
96
- 2. Set secret: `DEEPSEEK_API_KEY`
97
  3. Rebuild deployment""")
98
  st.stop()
99
 
100
  # ------------------------------
101
- # ChromaDB Document Manager
102
  # ------------------------------
103
  class QuantumDocumentManager:
104
  def __init__(self):
@@ -107,82 +79,97 @@ class QuantumDocumentManager:
107
  model="text-embedding-3-large",
108
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
109
  )
110
-
111
- def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
112
  splitter = RecursiveCharacterTextSplitter(
113
  chunk_size=ResearchConfig.CHUNK_SIZE,
114
  chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
115
  separators=["\n\n", "\n", "|||"]
116
  )
117
-
118
- docs = []
119
- for key, data in document_map.items():
120
- chunks = splitter.split_text(data["content"])
121
- for chunk in chunks:
122
- docs.append(Document(
123
- page_content=chunk,
124
- metadata={
125
- "title": data["title"],
126
- "source": collection_name,
127
- "hash": hashlib.sha256(chunk.encode()).hexdigest()[:16]
128
- }
129
- ))
130
-
131
  return Chroma.from_documents(
132
  documents=docs,
133
  embedding=self.embeddings,
134
- collection_name=collection_name,
135
  client=self.client,
 
136
  ids=[self._document_id(doc.page_content) for doc in docs]
137
  )
138
 
139
  def _document_id(self, content: str) -> str:
140
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
141
 
142
- # Initialize document system
143
  qdm = QuantumDocumentManager()
144
- research_docs = qdm.create_collection(ResearchConfig.DOCUMENT_MAP, "research_papers")
145
-
 
 
 
 
 
 
 
 
 
146
 
147
  # ------------------------------
148
- # Retrieval System
149
  # ------------------------------
150
  class ResearchRetriever:
151
  def __init__(self):
152
- self.retriever = research_docs.as_retriever(
153
- search_type="mmr",
154
- search_kwargs={
155
- 'k': 4,
156
- 'fetch_k': 20,
157
- 'lambda_mult': 0.85
158
- }
159
- )
 
 
 
 
 
 
160
 
161
- def retrieve(self, query: str) -> List[Document]:
162
  try:
163
- docs = self.retriever.invoke(query)
164
- if len(docs) < 1:
165
- raise ValueError("No relevant documents found")
166
- return docs
167
- except Exception as e:
168
- st.error(f"Retrieval Error: {str(e)}")
169
  return []
170
 
 
 
171
  # ------------------------------
172
- # Analysis Processor
173
  # ------------------------------
174
  class CognitiveProcessor:
175
  def __init__(self):
176
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
 
177
 
178
  def process_query(self, prompt: str) -> Dict:
179
- futures = [self.executor.submit(self._api_request, prompt) for _ in range(3)]
180
- return self._best_result([f.result() for f in as_completed(futures)])
181
-
182
- def _api_request(self, prompt: str) -> Dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  headers = {
184
  "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
185
- "Content-Type": "application/json"
 
186
  }
187
 
188
  try:
@@ -203,35 +190,30 @@ class CognitiveProcessor:
203
  )
204
  response.raise_for_status()
205
  return response.json()
206
- except Exception as e:
207
  return {"error": str(e)}
208
 
209
- def _best_result(self, results: List[Dict]) -> Dict:
210
  valid = [r for r in results if "error" not in r]
211
  if not valid:
212
  return {"error": "All API requests failed"}
213
-
214
- # Select response with most technical content
215
- contents = [r.get('choices', [{}])[0].get('message', {}).get('content', '') for r in valid]
216
- tech_scores = [len(re.findall(r"\$.*?\$", c)) for c in contents]
217
- return valid[np.argmax(tech_scores)]
218
 
219
  # ------------------------------
220
- # Workflow Engine
221
  # ------------------------------
222
  class ResearchWorkflow:
223
  def __init__(self):
224
- self.retriever = ResearchRetriever()
225
  self.processor = CognitiveProcessor()
226
  self.workflow = StateGraph(AgentState)
227
  self._build_workflow()
228
 
229
  def _build_workflow(self):
230
- self.workflow.add_node("ingest", self.ingest)
231
- self.workflow.add_node("retrieve", self.retrieve)
232
- self.workflow.add_node("analyze", self.analyze)
233
- self.workflow.add_node("validate", self.validate)
234
- self.workflow.add_node("refine", self.refine)
235
 
236
  self.workflow.set_entry_point("ingest")
237
  self.workflow.add_edge("ingest", "retrieve")
@@ -246,93 +228,83 @@ class ResearchWorkflow:
246
 
247
  self.app = self.workflow.compile()
248
 
249
- def ingest(self, state: AgentState) -> Dict:
250
  try:
251
  query = state["messages"][-1].content
252
  return {
253
- "messages": [AIMessage(content="Query ingested")],
254
- "context": {"query": query},
255
  "metadata": {"timestamp": datetime.now().isoformat()}
256
  }
257
  except Exception as e:
258
  return self._error_state(f"Ingestion Error: {str(e)}")
259
 
260
- def retrieve(self, state: AgentState) -> Dict:
261
  try:
262
- docs = self.retriever.retrieve(state["context"]["query"])
 
263
  return {
264
- "messages": [AIMessage(content=f"Found {len(docs)} relevant papers")],
265
- "context": {"docs": docs}
 
 
 
266
  }
267
  except Exception as e:
268
  return self._error_state(f"Retrieval Error: {str(e)}")
269
 
270
- def analyze(self, state: AgentState) -> Dict:
271
  try:
272
- context = "\n\n".join([
273
- f"### {doc.metadata['title']}\n{doc.page_content}"
274
- for doc in state["context"]["docs"]
275
- ])
276
- prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=context)
277
  response = self.processor.process_query(prompt)
278
 
279
  if "error" in response:
280
- raise RuntimeError(response["error"])
281
-
282
- content = response['choices'][0]['message']['content']
283
- self._validate_analysis(content)
284
 
285
- return {"messages": [AIMessage(content=content)]}
 
 
 
286
  except Exception as e:
287
  return self._error_state(f"Analysis Error: {str(e)}")
288
 
289
- def validate(self, state: AgentState) -> Dict:
290
- validation_prompt = f"""Validate this technical analysis:
291
- {state["messages"][-1].content}
292
-
293
- Check for:
294
- 1. Mathematical accuracy
295
- 2. Technical depth
296
- 3. Logical consistency
297
-
298
- Respond with 'VALID' or 'INVALID'"""
 
 
299
 
300
  response = self.processor.process_query(validation_prompt)
301
- valid = "VALID" in response.get('choices', [{}])[0].get('message', {}).get('content', '')
302
  return {
303
- "messages": [AIMessage(content=f"{state['messages'][-1].content}\n\nValidation: {'βœ… Valid' if valid else '❌ Invalid'}")],
304
- "context": {"valid": valid}
305
  }
306
 
307
- def refine(self, state: AgentState) -> Dict:
308
- refinement_prompt = f"""Improve this analysis:
309
- {state["messages"][-1].content}
310
-
311
- Focus on:
312
- 1. Mathematical precision
313
- 2. Technical terminology
314
- 3. Empirical references"""
315
 
316
  response = self.processor.process_query(refinement_prompt)
317
- return {"messages": [AIMessage(content=response['choices'][0]['message']['content'])]}
 
 
 
318
 
319
  def _quality_check(self, state: AgentState) -> str:
320
- return "valid" if state.get("context", {}).get("valid", False) else "invalid"
321
-
322
- def _validate_analysis(self, content: str):
323
- required_sections = [
324
- "Key Innovations",
325
- "Methodologies",
326
- "Empirical Results",
327
- "Applications",
328
- "Limitations"
329
- ]
330
- missing = [s for s in required_sections if f"## {s}" not in content]
331
- if missing:
332
- raise ValueError(f"Missing sections: {', '.join(missing)}")
333
-
334
- if not re.search(r"\$.*?\$", content):
335
- raise ValueError("Analysis lacks mathematical notation")
336
 
337
  def _error_state(self, message: str) -> Dict:
338
  return {
@@ -342,22 +314,22 @@ Focus on:
342
  }
343
 
344
  # ------------------------------
345
- # Streamlit Interface
346
  # ------------------------------
347
  class ResearchInterface:
348
  def __init__(self):
349
  self.workflow = ResearchWorkflow()
350
- self._initialize()
351
 
352
- def _initialize(self):
353
  st.set_page_config(
354
- page_title="AI Research Assistant",
355
  layout="wide",
356
  initial_sidebar_state="expanded"
357
  )
358
  self._inject_styles()
359
  self._build_sidebar()
360
- self._build_main()
361
 
362
  def _inject_styles(self):
363
  st.markdown("""
@@ -366,72 +338,115 @@ class ResearchInterface:
366
  --primary: #2ecc71;
367
  --secondary: #3498db;
368
  --background: #0a0a0a;
 
369
  }
 
370
  .stApp {
371
  background: var(--background);
372
- color: white;
 
373
  }
 
374
  .stTextArea textarea {
375
  background: #1a1a1a !important;
376
- border: 2px solid var(--secondary) !important;
 
 
 
377
  }
378
- code {
379
- color: var(--primary);
380
- background: #002200;
381
- padding: 2px 4px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  }
383
  </style>
384
  """, unsafe_allow_html=True)
385
 
386
  def _build_sidebar(self):
387
  with st.sidebar:
388
- st.title("πŸ”¬ Research Corpus")
389
- for key, data in ResearchConfig.DOCUMENT_MAP.items():
390
- with st.expander(data["title"]):
391
- st.markdown(f"```latex\n{data['content']}\n```")
392
- st.metric("Vector DB Size", len(research_docs.get()['ids']))
393
-
394
- def _build_main(self):
395
- st.title("🧠 AI Research Analyst")
396
- query = st.text_area("Research Query:", height=150,
397
- placeholder="Enter technical question...")
 
 
 
 
398
 
399
- if st.button("Analyze", type="primary"):
400
  self._execute_analysis(query)
401
 
402
  def _execute_analysis(self, query: str):
403
  try:
404
- with st.spinner("Analyzing research corpus..."):
405
- result = self.workflow.app.invoke(
406
- {"messages": [HumanMessage(content=query)]}
407
  )
408
 
409
- if result.get("context", {}).get("error"):
410
- self._show_error(result["context"]["error"])
411
- else:
412
- self._display_result(result)
413
  except Exception as e:
414
- self._show_error(str(e))
415
-
416
- def _display_result(self, result):
417
- with st.expander("Technical Report", expanded=True):
418
- st.markdown(result["messages"][-1].content)
419
-
420
- with st.expander("Source Excerpts", expanded=False):
421
- for doc in result["context"].get("docs", []):
422
- st.markdown(f"**{doc.metadata['title']}**")
423
- st.code(doc.page_content, language='latex')
424
-
425
- def _show_error(self, message):
426
- st.error(f"""
427
- ⚠️ Analysis Failed
428
- {message}
429
-
430
- Mitigation Steps:
431
- 1. Simplify query complexity
432
- 2. Check document connections
433
- 3. Verify technical terms
434
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
  if __name__ == "__main__":
437
  ResearchInterface()
 
3
  # ------------------------------
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
8
  from langgraph.graph import END, StateGraph
9
+ from langgraph.prebuilt import ToolNode
10
+ from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
  from typing import Sequence, Dict, List, Optional, Any
13
  import chromadb
14
+ import re
15
  import os
16
  import streamlit as st
17
  import requests
18
  import hashlib
19
+ import json
20
  import time
21
+ from langchain.tools.retriever import create_retriever_tool
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
23
  from datetime import datetime
 
24
 
25
  # ------------------------------
26
+ # State Schema Definition
27
  # ------------------------------
28
  class AgentState(TypedDict):
29
+ messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
 
 
 
30
  context: Dict[str, Any]
31
  metadata: Dict[str, Any]
32
 
 
40
  CHUNK_OVERLAP = 64
41
  MAX_CONCURRENT_REQUESTS = 5
42
  EMBEDDING_DIMENSIONS = 1536
 
43
  DOCUMENT_MAP = {
44
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
45
+ "CV-Transformer Hybrid Architecture",
46
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
47
+ "Transformer Architecture Analysis",
48
+ "Latest Trends in Machine Learning Methods Using Quantum Computing":
49
+ "Quantum ML Frontiers"
 
 
 
 
 
 
 
 
 
 
50
  }
51
+ ANALYSIS_TEMPLATE = """Analyze these technical documents with scientific rigor:
 
52
  {context}
53
 
54
+ Respond with:
55
+ 1. Key Technical Contributions (bullet points)
56
+ 2. Novel Methodologies
57
+ 3. Empirical Results (with metrics)
58
+ 4. Potential Applications
59
+ 5. Limitations & Future Directions
60
 
61
+ Format: Markdown with LaTeX mathematical notation where applicable
62
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # Validation
65
  if not ResearchConfig.DEEPSEEK_API_KEY:
66
+ st.error("""**Research Portal Configuration Required**
67
+ 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
68
+ 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
69
  3. Rebuild deployment""")
70
  st.stop()
71
 
72
  # ------------------------------
73
+ # Quantum Document Processing
74
  # ------------------------------
75
  class QuantumDocumentManager:
76
  def __init__(self):
 
79
  model="text-embedding-3-large",
80
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
81
  )
82
+
83
+ def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
84
  splitter = RecursiveCharacterTextSplitter(
85
  chunk_size=ResearchConfig.CHUNK_SIZE,
86
  chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
87
  separators=["\n\n", "\n", "|||"]
88
  )
89
+ docs = splitter.create_documents(documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  return Chroma.from_documents(
91
  documents=docs,
92
  embedding=self.embeddings,
 
93
  client=self.client,
94
+ collection_name=collection_name,
95
  ids=[self._document_id(doc.page_content) for doc in docs]
96
  )
97
 
98
  def _document_id(self, content: str) -> str:
99
  return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
100
 
101
+ # Initialize document collections
102
  qdm = QuantumDocumentManager()
103
+ research_docs = qdm.create_collection([
104
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
105
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
106
+ "Latest Trends in Machine Learning Methods Using Quantum Computing"
107
+ ], "research")
108
+
109
+ development_docs = qdm.create_collection([
110
+ "Project A: UI Design Completed, API Integration in Progress",
111
+ "Project B: Testing New Feature X, Bug Fixes Needed",
112
+ "Product Y: In the Performance Optimization Stage Before Release"
113
+ ], "development")
114
 
115
  # ------------------------------
116
+ # Advanced Retrieval System
117
  # ------------------------------
118
  class ResearchRetriever:
119
  def __init__(self):
120
+ self.retrievers = {
121
+ "research": research_docs.as_retriever(
122
+ search_type="mmr",
123
+ search_kwargs={
124
+ 'k': 4,
125
+ 'fetch_k': 20,
126
+ 'lambda_mult': 0.85
127
+ }
128
+ ),
129
+ "development": development_docs.as_retriever(
130
+ search_type="similarity",
131
+ search_kwargs={'k': 3}
132
+ )
133
+ }
134
 
135
+ def retrieve(self, query: str, domain: str) -> List[Any]:
136
  try:
137
+ return self.retrievers[domain].invoke(query)
138
+ except KeyError:
 
 
 
 
139
  return []
140
 
141
+ retriever = ResearchRetriever()
142
+
143
  # ------------------------------
144
+ # Cognitive Processing Unit
145
  # ------------------------------
146
  class CognitiveProcessor:
147
  def __init__(self):
148
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
149
+ self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
150
 
151
  def process_query(self, prompt: str) -> Dict:
152
+ futures = []
153
+ for _ in range(3): # Triple redundancy
154
+ futures.append(self.executor.submit(
155
+ self._execute_api_request,
156
+ prompt
157
+ ))
158
+
159
+ results = []
160
+ for future in as_completed(futures):
161
+ try:
162
+ results.append(future.result())
163
+ except Exception as e:
164
+ st.error(f"Processing Error: {str(e)}")
165
+
166
+ return self._consensus_check(results)
167
+
168
+ def _execute_api_request(self, prompt: str) -> Dict:
169
  headers = {
170
  "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
171
+ "Content-Type": "application/json",
172
+ "X-Research-Session": self.session_id
173
  }
174
 
175
  try:
 
190
  )
191
  response.raise_for_status()
192
  return response.json()
193
+ except requests.exceptions.RequestException as e:
194
  return {"error": str(e)}
195
 
196
+ def _consensus_check(self, results: List[Dict]) -> Dict:
197
  valid = [r for r in results if "error" not in r]
198
  if not valid:
199
  return {"error": "All API requests failed"}
200
+ return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
 
 
 
 
201
 
202
  # ------------------------------
203
+ # Research Workflow Engine
204
  # ------------------------------
205
  class ResearchWorkflow:
206
  def __init__(self):
 
207
  self.processor = CognitiveProcessor()
208
  self.workflow = StateGraph(AgentState)
209
  self._build_workflow()
210
 
211
  def _build_workflow(self):
212
+ self.workflow.add_node("ingest", self.ingest_query)
213
+ self.workflow.add_node("retrieve", self.retrieve_documents)
214
+ self.workflow.add_node("analyze", self.analyze_content)
215
+ self.workflow.add_node("validate", self.validate_output)
216
+ self.workflow.add_node("refine", self.refine_results)
217
 
218
  self.workflow.set_entry_point("ingest")
219
  self.workflow.add_edge("ingest", "retrieve")
 
228
 
229
  self.app = self.workflow.compile()
230
 
231
+ def ingest_query(self, state: AgentState) -> Dict:
232
  try:
233
  query = state["messages"][-1].content
234
  return {
235
+ "messages": [AIMessage(content="Query ingested successfully")],
236
+ "context": {"raw_query": query},
237
  "metadata": {"timestamp": datetime.now().isoformat()}
238
  }
239
  except Exception as e:
240
  return self._error_state(f"Ingestion Error: {str(e)}")
241
 
242
+ def retrieve_documents(self, state: AgentState) -> Dict:
243
  try:
244
+ query = state["context"]["raw_query"]
245
+ docs = retriever.retrieve(query, "research")
246
  return {
247
+ "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
248
+ "context": {
249
+ "documents": docs,
250
+ "retrieval_time": time.time()
251
+ }
252
  }
253
  except Exception as e:
254
  return self._error_state(f"Retrieval Error: {str(e)}")
255
 
256
+ def analyze_content(self, state: AgentState) -> Dict:
257
  try:
258
+ docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
259
+ prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
 
 
 
260
  response = self.processor.process_query(prompt)
261
 
262
  if "error" in response:
263
+ return self._error_state(response["error"])
 
 
 
264
 
265
+ return {
266
+ "messages": [AIMessage(content=response['choices'][0]['message']['content'])],
267
+ "context": {"analysis": response}
268
+ }
269
  except Exception as e:
270
  return self._error_state(f"Analysis Error: {str(e)}")
271
 
272
+ def validate_output(self, state: AgentState) -> Dict:
273
+ analysis = state["messages"][-1].content
274
+ validation_prompt = f"""Validate research analysis:
275
+ {analysis}
276
+
277
+ Check for:
278
+ 1. Technical accuracy
279
+ 2. Citation support
280
+ 3. Logical consistency
281
+ 4. Methodological soundness
282
+
283
+ Respond with 'VALID' or 'INVALID'"""
284
 
285
  response = self.processor.process_query(validation_prompt)
 
286
  return {
287
+ "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
 
288
  }
289
 
290
+ def refine_results(self, state: AgentState) -> Dict:
291
+ refinement_prompt = f"""Refine this analysis:
292
+ {state["messages"][-1].content}
293
+
294
+ Improve:
295
+ 1. Technical precision
296
+ 2. Empirical grounding
297
+ 3. Theoretical coherence"""
298
 
299
  response = self.processor.process_query(refinement_prompt)
300
+ return {
301
+ "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
302
+ "context": state["context"]
303
+ }
304
 
305
  def _quality_check(self, state: AgentState) -> str:
306
+ content = state["messages"][-1].content
307
+ return "valid" if "VALID" in content else "invalid"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
  def _error_state(self, message: str) -> Dict:
310
  return {
 
314
  }
315
 
316
  # ------------------------------
317
+ # Research Interface
318
  # ------------------------------
319
  class ResearchInterface:
320
  def __init__(self):
321
  self.workflow = ResearchWorkflow()
322
+ self._initialize_interface()
323
 
324
+ def _initialize_interface(self):
325
  st.set_page_config(
326
+ page_title="NeuroResearch AI",
327
  layout="wide",
328
  initial_sidebar_state="expanded"
329
  )
330
  self._inject_styles()
331
  self._build_sidebar()
332
+ self._build_main_interface()
333
 
334
  def _inject_styles(self):
335
  st.markdown("""
 
338
  --primary: #2ecc71;
339
  --secondary: #3498db;
340
  --background: #0a0a0a;
341
+ --text: #ecf0f1;
342
  }
343
+
344
  .stApp {
345
  background: var(--background);
346
+ color: var(--text);
347
+ font-family: 'Roboto', sans-serif;
348
  }
349
+
350
  .stTextArea textarea {
351
  background: #1a1a1a !important;
352
+ color: var(--text) !important;
353
+ border: 2px solid var(--secondary);
354
+ border-radius: 8px;
355
+ padding: 1rem;
356
  }
357
+
358
+ .stButton>button {
359
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
360
+ border: none;
361
+ border-radius: 8px;
362
+ padding: 1rem 2rem;
363
+ transition: all 0.3s;
364
+ }
365
+
366
+ .stButton>button:hover {
367
+ transform: translateY(-2px);
368
+ box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
369
+ }
370
+
371
+ .stExpander {
372
+ background: #1a1a1a;
373
+ border: 1px solid #2a2a2a;
374
+ border-radius: 8px;
375
+ margin: 1rem 0;
376
  }
377
  </style>
378
  """, unsafe_allow_html=True)
379
 
380
  def _build_sidebar(self):
381
  with st.sidebar:
382
+ st.title("πŸ” Research Database")
383
+ st.subheader("Technical Papers")
384
+ for title, short in ResearchConfig.DOCUMENT_MAP.items():
385
+ with st.expander(short):
386
+ st.markdown(f"```\n{title}\n```")
387
+
388
+ st.subheader("Analysis Metrics")
389
+ st.metric("Vector Collections", 2)
390
+ st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
391
+
392
+ def _build_main_interface(self):
393
+ st.title("🧠 NeuroResearch AI")
394
+ query = st.text_area("Research Query:", height=200,
395
+ placeholder="Enter technical research question...")
396
 
397
+ if st.button("Execute Analysis", type="primary"):
398
  self._execute_analysis(query)
399
 
400
  def _execute_analysis(self, query: str):
401
  try:
402
+ with st.spinner("Initializing Quantum Analysis..."):
403
+ results = self.workflow.app.stream(
404
+ {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
405
  )
406
 
407
+ for event in results:
408
+ self._render_event(event)
409
+
410
+ st.success("βœ… Analysis Completed Successfully")
411
  except Exception as e:
412
+ st.error(f"""**Analysis Failed**
413
+ {str(e)}
414
+ Potential issues:
415
+ - Complex query structure
416
+ - Document correlation failure
417
+ - Temporal processing constraints""")
418
+
419
+ def _render_event(self, event: Dict):
420
+ if 'ingest' in event:
421
+ with st.container():
422
+ st.success("βœ… Query Ingested")
423
+
424
+ elif 'retrieve' in event:
425
+ with st.container():
426
+ docs = event['retrieve']['context']['documents']
427
+ st.info(f"πŸ“š Retrieved {len(docs)} documents")
428
+ with st.expander("View Retrieved Documents", expanded=False):
429
+ for i, doc in enumerate(docs, 1):
430
+ st.markdown(f"**Document {i}**")
431
+ st.code(doc.page_content, language='text')
432
+
433
+ elif 'analyze' in event:
434
+ with st.container():
435
+ content = event['analyze']['messages'][0].content
436
+ with st.expander("Technical Analysis Report", expanded=True):
437
+ st.markdown(content)
438
+
439
+ elif 'validate' in event:
440
+ with st.container():
441
+ content = event['validate']['messages'][0].content
442
+ if "VALID" in content:
443
+ st.success("βœ… Validation Passed")
444
+ with st.expander("View Validated Analysis", expanded=True):
445
+ st.markdown(content.split("Validation:")[0])
446
+ else:
447
+ st.warning("⚠️ Validation Issues Detected")
448
+ with st.expander("View Validation Details", expanded=True):
449
+ st.markdown(content)
450
 
451
  if __name__ == "__main__":
452
  ResearchInterface()