mgbam commited on
Commit
53e86a9
Β·
verified Β·
1 Parent(s): 2c0f60e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +296 -294
app.py CHANGED
@@ -3,35 +3,25 @@
3
  # ------------------------------
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
- from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langgraph.graph import END, StateGraph
9
- from langgraph.prebuilt import ToolNode
10
- from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
  from typing import Sequence, Dict, List, Optional, Any
13
  import chromadb
14
- import re
15
  import os
16
  import streamlit as st
17
  import requests
18
  import hashlib
19
  import json
20
  import time
21
- from langchain.tools.retriever import create_retriever_tool
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
23
  from datetime import datetime
 
 
24
 
25
  # ------------------------------
26
- # State Schema Definition
27
- # ------------------------------
28
- class AgentState(TypedDict):
29
- messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
30
- context: Dict[str, Any]
31
- metadata: Dict[str, Any]
32
-
33
- # ------------------------------
34
- # Configuration
35
  # ------------------------------
36
  class ResearchConfig:
37
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
@@ -40,136 +30,171 @@ class ResearchConfig:
40
  CHUNK_OVERLAP = 64
41
  MAX_CONCURRENT_REQUESTS = 5
42
  EMBEDDING_DIMENSIONS = 1536
43
- DOCUMENT_MAP = {
44
- "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
45
- "CV-Transformer Hybrid Architecture",
46
- "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
47
- "Transformer Architecture Analysis",
48
- "Latest Trends in Machine Learning Methods Using Quantum Computing":
49
- "Quantum ML Frontiers"
50
- }
51
- ANALYSIS_TEMPLATE = """Analyze these technical documents with scientific rigor:
52
  {context}
53
 
54
  Respond with:
55
- 1. Key Technical Contributions (bullet points)
56
- 2. Novel Methodologies
57
- 3. Empirical Results (with metrics)
58
- 4. Potential Applications
59
- 5. Limitations & Future Directions
60
-
61
- Format: Markdown with LaTeX mathematical notation where applicable
62
- """
63
-
64
- # Validation
65
- if not ResearchConfig.DEEPSEEK_API_KEY:
66
- st.error("""**Research Portal Configuration Required**
67
- 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
68
- 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
69
- 3. Rebuild deployment""")
70
- st.stop()
71
 
72
  # ------------------------------
73
- # Quantum Document Processing
74
  # ------------------------------
75
- class QuantumDocumentManager:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def __init__(self):
77
  self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
78
  self.embeddings = OpenAIEmbeddings(
79
  model="text-embedding-3-large",
80
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
81
  )
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  splitter = RecursiveCharacterTextSplitter(
85
  chunk_size=ResearchConfig.CHUNK_SIZE,
86
  chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
87
- separators=["\n\n", "\n", "|||"]
88
  )
89
- docs = splitter.create_documents(documents)
90
- return Chroma.from_documents(
91
- documents=docs,
92
- embedding=self.embeddings,
93
- client=self.client,
94
- collection_name=collection_name,
95
- ids=[self._document_id(doc.page_content) for doc in docs]
96
- )
97
-
98
- def _document_id(self, content: str) -> str:
99
- return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
100
-
101
- # Initialize document collections
102
- qdm = QuantumDocumentManager()
103
- research_docs = qdm.create_collection([
104
- "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
105
- "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
106
- "Latest Trends in Machine Learning Methods Using Quantum Computing"
107
- ], "research")
108
-
109
- development_docs = qdm.create_collection([
110
- "Project A: UI Design Completed, API Integration in Progress",
111
- "Project B: Testing New Feature X, Bug Fixes Needed",
112
- "Product Y: In the Performance Optimization Stage Before Release"
113
- ], "development")
114
 
115
  # ------------------------------
116
- # Advanced Retrieval System
117
  # ------------------------------
118
  class ResearchRetriever:
119
  def __init__(self):
120
- self.retrievers = {
121
- "research": research_docs.as_retriever(
122
- search_type="mmr",
123
- search_kwargs={
124
- 'k': 4,
125
- 'fetch_k': 20,
126
- 'lambda_mult': 0.85
127
- }
128
- ),
129
- "development": development_docs.as_retriever(
130
- search_type="similarity",
131
- search_kwargs={'k': 3}
132
- )
133
- }
134
 
135
- def retrieve(self, query: str, domain: str) -> List[Any]:
136
  try:
137
- return self.retrievers[domain].invoke(query)
138
- except KeyError:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  return []
140
 
141
- retriever = ResearchRetriever()
142
-
143
  # ------------------------------
144
- # Cognitive Processing Unit
145
  # ------------------------------
146
- class CognitiveProcessor:
147
  def __init__(self):
148
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
149
- self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
150
-
151
- def process_query(self, prompt: str) -> Dict:
152
- futures = []
153
- for _ in range(3): # Triple redundancy
154
- futures.append(self.executor.submit(
155
- self._execute_api_request,
156
- prompt
157
- ))
158
-
159
- results = []
160
- for future in as_completed(futures):
161
- try:
162
- results.append(future.result())
163
- except Exception as e:
164
- st.error(f"Processing Error: {str(e)}")
165
-
166
- return self._consensus_check(results)
167
-
168
- def _execute_api_request(self, prompt: str) -> Dict:
169
  headers = {
170
  "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
171
- "Content-Type": "application/json",
172
- "X-Research-Session": self.session_id
173
  }
174
 
175
  try:
@@ -178,200 +203,173 @@ class CognitiveProcessor:
178
  headers=headers,
179
  json={
180
  "model": "deepseek-chat",
181
- "messages": [{
182
- "role": "user",
183
- "content": f"Respond as Senior AI Researcher:\n{prompt}"
184
- }],
185
  "temperature": 0.7,
186
- "max_tokens": 1500,
187
- "top_p": 0.9
188
  },
189
- timeout=45
190
  )
191
  response.raise_for_status()
192
  return response.json()
193
- except requests.exceptions.RequestException as e:
194
- return {"error": str(e)}
195
-
196
- def _consensus_check(self, results: List[Dict]) -> Dict:
197
  valid = [r for r in results if "error" not in r]
198
  if not valid:
199
- return {"error": "All API requests failed"}
200
- return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
 
 
201
 
202
  # ------------------------------
203
- # Research Workflow Engine
204
  # ------------------------------
205
  class ResearchWorkflow:
206
  def __init__(self):
207
- self.processor = CognitiveProcessor()
208
- self.workflow = StateGraph(AgentState)
209
- self._build_workflow()
 
210
 
211
- def _build_workflow(self):
212
- self.workflow.add_node("ingest", self.ingest_query)
213
- self.workflow.add_node("retrieve", self.retrieve_documents)
214
- self.workflow.add_node("analyze", self.analyze_content)
215
- self.workflow.add_node("validate", self.validate_output)
216
- self.workflow.add_node("refine", self.refine_results)
217
 
218
  self.workflow.set_entry_point("ingest")
219
  self.workflow.add_edge("ingest", "retrieve")
220
  self.workflow.add_edge("retrieve", "analyze")
221
  self.workflow.add_conditional_edges(
222
  "analyze",
223
- self._quality_check,
224
  {"valid": "validate", "invalid": "refine"}
225
  )
226
  self.workflow.add_edge("validate", END)
227
  self.workflow.add_edge("refine", "retrieve")
228
 
229
- self.app = self.workflow.compile()
230
-
231
- def ingest_query(self, state: AgentState) -> Dict:
232
  try:
233
- query = state["messages"][-1].content
 
234
  return {
235
- "messages": [AIMessage(content="Query ingested successfully")],
236
- "context": {"raw_query": query},
237
- "metadata": {"timestamp": datetime.now().isoformat()}
 
 
 
 
 
 
 
238
  }
239
  except Exception as e:
240
- return self._error_state(f"Ingestion Error: {str(e)}")
241
 
242
- def retrieve_documents(self, state: AgentState) -> Dict:
243
  try:
244
- query = state["context"]["raw_query"]
245
- docs = retriever.retrieve(query, "research")
246
  return {
247
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
248
  "context": {
 
249
  "documents": docs,
250
  "retrieval_time": time.time()
251
- }
 
252
  }
253
  except Exception as e:
254
- return self._error_state(f"Retrieval Error: {str(e)}")
255
 
256
- def analyze_content(self, state: AgentState) -> Dict:
 
 
 
 
257
  try:
258
- docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
259
- prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
260
- response = self.processor.process_query(prompt)
 
 
 
 
 
261
 
262
- if "error" in response:
263
- return self._error_state(response["error"])
264
 
265
  return {
266
- "messages": [AIMessage(content=response['choices'][0]['message']['content'])],
267
- "context": {"analysis": response}
 
268
  }
269
  except Exception as e:
270
- return self._error_state(f"Analysis Error: {str(e)}")
271
 
272
- def validate_output(self, state: AgentState) -> Dict:
273
- analysis = state["messages"][-1].content
274
- validation_prompt = f"""Validate research analysis:
275
- {analysis}
276
-
277
- Check for:
278
- 1. Technical accuracy
279
- 2. Citation support
280
- 3. Logical consistency
281
- 4. Methodological soundness
282
-
283
- Respond with 'VALID' or 'INVALID'"""
284
-
285
- response = self.processor.process_query(validation_prompt)
286
- return {
287
- "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
288
- }
289
 
290
- def refine_results(self, state: AgentState) -> Dict:
291
- refinement_prompt = f"""Refine this analysis:
292
- {state["messages"][-1].content}
293
-
294
- Improve:
295
- 1. Technical precision
296
- 2. Empirical grounding
297
- 3. Theoretical coherence"""
298
-
299
- response = self.processor.process_query(refinement_prompt)
300
- return {
301
- "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
302
- "context": state["context"]
303
- }
304
 
305
- def _quality_check(self, state: AgentState) -> str:
306
- content = state["messages"][-1].content
307
- return "valid" if "VALID" in content else "invalid"
 
308
 
309
- def _error_state(self, message: str) -> Dict:
310
  return {
311
- "messages": [AIMessage(content=f"❌ {message}")],
312
- "context": {"error": True},
313
- "metadata": {"status": "error"}
 
 
 
314
  }
315
 
316
  # ------------------------------
317
- # Research Interface
318
  # ------------------------------
319
  class ResearchInterface:
320
  def __init__(self):
321
- self.workflow = ResearchWorkflow()
322
- self._initialize_interface()
323
 
324
- def _initialize_interface(self):
325
  st.set_page_config(
326
- page_title="NeuroResearch AI",
327
  layout="wide",
328
  initial_sidebar_state="expanded"
329
  )
330
- self._inject_styles()
331
  self._build_sidebar()
332
- self._build_main_interface()
333
 
334
- def _inject_styles(self):
335
  st.markdown("""
336
  <style>
337
- :root {
338
- --primary: #2ecc71;
339
- --secondary: #3498db;
340
- --background: #0a0a0a;
341
- --text: #ecf0f1;
342
- }
343
-
344
  .stApp {
345
- background: var(--background);
346
- color: var(--text);
347
- font-family: 'Roboto', sans-serif;
348
  }
349
-
350
  .stTextArea textarea {
351
- background: #1a1a1a !important;
352
- color: var(--text) !important;
353
- border: 2px solid var(--secondary);
354
- border-radius: 8px;
355
- padding: 1rem;
356
  }
357
-
358
  .stButton>button {
359
- background: linear-gradient(135deg, var(--primary), var(--secondary));
360
- border: none;
361
- border-radius: 8px;
362
- padding: 1rem 2rem;
363
- transition: all 0.3s;
364
- }
365
-
366
- .stButton>button:hover {
367
- transform: translateY(-2px);
368
- box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
369
  }
370
-
371
- .stExpander {
372
- background: #1a1a1a;
373
- border: 1px solid #2a2a2a;
374
- border-radius: 8px;
375
  margin: 1rem 0;
376
  }
377
  </style>
@@ -379,74 +377,78 @@ class ResearchInterface:
379
 
380
  def _build_sidebar(self):
381
  with st.sidebar:
382
- st.title("πŸ” Research Database")
383
- st.subheader("Technical Papers")
384
- for title, short in ResearchConfig.DOCUMENT_MAP.items():
385
- with st.expander(short):
386
- st.markdown(f"```\n{title}\n```")
387
-
388
- st.subheader("Analysis Metrics")
389
- st.metric("Vector Collections", 2)
390
- st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
391
-
392
- def _build_main_interface(self):
393
- st.title("🧠 NeuroResearch AI")
394
- query = st.text_area("Research Query:", height=200,
395
- placeholder="Enter technical research question...")
396
 
397
- if st.button("Execute Analysis", type="primary"):
398
- self._execute_analysis(query)
399
 
400
- def _execute_analysis(self, query: str):
401
  try:
402
- with st.spinner("Initializing Quantum Analysis..."):
403
- results = self.workflow.app.stream(
404
- {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
405
- )
 
 
 
 
 
 
406
 
407
- for event in results:
408
- self._render_event(event)
 
 
 
409
 
410
- st.success("βœ… Analysis Completed Successfully")
411
  except Exception as e:
412
  st.error(f"""**Analysis Failed**
413
  {str(e)}
414
- Potential issues:
415
- - Complex query structure
416
- - Document correlation failure
417
- - Temporal processing constraints""")
418
-
419
- def _render_event(self, event: Dict):
420
- if 'ingest' in event:
421
- with st.container():
422
- st.success("βœ… Query Ingested")
 
 
 
 
 
 
423
 
424
- elif 'retrieve' in event:
425
- with st.container():
426
- docs = event['retrieve']['context']['documents']
427
- st.info(f"πŸ“š Retrieved {len(docs)} documents")
428
- with st.expander("View Retrieved Documents", expanded=False):
429
- for i, doc in enumerate(docs, 1):
430
- st.markdown(f"**Document {i}**")
431
- st.code(doc.page_content, language='text')
432
-
433
- elif 'analyze' in event:
434
- with st.container():
435
- content = event['analyze']['messages'][0].content
436
- with st.expander("Technical Analysis Report", expanded=True):
437
- st.markdown(content)
438
-
439
- elif 'validate' in event:
440
- with st.container():
441
- content = event['validate']['messages'][0].content
442
- if "VALID" in content:
443
- st.success("βœ… Validation Passed")
444
- with st.expander("View Validated Analysis", expanded=True):
445
- st.markdown(content.split("Validation:")[0])
446
  else:
447
- st.warning("⚠️ Validation Issues Detected")
448
- with st.expander("View Validation Details", expanded=True):
449
- st.markdown(content)
 
 
 
 
 
 
 
 
 
450
 
451
  if __name__ == "__main__":
452
  ResearchInterface()
 
3
  # ------------------------------
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
+ from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langgraph.graph import END, StateGraph
 
 
9
  from typing_extensions import TypedDict, Annotated
10
  from typing import Sequence, Dict, List, Optional, Any
11
  import chromadb
 
12
  import os
13
  import streamlit as st
14
  import requests
15
  import hashlib
16
  import json
17
  import time
 
18
  from concurrent.futures import ThreadPoolExecutor, as_completed
19
  from datetime import datetime
20
+ from pydantic import BaseModel, ValidationError
21
+ import traceback
22
 
23
  # ------------------------------
24
+ # Configuration & Constants
 
 
 
 
 
 
 
 
25
  # ------------------------------
26
  class ResearchConfig:
27
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
 
30
  CHUNK_OVERLAP = 64
31
  MAX_CONCURRENT_REQUESTS = 5
32
  EMBEDDING_DIMENSIONS = 1536
33
+ ANALYSIS_TEMPLATE = """**Technical Analysis Request**
 
 
 
 
 
 
 
 
34
  {context}
35
 
36
  Respond with:
37
+ 1. Key Technical Innovations (markdown table)
38
+ 2. Methodological Breakdown (bullet points)
39
+ 3. Quantitative Results (LaTeX equations)
40
+ 4. Critical Evaluation
41
+ 5. Research Impact Assessment
42
+
43
+ Include proper academic citations where applicable."""
 
 
 
 
 
 
 
 
 
44
 
45
  # ------------------------------
46
+ # Document Schema & Content
47
  # ------------------------------
48
+ DOCUMENT_CONTENT = {
49
+ "CV-Transformer Hybrid": {
50
+ "content": """## Hybrid Architecture for Computer Vision
51
+ **Authors**: DeepVision Research Team
52
+ **Abstract**: Novel combination of convolutional layers with transformer attention mechanisms.
53
+
54
+ ### Key Innovations:
55
+ - Cross-attention feature fusion
56
+ - Adaptive spatial pooling
57
+ - Multi-scale gradient propagation
58
+
59
+ $$\\mathcal{L}_{total} = \\alpha\\mathcal{L}_{CE} + \\beta\\mathcal{L}_{SSIM}$$""",
60
+ "metadata": {
61
+ "year": 2024,
62
+ "domain": "computer_vision",
63
+ "citations": 142
64
+ }
65
+ },
66
+ "Quantum ML Advances": {
67
+ "content": """## Quantum Machine Learning Breakthroughs
68
+ **Authors**: Quantum AI Lab
69
+
70
+ ### Achievements:
71
+ - Quantum-enhanced SGD (40% faster convergence)
72
+ - 5-qubit QNN achieving 98% accuracy
73
+ - Hybrid quantum-classical GANs
74
+
75
+ $$\\mathcal{H} = -\\sum_{i<j} J_{ij}\\sigma_i^z\\sigma_j^z - \\Gamma\\sum_i\\sigma_i^x$$""",
76
+ "metadata": {
77
+ "year": 2023,
78
+ "domain": "quantum_ml",
79
+ "citations": 89
80
+ }
81
+ }
82
+ }
83
+
84
+ class DocumentSchema(BaseModel):
85
+ content: str
86
+ metadata: dict
87
+ doc_id: str
88
+
89
+ # ------------------------------
90
+ # State Management
91
+ # ------------------------------
92
+ class ResearchState(TypedDict):
93
+ messages: Annotated[List[BaseMessage], add_messages]
94
+ context: Annotated[Dict[str, Any], "research_context"]
95
+ metadata: Annotated[Dict[str, str], "system_metadata"]
96
+
97
+ # ------------------------------
98
+ # Document Processing
99
+ # ------------------------------
100
+ class DocumentManager:
101
  def __init__(self):
102
  self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
103
  self.embeddings = OpenAIEmbeddings(
104
  model="text-embedding-3-large",
105
  dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
106
  )
107
+
108
+ def initialize_collections(self):
109
+ try:
110
+ self.research_col = self._create_collection("research")
111
+ self.dev_col = self._create_collection("development")
112
+ except Exception as e:
113
+ st.error(f"Collection initialization failed: {str(e)}")
114
+ traceback.print_exc()
115
+
116
+ def _create_collection(self, name: str) -> Chroma:
117
+ documents, metadatas, ids = [], [], []
118
 
119
+ for title, data in DOCUMENT_CONTENT.items():
120
+ try:
121
+ doc = DocumentSchema(
122
+ content=data["content"],
123
+ metadata=data["metadata"],
124
+ doc_id=hashlib.sha256(title.encode()).hexdigest()[:16]
125
+ )
126
+ documents.append(doc.content)
127
+ metadatas.append(doc.metadata)
128
+ ids.append(doc.doc_id)
129
+ except ValidationError as e:
130
+ st.error(f"Invalid document format: {title} - {str(e)}")
131
+ continue
132
+
133
  splitter = RecursiveCharacterTextSplitter(
134
  chunk_size=ResearchConfig.CHUNK_SIZE,
135
  chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
136
+ separators=["\n## ", "\n### ", "\n\n", "\nβ€’ "]
137
  )
138
+
139
+ try:
140
+ docs = splitter.create_documents(documents, metadatas=metadatas)
141
+ return Chroma.from_documents(
142
+ docs,
143
+ self.embeddings,
144
+ client=self.client,
145
+ collection_name=name,
146
+ ids=ids
147
+ )
148
+ except Exception as e:
149
+ raise RuntimeError(f"Failed creating {name} collection: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  # ------------------------------
152
+ # Retrieval System
153
  # ------------------------------
154
  class ResearchRetriever:
155
  def __init__(self):
156
+ self.dm = DocumentManager()
157
+ self.dm.initialize_collections()
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ def retrieve(self, query: str, domain: str) -> List[DocumentSchema]:
160
  try:
161
+ collection = self.dm.research_col if domain == "research" else self.dm.dev_col
162
+ if not collection:
163
+ return []
164
+
165
+ results = collection.as_retriever(
166
+ search_type="mmr",
167
+ search_kwargs={'k': 4, 'fetch_k': 20}
168
+ ).invoke(query)
169
+
170
+ return [DocumentSchema(
171
+ content=doc.page_content,
172
+ metadata=doc.metadata,
173
+ doc_id=doc.metadata.get("doc_id", "")
174
+ ) for doc in results if doc.page_content]
175
+
176
+ except Exception as e:
177
+ st.error(f"Retrieval failure: {str(e)}")
178
+ traceback.print_exc()
179
  return []
180
 
 
 
181
  # ------------------------------
182
+ # Analysis Processor
183
  # ------------------------------
184
+ class AnalysisEngine:
185
  def __init__(self):
186
  self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
187
+ self.session_hash = hashlib.sha256(str(time.time()).encode()).hexdigest()[:12]
188
+
189
+ def analyze(self, prompt: str) -> Dict:
190
+ futures = [self.executor.submit(self._api_request, prompt) for _ in range(3)]
191
+ return self._validate_results([f.result() for f in as_completed(futures)])
192
+
193
+ def _api_request(self, prompt: str) -> Dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  headers = {
195
  "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
196
+ "X-Session-ID": self.session_hash,
197
+ "Content-Type": "application/json"
198
  }
199
 
200
  try:
 
203
  headers=headers,
204
  json={
205
  "model": "deepseek-chat",
206
+ "messages": [{"role": "user", "content": prompt}],
 
 
 
207
  "temperature": 0.7,
208
+ "max_tokens": 2000
 
209
  },
210
+ timeout=30
211
  )
212
  response.raise_for_status()
213
  return response.json()
214
+ except Exception as e:
215
+ return {"error": str(e), "status_code": 500}
216
+
217
+ def _validate_results(self, results: List[Dict]) -> Dict:
218
  valid = [r for r in results if "error" not in r]
219
  if not valid:
220
+ return {"error": "All analysis attempts failed", "results": results}
221
+
222
+ best = max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', ''))
223
+ return best
224
 
225
  # ------------------------------
226
+ # Workflow Implementation
227
  # ------------------------------
228
  class ResearchWorkflow:
229
  def __init__(self):
230
+ self.retriever = ResearchRetriever()
231
+ self.engine = AnalysisEngine()
232
+ self.workflow = StateGraph(ResearchState)
233
+ self._build_graph()
234
 
235
+ def _build_graph(self):
236
+ self.workflow.add_node("ingest", self._ingest)
237
+ self.workflow.add_node("retrieve", self._retrieve)
238
+ self.workflow.add_node("analyze", self._analyze)
239
+ self.workflow.add_node("validate", self._validate)
240
+ self.workflow.add_node("refine", self._refine)
241
 
242
  self.workflow.set_entry_point("ingest")
243
  self.workflow.add_edge("ingest", "retrieve")
244
  self.workflow.add_edge("retrieve", "analyze")
245
  self.workflow.add_conditional_edges(
246
  "analyze",
247
+ self._quality_gate,
248
  {"valid": "validate", "invalid": "refine"}
249
  )
250
  self.workflow.add_edge("validate", END)
251
  self.workflow.add_edge("refine", "retrieve")
252
 
253
+ def _ingest(self, state: ResearchState) -> ResearchState:
 
 
254
  try:
255
+ query = next(msg.content for msg in reversed(state["messages"])
256
+ if isinstance(msg, HumanMessage))
257
  return {
258
+ "messages": [AIMessage(content="Query ingested")],
259
+ "context": {
260
+ "query": query,
261
+ "documents": [],
262
+ "errors": []
263
+ },
264
+ "metadata": {
265
+ "session_id": hashlib.sha256(str(time.time()).encode()).hexdigest()[:8],
266
+ "timestamp": datetime.now().isoformat()
267
+ }
268
  }
269
  except Exception as e:
270
+ return self._handle_error(f"Ingest failed: {str(e)}", state)
271
 
272
+ def _retrieve(self, state: ResearchState) -> ResearchState:
273
  try:
274
+ docs = self.retriever.retrieve(state["context"]["query"], "research")
 
275
  return {
276
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
277
  "context": {
278
+ **state["context"],
279
  "documents": docs,
280
  "retrieval_time": time.time()
281
+ },
282
+ "metadata": state["metadata"]
283
  }
284
  except Exception as e:
285
+ return self._handle_error(f"Retrieval error: {str(e)}", state)
286
 
287
+ def _analyze(self, state: ResearchState) -> ResearchState:
288
+ docs = state["context"].get("documents", [])
289
+ if not docs:
290
+ return self._handle_error("No documents for analysis", state)
291
+
292
  try:
293
+ context = "\n\n".join([d.content for d in docs])
294
+ prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=context)
295
+ result = self.engine.analyze(prompt)
296
+
297
+ if "error" in result:
298
+ raise RuntimeError(result["error"])
299
+
300
+ content = result['choices'][0]['message']['content']
301
 
302
+ if len(content) < 200 or not any(c.isalpha() for c in content):
303
+ raise ValueError("Insufficient analysis content")
304
 
305
  return {
306
+ "messages": [AIMessage(content=content)],
307
+ "context": state["context"],
308
+ "metadata": state["metadata"]
309
  }
310
  except Exception as e:
311
+ return self._handle_error(f"Analysis failed: {str(e)}", state)
312
 
313
+ def _validate(self, state: ResearchState) -> ResearchState:
314
+ # Implementation of validation logic
315
+ return state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
+ def _refine(self, state: ResearchState) -> ResearchState:
318
+ # Implementation of refinement logic
319
+ return state
 
 
 
 
 
 
 
 
 
 
 
320
 
321
+ def _quality_gate(self, state: ResearchState) -> str:
322
+ content = state["messages"][-1].content if state["messages"] else ""
323
+ required = ["Innovations", "Results", "Evaluation"]
324
+ return "valid" if all(kw in content for kw in required) else "invalid"
325
 
326
+ def _handle_error(self, message: str, state: ResearchState) -> ResearchState:
327
  return {
328
+ "messages": [AIMessage(content=f"🚨 Error: {message}")],
329
+ "context": {
330
+ **state["context"],
331
+ "errors": state["context"]["errors"] + [message]
332
+ },
333
+ "metadata": state["metadata"]
334
  }
335
 
336
  # ------------------------------
337
+ # User Interface
338
  # ------------------------------
339
  class ResearchInterface:
340
  def __init__(self):
341
+ self.workflow = ResearchWorkflow().workflow.compile()
342
+ self._setup_interface()
343
 
344
+ def _setup_interface(self):
345
  st.set_page_config(
346
+ page_title="Research Assistant",
347
  layout="wide",
348
  initial_sidebar_state="expanded"
349
  )
350
+ self._apply_styles()
351
  self._build_sidebar()
352
+ self._build_main()
353
 
354
+ def _apply_styles(self):
355
  st.markdown("""
356
  <style>
 
 
 
 
 
 
 
357
  .stApp {
358
+ background: #0a192f;
359
+ color: #64ffda;
 
360
  }
 
361
  .stTextArea textarea {
362
+ background: #172a45 !important;
363
+ color: #a8b2d1 !important;
 
 
 
364
  }
 
365
  .stButton>button {
366
+ background: #233554;
367
+ border: 1px solid #64ffda;
 
 
 
 
 
 
 
 
368
  }
369
+ .error-box {
370
+ border: 1px solid #ff4444;
371
+ border-radius: 5px;
372
+ padding: 1rem;
 
373
  margin: 1rem 0;
374
  }
375
  </style>
 
377
 
378
  def _build_sidebar(self):
379
  with st.sidebar:
380
+ st.title("πŸ” Document Database")
381
+ for title, data in DOCUMENT_CONTENT.items():
382
+ with st.expander(title[:25]+"..."):
383
+ st.markdown(f"```\n{data['content'][:300]}...\n```")
384
+
385
+ def _build_main(self):
386
+ st.title("🧠 Research Analysis System")
387
+ query = st.text_area("Enter your research query:", height=150)
 
 
 
 
 
 
388
 
389
+ if st.button("Start Analysis", type="primary"):
390
+ self._run_analysis(query)
391
 
392
+ def _run_analysis(self, query: str):
393
  try:
394
+ with st.spinner("πŸ” Analyzing documents..."):
395
+ state = {
396
+ "messages": [HumanMessage(content=query)],
397
+ "context": {
398
+ "query": "",
399
+ "documents": [],
400
+ "errors": []
401
+ },
402
+ "metadata": {}
403
+ }
404
 
405
+ for event in self.workflow.stream(state):
406
+ self._display_progress(event)
407
+
408
+ final_state = self.workflow.invoke(state)
409
+ self._show_results(final_state)
410
 
 
411
  except Exception as e:
412
  st.error(f"""**Analysis Failed**
413
  {str(e)}
414
+ Common solutions:
415
+ - Simplify your query
416
+ - Check document database status
417
+ - Verify API connectivity""")
418
+
419
+ def _display_progress(self, event):
420
+ current_state = next(iter(event.values()))
421
+ with st.container():
422
+ st.markdown("---")
423
+ cols = st.columns([1,2,1])
424
+
425
+ with cols[0]:
426
+ st.subheader("Processing Stage")
427
+ stage = list(event.keys())[0].title()
428
+ st.code(stage)
429
 
430
+ with cols[1]:
431
+ st.subheader("Documents")
432
+ docs = current_state["context"].get("documents", [])
433
+ st.metric("Retrieved", len(docs))
434
+
435
+ with cols[2]:
436
+ st.subheader("Status")
437
+ if current_state["context"].get("errors"):
438
+ st.error("Errors detected")
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  else:
440
+ st.success("Normal operation")
441
+
442
+ def _show_results(self, state: ResearchState):
443
+ if state["context"].get("errors"):
444
+ st.error("Analysis completed with errors")
445
+ with st.expander("Error Details"):
446
+ for error in state["context"]["errors"]:
447
+ st.markdown(f"- {error}")
448
+ else:
449
+ st.success("Analysis completed successfully βœ…")
450
+ with st.expander("Full Report"):
451
+ st.markdown(state["messages"][-1].content)
452
 
453
  if __name__ == "__main__":
454
  ResearchInterface()