mgbam commited on
Commit
3cf95b0
Β·
verified Β·
1 Parent(s): bfe5a86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +393 -495
app.py CHANGED
@@ -9,546 +9,444 @@ from langgraph.graph import END, StateGraph
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
- from typing import Sequence
13
  import chromadb
14
  import re
15
  import os
16
  import streamlit as st
17
  import requests
 
 
 
18
  from langchain.tools.retriever import create_retriever_tool
 
 
 
 
 
 
 
 
 
 
19
 
20
  # ------------------------------
21
  # Configuration
22
  # ------------------------------
23
- # Get DeepSeek API key from Hugging Face Space secrets
24
- DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- if not DEEPSEEK_API_KEY:
27
- st.error("""
28
- **Missing API Configuration**
29
- Please configure your DeepSeek API key in Hugging Face Space secrets:
30
- 1. Go to your Space's Settings
31
- 2. Click on 'Repository secrets'
32
- 3. Add a secret named DEEPSEEK_API_KEY
33
- """)
34
- st.stop()
35
 
36
- # Create directory for Chroma persistence
37
- os.makedirs("chroma_db", exist_ok=True)
38
 
39
- # ------------------------------
40
- # ChromaDB Client Configuration
41
- # ------------------------------
42
- chroma_client = chromadb.PersistentClient(path="chroma_db")
 
 
 
43
 
44
  # ------------------------------
45
- # Dummy Data: Research & Development Texts
46
  # ------------------------------
47
- research_texts = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
49
  "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
50
  "Latest Trends in Machine Learning Methods Using Quantum Computing"
51
- ]
52
 
53
- development_texts = [
54
  "Project A: UI Design Completed, API Integration in Progress",
55
  "Project B: Testing New Feature X, Bug Fixes Needed",
56
  "Product Y: In the Performance Optimization Stage Before Release"
57
- ]
58
-
59
- # ------------------------------
60
- # Text Splitting & Document Creation
61
- # ------------------------------
62
- splitter = RecursiveCharacterTextSplitter(
63
- chunk_size=300,
64
- chunk_overlap=30,
65
- separators=["\n\n", "\n", ". ", "! ", "? ", " "]
66
- )
67
-
68
- research_docs = splitter.create_documents(research_texts)
69
- development_docs = splitter.create_documents(development_texts)
70
-
71
- # ------------------------------
72
- # Creating Vector Stores with Embeddings
73
- # ------------------------------
74
- embeddings = OpenAIEmbeddings(
75
- model="text-embedding-3-large",
76
- # dimensions=1024 # Uncomment if needed
77
- )
78
-
79
- research_vectorstore = Chroma.from_documents(
80
- documents=research_docs,
81
- embedding=embeddings,
82
- client=chroma_client,
83
- collection_name="research_collection"
84
- )
85
-
86
- development_vectorstore = Chroma.from_documents(
87
- documents=development_docs,
88
- embedding=embeddings,
89
- client=chroma_client,
90
- collection_name="development_collection"
91
- )
92
 
93
  # ------------------------------
94
- # Creating Retriever Tools with MMR
95
  # ------------------------------
96
- research_retriever = research_vectorstore.as_retriever(
97
- search_type="mmr",
98
- search_kwargs={
99
- 'k': 3,
100
- 'fetch_k': 10,
101
- 'lambda_mult': 0.7
102
- }
103
- )
104
-
105
- development_retriever = development_vectorstore.as_retriever(
106
- search_type="mmr",
107
- search_kwargs={
108
- 'k': 3,
109
- 'fetch_k': 10,
110
- 'lambda_mult': 0.7
111
- }
112
- )
113
-
114
- research_tool = create_retriever_tool(
115
- research_retriever,
116
- "research_db_tool",
117
- "Search information from the research database."
118
- )
119
-
120
- development_tool = create_retriever_tool(
121
- development_retriever,
122
- "development_db_tool",
123
- "Search information from the development database."
124
- )
125
 
126
- tools = [research_tool, development_tool]
127
 
128
  # ------------------------------
129
- # Agent Function & Workflow Functions
130
  # ------------------------------
131
- class AgentState(TypedDict):
132
- messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
133
-
134
- def agent(state: AgentState):
135
- print("---CALL AGENT---")
136
- messages = state["messages"]
137
-
138
- if isinstance(messages[0], tuple):
139
- user_message = messages[0][1]
140
- else:
141
- user_message = messages[0].content
142
-
143
- prompt = f"""Given this user question: "{user_message}"
144
- If it's about research or academic topics, respond EXACTLY in this format:
145
- SEARCH_RESEARCH: <search terms>
146
-
147
- If it's about development status, respond EXACTLY in this format:
148
- SEARCH_DEV: <search terms>
149
-
150
- Otherwise, just answer directly.
151
- """
152
-
153
- headers = {
154
- "Accept": "application/json",
155
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
156
- "Content-Type": "application/json"
157
- }
158
-
159
- data = {
160
- "model": "deepseek-chat",
161
- "messages": [{"role": "user", "content": prompt}],
162
- "temperature": 0.7,
163
- "max_tokens": 1024
164
- }
165
 
166
- try:
167
- response = requests.post(
168
- "https://api.deepseek.com/v1/chat/completions",
169
- headers=headers,
170
- json=data,
171
- verify=False,
172
- timeout=30
173
- )
174
- response.raise_for_status()
175
 
176
- response_text = response.json()['choices'][0]['message']['content']
177
- print("Raw response:", response_text)
 
 
 
 
178
 
179
- if "SEARCH_RESEARCH:" in response_text:
180
- query = response_text.split("SEARCH_RESEARCH:")[1].strip()
181
- results = research_retriever.invoke(query)
182
- return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
183
-
184
- elif "SEARCH_DEV:" in response_text:
185
- query = response_text.split("SEARCH_DEV:")[1].strip()
186
- results = development_retriever.invoke(query)
187
- return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
188
-
189
- else:
190
- return {"messages": [AIMessage(content=response_text)]}
191
-
192
- except Exception as e:
193
- error_msg = f"API Error: {str(e)}"
194
- if "Insufficient Balance" in str(e):
195
- error_msg += "\n\nPlease check your DeepSeek API account balance."
196
- return {"messages": [AIMessage(content=error_msg)]}
197
-
198
- def simple_grade_documents(state: AgentState):
199
- messages = state["messages"]
200
- last_message = messages[-1]
201
- print("Evaluating message:", last_message.content)
202
-
203
- if "Results: [Document" in last_message.content:
204
- print("---DOCS FOUND, GO TO GENERATE---")
205
- return "generate"
206
- else:
207
- print("---NO DOCS FOUND, TRY REWRITE---")
208
- return "rewrite"
209
-
210
- def generate(state: AgentState):
211
- print("---GENERATE FINAL ANSWER---")
212
- messages = state["messages"]
213
- question = messages[0].content if isinstance(messages[0], tuple) else messages[0].content
214
- last_message = messages[-1]
215
-
216
- docs = ""
217
- if "Results: [" in last_message.content:
218
- results_start = last_message.content.find("Results: [")
219
- docs = last_message.content[results_start:]
220
- print("Documents found:", docs)
221
-
222
- headers = {
223
- "Accept": "application/json",
224
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
225
- "Content-Type": "application/json"
226
- }
227
-
228
- prompt = f"""Analyze these research documents and provide structured insights:
229
- Question: {question}
230
- Documents: {docs}
231
-
232
- Format your response with:
233
- 1. Key Findings section with bullet points
234
- 2. Technical Innovations section
235
- 3. Potential Applications
236
- 4. References to source documents (Doc1, Doc2, etc.)
237
-
238
- Focus on:
239
- - Distilling unique insights
240
- - Connecting different research aspects
241
- - Highlighting practical implications
242
- """
243
-
244
- data = {
245
- "model": "deepseek-chat",
246
- "messages": [{
247
- "role": "user",
248
- "content": prompt
249
- }],
250
- "temperature": 0.7,
251
- "max_tokens": 1024
252
- }
253
 
254
- try:
255
- print("Sending generate request to API...")
256
- response = requests.post(
257
- "https://api.deepseek.com/v1/chat/completions",
258
- headers=headers,
259
- json=data,
260
- verify=False,
261
- timeout=30
262
- )
263
- response.raise_for_status()
264
 
265
- response_text = response.json()['choices'][0]['message']['content']
266
- print("Final Answer:", response_text)
267
- return {"messages": [AIMessage(content=response_text)]}
268
- except Exception as e:
269
- error_msg = f"Generation Error: {str(e)}"
270
- return {"messages": [AIMessage(content=error_msg)]}
271
-
272
- def rewrite(state: AgentState):
273
- print("---REWRITE QUESTION---")
274
- messages = state["messages"]
275
- original_question = messages[0].content if len(messages) > 0 else "N/A"
276
-
277
- headers = {
278
- "Accept": "application/json",
279
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
280
- "Content-Type": "application/json"
281
- }
 
 
 
282
 
283
- data = {
284
- "model": "deepseek-chat",
285
- "messages": [{
286
- "role": "user",
287
- "content": f"Rewrite this question to be more specific and clearer: {original_question}"
288
- }],
289
- "temperature": 0.7,
290
- "max_tokens": 1024
291
- }
292
-
293
- try:
294
- print("Sending rewrite request...")
295
- response = requests.post(
296
- "https://api.deepseek.com/v1/chat/completions",
297
- headers=headers,
298
- json=data,
299
- verify=False,
300
- timeout=30
301
- )
302
- response.raise_for_status()
303
-
304
- response_text = response.json()['choices'][0]['message']['content']
305
- print("Rewritten question:", response_text)
306
- return {"messages": [AIMessage(content=response_text)]}
307
- except Exception as e:
308
- error_msg = f"Rewrite Error: {str(e)}"
309
- return {"messages": [AIMessage(content=error_msg)]}
310
-
311
- tools_pattern = re.compile(r"Action: .*")
312
-
313
- def custom_tools_condition(state: AgentState):
314
- messages = state["messages"]
315
- last_message = messages[-1]
316
- content = last_message.content
317
-
318
- print("Checking tools condition:", content)
319
- if tools_pattern.match(content):
320
- print("Moving to retrieve...")
321
- return "tools"
322
- print("Moving to END...")
323
- return END
324
-
325
- # ------------------------------
326
- # Workflow Configuration using LangGraph
327
- # ------------------------------
328
- workflow = StateGraph(AgentState)
329
-
330
- # Add nodes
331
- workflow.add_node("agent", agent)
332
- retrieve_node = ToolNode(tools)
333
- workflow.add_node("retrieve", retrieve_node)
334
- workflow.add_node("rewrite", rewrite)
335
- workflow.add_node("generate", generate)
336
-
337
- # Set entry point
338
- workflow.set_entry_point("agent")
339
-
340
- # Define transitions
341
- workflow.add_conditional_edges(
342
- "agent",
343
- custom_tools_condition,
344
- {
345
- "tools": "retrieve",
346
- END: END
347
- }
348
- )
349
-
350
- workflow.add_conditional_edges(
351
- "retrieve",
352
- simple_grade_documents,
353
- {
354
- "generate": "generate",
355
- "rewrite": "rewrite"
356
- }
357
- )
358
-
359
- workflow.add_edge("generate", END)
360
- workflow.add_edge("rewrite", "agent")
361
-
362
- # Compile the workflow
363
- app = workflow.compile()
364
 
365
  # ------------------------------
366
- # Processing Function
367
  # ------------------------------
368
- def process_question(user_question, app, config):
369
- """Process user question through the workflow"""
370
- events = []
371
- for event in app.stream({"messages": [("user", user_question)]}, config):
372
- events.append(event)
373
- return events
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  # ------------------------------
376
- # Streamlit App UI (Enhanced Dark Theme)
377
  # ------------------------------
378
- def main():
379
- st.set_page_config(
380
- page_title="AI Research & Development Assistant",
381
- layout="wide",
382
- initial_sidebar_state="expanded"
383
- )
384
-
385
- st.markdown("""
386
- <style>
387
- .stApp {
388
- background-color: #1a1a1a;
389
- color: #ffffff;
390
- }
391
-
392
- .stTextArea textarea {
393
- background-color: #2d2d2d !important;
394
- color: #ffffff !important;
395
- border: 1px solid #3d3d3d;
396
- }
397
-
398
- .stButton > button {
399
- background-color: #4CAF50;
400
- color: white;
401
- border: none;
402
- padding: 12px 28px;
403
- border-radius: 6px;
404
- transition: all 0.3s;
405
- font-weight: 500;
406
- }
407
-
408
- .stButton > button:hover {
409
- background-color: #45a049;
410
- transform: scale(1.02);
411
- box-shadow: 0 2px 8px rgba(0,0,0,0.2);
412
- }
413
-
414
- .data-box {
415
- padding: 18px;
416
- margin: 12px 0;
417
- border-radius: 8px;
418
- background-color: #2d2d2d;
419
- border-left: 4px solid;
420
- }
421
-
422
- .research-box {
423
- border-color: #2196F3;
424
- }
425
-
426
- .dev-box {
427
- border-color: #4CAF50;
428
- }
429
-
430
- .st-expander {
431
- background-color: #2d2d2d;
432
- border: 1px solid #3d3d3d;
433
- border-radius: 6px;
434
- margin: 16px 0;
435
- }
436
-
437
- .st-expander .streamlit-expanderHeader {
438
- color: #ffffff !important;
439
- font-weight: 500;
440
- }
441
-
442
- .stAlert {
443
- background-color: #2d2d2d !important;
444
- border: 1px solid #3d3d3d;
445
- }
446
-
447
- h1, h2, h3 {
448
- color: #ffffff !important;
449
- border-bottom: 2px solid #3d3d3d;
450
- padding-bottom: 8px;
451
- }
452
-
453
- .stMarkdown {
454
- color: #e0e0e0;
455
- line-height: 1.6;
456
- }
457
- </style>
458
- """, unsafe_allow_html=True)
459
 
460
- with st.sidebar:
461
- st.header("πŸ“š Available Data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
- st.subheader("Research Database")
464
- for text in research_texts:
465
- st.markdown(f'<div class="data-box research-box">{text}</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
- st.subheader("Development Database")
468
- for text in development_texts:
469
- st.markdown(f'<div class="data-box dev-box">{text}</div>', unsafe_allow_html=True)
470
-
471
- st.title("πŸ€– AI Research & Development Assistant")
472
- st.markdown("---")
473
-
474
- query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
475
-
476
- col1, col2 = st.columns([1, 2])
477
- with col1:
478
- if st.button("πŸ” Get Answer", use_container_width=True):
479
- if query:
480
- try:
481
- with st.spinner('Processing your question...'):
482
- events = process_question(query, app, {"configurable": {"thread_id": "1"}})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
484
- for event in events:
485
- if 'agent' in event:
486
- with st.expander("πŸ”„ Processing Step", expanded=True):
487
- content = event['agent']['messages'][0].content
488
- if "Error" in content:
489
- st.error(content)
490
- elif "Results:" in content:
491
- st.markdown("### πŸ“‘ Retrieved Documents")
492
- docs = content.split("Results:")[1].strip()
493
-
494
- # Process and deduplicate documents
495
- unique_docs = list({
496
- doc.split('page_content=')[1].split(')')[0].strip("'")
497
- for doc in docs.split("Document(")[1:]
498
- })
499
-
500
- for i, doc in enumerate(unique_docs, 1):
501
- st.markdown(f"""
502
- **Document {i}**
503
- {doc}
504
- """)
505
- elif 'generate' in event:
506
- content = event['generate']['messages'][0].content
507
- if "Error" in content:
508
- st.error(content)
509
- else:
510
- st.markdown("### ✨ Final Answer")
511
- st.markdown(f"""
512
- <div style='
513
- background-color: #2d2d2d;
514
- padding: 20px;
515
- border-radius: 8px;
516
- margin-top: 16px;
517
- '>
518
- {content}
519
- </div>
520
- """, unsafe_allow_html=True)
521
- except Exception as e:
522
- st.error(f"""
523
- **Processing Error**
524
- {str(e)}
525
- Please check:
526
- - API key configuration
527
- - Account balance
528
- - Network connection
529
- """)
530
- else:
531
- st.warning("⚠️ Please enter a question first!")
532
-
533
- with col2:
534
- st.markdown("""
535
- ### 🎯 How to Use
536
- 1. **Enter** your question in the text box
537
- 2. **Click** the search button
538
- 3. **Review** processing steps
539
- 4. **Analyze** final structured answer
540
-
541
- ### πŸ’‘ Example Questions
542
- - What's new in quantum machine learning?
543
- - How is Project Y progressing?
544
- - Recent breakthroughs in AI image recognition?
545
-
546
- ### πŸ” Search Features
547
- - Automatic query optimization
548
- - Technical document analysis
549
- - Cross-project insights
550
- - Source-aware reporting
551
- """)
552
 
553
  if __name__ == "__main__":
554
- main()
 
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
+ from typing import Sequence, Dict, List, Optional, Any
13
  import chromadb
14
  import re
15
  import os
16
  import streamlit as st
17
  import requests
18
+ import hashlib
19
+ import json
20
+ import time
21
  from langchain.tools.retriever import create_retriever_tool
22
+ from concurrent.futures import ThreadPoolExecutor, as_completed
23
+ from datetime import datetime
24
+
25
+ # ------------------------------
26
+ # State Schema Definition
27
+ # ------------------------------
28
+ class AgentState(TypedDict):
29
+ messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
30
+ context: Dict[str, Any]
31
+ metadata: Dict[str, Any]
32
 
33
  # ------------------------------
34
  # Configuration
35
  # ------------------------------
36
+ class ResearchConfig:
37
+ DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
38
+ CHROMA_PATH = "chroma_db"
39
+ CHUNK_SIZE = 512
40
+ CHUNK_OVERLAP = 64
41
+ MAX_CONCURRENT_REQUESTS = 5
42
+ EMBEDDING_DIMENSIONS = 1536
43
+ DOCUMENT_MAP = {
44
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
45
+ "CV-Transformer Hybrid Architecture",
46
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
47
+ "Transformer Architecture Analysis",
48
+ "Latest Trends in Machine Learning Methods Using Quantum Computing":
49
+ "Quantum ML Frontiers"
50
+ }
51
+ ANALYSIS_TEMPLATE = """Analyze these technical documents with scientific rigor:
52
+ {context}
53
 
54
+ Respond with:
55
+ 1. Key Technical Contributions (bullet points)
56
+ 2. Novel Methodologies
57
+ 3. Empirical Results (with metrics)
58
+ 4. Potential Applications
59
+ 5. Limitations & Future Directions
 
 
 
60
 
61
+ Format: Markdown with LaTeX mathematical notation where applicable
62
+ """
63
 
64
+ # Validation
65
+ if not ResearchConfig.DEEPSEEK_API_KEY:
66
+ st.error("""**Research Portal Configuration Required**
67
+ 1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
68
+ 2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
69
+ 3. Rebuild deployment""")
70
+ st.stop()
71
 
72
  # ------------------------------
73
+ # Quantum Document Processing
74
  # ------------------------------
75
+ class QuantumDocumentManager:
76
+ def __init__(self):
77
+ self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
78
+ self.embeddings = OpenAIEmbeddings(
79
+ model="text-embedding-3-large",
80
+ dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
81
+ )
82
+
83
+ def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
84
+ splitter = RecursiveCharacterTextSplitter(
85
+ chunk_size=ResearchConfig.CHUNK_SIZE,
86
+ chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
87
+ separators=["\n\n", "\n", "|||"]
88
+ )
89
+ docs = splitter.create_documents(documents)
90
+ return Chroma.from_documents(
91
+ documents=docs,
92
+ embedding=self.embeddings,
93
+ client=self.client,
94
+ collection_name=collection_name,
95
+ ids=[self._document_id(doc.page_content) for doc in docs]
96
+ )
97
+
98
+ def _document_id(self, content: str) -> str:
99
+ return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
100
+
101
+ # Initialize document collections
102
+ qdm = QuantumDocumentManager()
103
+ research_docs = qdm.create_collection([
104
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
105
  "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
106
  "Latest Trends in Machine Learning Methods Using Quantum Computing"
107
+ ], "research")
108
 
109
+ development_docs = qdm.create_collection([
110
  "Project A: UI Design Completed, API Integration in Progress",
111
  "Project B: Testing New Feature X, Bug Fixes Needed",
112
  "Product Y: In the Performance Optimization Stage Before Release"
113
+ ], "development")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  # ------------------------------
116
+ # Advanced Retrieval System
117
  # ------------------------------
118
+ class ResearchRetriever:
119
+ def __init__(self):
120
+ self.retrievers = {
121
+ "research": research_docs.as_retriever(
122
+ search_type="mmr",
123
+ search_kwargs={
124
+ 'k': 4,
125
+ 'fetch_k': 20,
126
+ 'lambda_mult': 0.85
127
+ }
128
+ ),
129
+ "development": development_docs.as_retriever(
130
+ search_type="similarity",
131
+ search_kwargs={'k': 3}
132
+ )
133
+ }
134
+
135
+ def retrieve(self, query: str, domain: str) -> List[Any]:
136
+ try:
137
+ return self.retrievers[domain].invoke(query)
138
+ except KeyError:
139
+ return []
 
 
 
 
 
 
 
140
 
141
+ retriever = ResearchRetriever()
142
 
143
  # ------------------------------
144
+ # Cognitive Processing Unit
145
  # ------------------------------
146
+ class CognitiveProcessor:
147
+ def __init__(self):
148
+ self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
149
+ self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ def process_query(self, prompt: str) -> Dict:
152
+ futures = []
153
+ for _ in range(3): # Triple redundancy
154
+ futures.append(self.executor.submit(
155
+ self._execute_api_request,
156
+ prompt
157
+ ))
 
 
158
 
159
+ results = []
160
+ for future in as_completed(futures):
161
+ try:
162
+ results.append(future.result())
163
+ except Exception as e:
164
+ st.error(f"Processing Error: {str(e)}")
165
 
166
+ return self._consensus_check(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
+ def _execute_api_request(self, prompt: str) -> Dict:
169
+ headers = {
170
+ "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
171
+ "Content-Type": "application/json",
172
+ "X-Research-Session": self.session_id
173
+ }
 
 
 
 
174
 
175
+ try:
176
+ response = requests.post(
177
+ "https://api.deepseek.com/v1/chat/completions",
178
+ headers=headers,
179
+ json={
180
+ "model": "deepseek-chat",
181
+ "messages": [{
182
+ "role": "user",
183
+ "content": f"Respond as Senior AI Researcher:\n{prompt}"
184
+ }],
185
+ "temperature": 0.7,
186
+ "max_tokens": 1500,
187
+ "top_p": 0.9
188
+ },
189
+ timeout=45
190
+ )
191
+ response.raise_for_status()
192
+ return response.json()
193
+ except requests.exceptions.RequestException as e:
194
+ return {"error": str(e)}
195
 
196
+ def _consensus_check(self, results: List[Dict]) -> Dict:
197
+ valid = [r for r in results if "error" not in r]
198
+ if not valid:
199
+ return {"error": "All API requests failed"}
200
+ return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # ------------------------------
203
+ # Research Workflow Engine
204
  # ------------------------------
205
+ class ResearchWorkflow:
206
+ def __init__(self):
207
+ self.processor = CognitiveProcessor()
208
+ self.workflow = StateGraph(AgentState)
209
+ self._build_workflow()
210
+
211
+ def _build_workflow(self):
212
+ self.workflow.add_node("ingest", self.ingest_query)
213
+ self.workflow.add_node("retrieve", self.retrieve_documents)
214
+ self.workflow.add_node("analyze", self.analyze_content)
215
+ self.workflow.add_node("validate", self.validate_output)
216
+ self.workflow.add_node("refine", self.refine_results)
217
+
218
+ self.workflow.set_entry_point("ingest")
219
+ self.workflow.add_edge("ingest", "retrieve")
220
+ self.workflow.add_edge("retrieve", "analyze")
221
+ self.workflow.add_conditional_edges(
222
+ "analyze",
223
+ self._quality_check,
224
+ {"valid": "validate", "invalid": "refine"}
225
+ )
226
+ self.workflow.add_edge("validate", END)
227
+ self.workflow.add_edge("refine", "retrieve")
228
+
229
+ self.app = self.workflow.compile()
230
+
231
+ def ingest_query(self, state: AgentState) -> Dict:
232
+ try:
233
+ query = state["messages"][-1].content
234
+ return {
235
+ "messages": [AIMessage(content="Query ingested successfully")],
236
+ "context": {"raw_query": query},
237
+ "metadata": {"timestamp": datetime.now().isoformat()}
238
+ }
239
+ except Exception as e:
240
+ return self._error_state(f"Ingestion Error: {str(e)}")
241
+
242
+ def retrieve_documents(self, state: AgentState) -> Dict:
243
+ try:
244
+ query = state["context"]["raw_query"]
245
+ docs = retriever.retrieve(query, "research")
246
+ return {
247
+ "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
248
+ "context": {
249
+ "documents": docs,
250
+ "retrieval_time": time.time()
251
+ }
252
+ }
253
+ except Exception as e:
254
+ return self._error_state(f"Retrieval Error: {str(e)}")
255
+
256
+ def analyze_content(self, state: AgentState) -> Dict:
257
+ try:
258
+ docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
259
+ prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
260
+ response = self.processor.process_query(prompt)
261
+
262
+ if "error" in response:
263
+ return self._error_state(response["error"])
264
+
265
+ return {
266
+ "messages": [AIMessage(content=response['choices'][0]['message']['content'])],
267
+ "context": {"analysis": response}
268
+ }
269
+ except Exception as e:
270
+ return self._error_state(f"Analysis Error: {str(e)}")
271
+
272
+ def validate_output(self, state: AgentState) -> Dict:
273
+ analysis = state["messages"][-1].content
274
+ validation_prompt = f"""Validate research analysis:
275
+ {analysis}
276
+
277
+ Check for:
278
+ 1. Technical accuracy
279
+ 2. Citation support
280
+ 3. Logical consistency
281
+ 4. Methodological soundness
282
+
283
+ Respond with 'VALID' or 'INVALID'"""
284
+
285
+ response = self.processor.process_query(validation_prompt)
286
+ return {
287
+ "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
288
+ }
289
+
290
+ def refine_results(self, state: AgentState) -> Dict:
291
+ refinement_prompt = f"""Refine this analysis:
292
+ {state["messages"][-1].content}
293
+
294
+ Improve:
295
+ 1. Technical precision
296
+ 2. Empirical grounding
297
+ 3. Theoretical coherence"""
298
+
299
+ response = self.processor.process_query(refinement_prompt)
300
+ return {
301
+ "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
302
+ "context": state["context"]
303
+ }
304
+
305
+ def _quality_check(self, state: AgentState) -> str:
306
+ content = state["messages"][-1].content
307
+ return "valid" if "VALID" in content else "invalid"
308
+
309
+ def _error_state(self, message: str) -> Dict:
310
+ return {
311
+ "messages": [AIMessage(content=f"❌ {message}")],
312
+ "context": {"error": True},
313
+ "metadata": {"status": "error"}
314
+ }
315
 
316
  # ------------------------------
317
+ # Research Interface
318
  # ------------------------------
319
+ class ResearchInterface:
320
+ def __init__(self):
321
+ self.workflow = ResearchWorkflow()
322
+ self._initialize_interface()
323
+
324
+ def _initialize_interface(self):
325
+ st.set_page_config(
326
+ page_title="NeuroResearch AI",
327
+ layout="wide",
328
+ initial_sidebar_state="expanded"
329
+ )
330
+ self._inject_styles()
331
+ self._build_sidebar()
332
+ self._build_main_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
+ def _inject_styles(self):
335
+ st.markdown("""
336
+ <style>
337
+ :root {
338
+ --primary: #2ecc71;
339
+ --secondary: #3498db;
340
+ --background: #0a0a0a;
341
+ --text: #ecf0f1;
342
+ }
343
+
344
+ .stApp {
345
+ background: var(--background);
346
+ color: var(--text);
347
+ font-family: 'Roboto', sans-serif;
348
+ }
349
+
350
+ .stTextArea textarea {
351
+ background: #1a1a1a !important;
352
+ color: var(--text) !important;
353
+ border: 2px solid var(--secondary);
354
+ border-radius: 8px;
355
+ padding: 1rem;
356
+ }
357
+
358
+ .stButton>button {
359
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
360
+ border: none;
361
+ border-radius: 8px;
362
+ padding: 1rem 2rem;
363
+ transition: all 0.3s;
364
+ }
365
+
366
+ .stButton>button:hover {
367
+ transform: translateY(-2px);
368
+ box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
369
+ }
370
 
371
+ .stExpander {
372
+ background: #1a1a1a;
373
+ border: 1px solid #2a2a2a;
374
+ border-radius: 8px;
375
+ margin: 1rem 0;
376
+ }
377
+ </style>
378
+ """, unsafe_allow_html=True)
379
+
380
+ def _build_sidebar(self):
381
+ with st.sidebar:
382
+ st.title("πŸ” Research Database")
383
+ st.subheader("Technical Papers")
384
+ for title, short in ResearchConfig.DOCUMENT_MAP.items():
385
+ with st.expander(short):
386
+ st.markdown(f"```\n{title}\n```")
387
 
388
+ st.subheader("Analysis Metrics")
389
+ st.metric("Vector Collections", 2)
390
+ st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
391
+
392
+ def _build_main_interface(self):
393
+ st.title("🧠 NeuroResearch AI")
394
+ query = st.text_area("Research Query:", height=200,
395
+ placeholder="Enter technical research question...")
396
+
397
+ if st.button("Execute Analysis", type="primary"):
398
+ self._execute_analysis(query)
399
+
400
+ def _execute_analysis(self, query: str):
401
+ try:
402
+ with st.spinner("Initializing Quantum Analysis..."):
403
+ results = self.workflow.app.stream(
404
+ {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
405
+ )
406
+
407
+ for event in results:
408
+ self._render_event(event)
409
+
410
+ st.success("βœ… Analysis Completed Successfully")
411
+ except Exception as e:
412
+ st.error(f"""**Analysis Failed**
413
+ {str(e)}
414
+ Potential issues:
415
+ - Complex query structure
416
+ - Document correlation failure
417
+ - Temporal processing constraints""")
418
+
419
+ def _render_event(self, event: Dict):
420
+ if 'ingest' in event:
421
+ with st.container():
422
+ st.success("βœ… Query Ingested")
423
+
424
+ elif 'retrieve' in event:
425
+ with st.container():
426
+ docs = event['retrieve']['context']['documents']
427
+ st.info(f"πŸ“š Retrieved {len(docs)} documents")
428
+ with st.expander("View Retrieved Documents", expanded=False):
429
+ for i, doc in enumerate(docs, 1):
430
+ st.markdown(f"**Document {i}**")
431
+ st.code(doc.page_content, language='text')
432
 
433
+ elif 'analyze' in event:
434
+ with st.container():
435
+ content = event['analyze']['messages'][0].content
436
+ with st.expander("Technical Analysis Report", expanded=True):
437
+ st.markdown(content)
438
+
439
+ elif 'validate' in event:
440
+ with st.container():
441
+ content = event['validate']['messages'][0].content
442
+ if "VALID" in content:
443
+ st.success("βœ… Validation Passed")
444
+ with st.expander("View Validated Analysis", expanded=True):
445
+ st.markdown(content.split("Validation:")[0])
446
+ else:
447
+ st.warning("⚠️ Validation Issues Detected")
448
+ with st.expander("View Validation Details", expanded=True):
449
+ st.markdown(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
  if __name__ == "__main__":
452
+ ResearchInterface()