mgbam commited on
Commit
bfe5a86
Β·
verified Β·
1 Parent(s): 9f9113f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +509 -409
app.py CHANGED
@@ -3,452 +3,552 @@
3
  # ------------------------------
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
- from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langgraph.graph import END, StateGraph
9
- from typing_extensions import TypedDict, Annotated
10
- from typing import Sequence, Dict, List, Optional, Any
11
  from langgraph.graph.message import add_messages
 
 
12
  import chromadb
 
13
  import os
14
  import streamlit as st
15
  import requests
16
- import hashlib
17
- import json
18
- import time
19
- from concurrent.futures import ThreadPoolExecutor, as_completed
20
- from datetime import datetime
21
- from pydantic import BaseModel, ValidationError
22
- import traceback
23
 
24
  # ------------------------------
25
- # Configuration & Constants
26
  # ------------------------------
27
- class ResearchConfig:
28
- DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
29
- CHROMA_PATH = "chroma_db"
30
- CHUNK_SIZE = 512
31
- CHUNK_OVERLAP = 64
32
- MAX_CONCURRENT_REQUESTS = 5
33
- EMBEDDING_DIMENSIONS = 1536
34
- ANALYSIS_TEMPLATE = """**Technical Analysis Request**
35
- {context}
36
-
37
- Respond with:
38
- 1. Key Technical Innovations (markdown table)
39
- 2. Methodological Breakdown (bullet points)
40
- 3. Quantitative Results (LaTeX equations)
41
- 4. Critical Evaluation
42
- 5. Research Impact Assessment
43
-
44
- Include proper academic citations where applicable."""
45
 
46
  # ------------------------------
47
- # Document Schema & Content
48
  # ------------------------------
49
- DOCUMENT_CONTENT = {
50
- "CV-Transformer Hybrid": {
51
- "content": """## Hybrid Architecture for Computer Vision
52
- **Authors**: DeepVision Research Team
53
- **Abstract**: Novel combination of convolutional layers with transformer attention mechanisms.
54
-
55
- ### Key Innovations:
56
- - Cross-attention feature fusion
57
- - Adaptive spatial pooling
58
- - Multi-scale gradient propagation
59
-
60
- $$\\mathcal{L}_{total} = \\alpha\\mathcal{L}_{CE} + \\beta\\mathcal{L}_{SSIM}$$""",
61
- "metadata": {
62
- "year": 2024,
63
- "domain": "computer_vision",
64
- "citations": 142
65
- }
66
- },
67
- "Quantum ML Advances": {
68
- "content": """## Quantum Machine Learning Breakthroughs
69
- **Authors**: Quantum AI Lab
70
-
71
- ### Achievements:
72
- - Quantum-enhanced SGD (40% faster convergence)
73
- - 5-qubit QNN achieving 98% accuracy
74
- - Hybrid quantum-classical GANs
75
-
76
- $$\\mathcal{H} = -\\sum_{i<j} J_{ij}\\sigma_i^z\\sigma_j^z - \\Gamma\\sum_i\\sigma_i^x$$""",
77
- "metadata": {
78
- "year": 2023,
79
- "domain": "quantum_ml",
80
- "citations": 89
81
- }
82
- }
83
- }
84
 
85
- class DocumentSchema(BaseModel):
86
- content: str
87
- metadata: dict
88
- doc_id: str
 
 
 
 
 
 
 
 
 
 
89
 
90
  # ------------------------------
91
- # State Management
92
  # ------------------------------
93
- class ResearchState(TypedDict):
94
- messages: Annotated[List[BaseMessage], add_messages]
95
- context: Annotated[Dict[str, Any], "research_context"]
96
- metadata: Annotated[Dict[str, str], "system_metadata"]
 
 
 
 
97
 
98
  # ------------------------------
99
- # Document Processing
100
  # ------------------------------
101
- class DocumentManager:
102
- def __init__(self):
103
- self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
104
- self.embeddings = OpenAIEmbeddings(
105
- model="text-embedding-3-large",
106
- dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
107
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- def initialize_collections(self):
110
- try:
111
- self.research_col = self._create_collection("research")
112
- self.dev_col = self._create_collection("development")
113
- except Exception as e:
114
- st.error(f"Collection initialization failed: {str(e)}")
115
- traceback.print_exc()
116
-
117
- def _create_collection(self, name: str) -> Chroma:
118
- documents, metadatas, ids = [], [], []
119
-
120
- for title, data in DOCUMENT_CONTENT.items():
121
- try:
122
- doc = DocumentSchema(
123
- content=data["content"],
124
- metadata=data["metadata"],
125
- doc_id=hashlib.sha256(title.encode()).hexdigest()[:16]
126
- )
127
- documents.append(doc.content)
128
- metadatas.append(doc.metadata)
129
- ids.append(doc.doc_id)
130
- except ValidationError as e:
131
- st.error(f"Invalid document format: {title} - {str(e)}")
132
- continue
133
-
134
- splitter = RecursiveCharacterTextSplitter(
135
- chunk_size=ResearchConfig.CHUNK_SIZE,
136
- chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
137
- separators=["\n## ", "\n### ", "\n\n", "\nβ€’ "]
138
  )
 
139
 
140
- try:
141
- docs = splitter.create_documents(documents, metadatas=metadatas)
142
- return Chroma.from_documents(
143
- docs,
144
- self.embeddings,
145
- client=self.client,
146
- collection_name=name,
147
- ids=ids
148
- )
149
- except Exception as e:
150
- raise RuntimeError(f"Failed creating {name} collection: {str(e)}")
151
 
152
- # ------------------------------
153
- # Retrieval System
154
- # ------------------------------
155
- class ResearchRetriever:
156
- def __init__(self):
157
- self.dm = DocumentManager()
158
- self.dm.initialize_collections()
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- def retrieve(self, query: str, domain: str) -> List[DocumentSchema]:
161
- try:
162
- collection = self.dm.research_col if domain == "research" else self.dm.dev_col
163
- if not collection:
164
- return []
165
-
166
- results = collection.as_retriever(
167
- search_type="mmr",
168
- search_kwargs={'k': 4, 'fetch_k': 20}
169
- ).invoke(query)
170
-
171
- return [DocumentSchema(
172
- content=doc.page_content,
173
- metadata=doc.metadata,
174
- doc_id=doc.metadata.get("doc_id", "")
175
- ) for doc in results if doc.page_content]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- except Exception as e:
178
- st.error(f"Retrieval failure: {str(e)}")
179
- traceback.print_exc()
180
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  # ------------------------------
183
- # Analysis Processor
184
  # ------------------------------
185
- class AnalysisEngine:
186
- def __init__(self):
187
- self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
188
- self.session_hash = hashlib.sha256(str(time.time()).encode()).hexdigest()[:12]
189
-
190
- def analyze(self, prompt: str) -> Dict:
191
- futures = [self.executor.submit(self._api_request, prompt) for _ in range(3)]
192
- return self._validate_results([f.result() for f in as_completed(futures)])
193
-
194
- def _api_request(self, prompt: str) -> Dict:
195
- headers = {
196
- "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
197
- "X-Session-ID": self.session_hash,
198
- "Content-Type": "application/json"
199
- }
200
-
201
- try:
202
- response = requests.post(
203
- "https://api.deepseek.com/v1/chat/completions",
204
- headers=headers,
205
- json={
206
- "model": "deepseek-chat",
207
- "messages": [{"role": "user", "content": prompt}],
208
- "temperature": 0.7,
209
- "max_tokens": 2000
210
- },
211
- timeout=30
212
- )
213
- response.raise_for_status()
214
- return response.json()
215
- except Exception as e:
216
- return {"error": str(e), "status_code": 500}
217
-
218
- def _validate_results(self, results: List[Dict]) -> Dict:
219
- valid = [r for r in results if "error" not in r]
220
- if not valid:
221
- return {"error": "All analysis attempts failed", "results": results}
222
-
223
- # Corrected line with proper parenthesis closure
224
- best = max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
225
- return best
226
 
227
  # ------------------------------
228
- # Workflow Implementation
229
  # ------------------------------
230
- class ResearchWorkflow:
231
- def __init__(self):
232
- self.retriever = ResearchRetriever()
233
- self.engine = AnalysisEngine()
234
- self.workflow = StateGraph(ResearchState)
235
- self._build_graph()
236
-
237
- def _build_graph(self):
238
- self.workflow.add_node("ingest", self._ingest)
239
- self.workflow.add_node("retrieve", self._retrieve)
240
- self.workflow.add_node("analyze", self._analyze)
241
- self.workflow.add_node("validate", self._validate)
242
- self.workflow.add_node("refine", self._refine)
243
-
244
- self.workflow.set_entry_point("ingest")
245
- self.workflow.add_edge("ingest", "retrieve")
246
- self.workflow.add_edge("retrieve", "analyze")
247
- self.workflow.add_conditional_edges(
248
- "analyze",
249
- self._quality_gate,
250
- {"valid": "validate", "invalid": "refine"}
251
- )
252
- self.workflow.add_edge("validate", END)
253
- self.workflow.add_edge("refine", "retrieve")
254
-
255
- def _ingest(self, state: ResearchState) -> ResearchState:
256
- try:
257
- query = next(msg.content for msg in reversed(state["messages"])
258
- if isinstance(msg, HumanMessage))
259
- return {
260
- "messages": [AIMessage(content="Query ingested")],
261
- "context": {
262
- "query": query,
263
- "documents": [],
264
- "errors": []
265
- },
266
- "metadata": {
267
- "session_id": hashlib.sha256(str(time.time()).encode()).hexdigest()[:8],
268
- "timestamp": datetime.now().isoformat()
269
- }
270
- }
271
- except Exception as e:
272
- return self._handle_error(f"Ingest failed: {str(e)}", state)
273
-
274
- def _retrieve(self, state: ResearchState) -> ResearchState:
275
- try:
276
- docs = self.retriever.retrieve(state["context"]["query"], "research")
277
- return {
278
- "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
279
- "context": {
280
- **state["context"],
281
- "documents": docs,
282
- "retrieval_time": time.time()
283
- },
284
- "metadata": state["metadata"]
285
- }
286
- except Exception as e:
287
- return self._handle_error(f"Retrieval error: {str(e)}", state)
288
-
289
- def _analyze(self, state: ResearchState) -> ResearchState:
290
- docs = state["context"].get("documents", [])
291
- if not docs:
292
- return self._handle_error("No documents for analysis", state)
293
-
294
- try:
295
- context = "\n\n".join([d.content for d in docs])
296
- prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=context)
297
- result = self.engine.analyze(prompt)
298
-
299
- if "error" in result:
300
- raise RuntimeError(result["error"])
301
-
302
- content = result['choices'][0]['message']['content']
303
-
304
- if len(content) < 200 or not any(c.isalpha() for c in content):
305
- raise ValueError("Insufficient analysis content")
306
-
307
- return {
308
- "messages": [AIMessage(content=content)],
309
- "context": state["context"],
310
- "metadata": state["metadata"]
311
- }
312
- except Exception as e:
313
- return self._handle_error(f"Analysis failed: {str(e)}", state)
314
-
315
- def _validate(self, state: ResearchState) -> ResearchState:
316
- return state
317
-
318
- def _refine(self, state: ResearchState) -> ResearchState:
319
- return state
320
-
321
- def _quality_gate(self, state: ResearchState) -> str:
322
- content = state["messages"][-1].content if state["messages"] else ""
323
- required = ["Innovations", "Results", "Evaluation"]
324
- return "valid" if all(kw in content for kw in required) else "invalid"
325
-
326
- def _handle_error(self, message: str, state: ResearchState) -> ResearchState:
327
- return {
328
- "messages": [AIMessage(content=f"🚨 Error: {message}")],
329
- "context": {
330
- **state["context"],
331
- "errors": state["context"]["errors"] + [message]
332
- },
333
- "metadata": state["metadata"]
334
- }
335
 
336
  # ------------------------------
337
- # User Interface
338
  # ------------------------------
339
- class ResearchInterface:
340
- def __init__(self):
341
- self.workflow = ResearchWorkflow().workflow.compile()
342
- self._setup_interface()
343
-
344
- def _setup_interface(self):
345
- st.set_page_config(
346
- page_title="Research Assistant",
347
- layout="wide",
348
- initial_sidebar_state="expanded"
349
- )
350
- self._apply_styles()
351
- self._build_sidebar()
352
- self._build_main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
- def _apply_styles(self):
355
- st.markdown("""
356
- <style>
357
- .stApp {
358
- background: #0a192f;
359
- color: #64ffda;
360
- }
361
- .stTextArea textarea {
362
- background: #172a45 !important;
363
- color: #a8b2d1 !important;
364
- }
365
- .stButton>button {
366
- background: #233554;
367
- border: 1px solid #64ffda;
368
- }
369
- .error-box {
370
- border: 1px solid #ff4444;
371
- border-radius: 5px;
372
- padding: 1rem;
373
- margin: 1rem 0;
374
- }
375
- </style>
376
- """, unsafe_allow_html=True)
377
-
378
- def _build_sidebar(self):
379
- with st.sidebar:
380
- st.title("πŸ” Document Database")
381
- for title, data in DOCUMENT_CONTENT.items():
382
- with st.expander(title[:25]+"..."):
383
- st.markdown(f"```\n{data['content'][:300]}...\n```")
384
-
385
- def _build_main(self):
386
- st.title("🧠 Research Analysis System")
387
- query = st.text_area("Enter your research query:", height=150)
388
 
389
- if st.button("Start Analysis", type="primary"):
390
- self._run_analysis(query)
391
-
392
- def _run_analysis(self, query: str):
393
- try:
394
- with st.spinner("πŸ” Analyzing documents..."):
395
- state = {
396
- "messages": [HumanMessage(content=query)],
397
- "context": {
398
- "query": "",
399
- "documents": [],
400
- "errors": []
401
- },
402
- "metadata": {}
403
- }
404
-
405
- for event in self.workflow.stream(state):
406
- self._display_progress(event)
407
-
408
- final_state = self.workflow.invoke(state)
409
- self._show_results(final_state)
410
-
411
- except Exception as e:
412
- st.error(f"""**Analysis Failed**
413
- {str(e)}
414
- Common solutions:
415
- - Simplify your query
416
- - Check document database status
417
- - Verify API connectivity""")
418
-
419
- def _display_progress(self, event):
420
- current_state = next(iter(event.values()))
421
- with st.container():
422
- st.markdown("---")
423
- cols = st.columns([1,2,1])
424
 
425
- with cols[0]:
426
- st.subheader("Processing Stage")
427
- stage = list(event.keys())[0].title()
428
- st.code(stage)
429
-
430
- with cols[1]:
431
- st.subheader("Documents")
432
- docs = current_state["context"].get("documents", [])
433
- st.metric("Retrieved", len(docs))
434
-
435
- with cols[2]:
436
- st.subheader("Status")
437
- if current_state["context"].get("errors"):
438
- st.error("Errors detected")
439
- else:
440
- st.success("Normal operation")
441
-
442
- def _show_results(self, state: ResearchState):
443
- if state["context"].get("errors"):
444
- st.error("Analysis completed with errors")
445
- with st.expander("Error Details"):
446
- for error in state["context"]["errors"]:
447
- st.markdown(f"- {error}")
448
- else:
449
- st.success("Analysis completed successfully βœ…")
450
- with st.expander("Full Report"):
451
- st.markdown(state["messages"][-1].content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
  if __name__ == "__main__":
454
- ResearchInterface()
 
3
  # ------------------------------
4
  from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.vectorstores import Chroma
6
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langgraph.graph import END, StateGraph
9
+ from langgraph.prebuilt import ToolNode
 
10
  from langgraph.graph.message import add_messages
11
+ from typing_extensions import TypedDict, Annotated
12
+ from typing import Sequence
13
  import chromadb
14
+ import re
15
  import os
16
  import streamlit as st
17
  import requests
18
+ from langchain.tools.retriever import create_retriever_tool
 
 
 
 
 
 
19
 
20
  # ------------------------------
21
+ # Configuration
22
  # ------------------------------
23
+ # Get DeepSeek API key from Hugging Face Space secrets
24
+ DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
25
+
26
+ if not DEEPSEEK_API_KEY:
27
+ st.error("""
28
+ **Missing API Configuration**
29
+ Please configure your DeepSeek API key in Hugging Face Space secrets:
30
+ 1. Go to your Space's Settings
31
+ 2. Click on 'Repository secrets'
32
+ 3. Add a secret named DEEPSEEK_API_KEY
33
+ """)
34
+ st.stop()
35
+
36
+ # Create directory for Chroma persistence
37
+ os.makedirs("chroma_db", exist_ok=True)
 
 
 
38
 
39
  # ------------------------------
40
+ # ChromaDB Client Configuration
41
  # ------------------------------
42
+ chroma_client = chromadb.PersistentClient(path="chroma_db")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # ------------------------------
45
+ # Dummy Data: Research & Development Texts
46
+ # ------------------------------
47
+ research_texts = [
48
+ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
49
+ "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
50
+ "Latest Trends in Machine Learning Methods Using Quantum Computing"
51
+ ]
52
+
53
+ development_texts = [
54
+ "Project A: UI Design Completed, API Integration in Progress",
55
+ "Project B: Testing New Feature X, Bug Fixes Needed",
56
+ "Product Y: In the Performance Optimization Stage Before Release"
57
+ ]
58
 
59
  # ------------------------------
60
+ # Text Splitting & Document Creation
61
  # ------------------------------
62
+ splitter = RecursiveCharacterTextSplitter(
63
+ chunk_size=300,
64
+ chunk_overlap=30,
65
+ separators=["\n\n", "\n", ". ", "! ", "? ", " "]
66
+ )
67
+
68
+ research_docs = splitter.create_documents(research_texts)
69
+ development_docs = splitter.create_documents(development_texts)
70
 
71
  # ------------------------------
72
+ # Creating Vector Stores with Embeddings
73
  # ------------------------------
74
+ embeddings = OpenAIEmbeddings(
75
+ model="text-embedding-3-large",
76
+ # dimensions=1024 # Uncomment if needed
77
+ )
78
+
79
+ research_vectorstore = Chroma.from_documents(
80
+ documents=research_docs,
81
+ embedding=embeddings,
82
+ client=chroma_client,
83
+ collection_name="research_collection"
84
+ )
85
+
86
+ development_vectorstore = Chroma.from_documents(
87
+ documents=development_docs,
88
+ embedding=embeddings,
89
+ client=chroma_client,
90
+ collection_name="development_collection"
91
+ )
92
+
93
+ # ------------------------------
94
+ # Creating Retriever Tools with MMR
95
+ # ------------------------------
96
+ research_retriever = research_vectorstore.as_retriever(
97
+ search_type="mmr",
98
+ search_kwargs={
99
+ 'k': 3,
100
+ 'fetch_k': 10,
101
+ 'lambda_mult': 0.7
102
+ }
103
+ )
104
+
105
+ development_retriever = development_vectorstore.as_retriever(
106
+ search_type="mmr",
107
+ search_kwargs={
108
+ 'k': 3,
109
+ 'fetch_k': 10,
110
+ 'lambda_mult': 0.7
111
+ }
112
+ )
113
+
114
+ research_tool = create_retriever_tool(
115
+ research_retriever,
116
+ "research_db_tool",
117
+ "Search information from the research database."
118
+ )
119
+
120
+ development_tool = create_retriever_tool(
121
+ development_retriever,
122
+ "development_db_tool",
123
+ "Search information from the development database."
124
+ )
125
+
126
+ tools = [research_tool, development_tool]
127
+
128
+ # ------------------------------
129
+ # Agent Function & Workflow Functions
130
+ # ------------------------------
131
+ class AgentState(TypedDict):
132
+ messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
133
+
134
+ def agent(state: AgentState):
135
+ print("---CALL AGENT---")
136
+ messages = state["messages"]
137
+
138
+ if isinstance(messages[0], tuple):
139
+ user_message = messages[0][1]
140
+ else:
141
+ user_message = messages[0].content
142
+
143
+ prompt = f"""Given this user question: "{user_message}"
144
+ If it's about research or academic topics, respond EXACTLY in this format:
145
+ SEARCH_RESEARCH: <search terms>
146
+
147
+ If it's about development status, respond EXACTLY in this format:
148
+ SEARCH_DEV: <search terms>
149
+
150
+ Otherwise, just answer directly.
151
+ """
152
+
153
+ headers = {
154
+ "Accept": "application/json",
155
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
156
+ "Content-Type": "application/json"
157
+ }
158
 
159
+ data = {
160
+ "model": "deepseek-chat",
161
+ "messages": [{"role": "user", "content": prompt}],
162
+ "temperature": 0.7,
163
+ "max_tokens": 1024
164
+ }
165
+
166
+ try:
167
+ response = requests.post(
168
+ "https://api.deepseek.com/v1/chat/completions",
169
+ headers=headers,
170
+ json=data,
171
+ verify=False,
172
+ timeout=30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  )
174
+ response.raise_for_status()
175
 
176
+ response_text = response.json()['choices'][0]['message']['content']
177
+ print("Raw response:", response_text)
178
+
179
+ if "SEARCH_RESEARCH:" in response_text:
180
+ query = response_text.split("SEARCH_RESEARCH:")[1].strip()
181
+ results = research_retriever.invoke(query)
182
+ return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
 
 
 
 
183
 
184
+ elif "SEARCH_DEV:" in response_text:
185
+ query = response_text.split("SEARCH_DEV:")[1].strip()
186
+ results = development_retriever.invoke(query)
187
+ return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
188
+
189
+ else:
190
+ return {"messages": [AIMessage(content=response_text)]}
191
+
192
+ except Exception as e:
193
+ error_msg = f"API Error: {str(e)}"
194
+ if "Insufficient Balance" in str(e):
195
+ error_msg += "\n\nPlease check your DeepSeek API account balance."
196
+ return {"messages": [AIMessage(content=error_msg)]}
197
+
198
+ def simple_grade_documents(state: AgentState):
199
+ messages = state["messages"]
200
+ last_message = messages[-1]
201
+ print("Evaluating message:", last_message.content)
202
 
203
+ if "Results: [Document" in last_message.content:
204
+ print("---DOCS FOUND, GO TO GENERATE---")
205
+ return "generate"
206
+ else:
207
+ print("---NO DOCS FOUND, TRY REWRITE---")
208
+ return "rewrite"
209
+
210
+ def generate(state: AgentState):
211
+ print("---GENERATE FINAL ANSWER---")
212
+ messages = state["messages"]
213
+ question = messages[0].content if isinstance(messages[0], tuple) else messages[0].content
214
+ last_message = messages[-1]
215
+
216
+ docs = ""
217
+ if "Results: [" in last_message.content:
218
+ results_start = last_message.content.find("Results: [")
219
+ docs = last_message.content[results_start:]
220
+ print("Documents found:", docs)
221
+
222
+ headers = {
223
+ "Accept": "application/json",
224
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
225
+ "Content-Type": "application/json"
226
+ }
227
+
228
+ prompt = f"""Analyze these research documents and provide structured insights:
229
+ Question: {question}
230
+ Documents: {docs}
231
+
232
+ Format your response with:
233
+ 1. Key Findings section with bullet points
234
+ 2. Technical Innovations section
235
+ 3. Potential Applications
236
+ 4. References to source documents (Doc1, Doc2, etc.)
237
+
238
+ Focus on:
239
+ - Distilling unique insights
240
+ - Connecting different research aspects
241
+ - Highlighting practical implications
242
+ """
243
+
244
+ data = {
245
+ "model": "deepseek-chat",
246
+ "messages": [{
247
+ "role": "user",
248
+ "content": prompt
249
+ }],
250
+ "temperature": 0.7,
251
+ "max_tokens": 1024
252
+ }
253
+
254
+ try:
255
+ print("Sending generate request to API...")
256
+ response = requests.post(
257
+ "https://api.deepseek.com/v1/chat/completions",
258
+ headers=headers,
259
+ json=data,
260
+ verify=False,
261
+ timeout=30
262
+ )
263
+ response.raise_for_status()
264
+
265
+ response_text = response.json()['choices'][0]['message']['content']
266
+ print("Final Answer:", response_text)
267
+ return {"messages": [AIMessage(content=response_text)]}
268
+ except Exception as e:
269
+ error_msg = f"Generation Error: {str(e)}"
270
+ return {"messages": [AIMessage(content=error_msg)]}
271
+
272
+ def rewrite(state: AgentState):
273
+ print("---REWRITE QUESTION---")
274
+ messages = state["messages"]
275
+ original_question = messages[0].content if len(messages) > 0 else "N/A"
276
+
277
+ headers = {
278
+ "Accept": "application/json",
279
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
280
+ "Content-Type": "application/json"
281
+ }
282
+
283
+ data = {
284
+ "model": "deepseek-chat",
285
+ "messages": [{
286
+ "role": "user",
287
+ "content": f"Rewrite this question to be more specific and clearer: {original_question}"
288
+ }],
289
+ "temperature": 0.7,
290
+ "max_tokens": 1024
291
+ }
292
+
293
+ try:
294
+ print("Sending rewrite request...")
295
+ response = requests.post(
296
+ "https://api.deepseek.com/v1/chat/completions",
297
+ headers=headers,
298
+ json=data,
299
+ verify=False,
300
+ timeout=30
301
+ )
302
+ response.raise_for_status()
303
 
304
+ response_text = response.json()['choices'][0]['message']['content']
305
+ print("Rewritten question:", response_text)
306
+ return {"messages": [AIMessage(content=response_text)]}
307
+ except Exception as e:
308
+ error_msg = f"Rewrite Error: {str(e)}"
309
+ return {"messages": [AIMessage(content=error_msg)]}
310
+
311
+ tools_pattern = re.compile(r"Action: .*")
312
+
313
+ def custom_tools_condition(state: AgentState):
314
+ messages = state["messages"]
315
+ last_message = messages[-1]
316
+ content = last_message.content
317
+
318
+ print("Checking tools condition:", content)
319
+ if tools_pattern.match(content):
320
+ print("Moving to retrieve...")
321
+ return "tools"
322
+ print("Moving to END...")
323
+ return END
324
 
325
  # ------------------------------
326
+ # Workflow Configuration using LangGraph
327
  # ------------------------------
328
+ workflow = StateGraph(AgentState)
329
+
330
+ # Add nodes
331
+ workflow.add_node("agent", agent)
332
+ retrieve_node = ToolNode(tools)
333
+ workflow.add_node("retrieve", retrieve_node)
334
+ workflow.add_node("rewrite", rewrite)
335
+ workflow.add_node("generate", generate)
336
+
337
+ # Set entry point
338
+ workflow.set_entry_point("agent")
339
+
340
+ # Define transitions
341
+ workflow.add_conditional_edges(
342
+ "agent",
343
+ custom_tools_condition,
344
+ {
345
+ "tools": "retrieve",
346
+ END: END
347
+ }
348
+ )
349
+
350
+ workflow.add_conditional_edges(
351
+ "retrieve",
352
+ simple_grade_documents,
353
+ {
354
+ "generate": "generate",
355
+ "rewrite": "rewrite"
356
+ }
357
+ )
358
+
359
+ workflow.add_edge("generate", END)
360
+ workflow.add_edge("rewrite", "agent")
361
+
362
+ # Compile the workflow
363
+ app = workflow.compile()
 
 
 
 
 
364
 
365
  # ------------------------------
366
+ # Processing Function
367
  # ------------------------------
368
+ def process_question(user_question, app, config):
369
+ """Process user question through the workflow"""
370
+ events = []
371
+ for event in app.stream({"messages": [("user", user_question)]}, config):
372
+ events.append(event)
373
+ return events
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  # ------------------------------
376
+ # Streamlit App UI (Enhanced Dark Theme)
377
  # ------------------------------
378
+ def main():
379
+ st.set_page_config(
380
+ page_title="AI Research & Development Assistant",
381
+ layout="wide",
382
+ initial_sidebar_state="expanded"
383
+ )
384
+
385
+ st.markdown("""
386
+ <style>
387
+ .stApp {
388
+ background-color: #1a1a1a;
389
+ color: #ffffff;
390
+ }
391
+
392
+ .stTextArea textarea {
393
+ background-color: #2d2d2d !important;
394
+ color: #ffffff !important;
395
+ border: 1px solid #3d3d3d;
396
+ }
397
+
398
+ .stButton > button {
399
+ background-color: #4CAF50;
400
+ color: white;
401
+ border: none;
402
+ padding: 12px 28px;
403
+ border-radius: 6px;
404
+ transition: all 0.3s;
405
+ font-weight: 500;
406
+ }
407
+
408
+ .stButton > button:hover {
409
+ background-color: #45a049;
410
+ transform: scale(1.02);
411
+ box-shadow: 0 2px 8px rgba(0,0,0,0.2);
412
+ }
413
+
414
+ .data-box {
415
+ padding: 18px;
416
+ margin: 12px 0;
417
+ border-radius: 8px;
418
+ background-color: #2d2d2d;
419
+ border-left: 4px solid;
420
+ }
421
+
422
+ .research-box {
423
+ border-color: #2196F3;
424
+ }
425
+
426
+ .dev-box {
427
+ border-color: #4CAF50;
428
+ }
429
+
430
+ .st-expander {
431
+ background-color: #2d2d2d;
432
+ border: 1px solid #3d3d3d;
433
+ border-radius: 6px;
434
+ margin: 16px 0;
435
+ }
436
+
437
+ .st-expander .streamlit-expanderHeader {
438
+ color: #ffffff !important;
439
+ font-weight: 500;
440
+ }
441
+
442
+ .stAlert {
443
+ background-color: #2d2d2d !important;
444
+ border: 1px solid #3d3d3d;
445
+ }
446
+
447
+ h1, h2, h3 {
448
+ color: #ffffff !important;
449
+ border-bottom: 2px solid #3d3d3d;
450
+ padding-bottom: 8px;
451
+ }
452
+
453
+ .stMarkdown {
454
+ color: #e0e0e0;
455
+ line-height: 1.6;
456
+ }
457
+ </style>
458
+ """, unsafe_allow_html=True)
459
 
460
+ with st.sidebar:
461
+ st.header("πŸ“š Available Data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
+ st.subheader("Research Database")
464
+ for text in research_texts:
465
+ st.markdown(f'<div class="data-box research-box">{text}</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
+ st.subheader("Development Database")
468
+ for text in development_texts:
469
+ st.markdown(f'<div class="data-box dev-box">{text}</div>', unsafe_allow_html=True)
470
+
471
+ st.title("πŸ€– AI Research & Development Assistant")
472
+ st.markdown("---")
473
+
474
+ query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
475
+
476
+ col1, col2 = st.columns([1, 2])
477
+ with col1:
478
+ if st.button("πŸ” Get Answer", use_container_width=True):
479
+ if query:
480
+ try:
481
+ with st.spinner('Processing your question...'):
482
+ events = process_question(query, app, {"configurable": {"thread_id": "1"}})
483
+
484
+ for event in events:
485
+ if 'agent' in event:
486
+ with st.expander("πŸ”„ Processing Step", expanded=True):
487
+ content = event['agent']['messages'][0].content
488
+ if "Error" in content:
489
+ st.error(content)
490
+ elif "Results:" in content:
491
+ st.markdown("### πŸ“‘ Retrieved Documents")
492
+ docs = content.split("Results:")[1].strip()
493
+
494
+ # Process and deduplicate documents
495
+ unique_docs = list({
496
+ doc.split('page_content=')[1].split(')')[0].strip("'")
497
+ for doc in docs.split("Document(")[1:]
498
+ })
499
+
500
+ for i, doc in enumerate(unique_docs, 1):
501
+ st.markdown(f"""
502
+ **Document {i}**
503
+ {doc}
504
+ """)
505
+ elif 'generate' in event:
506
+ content = event['generate']['messages'][0].content
507
+ if "Error" in content:
508
+ st.error(content)
509
+ else:
510
+ st.markdown("### ✨ Final Answer")
511
+ st.markdown(f"""
512
+ <div style='
513
+ background-color: #2d2d2d;
514
+ padding: 20px;
515
+ border-radius: 8px;
516
+ margin-top: 16px;
517
+ '>
518
+ {content}
519
+ </div>
520
+ """, unsafe_allow_html=True)
521
+ except Exception as e:
522
+ st.error(f"""
523
+ **Processing Error**
524
+ {str(e)}
525
+ Please check:
526
+ - API key configuration
527
+ - Account balance
528
+ - Network connection
529
+ """)
530
+ else:
531
+ st.warning("⚠️ Please enter a question first!")
532
+
533
+ with col2:
534
+ st.markdown("""
535
+ ### 🎯 How to Use
536
+ 1. **Enter** your question in the text box
537
+ 2. **Click** the search button
538
+ 3. **Review** processing steps
539
+ 4. **Analyze** final structured answer
540
+
541
+ ### πŸ’‘ Example Questions
542
+ - What's new in quantum machine learning?
543
+ - How is Project Y progressing?
544
+ - Recent breakthroughs in AI image recognition?
545
+
546
+ ### πŸ” Search Features
547
+ - Automatic query optimization
548
+ - Technical document analysis
549
+ - Cross-project insights
550
+ - Source-aware reporting
551
+ """)
552
 
553
  if __name__ == "__main__":
554
+ main()