mgbam commited on
Commit
e021e49
·
verified ·
1 Parent(s): dd92890

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +334 -261
app.py CHANGED
@@ -9,27 +9,30 @@ from langgraph.graph import END, StateGraph
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
- from typing import Sequence
13
  import chromadb
14
  import re
15
  import os
16
  import streamlit as st
17
  import requests
 
18
  from langchain.tools.retriever import create_retriever_tool
 
19
 
20
  # ------------------------------
21
  # Configuration
22
  # ------------------------------
23
- # Get DeepSeek API key from Hugging Face Space secrets
24
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
25
 
 
26
  if not DEEPSEEK_API_KEY:
27
  st.error("""
28
- **Missing API Configuration**
29
- Please configure your DeepSeek API key in Hugging Face Space secrets:
30
- 1. Go to your Space's Settings
31
- 2. Click on 'Repository secrets'
32
- 3. Add a secret named DEEPSEEK_API_KEY
33
  """)
34
  st.stop()
35
 
@@ -39,10 +42,26 @@ os.makedirs("chroma_db", exist_ok=True)
39
  # ------------------------------
40
  # ChromaDB Client Configuration
41
  # ------------------------------
42
- chroma_client = chromadb.PersistentClient(path="chroma_db")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # ------------------------------
45
- # Dummy Data: Research & Development Texts
46
  # ------------------------------
47
  research_texts = [
48
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
@@ -56,407 +75,461 @@ development_texts = [
56
  "Product Y: In the Performance Optimization Stage Before Release"
57
  ]
58
 
59
- # ------------------------------
60
- # Text Splitting & Document Creation
61
- # ------------------------------
62
- splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
63
- research_docs = splitter.create_documents(research_texts)
64
- development_docs = splitter.create_documents(development_texts)
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # ------------------------------
67
- # Creating Vector Stores with Embeddings
68
  # ------------------------------
69
  embeddings = OpenAIEmbeddings(
70
  model="text-embedding-3-large",
71
- # dimensions=1024 # Uncomment if needed
72
  )
73
 
74
  research_vectorstore = Chroma.from_documents(
75
  documents=research_docs,
76
  embedding=embeddings,
77
  client=chroma_client,
78
- collection_name="research_collection"
 
79
  )
80
 
81
  development_vectorstore = Chroma.from_documents(
82
  documents=development_docs,
83
  embedding=embeddings,
84
  client=chroma_client,
85
- collection_name="development_collection"
 
86
  )
87
 
88
- research_retriever = research_vectorstore.as_retriever()
89
- development_retriever = development_vectorstore.as_retriever()
90
-
91
  # ------------------------------
92
- # Creating Retriever Tools
93
  # ------------------------------
94
- research_tool = create_retriever_tool(
95
- research_retriever,
96
- "research_db_tool",
97
- "Search information from the research database."
98
  )
99
 
100
- development_tool = create_retriever_tool(
101
- development_retriever,
102
- "development_db_tool",
103
- "Search information from the development database."
104
  )
105
 
106
- tools = [research_tool, development_tool]
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # ------------------------------
109
- # Agent Function & Workflow Functions
110
  # ------------------------------
111
  class AgentState(TypedDict):
112
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
113
 
 
 
 
114
  def agent(state: AgentState):
115
- print("---CALL AGENT---")
 
116
  messages = state["messages"]
117
-
118
- if isinstance(messages[0], tuple):
119
- user_message = messages[0][1]
120
- else:
121
- user_message = messages[0].content
122
-
123
- prompt = f"""Given this user question: "{user_message}"
124
- If it's about research or academic topics, respond EXACTLY in this format:
125
- SEARCH_RESEARCH: <search terms>
126
-
127
- If it's about development status, respond EXACTLY in this format:
128
- SEARCH_DEV: <search terms>
129
-
130
- Otherwise, just answer directly.
131
- """
132
-
133
- headers = {
134
- "Accept": "application/json",
135
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
136
- "Content-Type": "application/json"
137
- }
138
-
139
- data = {
140
- "model": "deepseek-chat",
141
- "messages": [{"role": "user", "content": prompt}],
142
- "temperature": 0.7,
143
- "max_tokens": 1024
144
- }
145
 
146
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  response = requests.post(
148
  "https://api.deepseek.com/v1/chat/completions",
149
  headers=headers,
150
  json=data,
151
- verify=False,
152
  timeout=30
153
  )
154
  response.raise_for_status()
155
 
 
156
  response_text = response.json()['choices'][0]['message']['content']
157
- print("Raw response:", response_text)
158
-
 
159
  if "SEARCH_RESEARCH:" in response_text:
160
  query = response_text.split("SEARCH_RESEARCH:")[1].strip()
161
  results = research_retriever.invoke(query)
162
- return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
 
 
 
 
 
 
 
 
163
 
164
  elif "SEARCH_DEV:" in response_text:
165
  query = response_text.split("SEARCH_DEV:")[1].strip()
166
  results = development_retriever.invoke(query)
167
- return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
 
 
 
 
 
 
 
 
168
 
169
  else:
170
  return {"messages": [AIMessage(content=response_text)]}
171
 
172
- except Exception as e:
173
- error_msg = f"API Error: {str(e)}"
174
- if "Insufficient Balance" in str(e):
175
- error_msg += "\n\nPlease check your DeepSeek API account balance."
176
  return {"messages": [AIMessage(content=error_msg)]}
 
 
177
 
 
 
 
178
  def simple_grade_documents(state: AgentState):
 
179
  messages = state["messages"]
180
  last_message = messages[-1]
181
- print("Evaluating message:", last_message.content)
182
 
183
- if "Results: [Document" in last_message.content:
184
- print("---DOCS FOUND, GO TO GENERATE---")
185
  return "generate"
186
  else:
187
- print("---NO DOCS FOUND, TRY REWRITE---")
188
  return "rewrite"
189
 
190
  def generate(state: AgentState):
191
- print("---GENERATE FINAL ANSWER---")
 
192
  messages = state["messages"]
193
- question = messages[0].content if isinstance(messages[0], tuple) else messages[0].content
194
- last_message = messages[-1]
195
-
196
- docs = ""
197
- if "Results: [" in last_message.content:
198
- results_start = last_message.content.find("Results: [")
199
- docs = last_message.content[results_start:]
200
- print("Documents found:", docs)
201
-
202
- headers = {
203
- "Accept": "application/json",
204
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
205
- "Content-Type": "application/json"
206
- }
207
-
208
- prompt = f"""Based on these research documents, summarize the latest advancements in AI:
209
- Question: {question}
210
- Documents: {docs}
211
- Focus on extracting and synthesizing the key findings from the research papers.
212
- """
213
-
214
- data = {
215
- "model": "deepseek-chat",
216
- "messages": [{
217
- "role": "user",
218
- "content": prompt
219
- }],
220
- "temperature": 0.7,
221
- "max_tokens": 1024
222
- }
223
 
224
  try:
225
- print("Sending generate request to API...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  response = requests.post(
227
  "https://api.deepseek.com/v1/chat/completions",
228
  headers=headers,
229
  json=data,
230
- verify=False,
231
- timeout=30
232
  )
233
  response.raise_for_status()
234
 
 
235
  response_text = response.json()['choices'][0]['message']['content']
236
- print("Final Answer:", response_text)
237
- return {"messages": [AIMessage(content=response_text)]}
 
 
238
  except Exception as e:
239
- error_msg = f"Generation Error: {str(e)}"
240
- return {"messages": [AIMessage(content=error_msg)]}
241
 
242
  def rewrite(state: AgentState):
243
- print("---REWRITE QUESTION---")
 
244
  messages = state["messages"]
245
- original_question = messages[0].content if len(messages) > 0 else "N/A"
246
-
247
- headers = {
248
- "Accept": "application/json",
249
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
250
- "Content-Type": "application/json"
251
- }
252
-
253
- data = {
254
- "model": "deepseek-chat",
255
- "messages": [{
256
- "role": "user",
257
- "content": f"Rewrite this question to be more specific and clearer: {original_question}"
258
- }],
259
- "temperature": 0.7,
260
- "max_tokens": 1024
261
- }
262
 
263
  try:
264
- print("Sending rewrite request...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  response = requests.post(
266
  "https://api.deepseek.com/v1/chat/completions",
267
  headers=headers,
268
  json=data,
269
- verify=False,
270
  timeout=30
271
  )
272
  response.raise_for_status()
273
 
274
- response_text = response.json()['choices'][0]['message']['content']
275
- print("Rewritten question:", response_text)
276
- return {"messages": [AIMessage(content=response_text)]}
277
- except Exception as e:
278
- error_msg = f"Rewrite Error: {str(e)}"
279
- return {"messages": [AIMessage(content=error_msg)]}
280
-
281
- tools_pattern = re.compile(r"Action: .*")
282
-
283
- def custom_tools_condition(state: AgentState):
284
- messages = state["messages"]
285
- last_message = messages[-1]
286
- content = last_message.content
287
 
288
- print("Checking tools condition:", content)
289
- if tools_pattern.match(content):
290
- print("Moving to retrieve...")
291
- return "tools"
292
- print("Moving to END...")
293
- return END
294
 
295
  # ------------------------------
296
- # Workflow Configuration using LangGraph
297
  # ------------------------------
298
  workflow = StateGraph(AgentState)
299
 
300
- # Add nodes
301
  workflow.add_node("agent", agent)
302
- retrieve_node = ToolNode(tools)
303
- workflow.add_node("retrieve", retrieve_node)
304
- workflow.add_node("rewrite", rewrite)
305
  workflow.add_node("generate", generate)
 
306
 
307
- # Set entry point
308
  workflow.set_entry_point("agent")
309
 
310
- # Define transitions
311
  workflow.add_conditional_edges(
312
  "agent",
313
- custom_tools_condition,
314
- {
315
- "tools": "retrieve",
316
- END: END
317
- }
318
  )
319
 
320
  workflow.add_conditional_edges(
321
  "retrieve",
322
  simple_grade_documents,
323
- {
324
- "generate": "generate",
325
- "rewrite": "rewrite"
326
- }
327
  )
328
 
329
  workflow.add_edge("generate", END)
330
  workflow.add_edge("rewrite", "agent")
331
 
332
- # Compile the workflow
333
  app = workflow.compile()
334
 
335
  # ------------------------------
336
- # Processing Function
337
- # ------------------------------
338
- def process_question(user_question, app, config):
339
- """Process user question through the workflow"""
340
- events = []
341
- for event in app.stream({"messages": [("user", user_question)]}, config):
342
- events.append(event)
343
- return events
344
-
345
- # ------------------------------
346
- # Streamlit App UI (Dark Theme)
347
  # ------------------------------
348
  def main():
 
349
  st.set_page_config(
350
- page_title="AI Research & Development Assistant",
351
- layout="wide",
352
  initial_sidebar_state="expanded"
353
  )
354
 
 
355
  st.markdown("""
356
  <style>
357
  .stApp {
358
- background-color: #1a1a1a;
359
- color: #ffffff;
360
  }
361
 
362
  .stTextArea textarea {
363
- background-color: #2d2d2d !important;
364
- color: #ffffff !important;
 
365
  }
366
 
367
- .stButton > button {
368
- background-color: #4CAF50;
369
  color: white;
 
 
370
  transition: all 0.3s;
371
  }
372
 
373
- .stButton > button:hover {
374
- background-color: #45a049;
375
  transform: scale(1.02);
376
  }
377
 
378
- .data-box {
379
- background-color: #2d2d2d;
380
- border-left: 5px solid #2196F3;
381
  }
382
 
383
- .dev-box {
384
- border-left: 5px solid #4CAF50;
 
385
  }
386
 
387
- .st-expander {
388
- background-color: #2d2d2d;
389
- border: 1px solid #3d3d3d;
 
 
 
390
  }
391
  </style>
392
  """, unsafe_allow_html=True)
393
 
 
394
  with st.sidebar:
395
- st.header("📚 Available Data")
396
- st.subheader("Research Database")
397
- for text in research_texts:
398
- st.markdown(f'<div class="data-box research-box" style="padding: 15px; margin: 10px 0; border-radius: 5px;">{text}</div>', unsafe_allow_html=True)
 
 
 
 
399
 
400
- st.subheader("Development Database")
401
- for text in development_texts:
402
- st.markdown(f'<div class="data-box dev-box" style="padding: 15px; margin: 10px 0; border-radius: 5px;">{text}</div>', unsafe_allow_html=True)
403
-
404
- st.title("🤖 AI Research & Development Assistant")
405
- st.markdown("---")
406
-
407
- query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
408
-
409
- col1, col2 = st.columns([1, 2])
410
- with col1:
411
- if st.button("🔍 Get Answer", use_container_width=True):
412
- if query:
413
- try:
414
- with st.spinner('Processing your question...'):
415
- events = process_question(query, app, {"configurable": {"thread_id": "1"}})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
- for event in events:
418
- if 'agent' in event:
419
- with st.expander("🔄 Processing Step", expanded=True):
420
- content = event['agent']['messages'][0].content
421
- if "Error" in content:
422
- st.error(content)
423
- elif "Results:" in content:
424
- st.markdown("### 📑 Retrieved Documents:")
425
- docs_start = content.find("Results:")
426
- docs = content[docs_start:]
427
- st.info(docs)
428
- elif 'generate' in event:
429
- content = event['generate']['messages'][0].content
430
- if "Error" in content:
431
- st.error(content)
432
- else:
433
- st.markdown("### ✨ Final Answer:")
434
- st.success(content)
435
- except Exception as e:
436
- st.error(f"""
437
- **Processing Error**
438
- {str(e)}
439
- Please check:
440
- - API key configuration
441
- - Account balance
442
- - Network connection
443
- """)
444
- else:
445
- st.warning("⚠️ Please enter a question first!")
446
-
447
- with col2:
448
- st.markdown("""
449
- ### 🎯 How to Use
450
- 1. Enter your question in the text box
451
- 2. Click the search button
452
- 3. Review processing steps
453
- 4. See final answer
454
-
455
- ### 💡 Example Questions
456
- - What's new in AI image recognition?
457
- - How is Project B progressing?
458
- - Recent machine learning trends?
459
- """)
460
 
461
  if __name__ == "__main__":
462
  main()
 
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
+ from typing import Sequence, List, Dict
13
  import chromadb
14
  import re
15
  import os
16
  import streamlit as st
17
  import requests
18
+ import hashlib
19
  from langchain.tools.retriever import create_retriever_tool
20
+ from langchain.schema import Document
21
 
22
  # ------------------------------
23
  # Configuration
24
  # ------------------------------
25
+ # Get DeepSeek API key from environment variables
26
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
27
 
28
+ # Validate API key configuration
29
  if not DEEPSEEK_API_KEY:
30
  st.error("""
31
+ **Critical Configuration Missing**
32
+ DeepSeek API key not found. Please ensure you have:
33
+ 1. Created a Hugging Face Space secret named DEEPSEEK_API_KEY
34
+ 2. Added your valid API key to the Space secrets
35
+ 3. Restarted the Space after configuration
36
  """)
37
  st.stop()
38
 
 
42
  # ------------------------------
43
  # ChromaDB Client Configuration
44
  # ------------------------------
45
+ chroma_client = chromadb.PersistentClient(
46
+ path="chroma_db",
47
+ settings=chromadb.config.Settings(anonymized_telemetry=False)
48
+
49
+ # ------------------------------
50
+ # Document Processing Utilities
51
+ # ------------------------------
52
+ def deduplicate_docs(docs: List[Document]) -> List[Document]:
53
+ """Remove duplicate documents using content hashing"""
54
+ seen = set()
55
+ unique_docs = []
56
+ for doc in docs:
57
+ content_hash = hashlib.sha256(doc.page_content.encode()).hexdigest()
58
+ if content_hash not in seen:
59
+ seen.add(content_hash)
60
+ unique_docs.append(doc)
61
+ return unique_docs
62
 
63
  # ------------------------------
64
+ # Data Preparation
65
  # ------------------------------
66
  research_texts = [
67
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
 
75
  "Product Y: In the Performance Optimization Stage Before Release"
76
  ]
77
 
78
+ # Create documents with metadata
79
+ splitter = RecursiveCharacterTextSplitter(
80
+ chunk_size=150,
81
+ chunk_overlap=20,
82
+ length_function=len,
83
+ add_start_index=True
84
+ )
85
+
86
+ research_docs = splitter.create_documents(
87
+ research_texts,
88
+ metadatas=[{"source": "research", "doc_id": f"res_{i}"} for i in range(len(research_texts))]
89
+ )
90
+
91
+ development_docs = splitter.create_documents(
92
+ development_texts,
93
+ metadatas=[{"source": "development", "doc_id": f"dev_{i}"} for i in range(len(development_texts))]
94
+ )
95
 
96
  # ------------------------------
97
+ # Vector Store Initialization
98
  # ------------------------------
99
  embeddings = OpenAIEmbeddings(
100
  model="text-embedding-3-large",
101
+ model_kwargs={"dimensions": 1024}
102
  )
103
 
104
  research_vectorstore = Chroma.from_documents(
105
  documents=research_docs,
106
  embedding=embeddings,
107
  client=chroma_client,
108
+ collection_name="research_collection",
109
+ collection_metadata={"hnsw:space": "cosine"}
110
  )
111
 
112
  development_vectorstore = Chroma.from_documents(
113
  documents=development_docs,
114
  embedding=embeddings,
115
  client=chroma_client,
116
+ collection_name="development_collection",
117
+ collection_metadata={"hnsw:space": "cosine"}
118
  )
119
 
 
 
 
120
  # ------------------------------
121
+ # Retriever Tools Configuration
122
  # ------------------------------
123
+ research_retriever = research_vectorstore.as_retriever(
124
+ search_type="mmr",
125
+ search_kwargs={"k": 5, "fetch_k": 10}
 
126
  )
127
 
128
+ development_retriever = development_vectorstore.as_retriever(
129
+ search_type="similarity",
130
+ search_kwargs={"k": 5}
 
131
  )
132
 
133
+ tools = [
134
+ create_retriever_tool(
135
+ research_retriever,
136
+ "research_database",
137
+ "Searches through academic papers and research reports for technical AI advancements"
138
+ ),
139
+ create_retriever_tool(
140
+ development_retriever,
141
+ "development_database",
142
+ "Accesses current project statuses and development timelines"
143
+ )
144
+ ]
145
 
146
  # ------------------------------
147
+ # Agent State Definition
148
  # ------------------------------
149
  class AgentState(TypedDict):
150
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
151
 
152
+ # ------------------------------
153
+ # Core Agent Function
154
+ # ------------------------------
155
  def agent(state: AgentState):
156
+ """Main decision-making agent handling user queries"""
157
+ print("\n--- AGENT EXECUTION START ---")
158
  messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  try:
161
+ # Extract user message content
162
+ user_message = messages[-1].content if isinstance(messages[-1], HumanMessage) else ""
163
+
164
+ # Construct analysis prompt
165
+ prompt = f"""Analyze this user query and determine the appropriate action:
166
+
167
+ Query: {user_message}
168
+
169
+ Response Format:
170
+ - If research-related (technical details, academic concepts), respond:
171
+ SEARCH_RESEARCH: [keywords]
172
+
173
+ - If development-related (project status, timelines), respond:
174
+ SEARCH_DEV: [keywords]
175
+
176
+ - If general question, answer directly
177
+ - If unclear, request clarification
178
+ """
179
+
180
+ # API request configuration
181
+ headers = {
182
+ "Accept": "application/json",
183
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
184
+ "Content-Type": "application/json"
185
+ }
186
+
187
+ data = {
188
+ "model": "deepseek-chat",
189
+ "messages": [{"role": "user", "content": prompt}],
190
+ "temperature": 0.5,
191
+ "max_tokens": 256
192
+ }
193
+
194
+ # Execute API call
195
  response = requests.post(
196
  "https://api.deepseek.com/v1/chat/completions",
197
  headers=headers,
198
  json=data,
 
199
  timeout=30
200
  )
201
  response.raise_for_status()
202
 
203
+ # Process response
204
  response_text = response.json()['choices'][0]['message']['content']
205
+ print(f"Agent Decision: {response_text}")
206
+
207
+ # Handle different response types
208
  if "SEARCH_RESEARCH:" in response_text:
209
  query = response_text.split("SEARCH_RESEARCH:")[1].strip()
210
  results = research_retriever.invoke(query)
211
+ unique_results = deduplicate_docs(results)
212
+ return {
213
+ "messages": [
214
+ AIMessage(
215
+ content=f'Action: research_database\nQuery: "{query}"\nResults: {len(unique_results)} relevant documents',
216
+ additional_kwargs={"documents": unique_results}
217
+ )
218
+ ]
219
+ }
220
 
221
  elif "SEARCH_DEV:" in response_text:
222
  query = response_text.split("SEARCH_DEV:")[1].strip()
223
  results = development_retriever.invoke(query)
224
+ unique_results = deduplicate_docs(results)
225
+ return {
226
+ "messages": [
227
+ AIMessage(
228
+ content=f'Action: development_database\nQuery: "{query}"\nResults: {len(unique_results)} relevant documents',
229
+ additional_kwargs={"documents": unique_results}
230
+ )
231
+ ]
232
+ }
233
 
234
  else:
235
  return {"messages": [AIMessage(content=response_text)]}
236
 
237
+ except requests.exceptions.HTTPError as e:
238
+ error_msg = f"API Error: {e.response.status_code} - {e.response.text}"
239
+ if "insufficient balance" in e.response.text.lower():
240
+ error_msg += "\n\nPlease check your DeepSeek account balance."
241
  return {"messages": [AIMessage(content=error_msg)]}
242
+ except Exception as e:
243
+ return {"messages": [AIMessage(content=f"Processing Error: {str(e)}")]}
244
 
245
+ # ------------------------------
246
+ # Document Evaluation Functions
247
+ # ------------------------------
248
  def simple_grade_documents(state: AgentState):
249
+ """Evaluate retrieved document relevance"""
250
  messages = state["messages"]
251
  last_message = messages[-1]
 
252
 
253
+ if last_message.additional_kwargs.get("documents"):
254
+ print("--- Relevant Documents Found ---")
255
  return "generate"
256
  else:
257
+ print("--- No Valid Documents Found ---")
258
  return "rewrite"
259
 
260
  def generate(state: AgentState):
261
+ """Generate final answer from documents"""
262
+ print("\n--- GENERATING FINAL ANSWER ---")
263
  messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
  try:
266
+ # Extract context
267
+ user_question = next(msg.content for msg in messages if isinstance(msg, HumanMessage))
268
+ documents = messages[-1].additional_kwargs.get("documents", [])
269
+
270
+ # Format document sources
271
+ sources = list(set(
272
+ doc.metadata.get('source', 'unknown')
273
+ for doc in documents
274
+ ))
275
+
276
+ # Create analysis prompt
277
+ prompt = f"""Synthesize a technical answer using these documents:
278
+
279
+ Question: {user_question}
280
+
281
+ Documents:
282
+ {[doc.page_content for doc in documents]}
283
+
284
+ Requirements:
285
+ 1. Highlight quantitative metrics
286
+ 2. Cite document sources (research/development)
287
+ 3. Note temporal context
288
+ 4. List potential applications
289
+ 5. Mention limitations/gaps
290
+ """
291
+
292
+ # API request configuration
293
+ headers = {
294
+ "Accept": "application/json",
295
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
296
+ "Content-Type": "application/json"
297
+ }
298
+
299
+ data = {
300
+ "model": "deepseek-chat",
301
+ "messages": [{"role": "user", "content": prompt}],
302
+ "temperature": 0.3,
303
+ "max_tokens": 1024
304
+ }
305
+
306
+ # Execute API call
307
  response = requests.post(
308
  "https://api.deepseek.com/v1/chat/completions",
309
  headers=headers,
310
  json=data,
311
+ timeout=45
 
312
  )
313
  response.raise_for_status()
314
 
315
+ # Format final answer
316
  response_text = response.json()['choices'][0]['message']['content']
317
+ formatted_answer = f"{response_text}\n\nSources: {', '.join(sources)}"
318
+
319
+ return {"messages": [AIMessage(content=formatted_answer)]}
320
+
321
  except Exception as e:
322
+ return {"messages": [AIMessage(content=f"Generation Error: {str(e)}")]}
 
323
 
324
  def rewrite(state: AgentState):
325
+ """Rewrite unclear queries"""
326
+ print("\n--- REWRITING QUERY ---")
327
  messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
  try:
330
+ original_query = next(msg.content for msg in messages if isinstance(msg, HumanMessage))
331
+
332
+ headers = {
333
+ "Accept": "application/json",
334
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
335
+ "Content-Type": "application/json"
336
+ }
337
+
338
+ data = {
339
+ "model": "deepseek-chat",
340
+ "messages": [{
341
+ "role": "user",
342
+ "content": f"Clarify this query while preserving technical intent: {original_query}"
343
+ }],
344
+ "temperature": 0.5,
345
+ "max_tokens": 256
346
+ }
347
+
348
  response = requests.post(
349
  "https://api.deepseek.com/v1/chat/completions",
350
  headers=headers,
351
  json=data,
 
352
  timeout=30
353
  )
354
  response.raise_for_status()
355
 
356
+ rewritten = response.json()['choices'][0]['message']['content']
357
+ return {"messages": [AIMessage(content=f"Revised Query: {rewritten}")]}
 
 
 
 
 
 
 
 
 
 
 
358
 
359
+ except Exception as e:
360
+ return {"messages": [AIMessage(content=f"Rewriting Error: {str(e)}")]}
 
 
 
 
361
 
362
  # ------------------------------
363
+ # Workflow Configuration
364
  # ------------------------------
365
  workflow = StateGraph(AgentState)
366
 
367
+ # Node Registration
368
  workflow.add_node("agent", agent)
369
+ workflow.add_node("retrieve", ToolNode(tools))
 
 
370
  workflow.add_node("generate", generate)
371
+ workflow.add_node("rewrite", rewrite)
372
 
373
+ # Workflow Structure
374
  workflow.set_entry_point("agent")
375
 
 
376
  workflow.add_conditional_edges(
377
  "agent",
378
+ lambda state: "tools" if any(
379
+ tool.name in state["messages"][-1].content
380
+ for tool in tools
381
+ ) else END,
382
+ {"tools": "retrieve", END: END}
383
  )
384
 
385
  workflow.add_conditional_edges(
386
  "retrieve",
387
  simple_grade_documents,
388
+ {"generate": "generate", "rewrite": "rewrite"}
 
 
 
389
  )
390
 
391
  workflow.add_edge("generate", END)
392
  workflow.add_edge("rewrite", "agent")
393
 
 
394
  app = workflow.compile()
395
 
396
  # ------------------------------
397
+ # Streamlit UI Implementation
 
 
 
 
 
 
 
 
 
 
398
  # ------------------------------
399
  def main():
400
+ """Main application interface"""
401
  st.set_page_config(
402
+ page_title="AI Research Assistant",
403
+ layout="centered",
404
  initial_sidebar_state="expanded"
405
  )
406
 
407
+ # Dark Theme Configuration
408
  st.markdown("""
409
  <style>
410
  .stApp {
411
+ background-color: #0E1117;
412
+ color: #FAFAFA;
413
  }
414
 
415
  .stTextArea textarea {
416
+ background-color: #262730 !important;
417
+ color: #FAFAFA !important;
418
+ border: 1px solid #3D4051;
419
  }
420
 
421
+ .stButton>button {
422
+ background-color: #2E8B57;
423
  color: white;
424
+ border-radius: 4px;
425
+ padding: 0.5rem 1rem;
426
  transition: all 0.3s;
427
  }
428
 
429
+ .stButton>button:hover {
430
+ background-color: #3CB371;
431
  transform: scale(1.02);
432
  }
433
 
434
+ .stAlert {
435
+ background-color: #1A1D23 !important;
436
+ border: 1px solid #3D4051;
437
  }
438
 
439
+ .stExpander {
440
+ background-color: #1A1D23;
441
+ border: 1px solid #3D4051;
442
  }
443
 
444
+ .data-source {
445
+ padding: 0.5rem;
446
+ margin: 0.5rem 0;
447
+ background-color: #1A1D23;
448
+ border-left: 3px solid #2E8B57;
449
+ border-radius: 4px;
450
  }
451
  </style>
452
  """, unsafe_allow_html=True)
453
 
454
+ # Sidebar Configuration
455
  with st.sidebar:
456
+ st.header("Technical Databases")
457
+ with st.expander("Research Corpus", expanded=True):
458
+ st.markdown("""
459
+ - AI Model Architectures
460
+ - Machine Learning Advances
461
+ - Quantum Computing Applications
462
+ - Algorithmic Breakthroughs
463
+ """)
464
 
465
+ with st.expander("Development Tracking", expanded=True):
466
+ st.markdown("""
467
+ - Project Milestones
468
+ - System Architecture
469
+ - Deployment Status
470
+ - Performance Metrics
471
+ """)
472
+
473
+ # Main Interface
474
+ st.title("🧠 AI Research Assistant")
475
+ st.caption("Technical Analysis and Development Tracking System")
476
+
477
+ query = st.text_area(
478
+ "Enter Technical Query:",
479
+ height=150,
480
+ placeholder="Example: Compare transformer architectures for medical imaging analysis..."
481
+ )
482
+
483
+ if st.button("Execute Analysis", use_container_width=True):
484
+ if not query:
485
+ st.warning("Please input a technical query")
486
+ return
487
+
488
+ with st.status("Processing...", expanded=True) as status:
489
+ try:
490
+ events = []
491
+ for event in app.stream({"messages": [HumanMessage(content=query)]}):
492
+ events.append(event)
493
+
494
+ if 'agent' in event:
495
+ status.update(label="Decision Making", state="running")
496
+ st.session_state.agent_step = event['agent']
497
+
498
+ if 'retrieve' in event:
499
+ status.update(label="Document Retrieval", state="running")
500
+ st.session_state.retrieved = event['retrieve']
501
 
502
+ if 'generate' in event:
503
+ status.update(label="Synthesizing Answer", state="running")
504
+ st.session_state.final_answer = event['generate']
505
+
506
+ status.update(label="Analysis Complete", state="complete")
507
+
508
+ except Exception as e:
509
+ status.update(label="Processing Failed", state="error")
510
+ st.error(f"""
511
+ **System Error**
512
+ {str(e)}
513
+ Please verify:
514
+ - API key validity
515
+ - Network connectivity
516
+ - Query complexity
517
+ """)
518
+
519
+ if 'final_answer' in st.session_state:
520
+ answer = st.session_state.final_answer['messages'][0].content
521
+
522
+ with st.container():
523
+ st.subheader("Technical Analysis")
524
+ st.markdown("---")
525
+ st.markdown(answer)
526
+
527
+ if "Sources:" in answer:
528
+ st.markdown("""
529
+ <div class="data-source">
530
+ ℹ️ Document sources are derived from the internal research database
531
+ </div>
532
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
  if __name__ == "__main__":
535
  main()