mgbam commited on
Commit
9ba4314
Β·
verified Β·
1 Parent(s): 81de628

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -336
app.py CHANGED
@@ -9,30 +9,27 @@ from langgraph.graph import END, StateGraph
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
- from typing import Sequence, List, Dict
13
  import chromadb
14
  import re
15
  import os
16
  import streamlit as st
17
  import requests
18
- import hashlib
19
  from langchain.tools.retriever import create_retriever_tool
20
- from langchain.schema import Document
21
 
22
  # ------------------------------
23
  # Configuration
24
  # ------------------------------
25
- # Get DeepSeek API key from environment variables
26
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
27
 
28
- # Validate API key configuration
29
  if not DEEPSEEK_API_KEY:
30
  st.error("""
31
- **Critical Configuration Missing**
32
- DeepSeek API key not found. Please ensure you have:
33
- 1. Created a Hugging Face Space secret named DEEPSEEK_API_KEY
34
- 2. Added your valid API key to the Space secrets
35
- 3. Restarted the Space after configuration
36
  """)
37
  st.stop()
38
 
@@ -42,28 +39,10 @@ os.makedirs("chroma_db", exist_ok=True)
42
  # ------------------------------
43
  # ChromaDB Client Configuration
44
  # ------------------------------
45
- # After (corrected)
46
- chroma_client = chromadb.PersistentClient(
47
- path="chroma_db",
48
- settings=chromadb.config.Settings(anonymized_telemetry=False)
49
- )
50
-
51
- # ------------------------------
52
- # Document Processing Utilities
53
- # ------------------------------
54
- def deduplicate_docs(docs: List[Document]) -> List[Document]:
55
- """Remove duplicate documents using content hashing"""
56
- seen = set()
57
- unique_docs = []
58
- for doc in docs:
59
- content_hash = hashlib.sha256(doc.page_content.encode()).hexdigest()
60
- if content_hash not in seen:
61
- seen.add(content_hash)
62
- unique_docs.append(doc)
63
- return unique_docs
64
 
65
  # ------------------------------
66
- # Data Preparation
67
  # ------------------------------
68
  research_texts = [
69
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
@@ -77,461 +56,407 @@ development_texts = [
77
  "Product Y: In the Performance Optimization Stage Before Release"
78
  ]
79
 
80
- # Create documents with metadata
81
- splitter = RecursiveCharacterTextSplitter(
82
- chunk_size=150,
83
- chunk_overlap=20,
84
- length_function=len,
85
- add_start_index=True
86
- )
87
-
88
- research_docs = splitter.create_documents(
89
- research_texts,
90
- metadatas=[{"source": "research", "doc_id": f"res_{i}"} for i in range(len(research_texts))]
91
- )
92
-
93
- development_docs = splitter.create_documents(
94
- development_texts,
95
- metadatas=[{"source": "development", "doc_id": f"dev_{i}"} for i in range(len(development_texts))]
96
- )
97
 
98
  # ------------------------------
99
- # Vector Store Initialization
100
  # ------------------------------
101
  embeddings = OpenAIEmbeddings(
102
  model="text-embedding-3-large",
103
- model_kwargs={"dimensions": 1024}
104
  )
105
 
106
  research_vectorstore = Chroma.from_documents(
107
  documents=research_docs,
108
  embedding=embeddings,
109
  client=chroma_client,
110
- collection_name="research_collection",
111
- collection_metadata={"hnsw:space": "cosine"}
112
  )
113
 
114
  development_vectorstore = Chroma.from_documents(
115
  documents=development_docs,
116
  embedding=embeddings,
117
  client=chroma_client,
118
- collection_name="development_collection",
119
- collection_metadata={"hnsw:space": "cosine"}
120
  )
121
 
 
 
 
122
  # ------------------------------
123
- # Retriever Tools Configuration
124
  # ------------------------------
125
- research_retriever = research_vectorstore.as_retriever(
126
- search_type="mmr",
127
- search_kwargs={"k": 5, "fetch_k": 10}
 
128
  )
129
 
130
- development_retriever = development_vectorstore.as_retriever(
131
- search_type="similarity",
132
- search_kwargs={"k": 5}
 
133
  )
134
 
135
- tools = [
136
- create_retriever_tool(
137
- research_retriever,
138
- "research_database",
139
- "Searches through academic papers and research reports for technical AI advancements"
140
- ),
141
- create_retriever_tool(
142
- development_retriever,
143
- "development_database",
144
- "Accesses current project statuses and development timelines"
145
- )
146
- ]
147
 
148
  # ------------------------------
149
- # Agent State Definition
150
  # ------------------------------
151
  class AgentState(TypedDict):
152
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
153
 
154
- # ------------------------------
155
- # Core Agent Function
156
- # ------------------------------
157
  def agent(state: AgentState):
158
- """Main decision-making agent handling user queries"""
159
- print("\n--- AGENT EXECUTION START ---")
160
  messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  try:
163
- # Extract user message content
164
- user_message = messages[-1].content if isinstance(messages[-1], HumanMessage) else ""
165
-
166
- # Construct analysis prompt
167
- prompt = f"""Analyze this user query and determine the appropriate action:
168
-
169
- Query: {user_message}
170
-
171
- Response Format:
172
- - If research-related (technical details, academic concepts), respond:
173
- SEARCH_RESEARCH: [keywords]
174
-
175
- - If development-related (project status, timelines), respond:
176
- SEARCH_DEV: [keywords]
177
-
178
- - If general question, answer directly
179
- - If unclear, request clarification
180
- """
181
-
182
- # API request configuration
183
- headers = {
184
- "Accept": "application/json",
185
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
186
- "Content-Type": "application/json"
187
- }
188
-
189
- data = {
190
- "model": "deepseek-chat",
191
- "messages": [{"role": "user", "content": prompt}],
192
- "temperature": 0.5,
193
- "max_tokens": 256
194
- }
195
-
196
- # Execute API call
197
  response = requests.post(
198
  "https://api.deepseek.com/v1/chat/completions",
199
  headers=headers,
200
  json=data,
 
201
  timeout=30
202
  )
203
  response.raise_for_status()
204
 
205
- # Process response
206
  response_text = response.json()['choices'][0]['message']['content']
207
- print(f"Agent Decision: {response_text}")
208
-
209
- # Handle different response types
210
  if "SEARCH_RESEARCH:" in response_text:
211
  query = response_text.split("SEARCH_RESEARCH:")[1].strip()
212
  results = research_retriever.invoke(query)
213
- unique_results = deduplicate_docs(results)
214
- return {
215
- "messages": [
216
- AIMessage(
217
- content=f'Action: research_database\nQuery: "{query}"\nResults: {len(unique_results)} relevant documents',
218
- additional_kwargs={"documents": unique_results}
219
- )
220
- ]
221
- }
222
 
223
  elif "SEARCH_DEV:" in response_text:
224
  query = response_text.split("SEARCH_DEV:")[1].strip()
225
  results = development_retriever.invoke(query)
226
- unique_results = deduplicate_docs(results)
227
- return {
228
- "messages": [
229
- AIMessage(
230
- content=f'Action: development_database\nQuery: "{query}"\nResults: {len(unique_results)} relevant documents',
231
- additional_kwargs={"documents": unique_results}
232
- )
233
- ]
234
- }
235
 
236
  else:
237
  return {"messages": [AIMessage(content=response_text)]}
238
 
239
- except requests.exceptions.HTTPError as e:
240
- error_msg = f"API Error: {e.response.status_code} - {e.response.text}"
241
- if "insufficient balance" in e.response.text.lower():
242
- error_msg += "\n\nPlease check your DeepSeek account balance."
243
- return {"messages": [AIMessage(content=error_msg)]}
244
  except Exception as e:
245
- return {"messages": [AIMessage(content=f"Processing Error: {str(e)}")]}
 
 
 
246
 
247
- # ------------------------------
248
- # Document Evaluation Functions
249
- # ------------------------------
250
  def simple_grade_documents(state: AgentState):
251
- """Evaluate retrieved document relevance"""
252
  messages = state["messages"]
253
  last_message = messages[-1]
 
254
 
255
- if last_message.additional_kwargs.get("documents"):
256
- print("--- Relevant Documents Found ---")
257
  return "generate"
258
  else:
259
- print("--- No Valid Documents Found ---")
260
  return "rewrite"
261
 
262
  def generate(state: AgentState):
263
- """Generate final answer from documents"""
264
- print("\n--- GENERATING FINAL ANSWER ---")
265
  messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
  try:
268
- # Extract context
269
- user_question = next(msg.content for msg in messages if isinstance(msg, HumanMessage))
270
- documents = messages[-1].additional_kwargs.get("documents", [])
271
-
272
- # Format document sources
273
- sources = list(set(
274
- doc.metadata.get('source', 'unknown')
275
- for doc in documents
276
- ))
277
-
278
- # Create analysis prompt
279
- prompt = f"""Synthesize a technical answer using these documents:
280
-
281
- Question: {user_question}
282
-
283
- Documents:
284
- {[doc.page_content for doc in documents]}
285
-
286
- Requirements:
287
- 1. Highlight quantitative metrics
288
- 2. Cite document sources (research/development)
289
- 3. Note temporal context
290
- 4. List potential applications
291
- 5. Mention limitations/gaps
292
- """
293
-
294
- # API request configuration
295
- headers = {
296
- "Accept": "application/json",
297
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
298
- "Content-Type": "application/json"
299
- }
300
-
301
- data = {
302
- "model": "deepseek-chat",
303
- "messages": [{"role": "user", "content": prompt}],
304
- "temperature": 0.3,
305
- "max_tokens": 1024
306
- }
307
-
308
- # Execute API call
309
  response = requests.post(
310
  "https://api.deepseek.com/v1/chat/completions",
311
  headers=headers,
312
  json=data,
313
- timeout=45
 
314
  )
315
  response.raise_for_status()
316
 
317
- # Format final answer
318
  response_text = response.json()['choices'][0]['message']['content']
319
- formatted_answer = f"{response_text}\n\nSources: {', '.join(sources)}"
320
-
321
- return {"messages": [AIMessage(content=formatted_answer)]}
322
-
323
  except Exception as e:
324
- return {"messages": [AIMessage(content=f"Generation Error: {str(e)}")]}
 
325
 
326
  def rewrite(state: AgentState):
327
- """Rewrite unclear queries"""
328
- print("\n--- REWRITING QUERY ---")
329
  messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
  try:
332
- original_query = next(msg.content for msg in messages if isinstance(msg, HumanMessage))
333
-
334
- headers = {
335
- "Accept": "application/json",
336
- "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
337
- "Content-Type": "application/json"
338
- }
339
-
340
- data = {
341
- "model": "deepseek-chat",
342
- "messages": [{
343
- "role": "user",
344
- "content": f"Clarify this query while preserving technical intent: {original_query}"
345
- }],
346
- "temperature": 0.5,
347
- "max_tokens": 256
348
- }
349
-
350
  response = requests.post(
351
  "https://api.deepseek.com/v1/chat/completions",
352
  headers=headers,
353
  json=data,
 
354
  timeout=30
355
  )
356
  response.raise_for_status()
357
 
358
- rewritten = response.json()['choices'][0]['message']['content']
359
- return {"messages": [AIMessage(content=f"Revised Query: {rewritten}")]}
360
-
361
  except Exception as e:
362
- return {"messages": [AIMessage(content=f"Rewriting Error: {str(e)}")]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
  # ------------------------------
365
- # Workflow Configuration
366
  # ------------------------------
367
  workflow = StateGraph(AgentState)
368
 
369
- # Node Registration
370
  workflow.add_node("agent", agent)
371
- workflow.add_node("retrieve", ToolNode(tools))
372
- workflow.add_node("generate", generate)
373
  workflow.add_node("rewrite", rewrite)
 
374
 
375
- # Workflow Structure
376
  workflow.set_entry_point("agent")
377
 
 
378
  workflow.add_conditional_edges(
379
  "agent",
380
- lambda state: "tools" if any(
381
- tool.name in state["messages"][-1].content
382
- for tool in tools
383
- ) else END,
384
- {"tools": "retrieve", END: END}
385
  )
386
 
387
  workflow.add_conditional_edges(
388
  "retrieve",
389
  simple_grade_documents,
390
- {"generate": "generate", "rewrite": "rewrite"}
 
 
 
391
  )
392
 
393
  workflow.add_edge("generate", END)
394
  workflow.add_edge("rewrite", "agent")
395
 
 
396
  app = workflow.compile()
397
 
398
  # ------------------------------
399
- # Streamlit UI Implementation
 
 
 
 
 
 
 
 
 
 
400
  # ------------------------------
401
  def main():
402
- """Main application interface"""
403
  st.set_page_config(
404
- page_title="AI Research Assistant",
405
- layout="centered",
406
  initial_sidebar_state="expanded"
407
  )
408
 
409
- # Dark Theme Configuration
410
  st.markdown("""
411
  <style>
412
  .stApp {
413
- background-color: #0E1117;
414
- color: #FAFAFA;
415
  }
416
 
417
  .stTextArea textarea {
418
- background-color: #262730 !important;
419
- color: #FAFAFA !important;
420
- border: 1px solid #3D4051;
421
  }
422
 
423
- .stButton>button {
424
- background-color: #2E8B57;
425
  color: white;
426
- border-radius: 4px;
427
- padding: 0.5rem 1rem;
428
  transition: all 0.3s;
429
  }
430
 
431
- .stButton>button:hover {
432
- background-color: #3CB371;
433
  transform: scale(1.02);
434
  }
435
 
436
- .stAlert {
437
- background-color: #1A1D23 !important;
438
- border: 1px solid #3D4051;
439
  }
440
 
441
- .stExpander {
442
- background-color: #1A1D23;
443
- border: 1px solid #3D4051;
444
  }
445
 
446
- .data-source {
447
- padding: 0.5rem;
448
- margin: 0.5rem 0;
449
- background-color: #1A1D23;
450
- border-left: 3px solid #2E8B57;
451
- border-radius: 4px;
452
  }
453
  </style>
454
  """, unsafe_allow_html=True)
455
 
456
- # Sidebar Configuration
457
  with st.sidebar:
458
- st.header("Technical Databases")
459
- with st.expander("Research Corpus", expanded=True):
460
- st.markdown("""
461
- - AI Model Architectures
462
- - Machine Learning Advances
463
- - Quantum Computing Applications
464
- - Algorithmic Breakthroughs
465
- """)
466
-
467
- with st.expander("Development Tracking", expanded=True):
468
- st.markdown("""
469
- - Project Milestones
470
- - System Architecture
471
- - Deployment Status
472
- - Performance Metrics
473
- """)
474
-
475
- # Main Interface
476
- st.title("🧠 AI Research Assistant")
477
- st.caption("Technical Analysis and Development Tracking System")
478
-
479
- query = st.text_area(
480
- "Enter Technical Query:",
481
- height=150,
482
- placeholder="Example: Compare transformer architectures for medical imaging analysis..."
483
- )
484
-
485
- if st.button("Execute Analysis", use_container_width=True):
486
- if not query:
487
- st.warning("Please input a technical query")
488
- return
489
 
490
- with st.status("Processing...", expanded=True) as status:
491
- try:
492
- events = []
493
- for event in app.stream({"messages": [HumanMessage(content=query)]}):
494
- events.append(event)
495
-
496
- if 'agent' in event:
497
- status.update(label="Decision Making", state="running")
498
- st.session_state.agent_step = event['agent']
499
-
500
- if 'retrieve' in event:
501
- status.update(label="Document Retrieval", state="running")
502
- st.session_state.retrieved = event['retrieve']
 
 
 
503
 
504
- if 'generate' in event:
505
- status.update(label="Synthesizing Answer", state="running")
506
- st.session_state.final_answer = event['generate']
507
-
508
- status.update(label="Analysis Complete", state="complete")
509
-
510
- except Exception as e:
511
- status.update(label="Processing Failed", state="error")
512
- st.error(f"""
513
- **System Error**
514
- {str(e)}
515
- Please verify:
516
- - API key validity
517
- - Network connectivity
518
- - Query complexity
519
- """)
520
-
521
- if 'final_answer' in st.session_state:
522
- answer = st.session_state.final_answer['messages'][0].content
523
-
524
- with st.container():
525
- st.subheader("Technical Analysis")
526
- st.markdown("---")
527
- st.markdown(answer)
528
-
529
- if "Sources:" in answer:
530
- st.markdown("""
531
- <div class="data-source">
532
- ℹ️ Document sources are derived from the internal research database
533
- </div>
534
- """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
535
 
536
  if __name__ == "__main__":
537
  main()
 
9
  from langgraph.prebuilt import ToolNode
10
  from langgraph.graph.message import add_messages
11
  from typing_extensions import TypedDict, Annotated
12
+ from typing import Sequence
13
  import chromadb
14
  import re
15
  import os
16
  import streamlit as st
17
  import requests
 
18
  from langchain.tools.retriever import create_retriever_tool
 
19
 
20
  # ------------------------------
21
  # Configuration
22
  # ------------------------------
23
+ # Get DeepSeek API key from Hugging Face Space secrets
24
  DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
25
 
 
26
  if not DEEPSEEK_API_KEY:
27
  st.error("""
28
+ **Missing API Configuration**
29
+ Please configure your DeepSeek API key in Hugging Face Space secrets:
30
+ 1. Go to your Space's Settings
31
+ 2. Click on 'Repository secrets'
32
+ 3. Add a secret named DEEPSEEK_API_KEY
33
  """)
34
  st.stop()
35
 
 
39
  # ------------------------------
40
  # ChromaDB Client Configuration
41
  # ------------------------------
42
+ chroma_client = chromadb.PersistentClient(path="chroma_db")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # ------------------------------
45
+ # Dummy Data: Research & Development Texts
46
  # ------------------------------
47
  research_texts = [
48
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
 
56
  "Product Y: In the Performance Optimization Stage Before Release"
57
  ]
58
 
59
+ # ------------------------------
60
+ # Text Splitting & Document Creation
61
+ # ------------------------------
62
+ splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
63
+ research_docs = splitter.create_documents(research_texts)
64
+ development_docs = splitter.create_documents(development_texts)
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # ------------------------------
67
+ # Creating Vector Stores with Embeddings
68
  # ------------------------------
69
  embeddings = OpenAIEmbeddings(
70
  model="text-embedding-3-large",
71
+ # dimensions=1024 # Uncomment if needed
72
  )
73
 
74
  research_vectorstore = Chroma.from_documents(
75
  documents=research_docs,
76
  embedding=embeddings,
77
  client=chroma_client,
78
+ collection_name="research_collection"
 
79
  )
80
 
81
  development_vectorstore = Chroma.from_documents(
82
  documents=development_docs,
83
  embedding=embeddings,
84
  client=chroma_client,
85
+ collection_name="development_collection"
 
86
  )
87
 
88
+ research_retriever = research_vectorstore.as_retriever()
89
+ development_retriever = development_vectorstore.as_retriever()
90
+
91
  # ------------------------------
92
+ # Creating Retriever Tools
93
  # ------------------------------
94
+ research_tool = create_retriever_tool(
95
+ research_retriever,
96
+ "research_db_tool",
97
+ "Search information from the research database."
98
  )
99
 
100
+ development_tool = create_retriever_tool(
101
+ development_retriever,
102
+ "development_db_tool",
103
+ "Search information from the development database."
104
  )
105
 
106
+ tools = [research_tool, development_tool]
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # ------------------------------
109
+ # Agent Function & Workflow Functions
110
  # ------------------------------
111
  class AgentState(TypedDict):
112
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
113
 
 
 
 
114
  def agent(state: AgentState):
115
+ print("---CALL AGENT---")
 
116
  messages = state["messages"]
117
+
118
+ if isinstance(messages[0], tuple):
119
+ user_message = messages[0][1]
120
+ else:
121
+ user_message = messages[0].content
122
+
123
+ prompt = f"""Given this user question: "{user_message}"
124
+ If it's about research or academic topics, respond EXACTLY in this format:
125
+ SEARCH_RESEARCH: <search terms>
126
+
127
+ If it's about development status, respond EXACTLY in this format:
128
+ SEARCH_DEV: <search terms>
129
+
130
+ Otherwise, just answer directly.
131
+ """
132
+
133
+ headers = {
134
+ "Accept": "application/json",
135
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
136
+ "Content-Type": "application/json"
137
+ }
138
+
139
+ data = {
140
+ "model": "deepseek-chat",
141
+ "messages": [{"role": "user", "content": prompt}],
142
+ "temperature": 0.7,
143
+ "max_tokens": 1024
144
+ }
145
 
146
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  response = requests.post(
148
  "https://api.deepseek.com/v1/chat/completions",
149
  headers=headers,
150
  json=data,
151
+ verify=False,
152
  timeout=30
153
  )
154
  response.raise_for_status()
155
 
 
156
  response_text = response.json()['choices'][0]['message']['content']
157
+ print("Raw response:", response_text)
158
+
 
159
  if "SEARCH_RESEARCH:" in response_text:
160
  query = response_text.split("SEARCH_RESEARCH:")[1].strip()
161
  results = research_retriever.invoke(query)
162
+ return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
 
 
 
 
 
 
 
 
163
 
164
  elif "SEARCH_DEV:" in response_text:
165
  query = response_text.split("SEARCH_DEV:")[1].strip()
166
  results = development_retriever.invoke(query)
167
+ return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
 
 
 
 
 
 
 
 
168
 
169
  else:
170
  return {"messages": [AIMessage(content=response_text)]}
171
 
 
 
 
 
 
172
  except Exception as e:
173
+ error_msg = f"API Error: {str(e)}"
174
+ if "Insufficient Balance" in str(e):
175
+ error_msg += "\n\nPlease check your DeepSeek API account balance."
176
+ return {"messages": [AIMessage(content=error_msg)]}
177
 
 
 
 
178
  def simple_grade_documents(state: AgentState):
 
179
  messages = state["messages"]
180
  last_message = messages[-1]
181
+ print("Evaluating message:", last_message.content)
182
 
183
+ if "Results: [Document" in last_message.content:
184
+ print("---DOCS FOUND, GO TO GENERATE---")
185
  return "generate"
186
  else:
187
+ print("---NO DOCS FOUND, TRY REWRITE---")
188
  return "rewrite"
189
 
190
  def generate(state: AgentState):
191
+ print("---GENERATE FINAL ANSWER---")
 
192
  messages = state["messages"]
193
+ question = messages[0].content if isinstance(messages[0], tuple) else messages[0].content
194
+ last_message = messages[-1]
195
+
196
+ docs = ""
197
+ if "Results: [" in last_message.content:
198
+ results_start = last_message.content.find("Results: [")
199
+ docs = last_message.content[results_start:]
200
+ print("Documents found:", docs)
201
+
202
+ headers = {
203
+ "Accept": "application/json",
204
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
205
+ "Content-Type": "application/json"
206
+ }
207
+
208
+ prompt = f"""Based on these research documents, summarize the latest advancements in AI:
209
+ Question: {question}
210
+ Documents: {docs}
211
+ Focus on extracting and synthesizing the key findings from the research papers.
212
+ """
213
+
214
+ data = {
215
+ "model": "deepseek-chat",
216
+ "messages": [{
217
+ "role": "user",
218
+ "content": prompt
219
+ }],
220
+ "temperature": 0.7,
221
+ "max_tokens": 1024
222
+ }
223
 
224
  try:
225
+ print("Sending generate request to API...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  response = requests.post(
227
  "https://api.deepseek.com/v1/chat/completions",
228
  headers=headers,
229
  json=data,
230
+ verify=False,
231
+ timeout=30
232
  )
233
  response.raise_for_status()
234
 
 
235
  response_text = response.json()['choices'][0]['message']['content']
236
+ print("Final Answer:", response_text)
237
+ return {"messages": [AIMessage(content=response_text)]}
 
 
238
  except Exception as e:
239
+ error_msg = f"Generation Error: {str(e)}"
240
+ return {"messages": [AIMessage(content=error_msg)]}
241
 
242
  def rewrite(state: AgentState):
243
+ print("---REWRITE QUESTION---")
 
244
  messages = state["messages"]
245
+ original_question = messages[0].content if len(messages) > 0 else "N/A"
246
+
247
+ headers = {
248
+ "Accept": "application/json",
249
+ "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
250
+ "Content-Type": "application/json"
251
+ }
252
+
253
+ data = {
254
+ "model": "deepseek-chat",
255
+ "messages": [{
256
+ "role": "user",
257
+ "content": f"Rewrite this question to be more specific and clearer: {original_question}"
258
+ }],
259
+ "temperature": 0.7,
260
+ "max_tokens": 1024
261
+ }
262
 
263
  try:
264
+ print("Sending rewrite request...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  response = requests.post(
266
  "https://api.deepseek.com/v1/chat/completions",
267
  headers=headers,
268
  json=data,
269
+ verify=False,
270
  timeout=30
271
  )
272
  response.raise_for_status()
273
 
274
+ response_text = response.json()['choices'][0]['message']['content']
275
+ print("Rewritten question:", response_text)
276
+ return {"messages": [AIMessage(content=response_text)]}
277
  except Exception as e:
278
+ error_msg = f"Rewrite Error: {str(e)}"
279
+ return {"messages": [AIMessage(content=error_msg)]}
280
+
281
+ tools_pattern = re.compile(r"Action: .*")
282
+
283
+ def custom_tools_condition(state: AgentState):
284
+ messages = state["messages"]
285
+ last_message = messages[-1]
286
+ content = last_message.content
287
+
288
+ print("Checking tools condition:", content)
289
+ if tools_pattern.match(content):
290
+ print("Moving to retrieve...")
291
+ return "tools"
292
+ print("Moving to END...")
293
+ return END
294
 
295
  # ------------------------------
296
+ # Workflow Configuration using LangGraph
297
  # ------------------------------
298
  workflow = StateGraph(AgentState)
299
 
300
+ # Add nodes
301
  workflow.add_node("agent", agent)
302
+ retrieve_node = ToolNode(tools)
303
+ workflow.add_node("retrieve", retrieve_node)
304
  workflow.add_node("rewrite", rewrite)
305
+ workflow.add_node("generate", generate)
306
 
307
+ # Set entry point
308
  workflow.set_entry_point("agent")
309
 
310
+ # Define transitions
311
  workflow.add_conditional_edges(
312
  "agent",
313
+ custom_tools_condition,
314
+ {
315
+ "tools": "retrieve",
316
+ END: END
317
+ }
318
  )
319
 
320
  workflow.add_conditional_edges(
321
  "retrieve",
322
  simple_grade_documents,
323
+ {
324
+ "generate": "generate",
325
+ "rewrite": "rewrite"
326
+ }
327
  )
328
 
329
  workflow.add_edge("generate", END)
330
  workflow.add_edge("rewrite", "agent")
331
 
332
+ # Compile the workflow
333
  app = workflow.compile()
334
 
335
  # ------------------------------
336
+ # Processing Function
337
+ # ------------------------------
338
+ def process_question(user_question, app, config):
339
+ """Process user question through the workflow"""
340
+ events = []
341
+ for event in app.stream({"messages": [("user", user_question)]}, config):
342
+ events.append(event)
343
+ return events
344
+
345
+ # ------------------------------
346
+ # Streamlit App UI (Dark Theme)
347
  # ------------------------------
348
  def main():
 
349
  st.set_page_config(
350
+ page_title="AI Research & Development Assistant",
351
+ layout="wide",
352
  initial_sidebar_state="expanded"
353
  )
354
 
 
355
  st.markdown("""
356
  <style>
357
  .stApp {
358
+ background-color: #1a1a1a;
359
+ color: #ffffff;
360
  }
361
 
362
  .stTextArea textarea {
363
+ background-color: #2d2d2d !important;
364
+ color: #ffffff !important;
 
365
  }
366
 
367
+ .stButton > button {
368
+ background-color: #4CAF50;
369
  color: white;
 
 
370
  transition: all 0.3s;
371
  }
372
 
373
+ .stButton > button:hover {
374
+ background-color: #45a049;
375
  transform: scale(1.02);
376
  }
377
 
378
+ .data-box {
379
+ background-color: #2d2d2d;
380
+ border-left: 5px solid #2196F3;
381
  }
382
 
383
+ .dev-box {
384
+ border-left: 5px solid #4CAF50;
 
385
  }
386
 
387
+ .st-expander {
388
+ background-color: #2d2d2d;
389
+ border: 1px solid #3d3d3d;
 
 
 
390
  }
391
  </style>
392
  """, unsafe_allow_html=True)
393
 
 
394
  with st.sidebar:
395
+ st.header("πŸ“š Available Data")
396
+ st.subheader("Research Database")
397
+ for text in research_texts:
398
+ st.markdown(f'<div class="data-box research-box" style="padding: 15px; margin: 10px 0; border-radius: 5px;">{text}</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
+ st.subheader("Development Database")
401
+ for text in development_texts:
402
+ st.markdown(f'<div class="data-box dev-box" style="padding: 15px; margin: 10px 0; border-radius: 5px;">{text}</div>', unsafe_allow_html=True)
403
+
404
+ st.title("πŸ€– AI Research & Development Assistant")
405
+ st.markdown("---")
406
+
407
+ query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
408
+
409
+ col1, col2 = st.columns([1, 2])
410
+ with col1:
411
+ if st.button("πŸ” Get Answer", use_container_width=True):
412
+ if query:
413
+ try:
414
+ with st.spinner('Processing your question...'):
415
+ events = process_question(query, app, {"configurable": {"thread_id": "1"}})
416
 
417
+ for event in events:
418
+ if 'agent' in event:
419
+ with st.expander("πŸ”„ Processing Step", expanded=True):
420
+ content = event['agent']['messages'][0].content
421
+ if "Error" in content:
422
+ st.error(content)
423
+ elif "Results:" in content:
424
+ st.markdown("### πŸ“‘ Retrieved Documents:")
425
+ docs_start = content.find("Results:")
426
+ docs = content[docs_start:]
427
+ st.info(docs)
428
+ elif 'generate' in event:
429
+ content = event['generate']['messages'][0].content
430
+ if "Error" in content:
431
+ st.error(content)
432
+ else:
433
+ st.markdown("### ✨ Final Answer:")
434
+ st.success(content)
435
+ except Exception as e:
436
+ st.error(f"""
437
+ **Processing Error**
438
+ {str(e)}
439
+ Please check:
440
+ - API key configuration
441
+ - Account balance
442
+ - Network connection
443
+ """)
444
+ else:
445
+ st.warning("⚠️ Please enter a question first!")
446
+
447
+ with col2:
448
+ st.markdown("""
449
+ ### 🎯 How to Use
450
+ 1. Enter your question in the text box
451
+ 2. Click the search button
452
+ 3. Review processing steps
453
+ 4. See final answer
454
+
455
+ ### πŸ’‘ Example Questions
456
+ - What's new in AI image recognition?
457
+ - How is Project B progressing?
458
+ - Recent machine learning trends?
459
+ """)
460
 
461
  if __name__ == "__main__":
462
  main()