mgbam commited on
Commit
09db53f
·
verified ·
1 Parent(s): 37d0ffb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -231
app.py CHANGED
@@ -1,300 +1,233 @@
1
- # app.py
2
- # Advanced AI R&D Assistant for Hugging Face Spaces
3
- #
4
- # This app leverages LangGraph, DeepSeek-R1 via text-based function calling, and Agentic RAG.
5
- # API keys are securely loaded via environment variables.
6
- #
7
- # To deploy:
8
- # 1. Add your API key to Hugging Face Space secrets with the key DEEP_SEEK_API.
9
- # 2. Ensure your requirements.txt is properly configured.
10
- # 3. Run the app with Streamlit.
11
 
12
  import os
13
- import re
14
- import logging
15
- import streamlit as st
16
  import requests
17
- from typing import Sequence
18
- from typing_extensions import TypedDict, Annotated
19
-
20
- # Updated imports for LangChain
21
- from langchain.embeddings.openai import OpenAIEmbeddings
22
- from langchain.vectorstores import Chroma
23
- from langchain.schema import HumanMessage, AIMessage, ToolMessage
24
  from langchain.text_splitter import RecursiveCharacterTextSplitter
25
- from langchain.tools.retriever import create_retriever_tool
26
-
27
- # Imports for LangGraph remain the same
28
  from langgraph.graph import END, StateGraph, START
29
  from langgraph.prebuilt import ToolNode
30
  from langgraph.graph.message import add_messages
 
 
31
 
32
- # Configure logging
33
- logging.basicConfig(level=logging.INFO)
34
- logger = logging.getLogger(__name__)
35
-
36
- # --- Dummy Data Setup ---
37
- research_texts = [
38
- "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
39
- "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
40
- "Latest Trends in Machine Learning Methods Using Quantum Computing"
41
  ]
42
 
43
- development_texts = [
44
- "Project A: UI Design Completed, API Integration in Progress",
45
- "Project B: Testing New Feature X, Bug Fixes Needed",
46
- "Product Y: In the Performance Optimization Stage Before Release"
47
  ]
48
 
49
- # --- Preprocessing & Embeddings ---
50
  splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
51
- research_docs = splitter.create_documents(research_texts)
52
- development_docs = splitter.create_documents(development_texts)
53
-
54
- embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
55
-
56
- research_vectorstore = Chroma.from_documents(
57
- documents=research_docs,
58
- embedding=embeddings,
59
- collection_name="research_collection"
60
- )
61
- development_vectorstore = Chroma.from_documents(
62
- documents=development_docs,
63
- embedding=embeddings,
64
- collection_name="development_collection"
65
- )
66
-
67
- research_retriever = research_vectorstore.as_retriever()
68
- development_retriever = development_vectorstore.as_retriever()
69
-
70
- research_tool = create_retriever_tool(
71
- research_retriever,
72
- "research_db_tool",
73
- "Search information from the research database."
 
 
 
 
 
 
 
 
74
  )
75
- development_tool = create_retriever_tool(
76
- development_retriever,
77
- "development_db_tool",
78
- "Search information from the development database."
79
  )
80
- tools = [research_tool, development_tool]
81
 
82
- # --- Agent and Workflow Functions ---
83
  class AgentState(TypedDict):
84
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
85
 
 
86
  def agent(state: AgentState):
87
- logger.info("Agent invoked")
88
  messages = state["messages"]
89
- user_message = messages[0][1] if isinstance(messages[0], tuple) else messages[0].content
90
-
91
- prompt = f"""Given this user question: "{user_message}"
92
- If it's about research or academic topics, respond EXACTLY in this format:
93
- SEARCH_RESEARCH: <search terms>
94
-
95
- If it's about development status, respond EXACTLY in this format:
96
- SEARCH_DEV: <search terms>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- Otherwise, just answer directly.
99
- """
100
- headers = {
101
- "Accept": "application/json",
102
- "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
103
- "Content-Type": "application/json"
104
- }
105
- data = {
106
- "model": "deepseek-chat",
107
- "messages": [{"role": "user", "content": prompt}],
108
- "temperature": 0.7,
109
- "max_tokens": 1024
110
- }
111
- response = requests.post(
112
- "https://api.deepseek.com/v1/chat/completions",
113
- headers=headers,
114
- json=data,
115
- verify=False
116
- )
117
- if response.status_code == 200:
118
- response_text = response.json()['choices'][0]['message']['content']
119
- logger.info(f"DeepSeek response: {response_text}")
120
- if "SEARCH_RESEARCH:" in response_text:
121
- query = response_text.split("SEARCH_RESEARCH:")[1].strip()
122
- results = research_retriever.invoke(query)
123
- return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
124
- elif "SEARCH_DEV:" in response_text:
125
- query = response_text.split("SEARCH_DEV:")[1].strip()
126
- results = development_retriever.invoke(query)
127
- return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]}
128
- else:
129
- return {"messages": [AIMessage(content=response_text)]}
130
  else:
131
- error_msg = f"DeepSeek API call failed: {response.text}"
132
- logger.error(error_msg)
133
- raise Exception(error_msg)
134
 
 
135
  def simple_grade_documents(state: AgentState):
136
- last_message = state["messages"][-1]
137
- logger.info(f"Grading message: {last_message.content}")
138
- if "Results: [Document" in last_message.content:
 
 
139
  return "generate"
140
  else:
 
141
  return "rewrite"
142
 
 
143
  def generate(state: AgentState):
144
- logger.info("Generating final answer")
145
  messages = state["messages"]
146
- question = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
147
  last_message = messages[-1]
148
- docs = ""
149
- if "Results: [" in last_message.content:
150
- docs = last_message.content[last_message.content.find("Results: ["):]
151
- headers = {
152
- "Accept": "application/json",
153
- "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
154
- "Content-Type": "application/json"
155
- }
156
- prompt = f"""Based on these research documents, summarize the latest advancements in AI:
157
- Question: {question}
158
- Documents: {docs}
159
- Focus on extracting and synthesizing the key findings from the research papers.
160
- """
161
- data = {
162
- "model": "deepseek-chat",
163
- "messages": [{"role": "user", "content": prompt}],
164
- "temperature": 0.7,
165
- "max_tokens": 1024
166
- }
167
- response = requests.post(
168
- "https://api.deepseek.com/v1/chat/completions",
169
- headers=headers,
170
- json=data,
171
- verify=False
172
- )
173
- if response.status_code == 200:
174
- response_text = response.json()['choices'][0]['message']['content']
175
- return {"messages": [AIMessage(content=response_text)]}
176
- else:
177
- error_msg = f"DeepSeek API generate call failed: {response.text}"
178
- logger.error(error_msg)
179
- raise Exception(error_msg)
180
-
181
  def rewrite(state: AgentState):
182
- logger.info("Rewriting question")
183
- original_question = state["messages"][0].content if state["messages"] else "N/A"
184
- headers = {
185
- "Accept": "application/json",
186
- "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
187
- "Content-Type": "application/json"
188
- }
189
- data = {
190
- "model": "deepseek-chat",
191
- "messages": [{"role": "user", "content": f"Rewrite this question to be more specific and clearer: {original_question}"}],
192
- "temperature": 0.7,
193
- "max_tokens": 1024
194
- }
195
- response = requests.post(
196
- "https://api.deepseek.com/v1/chat/completions",
197
- headers=headers,
198
- json=data,
199
- verify=False
200
- )
201
- if response.status_code == 200:
202
- response_text = response.json()['choices'][0]['message']['content']
203
- return {"messages": [AIMessage(content=response_text)]}
204
- else:
205
- error_msg = f"DeepSeek API rewrite call failed: {response.text}"
206
- logger.error(error_msg)
207
- raise Exception(error_msg)
208
 
209
- tools_pattern = re.compile(r"Action: .*")
210
  def custom_tools_condition(state: AgentState):
211
- last_message = state["messages"][-1]
212
- if tools_pattern.match(last_message.content):
 
 
 
213
  return "tools"
214
  return END
215
 
216
- # Build the workflow with LangGraph's StateGraph
217
  workflow = StateGraph(AgentState)
218
  workflow.add_node("agent", agent)
219
  retrieve_node = ToolNode(tools)
220
  workflow.add_node("retrieve", retrieve_node)
221
  workflow.add_node("rewrite", rewrite)
222
  workflow.add_node("generate", generate)
 
 
223
  workflow.add_edge(START, "agent")
224
  workflow.add_conditional_edges("agent", custom_tools_condition, {"tools": "retrieve", END: END})
225
  workflow.add_conditional_edges("retrieve", simple_grade_documents)
226
  workflow.add_edge("generate", END)
227
  workflow.add_edge("rewrite", "agent")
228
- app_workflow = workflow.compile()
229
 
230
- def process_question(user_question, app, config):
 
231
  events = []
232
  for event in app.stream({"messages": [("user", user_question)]}, config):
233
  events.append(event)
234
  return events
235
 
236
- # --- Streamlit UI ---
237
  def main():
238
- st.set_page_config(page_title="Advanced AI R&D Assistant", layout="wide", initial_sidebar_state="expanded")
239
- st.markdown(
240
- """
241
- <style>
242
- .stApp { background-color: #f8f9fa; }
243
- .stButton > button { width: 100%; margin-top: 20px; }
244
- .data-box { padding: 20px; border-radius: 10px; margin: 10px 0; }
245
- .research-box { background-color: #e3f2fd; border-left: 5px solid #1976d2; }
246
- .dev-box { background-color: #e8f5e9; border-left: 5px solid #43a047; }
247
- </style>
248
- """, unsafe_allow_html=True
249
  )
250
-
251
- # Sidebar: Display available data
252
- with st.sidebar:
253
- st.header("📚 Available Data")
254
- st.subheader("Research Database")
255
- for text in research_texts:
256
- st.markdown(f'<div class="data-box research-box">{text}</div>', unsafe_allow_html=True)
257
- st.subheader("Development Database")
258
- for text in development_texts:
259
- st.markdown(f'<div class="data-box dev-box">{text}</div>', unsafe_allow_html=True)
260
-
261
- st.title("🤖 Advanced AI R&D Assistant")
262
- st.markdown("---")
263
- query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
264
-
265
  col1, col2 = st.columns([1, 2])
266
  with col1:
267
- if st.button("🔍 Get Answer", use_container_width=True):
268
  if query:
269
- with st.spinner('Processing your question...'):
270
- events = process_question(query, app_workflow, {"configurable": {"thread_id": "1"}})
271
  for event in events:
272
  if 'agent' in event:
273
- with st.expander("🔄 Processing Step", expanded=True):
274
  content = event['agent']['messages'][0].content
275
- if "Results:" in content:
276
- st.markdown("### 📑 Retrieved Documents:")
277
- docs = content[content.find("Results:"):]
278
- st.info(docs)
279
  elif 'generate' in event:
280
- st.markdown("### Final Answer:")
281
  st.success(event['generate']['messages'][0].content)
 
 
 
282
  else:
283
- st.warning("⚠️ Please enter a question first!")
284
  with col2:
285
- st.markdown(
286
- """
287
- ### 🎯 How to Use
288
- 1. Type your question in the text box.
289
- 2. Click "Get Answer" to process.
290
- 3. View retrieved documents and the final answer.
291
-
292
- ### 💡 Example Questions
293
- - What are the latest advancements in AI research?
294
- - What is the status of Project A?
295
- - What are the current trends in machine learning?
296
- """
297
- )
298
-
299
  if __name__ == "__main__":
300
  main()
 
1
+ # Drug Repurposing Advisor: A Multi-Agent Workflow Example
2
+ # This example uses dummy data for demonstration.
3
+ # In a production system, replace the dummy data with real pharmaceutical databases.
 
 
 
 
 
 
 
4
 
5
  import os
6
+ import json
 
 
7
  import requests
8
+ import streamlit as st
9
+ from typing import List, Union, Tuple
10
+ from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 
 
 
 
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
12
  from langgraph.graph import END, StateGraph, START
13
  from langgraph.prebuilt import ToolNode
14
  from langgraph.graph.message import add_messages
15
+ from typing_extensions import TypedDict, Annotated
16
+ from typing import Sequence
17
 
18
+ # Dummy data for drug mechanism research and clinical trial outcomes
19
+ drug_mechanism_texts = [
20
+ "Drug A: Inhibits enzyme X and modulates receptor Y; potential anti-inflammatory effects.",
21
+ "Drug B: Blocks ion channel Z; has been shown to reduce oxidative stress in preclinical models.",
22
+ "Drug C: Activates nuclear receptor W; exhibits neuroprotective properties."
 
 
 
 
23
  ]
24
 
25
+ clinical_trials_texts = [
26
+ "Trial 1: Drug A repurposed for rheumatoid arthritis showed a 30% improvement in joint function.",
27
+ "Trial 2: Drug B evaluated in a pilot study for neurodegenerative disorders demonstrated a reduction in symptom severity.",
28
+ "Trial 3: Drug C tested in a phase II trial for multiple sclerosis reported significant reduction in relapse rates."
29
  ]
30
 
31
+ # Text splitting settings
32
  splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
33
+ mechanism_docs = splitter.create_documents(drug_mechanism_texts)
34
+ clinical_docs = splitter.create_documents(clinical_trials_texts)
35
+
36
+ # Here you would typically create vector embeddings and vectorstores (e.g., using ChromaDB)
37
+ # For demonstration, we define simple retriever functions that return dummy results.
38
+ def mechanism_retriever(query: str) -> str:
39
+ # Dummy search: return first document that mentions a keyword from the query
40
+ for doc in drug_mechanism_texts:
41
+ if any(word.lower() in doc.lower() for word in query.split()):
42
+ return f"[Mechanism Doc]: {doc}"
43
+ return "No relevant mechanism data found."
44
+
45
+ def clinical_retriever(query: str) -> str:
46
+ for doc in clinical_trials_texts:
47
+ if any(word.lower() in doc.lower() for word in query.split()):
48
+ return f"[Clinical Trial Doc]: {doc}"
49
+ return "No relevant clinical trial data found."
50
+
51
+ # Define tools using a simple wrapper function
52
+ def create_retriever_tool(retriever_func, tool_name: str, description: str):
53
+ def tool(query: str):
54
+ return retriever_func(query)
55
+ # Mimic a tool message (in a real system, you would wrap this in a ToolMessage object)
56
+ tool.__name__ = tool_name
57
+ tool.description = description
58
+ return tool
59
+
60
+ mechanism_tool = create_retriever_tool(
61
+ mechanism_retriever,
62
+ "mechanism_db_tool",
63
+ "Search drug mechanism data for repurposing insights."
64
  )
65
+ clinical_tool = create_retriever_tool(
66
+ clinical_retriever,
67
+ "clinical_db_tool",
68
+ "Search clinical trial outcomes for repurposing evidence."
69
  )
70
+ tools = [mechanism_tool, clinical_tool]
71
 
72
+ # Define the AgentState type for our workflow
73
  class AgentState(TypedDict):
74
  messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
75
 
76
+ # Agent function: Classifies queries as targeting drug mechanisms or clinical outcomes
77
  def agent(state: AgentState):
78
+ print("---CALL AGENT---")
79
  messages = state["messages"]
80
+ user_message = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
81
+
82
+ # Build a prompt to classify the query
83
+ prompt = f"""Given the user question: "{user_message}"
84
+ If the question is about the molecular mechanism or pharmacodynamics, respond EXACTLY in this format:
85
+ SEARCH_MECHANISM: <search terms>
86
+
87
+ If it's about clinical trial outcomes, efficacy, or safety evidence, respond EXACTLY in this format:
88
+ SEARCH_CLINICAL: <search terms>
89
+
90
+ Otherwise, answer directly with general repurposing insights.
91
+ """
92
+ # (Here we simulate a call to DeepSeek-R1 using a dummy response)
93
+ # In a real implementation, make an API call to DeepSeek-R1.
94
+ if "mechanism" in user_message.lower() or "how it works" in user_message.lower():
95
+ response_text = f"SEARCH_MECHANISM: {user_message}"
96
+ elif "trial" in user_message.lower() or "efficacy" in user_message.lower() or "safety" in user_message.lower():
97
+ response_text = f"SEARCH_CLINICAL: {user_message}"
98
+ else:
99
+ response_text = "The system did not classify your query. Please rephrase to focus on drug mechanism or clinical data."
100
+
101
+ print("Agent response:", response_text)
102
+ # Format response into expected tool call format
103
+ if "SEARCH_MECHANISM:" in response_text:
104
+ query = response_text.split("SEARCH_MECHANISM:")[1].strip()
105
+ result = mechanism_tool(query)
106
+ return {"messages": [AIMessage(content=f'Action: mechanism_db_tool\n{{"query": "{query}"}}\n\nResults: {result}')]}
107
+
108
+ elif "SEARCH_CLINICAL:" in response_text:
109
+ query = response_text.split("SEARCH_CLINICAL:")[1].strip()
110
+ result = clinical_tool(query)
111
+ return {"messages": [AIMessage(content=f'Action: clinical_db_tool\n{{"query": "{query}"}}\n\nResults: {result}')]}
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  else:
114
+ return {"messages": [AIMessage(content=response_text)]}
 
 
115
 
116
+ # Grading function: Checks if retrieved documents were found
117
  def simple_grade_documents(state: AgentState):
118
+ messages = state["messages"]
119
+ last_message = messages[-1]
120
+ print("Evaluating message:", last_message.content)
121
+ if "Results:" in last_message.content and "No relevant" not in last_message.content:
122
+ print("---DATA FOUND, PROCEED TO GENERATE INSIGHTS---")
123
  return "generate"
124
  else:
125
+ print("---NO DATA FOUND, TRY REWRITE---")
126
  return "rewrite"
127
 
128
+ # Generate function: Synthesizes repurposing insights from retrieved data
129
  def generate(state: AgentState):
130
+ print("---GENERATE FINAL INSIGHTS---")
131
  messages = state["messages"]
132
+ question = messages[0].content
133
  last_message = messages[-1]
134
+ # Extract data from results
135
+ data_start = last_message.content.find("Results:")
136
+ retrieved_data = last_message.content[data_start:] if data_start != -1 else "No data available"
137
+ # Build a prompt to synthesize insights
138
+ prompt = f"""Based on the following retrieved data:
139
+ {retrieved_data}
140
+ and considering the question:
141
+ {question}
142
+ Summarize potential drug repurposing opportunities and any recommended next steps for further investigation.
143
+ """
144
+ # Dummy generation using a simple echo for demonstration.
145
+ final_answer = f"Summary Insight: Considering the data, a promising repurposing opportunity is to explore Drug A for anti-inflammatory applications beyond its original use, and Drug B might be repurposed for neurodegenerative conditions. Further research should validate these hypotheses."
146
+ print("Final Answer:", final_answer)
147
+ return {"messages": [AIMessage(content=final_answer)]}
148
+
149
+ # Rewrite function: If no data is found, help rephrase the query for clarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def rewrite(state: AgentState):
151
+ print("---REWRITE QUESTION---")
152
+ messages = state["messages"]
153
+ original_question = messages[0].content if messages else "N/A"
154
+ # Dummy rewrite that just appends "Please specify mechanism or trial data" for demonstration.
155
+ rewritten = f"{original_question} (Please specify if you are asking about drug mechanism or clinical trial outcomes.)"
156
+ print("Rewritten question:", rewritten)
157
+ return {"messages": [AIMessage(content=rewritten)]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ # Decision function: Determines next step based on last message content
160
  def custom_tools_condition(state: AgentState):
161
+ messages = state["messages"]
162
+ last_message = messages[-1]
163
+ content = last_message.content
164
+ if content.startswith("Action:"):
165
+ print("Tool action detected. Proceed to retrieval.")
166
  return "tools"
167
  return END
168
 
169
+ # Create the workflow graph
170
  workflow = StateGraph(AgentState)
171
  workflow.add_node("agent", agent)
172
  retrieve_node = ToolNode(tools)
173
  workflow.add_node("retrieve", retrieve_node)
174
  workflow.add_node("rewrite", rewrite)
175
  workflow.add_node("generate", generate)
176
+
177
+ # Define workflow edges
178
  workflow.add_edge(START, "agent")
179
  workflow.add_conditional_edges("agent", custom_tools_condition, {"tools": "retrieve", END: END})
180
  workflow.add_conditional_edges("retrieve", simple_grade_documents)
181
  workflow.add_edge("generate", END)
182
  workflow.add_edge("rewrite", "agent")
183
+ app = workflow.compile()
184
 
185
+ # Function to process a query through the workflow
186
+ def process_question(user_question: str, app, config: dict):
187
  events = []
188
  for event in app.stream({"messages": [("user", user_question)]}, config):
189
  events.append(event)
190
  return events
191
 
192
+ # Streamlit UI for the Drug Repurposing Advisor
193
  def main():
194
+ st.set_page_config(
195
+ page_title="Drug Repurposing Advisor",
196
+ layout="wide",
197
+ initial_sidebar_state="expanded"
 
 
 
 
 
 
 
198
  )
199
+ st.title("💊 Drug Repurposing Advisor")
200
+ st.markdown("### Explore potential drug repurposing opportunities with AI-driven insights.")
201
+ query = st.text_area("Enter your research question:",
202
+ placeholder="e.g., Can Drug A be repurposed for neurodegenerative diseases?")
 
 
 
 
 
 
 
 
 
 
 
203
  col1, col2 = st.columns([1, 2])
204
  with col1:
205
+ if st.button("🔍 Get Insights", use_container_width=True):
206
  if query:
207
+ with st.spinner("Processing your query..."):
208
+ events = process_question(query, app, {"configurable": {"thread_id": "1"}})
209
  for event in events:
210
  if 'agent' in event:
211
+ with st.expander("Agent Processing Step", expanded=True):
212
  content = event['agent']['messages'][0].content
213
+ st.markdown(f"**Agent Step Output:**\n\n{content}")
 
 
 
214
  elif 'generate' in event:
215
+ st.markdown("### Final Insights:")
216
  st.success(event['generate']['messages'][0].content)
217
+ elif 'rewrite' in event:
218
+ st.markdown("### Suggestion:")
219
+ st.warning(event['rewrite']['messages'][0].content)
220
  else:
221
+ st.warning("⚠️ Please enter a query.")
222
  with col2:
223
+ st.markdown("""
224
+ **How to Use the Drug Repurposing Advisor:**
225
+ 1. **Input Query:** Describe your research question. Specify whether you are interested in drug mechanisms or clinical outcomes.
226
+ 2. **Get Insights:** Click "Get Insights" and let the system process your query.
227
+ 3. **Review Output:** Explore the retrieved data and the final synthesized insights.
228
+ **Example Questions:**
229
+ - "How does Drug A work and could its mechanism be useful in treating inflammatory diseases?"
230
+ - "What are the clinical trial outcomes of Drug B and can it be repurposed for neurodegenerative conditions?"
231
+ """)
 
 
 
 
 
232
  if __name__ == "__main__":
233
  main()