mgbam commited on
Commit
85e6b5b
·
verified ·
1 Parent(s): 1e0350f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -73
app.py CHANGED
@@ -1,14 +1,13 @@
1
  # app.py
2
- # Ultra-Dark Advanced AI R&D Assistant
3
  #
4
- # In the spirit of innovation and clarity, this app is built to be robust, scalable,
5
- # and visually striking. It leverages LangGraph, DeepSeek-R1, and local Chroma for fast, in-memory vector storage.
6
  #
7
- # Before deploying, make sure you set the following environment variables:
8
- # - DEEP_SEEK_API: Your DeepSeek API key.
9
- # - OPENAI_API_KEY: Your OpenAI API key.
10
- #
11
- # Written with a vision for tomorrow—by someone who believes in building the future.
12
 
13
  import os
14
  import re
@@ -18,65 +17,51 @@ import requests
18
  from typing import Sequence
19
  from typing_extensions import TypedDict, Annotated
20
 
21
- # Imports for LangChain (ensure langchain-community is installed)
22
  from langchain.embeddings.openai import OpenAIEmbeddings
23
  from langchain.vectorstores import Chroma
24
  from langchain.schema import HumanMessage, AIMessage
25
  from langchain.text_splitter import RecursiveCharacterTextSplitter
26
  from langchain.tools.retriever import create_retriever_tool
27
 
28
- # Imports for LangGraph
29
  from langgraph.graph import END, StateGraph, START
30
  from langgraph.prebuilt import ToolNode
31
  from langgraph.graph.message import add_messages
32
 
33
- # Import Chroma settings for local storage
34
- from chromadb.config import Settings
35
-
36
- # Set up logging
37
  logging.basicConfig(level=logging.INFO)
38
  logger = logging.getLogger(__name__)
39
 
40
- # --- Define our data ---
41
  research_texts = [
42
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
43
  "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
44
  "Latest Trends in Machine Learning Methods Using Quantum Computing"
45
  ]
 
46
  development_texts = [
47
  "Project A: UI Design Completed, API Integration in Progress",
48
  "Project B: Testing New Feature X, Bug Fixes Needed",
49
  "Product Y: In the Performance Optimization Stage Before Release"
50
  ]
51
 
52
- # --- Preprocess and create embeddings ---
53
  splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
54
  research_docs = splitter.create_documents(research_texts)
55
  development_docs = splitter.create_documents(development_texts)
56
 
57
- # Initialize embeddings with your OpenAI API key
58
- embeddings = OpenAIEmbeddings(
59
- model="text-embedding-3-large",
60
- openai_api_key=os.environ.get("OPENAI_API_KEY")
61
- )
62
-
63
- # Use local in-memory settings to avoid tenant issues
64
- client_settings = Settings(
65
- chroma_api_impl="local",
66
- persist_directory=None # Set to a directory like ".chroma" if persistence is needed
67
- )
68
 
69
  research_vectorstore = Chroma.from_documents(
70
  documents=research_docs,
71
  embedding=embeddings,
72
- collection_name="research_collection",
73
- client_settings=client_settings
74
  )
75
  development_vectorstore = Chroma.from_documents(
76
  documents=development_docs,
77
  embedding=embeddings,
78
- collection_name="development_collection",
79
- client_settings=client_settings
80
  )
81
 
82
  research_retriever = research_vectorstore.as_retriever()
@@ -94,7 +79,8 @@ development_tool = create_retriever_tool(
94
  )
95
  tools = [research_tool, development_tool]
96
 
97
- # --- Define our agent and workflow functions ---
 
98
  class AgentState(TypedDict):
99
  messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages]
100
 
@@ -102,6 +88,7 @@ def agent(state: AgentState):
102
  logger.info("Agent invoked")
103
  messages = state["messages"]
104
  user_message = messages[0][1] if isinstance(messages[0], tuple) else messages[0].content
 
105
  prompt = f"""Given this user question: "{user_message}"
106
  If it's about research or academic topics, respond EXACTLY in this format:
107
  SEARCH_RESEARCH: <search terms>
@@ -122,8 +109,12 @@ Otherwise, just answer directly.
122
  "temperature": 0.7,
123
  "max_tokens": 1024
124
  }
125
- response = requests.post("https://api.deepseek.com/v1/chat/completions",
126
- headers=headers, json=data, verify=False)
 
 
 
 
127
  if response.status_code == 200:
128
  response_text = response.json()['choices'][0]['message']['content']
129
  logger.info(f"DeepSeek response: {response_text}")
@@ -145,14 +136,19 @@ Otherwise, just answer directly.
145
  def simple_grade_documents(state: AgentState):
146
  last_message = state["messages"][-1]
147
  logger.info(f"Grading message: {last_message.content}")
148
- return "generate" if "Results: [Document" in last_message.content else "rewrite"
 
 
 
149
 
150
  def generate(state: AgentState):
151
  logger.info("Generating final answer")
152
  messages = state["messages"]
153
  question = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
154
  last_message = messages[-1]
155
- docs = last_message.content[last_message.content.find("Results: ["):] if "Results: [" in last_message.content else ""
 
 
156
  headers = {
157
  "Accept": "application/json",
158
  "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
@@ -169,8 +165,12 @@ Focus on extracting and synthesizing the key findings from the research papers.
169
  "temperature": 0.7,
170
  "max_tokens": 1024
171
  }
172
- response = requests.post("https://api.deepseek.com/v1/chat/completions",
173
- headers=headers, json=data, verify=False)
 
 
 
 
174
  if response.status_code == 200:
175
  response_text = response.json()['choices'][0]['message']['content']
176
  return {"messages": [AIMessage(content=response_text)]}
@@ -193,8 +193,12 @@ def rewrite(state: AgentState):
193
  "temperature": 0.7,
194
  "max_tokens": 1024
195
  }
196
- response = requests.post("https://api.deepseek.com/v1/chat/completions",
197
- headers=headers, json=data, verify=False)
 
 
 
 
198
  if response.status_code == 200:
199
  response_text = response.json()['choices'][0]['message']['content']
200
  return {"messages": [AIMessage(content=response_text)]}
@@ -206,9 +210,11 @@ def rewrite(state: AgentState):
206
  tools_pattern = re.compile(r"Action: .*")
207
  def custom_tools_condition(state: AgentState):
208
  last_message = state["messages"][-1]
209
- return "tools" if tools_pattern.match(last_message.content) else END
 
 
210
 
211
- # Build the workflow using LangGraph's StateGraph
212
  workflow = StateGraph(AgentState)
213
  workflow.add_node("agent", agent)
214
  retrieve_node = ToolNode(tools)
@@ -228,36 +234,34 @@ def process_question(user_question, app, config):
228
  events.append(event)
229
  return events
230
 
231
- # --- Streamlit UI with Ultra-Dark Theme ---
232
  def main():
233
- st.set_page_config(page_title="Ultra-Dark AI R&D Assistant", layout="wide", initial_sidebar_state="expanded")
234
- st.markdown("""
235
- <style>
236
- /* Ultra-dark background for the app */
237
- .stApp { background-color: #121212 !important; }
238
- /* Force text to be light for maximum contrast */
239
- html, body, [class*="css"] { color: #e0e0e0 !important; }
240
- /* Override default Streamlit input labels and other text */
241
- .stTextArea label, .stMarkdown, .stHeader, .stTitle { color: #ffffff !important; }
242
- /* Sidebar styling */
243
- .css-1d391kg { background-color: #1f1f1f !important; }
244
- .data-box { background-color: #1e1e1e !important; color: #e0e0e0 !important; padding: 20px; border-radius: 10px; margin: 10px 0; }
245
- </style>
246
- """, unsafe_allow_html=True)
247
 
248
- # Sidebar: Display available data with dark styling
249
  with st.sidebar:
250
  st.header("📚 Available Data")
251
  st.subheader("Research Database")
252
  for text in research_texts:
253
- st.markdown(f'<div class="data-box">{text}</div>', unsafe_allow_html=True)
254
  st.subheader("Development Database")
255
  for text in development_texts:
256
- st.markdown(f'<div class="data-box">{text}</div>', unsafe_allow_html=True)
257
 
258
- st.title("🤖 Ultra-Dark AI R&D Assistant")
259
  st.markdown("---")
260
- query = st.text_area("Enter your question:", height=100, placeholder="e.g., What are the latest advancements in AI research?")
261
 
262
  col1, col2 = st.columns([1, 2])
263
  with col1:
@@ -279,17 +283,19 @@ def main():
279
  else:
280
  st.warning("⚠️ Please enter a question first!")
281
  with col2:
282
- st.markdown("""
283
- ### 🎯 How to Use
284
- 1. Type your question in the text box.
285
- 2. Click "Get Answer" to process.
286
- 3. View retrieved documents and the final answer.
287
-
288
- ### 💡 Example Questions
289
- - What are the latest advancements in AI research?
290
- - What is the status of Project A?
291
- - What are the current trends in machine learning?
292
- """)
 
 
293
 
294
  if __name__ == "__main__":
295
  main()
 
1
  # app.py
2
+ # Advanced AI R&D Assistant for Hugging Face Spaces
3
  #
4
+ # This app leverages LangGraph, DeepSeek-R1 via text-based function calling, and Agentic RAG.
5
+ # API keys are securely loaded via environment variables.
6
  #
7
+ # To deploy:
8
+ # 1. Add your API key to Hugging Face Space secrets with the key DEEP_SEEK_API.
9
+ # 2. Ensure your requirements.txt includes langchain-community.
10
+ # 3. Run the app with Streamlit.
 
11
 
12
  import os
13
  import re
 
17
  from typing import Sequence
18
  from typing_extensions import TypedDict, Annotated
19
 
20
+ # Updated imports for LangChain
21
  from langchain.embeddings.openai import OpenAIEmbeddings
22
  from langchain.vectorstores import Chroma
23
  from langchain.schema import HumanMessage, AIMessage
24
  from langchain.text_splitter import RecursiveCharacterTextSplitter
25
  from langchain.tools.retriever import create_retriever_tool
26
 
27
+ # Imports for LangGraph remain the same
28
  from langgraph.graph import END, StateGraph, START
29
  from langgraph.prebuilt import ToolNode
30
  from langgraph.graph.message import add_messages
31
 
32
+ # Configure logging
 
 
 
33
  logging.basicConfig(level=logging.INFO)
34
  logger = logging.getLogger(__name__)
35
 
36
+ # --- Dummy Data Setup ---
37
  research_texts = [
38
  "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
39
  "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
40
  "Latest Trends in Machine Learning Methods Using Quantum Computing"
41
  ]
42
+
43
  development_texts = [
44
  "Project A: UI Design Completed, API Integration in Progress",
45
  "Project B: Testing New Feature X, Bug Fixes Needed",
46
  "Product Y: In the Performance Optimization Stage Before Release"
47
  ]
48
 
49
+ # --- Preprocessing & Embeddings ---
50
  splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
51
  research_docs = splitter.create_documents(research_texts)
52
  development_docs = splitter.create_documents(development_texts)
53
 
54
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
 
 
 
 
 
 
 
 
 
 
55
 
56
  research_vectorstore = Chroma.from_documents(
57
  documents=research_docs,
58
  embedding=embeddings,
59
+ collection_name="research_collection"
 
60
  )
61
  development_vectorstore = Chroma.from_documents(
62
  documents=development_docs,
63
  embedding=embeddings,
64
+ collection_name="development_collection"
 
65
  )
66
 
67
  research_retriever = research_vectorstore.as_retriever()
 
79
  )
80
  tools = [research_tool, development_tool]
81
 
82
+ # --- Agent and Workflow Functions ---
83
+ # Note: We are using only AIMessage and HumanMessage for our message types.
84
  class AgentState(TypedDict):
85
  messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages]
86
 
 
88
  logger.info("Agent invoked")
89
  messages = state["messages"]
90
  user_message = messages[0][1] if isinstance(messages[0], tuple) else messages[0].content
91
+
92
  prompt = f"""Given this user question: "{user_message}"
93
  If it's about research or academic topics, respond EXACTLY in this format:
94
  SEARCH_RESEARCH: <search terms>
 
109
  "temperature": 0.7,
110
  "max_tokens": 1024
111
  }
112
+ response = requests.post(
113
+ "https://api.deepseek.com/v1/chat/completions",
114
+ headers=headers,
115
+ json=data,
116
+ verify=False
117
+ )
118
  if response.status_code == 200:
119
  response_text = response.json()['choices'][0]['message']['content']
120
  logger.info(f"DeepSeek response: {response_text}")
 
136
  def simple_grade_documents(state: AgentState):
137
  last_message = state["messages"][-1]
138
  logger.info(f"Grading message: {last_message.content}")
139
+ if "Results: [Document" in last_message.content:
140
+ return "generate"
141
+ else:
142
+ return "rewrite"
143
 
144
  def generate(state: AgentState):
145
  logger.info("Generating final answer")
146
  messages = state["messages"]
147
  question = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
148
  last_message = messages[-1]
149
+ docs = ""
150
+ if "Results: [" in last_message.content:
151
+ docs = last_message.content[last_message.content.find("Results: ["):]
152
  headers = {
153
  "Accept": "application/json",
154
  "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
 
165
  "temperature": 0.7,
166
  "max_tokens": 1024
167
  }
168
+ response = requests.post(
169
+ "https://api.deepseek.com/v1/chat/completions",
170
+ headers=headers,
171
+ json=data,
172
+ verify=False
173
+ )
174
  if response.status_code == 200:
175
  response_text = response.json()['choices'][0]['message']['content']
176
  return {"messages": [AIMessage(content=response_text)]}
 
193
  "temperature": 0.7,
194
  "max_tokens": 1024
195
  }
196
+ response = requests.post(
197
+ "https://api.deepseek.com/v1/chat/completions",
198
+ headers=headers,
199
+ json=data,
200
+ verify=False
201
+ )
202
  if response.status_code == 200:
203
  response_text = response.json()['choices'][0]['message']['content']
204
  return {"messages": [AIMessage(content=response_text)]}
 
210
  tools_pattern = re.compile(r"Action: .*")
211
  def custom_tools_condition(state: AgentState):
212
  last_message = state["messages"][-1]
213
+ if tools_pattern.match(last_message.content):
214
+ return "tools"
215
+ return END
216
 
217
+ # Build the workflow with LangGraph's StateGraph
218
  workflow = StateGraph(AgentState)
219
  workflow.add_node("agent", agent)
220
  retrieve_node = ToolNode(tools)
 
234
  events.append(event)
235
  return events
236
 
237
+ # --- Streamlit UI ---
238
  def main():
239
+ st.set_page_config(page_title="Advanced AI R&D Assistant", layout="wide", initial_sidebar_state="expanded")
240
+ st.markdown(
241
+ """
242
+ <style>
243
+ .stApp { background-color: #f8f9fa; }
244
+ .stButton > button { width: 100%; margin-top: 20px; }
245
+ .data-box { padding: 20px; border-radius: 10px; margin: 10px 0; }
246
+ .research-box { background-color: #e3f2fd; border-left: 5px solid #1976d2; }
247
+ .dev-box { background-color: #e8f5e9; border-left: 5px solid #43a047; }
248
+ </style>
249
+ """, unsafe_allow_html=True
250
+ )
 
 
251
 
252
+ # Sidebar: Display available data
253
  with st.sidebar:
254
  st.header("📚 Available Data")
255
  st.subheader("Research Database")
256
  for text in research_texts:
257
+ st.markdown(f'<div class="data-box research-box">{text}</div>', unsafe_allow_html=True)
258
  st.subheader("Development Database")
259
  for text in development_texts:
260
+ st.markdown(f'<div class="data-box dev-box">{text}</div>', unsafe_allow_html=True)
261
 
262
+ st.title("🤖 Advanced AI R&D Assistant")
263
  st.markdown("---")
264
+ query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
265
 
266
  col1, col2 = st.columns([1, 2])
267
  with col1:
 
283
  else:
284
  st.warning("⚠️ Please enter a question first!")
285
  with col2:
286
+ st.markdown(
287
+ """
288
+ ### 🎯 How to Use
289
+ 1. Type your question in the text box.
290
+ 2. Click "Get Answer" to process.
291
+ 3. View retrieved documents and the final answer.
292
+
293
+ ### 💡 Example Questions
294
+ - What are the latest advancements in AI research?
295
+ - What is the status of Project A?
296
+ - What are the current trends in machine learning?
297
+ """
298
+ )
299
 
300
  if __name__ == "__main__":
301
  main()