Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 10

Commit

85e6b5b

verified ·

1 Parent(s): 1e0350f

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -73

app.py CHANGED Viewed

@@ -1,14 +1,13 @@
 # app.py
-# Ultra-Dark Advanced AI R&D Assistant
 #
-# In the spirit of innovation and clarity, this app is built to be robust, scalable,
-# and visually striking. It leverages LangGraph, DeepSeek-R1, and local Chroma for fast, in-memory vector storage.
 #
-# Before deploying, make sure you set the following environment variables:
-#   - DEEP_SEEK_API: Your DeepSeek API key.
-#   - OPENAI_API_KEY: Your OpenAI API key.
-#
-# Written with a vision for tomorrow—by someone who believes in building the future.
 import os
 import re
@@ -18,65 +17,51 @@ import requests
 from typing import Sequence
 from typing_extensions import TypedDict, Annotated
-# Imports for LangChain (ensure langchain-community is installed)
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import Chroma
 from langchain.schema import HumanMessage, AIMessage
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.tools.retriever import create_retriever_tool
-# Imports for LangGraph
 from langgraph.graph import END, StateGraph, START
 from langgraph.prebuilt import ToolNode
 from langgraph.graph.message import add_messages
-# Import Chroma settings for local storage
-from chromadb.config import Settings
-# Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# --- Define our data ---
 research_texts = [
     "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
     "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
     "Latest Trends in Machine Learning Methods Using Quantum Computing"
 ]
 development_texts = [
     "Project A: UI Design Completed, API Integration in Progress",
     "Project B: Testing New Feature X, Bug Fixes Needed",
     "Product Y: In the Performance Optimization Stage Before Release"
 ]
-# --- Preprocess and create embeddings ---
 splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
 research_docs = splitter.create_documents(research_texts)
 development_docs = splitter.create_documents(development_texts)
-# Initialize embeddings with your OpenAI API key
-embeddings = OpenAIEmbeddings(
-    model="text-embedding-3-large",
-    openai_api_key=os.environ.get("OPENAI_API_KEY")
-)
-# Use local in-memory settings to avoid tenant issues
-client_settings = Settings(
-    chroma_api_impl="local",
-    persist_directory=None  # Set to a directory like ".chroma" if persistence is needed
-)
 research_vectorstore = Chroma.from_documents(
     documents=research_docs,
     embedding=embeddings,
-    collection_name="research_collection",
-    client_settings=client_settings
 )
 development_vectorstore = Chroma.from_documents(
     documents=development_docs,
     embedding=embeddings,
-    collection_name="development_collection",
-    client_settings=client_settings
 )
 research_retriever = research_vectorstore.as_retriever()
@@ -94,7 +79,8 @@ development_tool = create_retriever_tool(
 )
 tools = [research_tool, development_tool]
-# --- Define our agent and workflow functions ---
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages]
@@ -102,6 +88,7 @@ def agent(state: AgentState):
     logger.info("Agent invoked")
     messages = state["messages"]
     user_message = messages[0][1] if isinstance(messages[0], tuple) else messages[0].content
     prompt = f"""Given this user question: "{user_message}"
 If it's about research or academic topics, respond EXACTLY in this format:
 SEARCH_RESEARCH: <search terms>
@@ -122,8 +109,12 @@ Otherwise, just answer directly.
         "temperature": 0.7,
         "max_tokens": 1024
     }
-    response = requests.post("https://api.deepseek.com/v1/chat/completions",
-                             headers=headers, json=data, verify=False)
     if response.status_code == 200:
         response_text = response.json()['choices'][0]['message']['content']
         logger.info(f"DeepSeek response: {response_text}")
@@ -145,14 +136,19 @@ Otherwise, just answer directly.
 def simple_grade_documents(state: AgentState):
     last_message = state["messages"][-1]
     logger.info(f"Grading message: {last_message.content}")
-    return "generate" if "Results: [Document" in last_message.content else "rewrite"
 def generate(state: AgentState):
     logger.info("Generating final answer")
     messages = state["messages"]
     question = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
     last_message = messages[-1]
-    docs = last_message.content[last_message.content.find("Results: ["):] if "Results: [" in last_message.content else ""
     headers = {
         "Accept": "application/json",
         "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
@@ -169,8 +165,12 @@ Focus on extracting and synthesizing the key findings from the research papers.
         "temperature": 0.7,
         "max_tokens": 1024
     }
-    response = requests.post("https://api.deepseek.com/v1/chat/completions",
-                             headers=headers, json=data, verify=False)
     if response.status_code == 200:
         response_text = response.json()['choices'][0]['message']['content']
         return {"messages": [AIMessage(content=response_text)]}
@@ -193,8 +193,12 @@ def rewrite(state: AgentState):
         "temperature": 0.7,
         "max_tokens": 1024
     }
-    response = requests.post("https://api.deepseek.com/v1/chat/completions",
-                             headers=headers, json=data, verify=False)
     if response.status_code == 200:
         response_text = response.json()['choices'][0]['message']['content']
         return {"messages": [AIMessage(content=response_text)]}
@@ -206,9 +210,11 @@ def rewrite(state: AgentState):
 tools_pattern = re.compile(r"Action: .*")
 def custom_tools_condition(state: AgentState):
     last_message = state["messages"][-1]
-    return "tools" if tools_pattern.match(last_message.content) else END
-# Build the workflow using LangGraph's StateGraph
 workflow = StateGraph(AgentState)
 workflow.add_node("agent", agent)
 retrieve_node = ToolNode(tools)
@@ -228,36 +234,34 @@ def process_question(user_question, app, config):
         events.append(event)
     return events
-# --- Streamlit UI with Ultra-Dark Theme ---
 def main():
-    st.set_page_config(page_title="Ultra-Dark AI R&D Assistant", layout="wide", initial_sidebar_state="expanded")
-    st.markdown("""
-    <style>
-    /* Ultra-dark background for the app */
-    .stApp { background-color: #121212 !important; }
-    /* Force text to be light for maximum contrast */
-    html, body, [class*="css"] { color: #e0e0e0 !important; }
-    /* Override default Streamlit input labels and other text */
-    .stTextArea label, .stMarkdown, .stHeader, .stTitle { color: #ffffff !important; }
-    /* Sidebar styling */
-    .css-1d391kg { background-color: #1f1f1f !important; }
-    .data-box { background-color: #1e1e1e !important; color: #e0e0e0 !important; padding: 20px; border-radius: 10px; margin: 10px 0; }
-    </style>
-    """, unsafe_allow_html=True)
-    # Sidebar: Display available data with dark styling
     with st.sidebar:
         st.header("📚 Available Data")
         st.subheader("Research Database")
         for text in research_texts:
-            st.markdown(f'<div class="data-box">{text}</div>', unsafe_allow_html=True)
         st.subheader("Development Database")
         for text in development_texts:
-            st.markdown(f'<div class="data-box">{text}</div>', unsafe_allow_html=True)
-    st.title("🤖 Ultra-Dark AI R&D Assistant")
     st.markdown("---")
-    query = st.text_area("Enter your question:", height=100, placeholder="e.g., What are the latest advancements in AI research?")
     col1, col2 = st.columns([1, 2])
     with col1:
@@ -279,17 +283,19 @@ def main():
             else:
                 st.warning("⚠️ Please enter a question first!")
     with col2:
-        st.markdown("""
-        ### 🎯 How to Use
-        1. Type your question in the text box.
-        2. Click "Get Answer" to process.
-        3. View retrieved documents and the final answer.
-        ### 💡 Example Questions
-        - What are the latest advancements in AI research?
-        - What is the status of Project A?
-        - What are the current trends in machine learning?
-        """)
 if __name__ == "__main__":
     main()

 # app.py
+# Advanced AI R&D Assistant for Hugging Face Spaces
 #
+# This app leverages LangGraph, DeepSeek-R1 via text-based function calling, and Agentic RAG.
+# API keys are securely loaded via environment variables.
 #
+# To deploy:
+# 1. Add your API key to Hugging Face Space secrets with the key DEEP_SEEK_API.
+# 2. Ensure your requirements.txt includes langchain-community.
+# 3. Run the app with Streamlit.
 import os
 import re
 from typing import Sequence
 from typing_extensions import TypedDict, Annotated
+# Updated imports for LangChain
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.vectorstores import Chroma
 from langchain.schema import HumanMessage, AIMessage
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.tools.retriever import create_retriever_tool
+# Imports for LangGraph remain the same
 from langgraph.graph import END, StateGraph, START
 from langgraph.prebuilt import ToolNode
 from langgraph.graph.message import add_messages
+# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# --- Dummy Data Setup ---
 research_texts = [
     "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
     "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
     "Latest Trends in Machine Learning Methods Using Quantum Computing"
 ]
 development_texts = [
     "Project A: UI Design Completed, API Integration in Progress",
     "Project B: Testing New Feature X, Bug Fixes Needed",
     "Product Y: In the Performance Optimization Stage Before Release"
 ]
+# --- Preprocessing & Embeddings ---
 splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
 research_docs = splitter.create_documents(research_texts)
 development_docs = splitter.create_documents(development_texts)
+embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
 research_vectorstore = Chroma.from_documents(
     documents=research_docs,
     embedding=embeddings,
+    collection_name="research_collection"
 )
 development_vectorstore = Chroma.from_documents(
     documents=development_docs,
     embedding=embeddings,
+    collection_name="development_collection"
 )
 research_retriever = research_vectorstore.as_retriever()
 )
 tools = [research_tool, development_tool]
+# --- Agent and Workflow Functions ---
+# Note: We are using only AIMessage and HumanMessage for our message types.
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages]
     logger.info("Agent invoked")
     messages = state["messages"]
     user_message = messages[0][1] if isinstance(messages[0], tuple) else messages[0].content
     prompt = f"""Given this user question: "{user_message}"
 If it's about research or academic topics, respond EXACTLY in this format:
 SEARCH_RESEARCH: <search terms>
         "temperature": 0.7,
         "max_tokens": 1024
     }
+    response = requests.post(
+        "https://api.deepseek.com/v1/chat/completions",
+        headers=headers,
+        json=data,
+        verify=False
+    )
     if response.status_code == 200:
         response_text = response.json()['choices'][0]['message']['content']
         logger.info(f"DeepSeek response: {response_text}")
 def simple_grade_documents(state: AgentState):
     last_message = state["messages"][-1]
     logger.info(f"Grading message: {last_message.content}")
+    if "Results: [Document" in last_message.content:
+        return "generate"
+    else:
+        return "rewrite"
 def generate(state: AgentState):
     logger.info("Generating final answer")
     messages = state["messages"]
     question = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
     last_message = messages[-1]
+    docs = ""
+    if "Results: [" in last_message.content:
+        docs = last_message.content[last_message.content.find("Results: ["):]
     headers = {
         "Accept": "application/json",
         "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}",
         "temperature": 0.7,
         "max_tokens": 1024
     }
+    response = requests.post(
+        "https://api.deepseek.com/v1/chat/completions",
+        headers=headers,
+        json=data,
+        verify=False
+    )
     if response.status_code == 200:
         response_text = response.json()['choices'][0]['message']['content']
         return {"messages": [AIMessage(content=response_text)]}
         "temperature": 0.7,
         "max_tokens": 1024
     }
+    response = requests.post(
+        "https://api.deepseek.com/v1/chat/completions",
+        headers=headers,
+        json=data,
+        verify=False
+    )
     if response.status_code == 200:
         response_text = response.json()['choices'][0]['message']['content']
         return {"messages": [AIMessage(content=response_text)]}
 tools_pattern = re.compile(r"Action: .*")
 def custom_tools_condition(state: AgentState):
     last_message = state["messages"][-1]
+    if tools_pattern.match(last_message.content):
+        return "tools"
+    return END
+# Build the workflow with LangGraph's StateGraph
 workflow = StateGraph(AgentState)
 workflow.add_node("agent", agent)
 retrieve_node = ToolNode(tools)
         events.append(event)
     return events
+# --- Streamlit UI ---
 def main():
+    st.set_page_config(page_title="Advanced AI R&D Assistant", layout="wide", initial_sidebar_state="expanded")
+    st.markdown(
+        """
+        <style>
+        .stApp { background-color: #f8f9fa; }
+        .stButton > button { width: 100%; margin-top: 20px; }
+        .data-box { padding: 20px; border-radius: 10px; margin: 10px 0; }
+        .research-box { background-color: #e3f2fd; border-left: 5px solid #1976d2; }
+        .dev-box { background-color: #e8f5e9; border-left: 5px solid #43a047; }
+        </style>
+        """, unsafe_allow_html=True
+    )
+    # Sidebar: Display available data
     with st.sidebar:
         st.header("📚 Available Data")
         st.subheader("Research Database")
         for text in research_texts:
+            st.markdown(f'<div class="data-box research-box">{text}</div>', unsafe_allow_html=True)
         st.subheader("Development Database")
         for text in development_texts:
+            st.markdown(f'<div class="data-box dev-box">{text}</div>', unsafe_allow_html=True)
+    st.title("🤖 Advanced AI R&D Assistant")
     st.markdown("---")
+    query = st.text_area("Enter your question:", height=100, placeholder="e.g., What is the latest advancement in AI research?")
     col1, col2 = st.columns([1, 2])
     with col1:
             else:
                 st.warning("⚠️ Please enter a question first!")
     with col2:
+        st.markdown(
+            """
+            ### 🎯 How to Use
+            1. Type your question in the text box.
+            2. Click "Get Answer" to process.
+            3. View retrieved documents and the final answer.
+            ### 💡 Example Questions
+            - What are the latest advancements in AI research?
+            - What is the status of Project A?
+            - What are the current trends in machine learning?
+            """
+        )
 if __name__ == "__main__":
     main()