# app.py # Ultra-Dark Advanced AI R&D Assistant # # In the spirit of innovation and clarity, this app is built to be robust, scalable, # and visually striking. It leverages LangGraph, DeepSeek-R1, and local Chroma for fast, in-memory vector storage. # # Before deploying, make sure you set the following environment variables: # - DEEP_SEEK_API: Your DeepSeek API key. # - OPENAI_API_KEY: Your OpenAI API key. # # Written with a vision for tomorrowβ€”by someone who believes in building the future. import os import re import logging import streamlit as st import requests from typing import Sequence from typing_extensions import TypedDict, Annotated # Imports for LangChain (ensure langchain-community is installed) from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.schema import HumanMessage, AIMessage from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.tools.retriever import create_retriever_tool # Imports for LangGraph from langgraph.graph import END, StateGraph, START from langgraph.prebuilt import ToolNode from langgraph.graph.message import add_messages # Import Chroma settings for local storage from chromadb.config import Settings # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # --- Define our data --- research_texts = [ "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%", "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing", "Latest Trends in Machine Learning Methods Using Quantum Computing" ] development_texts = [ "Project A: UI Design Completed, API Integration in Progress", "Project B: Testing New Feature X, Bug Fixes Needed", "Product Y: In the Performance Optimization Stage Before Release" ] # --- Preprocess and create embeddings --- splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10) research_docs = splitter.create_documents(research_texts) development_docs = splitter.create_documents(development_texts) # Initialize embeddings with your OpenAI API key embeddings = OpenAIEmbeddings( model="text-embedding-3-large", openai_api_key=os.environ.get("OPENAI_API_KEY") ) # Use local in-memory settings to avoid tenant issues client_settings = Settings( chroma_api_impl="local", persist_directory=None # Set to a directory like ".chroma" if persistence is needed ) research_vectorstore = Chroma.from_documents( documents=research_docs, embedding=embeddings, collection_name="research_collection", client_settings=client_settings ) development_vectorstore = Chroma.from_documents( documents=development_docs, embedding=embeddings, collection_name="development_collection", client_settings=client_settings ) research_retriever = research_vectorstore.as_retriever() development_retriever = development_vectorstore.as_retriever() research_tool = create_retriever_tool( research_retriever, "research_db_tool", "Search information from the research database." ) development_tool = create_retriever_tool( development_retriever, "development_db_tool", "Search information from the development database." ) tools = [research_tool, development_tool] # --- Define our agent and workflow functions --- class AgentState(TypedDict): messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages] def agent(state: AgentState): logger.info("Agent invoked") messages = state["messages"] user_message = messages[0][1] if isinstance(messages[0], tuple) else messages[0].content prompt = f"""Given this user question: "{user_message}" If it's about research or academic topics, respond EXACTLY in this format: SEARCH_RESEARCH: If it's about development status, respond EXACTLY in this format: SEARCH_DEV: Otherwise, just answer directly. """ headers = { "Accept": "application/json", "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}", "Content-Type": "application/json" } data = { "model": "deepseek-chat", "messages": [{"role": "user", "content": prompt}], "temperature": 0.7, "max_tokens": 1024 } response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=data, verify=False) if response.status_code == 200: response_text = response.json()['choices'][0]['message']['content'] logger.info(f"DeepSeek response: {response_text}") if "SEARCH_RESEARCH:" in response_text: query = response_text.split("SEARCH_RESEARCH:")[1].strip() results = research_retriever.invoke(query) return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]} elif "SEARCH_DEV:" in response_text: query = response_text.split("SEARCH_DEV:")[1].strip() results = development_retriever.invoke(query) return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]} else: return {"messages": [AIMessage(content=response_text)]} else: error_msg = f"DeepSeek API call failed: {response.text}" logger.error(error_msg) raise Exception(error_msg) def simple_grade_documents(state: AgentState): last_message = state["messages"][-1] logger.info(f"Grading message: {last_message.content}") return "generate" if "Results: [Document" in last_message.content else "rewrite" def generate(state: AgentState): logger.info("Generating final answer") messages = state["messages"] question = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1] last_message = messages[-1] docs = last_message.content[last_message.content.find("Results: ["):] if "Results: [" in last_message.content else "" headers = { "Accept": "application/json", "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}", "Content-Type": "application/json" } prompt = f"""Based on these research documents, summarize the latest advancements in AI: Question: {question} Documents: {docs} Focus on extracting and synthesizing the key findings from the research papers. """ data = { "model": "deepseek-chat", "messages": [{"role": "user", "content": prompt}], "temperature": 0.7, "max_tokens": 1024 } response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=data, verify=False) if response.status_code == 200: response_text = response.json()['choices'][0]['message']['content'] return {"messages": [AIMessage(content=response_text)]} else: error_msg = f"DeepSeek API generate call failed: {response.text}" logger.error(error_msg) raise Exception(error_msg) def rewrite(state: AgentState): logger.info("Rewriting question") original_question = state["messages"][0].content if state["messages"] else "N/A" headers = { "Accept": "application/json", "Authorization": f"Bearer {os.environ.get('DEEP_SEEK_API')}", "Content-Type": "application/json" } data = { "model": "deepseek-chat", "messages": [{"role": "user", "content": f"Rewrite this question to be more specific and clearer: {original_question}"}], "temperature": 0.7, "max_tokens": 1024 } response = requests.post("https://api.deepseek.com/v1/chat/completions", headers=headers, json=data, verify=False) if response.status_code == 200: response_text = response.json()['choices'][0]['message']['content'] return {"messages": [AIMessage(content=response_text)]} else: error_msg = f"DeepSeek API rewrite call failed: {response.text}" logger.error(error_msg) raise Exception(error_msg) tools_pattern = re.compile(r"Action: .*") def custom_tools_condition(state: AgentState): last_message = state["messages"][-1] return "tools" if tools_pattern.match(last_message.content) else END # Build the workflow using LangGraph's StateGraph workflow = StateGraph(AgentState) workflow.add_node("agent", agent) retrieve_node = ToolNode(tools) workflow.add_node("retrieve", retrieve_node) workflow.add_node("rewrite", rewrite) workflow.add_node("generate", generate) workflow.add_edge(START, "agent") workflow.add_conditional_edges("agent", custom_tools_condition, {"tools": "retrieve", END: END}) workflow.add_conditional_edges("retrieve", simple_grade_documents) workflow.add_edge("generate", END) workflow.add_edge("rewrite", "agent") app_workflow = workflow.compile() def process_question(user_question, app, config): events = [] for event in app.stream({"messages": [("user", user_question)]}, config): events.append(event) return events # --- Streamlit UI with Ultra-Dark Theme --- def main(): st.set_page_config(page_title="Ultra-Dark AI R&D Assistant", layout="wide", initial_sidebar_state="expanded") st.markdown(""" """, unsafe_allow_html=True) # Sidebar: Display available data with dark styling with st.sidebar: st.header("πŸ“š Available Data") st.subheader("Research Database") for text in research_texts: st.markdown(f'
{text}
', unsafe_allow_html=True) st.subheader("Development Database") for text in development_texts: st.markdown(f'
{text}
', unsafe_allow_html=True) st.title("πŸ€– Ultra-Dark AI R&D Assistant") st.markdown("---") query = st.text_area("Enter your question:", height=100, placeholder="e.g., What are the latest advancements in AI research?") col1, col2 = st.columns([1, 2]) with col1: if st.button("πŸ” Get Answer", use_container_width=True): if query: with st.spinner('Processing your question...'): events = process_question(query, app_workflow, {"configurable": {"thread_id": "1"}}) for event in events: if 'agent' in event: with st.expander("πŸ”„ Processing Step", expanded=True): content = event['agent']['messages'][0].content if "Results:" in content: st.markdown("### πŸ“‘ Retrieved Documents:") docs = content[content.find("Results:"):] st.info(docs) elif 'generate' in event: st.markdown("### ✨ Final Answer:") st.success(event['generate']['messages'][0].content) else: st.warning("⚠️ Please enter a question first!") with col2: st.markdown(""" ### 🎯 How to Use 1. Type your question in the text box. 2. Click "Get Answer" to process. 3. View retrieved documents and the final answer. ### πŸ’‘ Example Questions - What are the latest advancements in AI research? - What is the status of Project A? - What are the current trends in machine learning? """) if __name__ == "__main__": main()