""" AI Research Assistant """ # ------------------------------ # Core Imports & Configuration # ------------------------------ import os import re import time import chromadb import requests import streamlit as st from typing import Sequence, Tuple from typing_extensions import TypedDict, Annotated from langchain_core.messages import HumanMessage, AIMessage from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain.tools.retriever import create_retriever_tool from langgraph.graph import END, StateGraph from langgraph.prebuilt import ToolNode from langgraph.graph.message import add_messages from chromadb.config import Settings from langchain_openai import OpenAIEmbeddings # ------------------------------ # Type Definitions # ------------------------------ class AgentState(TypedDict): messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages] # ------------------------------ # Configuration & Constants # ------------------------------ class Config: API_KEY = os.environ.get("DEEPSEEK_API_KEY") CHROMA_PATH = "chroma_db" TEXT_SPLITTER_CONFIG = { "chunk_size": 512, "chunk_overlap": 128, "separators": ["\n\n", "\n", ". ", "! ", "? "] } # ------------------------------ # Core System Components # ------------------------------ class ResearchAssistant: def __init__(self): self.embeddings = OpenAIEmbeddings(model="text-embedding-3-large") self.vector_stores = self._init_vector_stores() self.tools = self._create_tools() self.workflow = self._build_workflow() def _init_vector_stores(self) -> Tuple[Chroma, Chroma]: """Initialize vector stores with proper document processing""" splitter = RecursiveCharacterTextSplitter(**Config.TEXT_SPLITTER_CONFIG) research_docs = splitter.create_documents([ "Research Report: New AI Model Achieves 98% Image Recognition Accuracy", "Transformers: The New NLP Architecture Standard", "Quantum Machine Learning: Emerging Trends and Applications" ]) development_docs = splitter.create_documents([ "Project A: UI Design Finalized, API Integration Phase", "Project B: Feature Testing and Bug Fixes", "Product Y: Performance Optimization Pre-Release" ]) client = chromadb.PersistentClient( path=Config.CHROMA_PATH, settings=Settings(anonymized_telemetry=False) return ( Chroma.from_documents(research_docs, self.embeddings, client=client, collection_name="research"), Chroma.from_documents(development_docs, self.embeddings, client=client, collection_name="development") ) def _create_tools(self): """Create retrieval tools with optimized search parameters""" research_retriever = self.vector_stores[0].as_retriever( search_kwargs={"k": 3, "score_threshold": 0.7} ) development_retriever = self.vector_stores[1].as_retriever( search_kwargs={"k": 3, "score_threshold": 0.7} ) return [ create_retriever_tool( research_retriever, "research_db", "Access technical research papers and reports" ), create_retriever_tool( development_retriever, "development_db", "Retrieve project development status updates" ) ] def _build_workflow(self): """Construct and return the processing workflow""" workflow = StateGraph(AgentState) workflow.add_node("analyze", self.analyze_query) workflow.add_node("retrieve", ToolNode(self.tools)) workflow.add_node("synthesize", self.synthesize_response) workflow.set_entry_point("analyze") workflow.add_conditional_edges( "analyze", self._needs_retrieval, {"retrieve": "retrieve", "direct": "synthesize"} ) workflow.add_edge("retrieve", "synthesize") workflow.add_edge("synthesize", END) return workflow.compile() def _needs_retrieval(self, state: AgentState) -> str: """Determine if document retrieval is needed""" query = state["messages"][-1].content.lower() return "retrieve" if any(kw in query for kw in { "research", "study", "project", "develop", "trend" }) else "direct" def analyze_query(self, state: AgentState): """Analyze user query and determine next steps""" try: user_input = state["messages"][-1].content headers = { "Authorization": f"Bearer {Config.API_KEY}", "Content-Type": "application/json" } response = requests.post( "https://api.deepseek.com/v1/chat/completions", headers=headers, json={ "model": "deepseek-chat", "messages": [{ "role": "user", "content": f"""Analyze this query and format as: CATEGORY: [RESEARCH|DEVELOPMENT|GENERAL] KEY_TERMS: comma-separated list {user_input}""" }], "temperature": 0.3 }, timeout=15 ) response.raise_for_status() analysis = response.json()["choices"][0]["message"]["content"] return {"messages": [AIMessage(content=analysis)]} except Exception as e: return {"messages": [AIMessage( content=f"Analysis Error: {str(e)}. Please rephrase your question." )]} def synthesize_response(self, state: AgentState): """Generate final response with citations""" try: context = "\n".join([msg.content for msg in state["messages"]]) headers = { "Authorization": f"Bearer {Config.API_KEY}", "Content-Type": "application/json" } response = requests.post( "https://api.deepseek.com/v1/chat/completions", headers=headers, json={ "model": "deepseek-chat", "messages": [{ "role": "user", "content": f"""Synthesize this information: {context} Include: 1. Key findings 2. Supporting evidence 3. Technical details 4. Potential applications""" }], "temperature": 0.5 }, timeout=20 ) response.raise_for_status() return {"messages": [AIMessage( content=response.json()["choices"][0]["message"]["content"] )]} except Exception as e: return {"messages": [AIMessage( content=f"Synthesis Error: {str(e)}. Please try again later." )]} # ------------------------------ # Professional UI Interface # ------------------------------ def main(): st.set_page_config( page_title="Research Assistant Pro", layout="wide", initial_sidebar_state="expanded" ) # Dark theme implementation st.markdown(""" """, unsafe_allow_html=True) st.title("🔍 Research Assistant Pro") st.write("Advanced AI-Powered Research Analysis") col1, col2 = st.columns([1, 2]) with col1: with st.form("query_form"): query = st.text_area("Research Query:", height=150, placeholder="Enter your research question...") submitted = st.form_submit_button("Analyze") if submitted and query: with st.spinner("Processing..."): try: assistant = ResearchAssistant() result = assistant.workflow.invoke({"messages": [ HumanMessage(content=query) ]}) with st.expander("Analysis Details", expanded=True): st.markdown(f"""