Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 11

Commit

d94f105

verified ·

1 Parent(s): be1f54f

Update app.py

Browse files

Files changed (1) hide show

app.py +236 -99

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ------------------------------
-# Imports & Dependencies (Enhanced)
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
@@ -21,7 +21,26 @@ from langchain.tools.retriever import create_retriever_tool
 from datetime import datetime
 # ------------------------------
-# Enhanced Configuration
 # ------------------------------
 class AppConfig:
     def __init__(self):
@@ -29,11 +48,10 @@ class AppConfig:
         self.CHROMA_PATH = "chroma_db"
         self.MAX_RETRIES = 3
         self.RETRY_DELAY = 1.5
-        self.DOCUMENT_CHUNK_SIZE = 300  # Increased from 100
-        self.DOCUMENT_OVERLAP = 50      # Added overlap for context preservation
-        self.SEARCH_K = 5               # Number of documents to retrieve
-        self.SEARCH_TYPE = "mmr"        # Maximal Marginal Relevance
         self.validate_config()
     def validate_config(self):
@@ -51,19 +69,26 @@ class AppConfig:
 config = AppConfig()
 # ------------------------------
-# Enhanced ChromaDB Setup
 # ------------------------------
 class ChromaManager:
-    def __init__(self):
         os.makedirs(config.CHROMA_PATH, exist_ok=True)
         self.client = chromadb.PersistentClient(path=config.CHROMA_PATH)
-        self.embeddings = OpenAIEmbeddings(
-            model="text-embedding-3-large",
-            # dimensions=1024  # Optional for large-scale deployments
-        )
-    def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
-        """Enhanced document processing with optimized chunking"""
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=config.DOCUMENT_CHUNK_SIZE,
             chunk_overlap=config.DOCUMENT_OVERLAP,
@@ -74,34 +99,19 @@ class ChromaManager:
             documents=docs,
             embedding=self.embeddings,
             client=self.client,
-            collection_name=collection_name
         )
-# Initialize Chroma with improved parameters
-chroma_manager = ChromaManager()
-research_collection = chroma_manager.create_collection(research_texts, "research_collection")
-dev_collection = chroma_manager.create_collection(development_texts, "development_collection")
-# ------------------------------
-# Enhanced Retriever Configuration
-# ------------------------------
-research_retriever = research_collection.as_retriever(
-    search_type=config.SEARCH_TYPE,
-    search_kwargs={"k": config.SEARCH_K, "fetch_k": config.SEARCH_K * 2}
-)
-development_retriever = dev_collection.as_retriever(
-    search_type=config.SEARCH_TYPE,
-    search_kwargs={"k": config.SEARCH_K, "fetch_k": config.SEARCH_K * 2}
-)
 # ------------------------------
-# Enhanced Document Processing
 # ------------------------------
 class DocumentProcessor:
     @staticmethod
     def deduplicate_documents(docs: List[Any]) -> List[Any]:
-        """Advanced deduplication using content hashing"""
         seen = set()
         unique_docs = []
         for doc in docs:
@@ -113,7 +123,6 @@ class DocumentProcessor:
     @staticmethod
     def extract_key_points(docs: List[Any]) -> str:
-        """Semantic analysis of retrieved documents"""
         key_points = []
         categories = {
             "quantum": ["quantum", "qpu", "qubit"],
@@ -123,7 +132,6 @@ class DocumentProcessor:
         for doc in docs:
             content = doc.page_content.lower()
-            # Categorization logic
             if any(kw in content for kw in categories["quantum"]):
                 key_points.append("- Quantum computing integration showing promising results")
             if any(kw in content for kw in categories["vision"]):
@@ -131,10 +139,10 @@ class DocumentProcessor:
             if any(kw in content for kw in categories["nlp"]):
                 key_points.append("- NLP architectures evolving with memory-augmented transformers")
-        return "\n".join(list(set(key_points)))  # Remove duplicates
 # ------------------------------
-# Enhanced Agent Workflow (Additions)
 # ------------------------------
 class EnhancedAgent:
     def __init__(self):
@@ -145,7 +153,6 @@ class EnhancedAgent:
         }
     def api_request_with_retry(self, endpoint: str, payload: Dict) -> Dict:
-        """Robust API handling with exponential backoff"""
         headers = {
             "Authorization": f"Bearer {config.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json"
@@ -171,7 +178,175 @@ class EnhancedAgent:
         raise Exception(f"API request failed after {config.MAX_RETRIES} attempts")
 # ------------------------------
-# Enhanced Streamlit UI (Dark Professional Theme)
 # ------------------------------
 class UITheme:
     primary_color = "#2E86C1"
@@ -183,12 +358,9 @@ class UITheme:
     def apply(cls):
         st.markdown(f"""
         <style>
-        .stApp {{
-            background-color: {cls.background_color};
-            color: {cls.text_color};
-        }}
-        .stTextArea textarea {{
-            background-color: #2D2D2D !important;
             color: {cls.text_color} !important;
             border: 1px solid {cls.primary_color};
         }}
@@ -214,22 +386,9 @@ class UITheme:
             border-radius: 8px;
             box-shadow: 0 2px 8px rgba(0,0,0,0.15);
         }}
-        .st-expander {{
-            background-color: #2D2D2D;
-            border: 1px solid #3D3D3D;
-            border-radius: 6px;
-            margin: 12px 0;
-        }}
-        .stAlert {{
-            background-color: #423a2d !important;
-            border: 1px solid #E67E22 !important;
-        }}
         </style>
         """, unsafe_allow_html=True)
-# ------------------------------
-# Enhanced Main Application
-# ------------------------------
 def main():
     UITheme.apply()
@@ -248,23 +407,20 @@ def main():
         st.header("📂 Knowledge Bases")
         with st.expander("Research Database", expanded=True):
             for text in research_texts:
-                st.markdown(f'<div class="data-box research-box">{text}</div>',
-                          unsafe_allow_html=True)
         with st.expander("Development Database"):
             for text in development_texts:
-                st.markdown(f'<div class="data-box dev-box">{text}</div>',
-                          unsafe_allow_html=True)
     st.title("🔬 AI Research Assistant Pro")
     st.markdown("---")
-    # Enhanced query input with examples
     query = st.text_area(
         "Research Query Input",
         height=120,
-        placeholder="Enter your research question...\nExample: What are recent breakthroughs in quantum machine learning?",
-        help="Be specific about domains (e.g., computer vision, NLP) for better results"
     )
     col1, col2 = st.columns([1, 2])
@@ -277,72 +433,53 @@ def main():
             with st.status("Processing Workflow...", expanded=True) as status:
                 try:
                     start_time = time.time()
-                    # Document Retrieval Phase
-                    status.update(label="🔍 Retrieving Relevant Documents", state="running")
                     events = process_question(query, app, {"configurable": {"thread_id": "1"}})
-                    # Processing Phase
-                    status.update(label="📊 Analyzing Content", state="running")
                     processed_data = []
                     for event in events:
                         if 'agent' in event:
                             content = event['agent']['messages'][0].content
                             if "Results:" in content:
-                                docs_str = content.split("Results: ")[1]
-                                docs = eval(docs_str)
                                 unique_docs = DocumentProcessor.deduplicate_documents(docs)
                                 key_points = DocumentProcessor.extract_key_points(unique_docs)
                                 processed_data.append(key_points)
                                 with st.expander("📄 Retrieved Documents", expanded=False):
                                     st.info(f"Found {len(unique_docs)} unique documents")
-                                    st.write(docs_str)
                         elif 'generate' in event:
                             final_answer = event['generate']['messages'][0].content
                             status.update(label="✅ Analysis Complete", state="complete")
                             st.markdown("## 📝 Research Summary")
                             st.markdown(final_answer)
-                    # Performance metrics
-                    proc_time = time.time() - start_time
-                    st.caption(f"⏱️ Processed in {proc_time:.2f}s | {len(processed_data)} document clusters")
                 except Exception as e:
                     status.update(label="❌ Processing Failed", state="error")
-                    st.error(f"""
-                    **Critical Error**
-                    {str(e)}
-                    Recommended Actions:
-                    - Verify API key configuration
-                    - Check service status
-                    - Simplify query complexity
-                    """)
-                    # Log error with timestamp
-                    error_log = f"{datetime.now()} | {str(e)}\n"
                     with open("error_log.txt", "a") as f:
-                        f.write(error_log)
     with col2:
         st.markdown("""
         ## 📘 Usage Guide
         **1. Query Formulation**
-        - Be domain-specific (e.g., "quantum NLP")
-        - Include timeframes (e.g., "2023-2024 advances")
         **2. Results Interpretation**
-        - Expand document sections for sources
-        - Key points highlight technical breakthroughs
-        - Summary shows commercial implications
         **3. Advanced Features**
-        - `CTRL+Enter` for quick reruns
         - Click documents for raw context
-        - Export results via screenshot
         """)
 if __name__ == "__main__":
-    main()

 # ------------------------------
+# Imports & Dependencies
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from datetime import datetime
 # ------------------------------
+# Data Definitions
+# ------------------------------
+research_texts = [
+    "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
+    "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
+    "Latest Trends in Machine Learning Methods Using Quantum Computing",
+    "Advancements in Neuromorphic Computing for Energy-Efficient AI Systems",
+    "Cross-Modal Learning: Integrating Visual and Textual Representations for Multimodal AI"
+]
+development_texts = [
+    "Project A: UI Design Completed, API Integration in Progress",
+    "Project B: Testing New Feature X, Bug Fixes Needed",
+    "Product Y: In the Performance Optimization Stage Before Release",
+    "Framework Z: Version 3.2 Released with Enhanced Distributed Training Support",
+    "DevOps Pipeline: Automated CI/CD Implementation for ML Model Deployment"
+]
+# ------------------------------
+# Configuration Class
 # ------------------------------
 class AppConfig:
     def __init__(self):
         self.CHROMA_PATH = "chroma_db"
         self.MAX_RETRIES = 3
         self.RETRY_DELAY = 1.5
+        self.DOCUMENT_CHUNK_SIZE = 300
+        self.DOCUMENT_OVERLAP = 50
+        self.SEARCH_K = 5
+        self.SEARCH_TYPE = "mmr"
         self.validate_config()
     def validate_config(self):
 config = AppConfig()
 # ------------------------------
+# ChromaDB Manager
 # ------------------------------
 class ChromaManager:
+    def __init__(self, research_data: List[str], development_data: List[str]):
         os.makedirs(config.CHROMA_PATH, exist_ok=True)
         self.client = chromadb.PersistentClient(path=config.CHROMA_PATH)
+        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
+        self.research_collection = self.create_collection(
+            research_data,
+            "research_collection",
+            {"category": "research", "version": "1.2"}
+        )
+        self.dev_collection = self.create_collection(
+            development_data,
+            "development_collection",
+            {"category": "development", "version": "1.1"}
+        )
+    def create_collection(self, documents: List[str], name: str, metadata: dict) -> Chroma:
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=config.DOCUMENT_CHUNK_SIZE,
             chunk_overlap=config.DOCUMENT_OVERLAP,
             documents=docs,
             embedding=self.embeddings,
             client=self.client,
+            collection_name=name,
+            collection_metadata=metadata
         )
+# Initialize Chroma with data
+chroma_manager = ChromaManager(research_texts, development_texts)
 # ------------------------------
+# Document Processing
 # ------------------------------
 class DocumentProcessor:
     @staticmethod
     def deduplicate_documents(docs: List[Any]) -> List[Any]:
         seen = set()
         unique_docs = []
         for doc in docs:
     @staticmethod
     def extract_key_points(docs: List[Any]) -> str:
         key_points = []
         categories = {
             "quantum": ["quantum", "qpu", "qubit"],
         for doc in docs:
             content = doc.page_content.lower()
             if any(kw in content for kw in categories["quantum"]):
                 key_points.append("- Quantum computing integration showing promising results")
             if any(kw in content for kw in categories["vision"]):
             if any(kw in content for kw in categories["nlp"]):
                 key_points.append("- NLP architectures evolving with memory-augmented transformers")
+        return "\n".join(list(set(key_points)))
 # ------------------------------
+# Enhanced Agent Components
 # ------------------------------
 class EnhancedAgent:
     def __init__(self):
         }
     def api_request_with_retry(self, endpoint: str, payload: Dict) -> Dict:
         headers = {
             "Authorization": f"Bearer {config.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json"
         raise Exception(f"API request failed after {config.MAX_RETRIES} attempts")
 # ------------------------------
+# Workflow Configuration
+# ------------------------------
+class AgentState(TypedDict):
+    messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
+def agent(state: AgentState):
+    print("---CALL AGENT---")
+    messages = state["messages"]
+    user_message = messages[0].content if not isinstance(messages[0], tuple) else messages[0][1]
+    prompt = f"""Given this user question: "{user_message}"
+If about research/academic topics, respond EXACTLY:
+SEARCH_RESEARCH: <search terms>
+If about development status, respond EXACTLY:
+SEARCH_DEV: <search terms>
+Otherwise, answer directly."""
+    headers = {
+        "Accept": "application/json",
+        "Authorization": f"Bearer {config.DEEPSEEK_API_KEY}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "deepseek-chat",
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.7,
+        "max_tokens": 1024
+    }
+    try:
+        response = requests.post(
+            "https://api.deepseek.com/v1/chat/completions",
+            headers=headers,
+            json=data,
+            verify=False,
+            timeout=30
+        )
+        response.raise_for_status()
+        response_text = response.json()['choices'][0]['message']['content']
+        if "SEARCH_RESEARCH:" in response_text:
+            query = response_text.split("SEARCH_RESEARCH:")[1].strip()
+            results = chroma_manager.research_collection.as_retriever().invoke(query)
+            return {"messages": [AIMessage(content=f'Action: research_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]
+        elif "SEARCH_DEV:" in response_text:
+            query = response_text.split("SEARCH_DEV:")[1].strip()
+            results = chroma_manager.dev_collection.as_retriever().invoke(query)
+            return {"messages": [AIMessage(content=f'Action: development_db_tool\n{{"query": "{query}"}}\n\nResults: {str(results)}')]
+        return {"messages": [AIMessage(content=response_text)]}
+    except Exception as e:
+        error_msg = f"API Error: {str(e)}"
+        if "Insufficient Balance" in str(e):
+            error_msg += "\n\nPlease check your DeepSeek API account balance."
+        return {"messages": [AIMessage(content=error_msg)]}
+def simple_grade_documents(state: AgentState):
+    messages = state["messages"]
+    last_message = messages[-1]
+    return "generate" if "Results: [Document" in last_message.content else "rewrite"
+def generate(state: AgentState):
+    messages = state["messages"]
+    question = messages[0].content
+    last_message = messages[-1]
+    docs_content = []
+    if "Results: [" in last_message.content:
+        docs_str = last_message.content.split("Results: ")[1]
+        docs_content = eval(docs_str)
+    processed_info = DocumentProcessor.extract_key_points(
+        DocumentProcessor.deduplicate_documents(docs_content)
+    )
+    prompt = f"""Generate structured research summary:
+    Key Information:
+    {processed_info}
+    Include:
+    1. Section headings
+    2. Bullet points
+    3. Significance
+    4. Applications"""
+    try:
+        response = requests.post(
+            "https://api.deepseek.com/v1/chat/completions",
+            headers={
+                "Authorization": f"Bearer {config.DEEPSEEK_API_KEY}",
+                "Content-Type": "application/json"
+            },
+            json={
+                "model": "deepseek-chat",
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.7,
+                "max_tokens": 1024
+            },
+            timeout=30
+        )
+        response.raise_for_status()
+        return {"messages": [AIMessage(content=response.json()['choices'][0]['message']['content'])}
+    except Exception as e:
+        return {"messages": [AIMessage(content=f"Generation Error: {str(e)}")]}
+def rewrite(state: AgentState):
+    messages = state["messages"]
+    original_question = messages[0].content
+    try:
+        response = requests.post(
+            "https://api.deepseek.com/v1/chat/completions",
+            headers={
+                "Authorization": f"Bearer {config.DEEPSEEK_API_KEY}",
+                "Content-Type": "application/json"
+            },
+            json={
+                "model": "deepseek-chat",
+                "messages": [{
+                    "role": "user",
+                    "content": f"Rewrite for clarity: {original_question}"
+                }],
+                "temperature": 0.7,
+                "max_tokens": 1024
+            },
+            timeout=30
+        )
+        response.raise_for_status()
+        return {"messages": [AIMessage(content=response.json()['choices'][0]['message']['content'])}
+    except Exception as e:
+        return {"messages": [AIMessage(content=f"Rewrite Error: {str(e)}")]}
+tools_pattern = re.compile(r"Action: .*")
+def custom_tools_condition(state: AgentState):
+    content = state["messages"][-1].content
+    return "tools" if tools_pattern.match(content) else END
+# ------------------------------
+# Workflow Graph Setup
+# ------------------------------
+workflow = StateGraph(AgentState)
+workflow.add_node("agent", agent)
+workflow.add_node("retrieve", ToolNode([
+    create_retriever_tool(
+        chroma_manager.research_collection.as_retriever(),
+        "research_db_tool",
+        "Search research database"
+    ),
+    create_retriever_tool(
+        chroma_manager.dev_collection.as_retriever(),
+        "development_db_tool",
+        "Search development database"
+    )
+]))
+workflow.add_node("rewrite", rewrite)
+workflow.add_node("generate", generate)
+workflow.set_entry_point("agent")
+workflow.add_conditional_edges("agent", custom_tools_condition, {"tools": "retrieve", END: END})
+workflow.add_conditional_edges("retrieve", simple_grade_documents, {"generate": "generate", "rewrite": "rewrite"})
+workflow.add_edge("generate", END)
+workflow.add_edge("rewrite", "agent")
+app = workflow.compile()
+# ------------------------------
+# Streamlit UI
 # ------------------------------
 class UITheme:
     primary_color = "#2E86C1"
     def apply(cls):
         st.markdown(f"""
         <style>
+        .stApp {{ background-color: {cls.background_color}; color: {cls.text_color}; }}
+        .stTextArea textarea {{
+            background-color: #2D2D2D !important;
             color: {cls.text_color} !important;
             border: 1px solid {cls.primary_color};
         }}
             border-radius: 8px;
             box-shadow: 0 2px 8px rgba(0,0,0,0.15);
         }}
         </style>
         """, unsafe_allow_html=True)
 def main():
     UITheme.apply()
         st.header("📂 Knowledge Bases")
         with st.expander("Research Database", expanded=True):
             for text in research_texts:
+                st.markdown(f'<div class="data-box research-box">{text}</div>', unsafe_allow_html=True)
         with st.expander("Development Database"):
             for text in development_texts:
+                st.markdown(f'<div class="data-box dev-box">{text}</div>', unsafe_allow_html=True)
     st.title("🔬 AI Research Assistant Pro")
     st.markdown("---")
     query = st.text_area(
         "Research Query Input",
         height=120,
+        placeholder="Enter your research question...",
+        help="Be specific about domains for better results"
     )
     col1, col2 = st.columns([1, 2])
             with st.status("Processing Workflow...", expanded=True) as status:
                 try:
                     start_time = time.time()
                     events = process_question(query, app, {"configurable": {"thread_id": "1"}})
                     processed_data = []
                     for event in events:
                         if 'agent' in event:
                             content = event['agent']['messages'][0].content
                             if "Results:" in content:
+                                docs = eval(content.split("Results: ")[1])
                                 unique_docs = DocumentProcessor.deduplicate_documents(docs)
                                 key_points = DocumentProcessor.extract_key_points(unique_docs)
                                 processed_data.append(key_points)
                                 with st.expander("📄 Retrieved Documents", expanded=False):
                                     st.info(f"Found {len(unique_docs)} unique documents")
+                                    st.write(docs)
                         elif 'generate' in event:
                             final_answer = event['generate']['messages'][0].content
                             status.update(label="✅ Analysis Complete", state="complete")
                             st.markdown("## 📝 Research Summary")
                             st.markdown(final_answer)
+                    st.caption(f"⏱️ Processed in {time.time()-start_time:.2f}s | {len(processed_data)} clusters")
                 except Exception as e:
                     status.update(label="❌ Processing Failed", state="error")
+                    st.error(f"**Error:** {str(e)}\n\nCheck API key and network connection")
                     with open("error_log.txt", "a") as f:
+                        f.write(f"{datetime.now()} | {str(e)}\n")
     with col2:
         st.markdown("""
         ## 📘 Usage Guide
         **1. Query Formulation**
+        - Specify domains (e.g., "quantum NLP")
+        - Include timeframes for recent advances
         **2. Results Interpretation**
+        - Expand sections for source documents
+        - Key points show technical breakthroughs
+        - Summary includes commercial implications
         **3. Advanced Features**
+        - Use keyboard shortcuts for efficiency
         - Click documents for raw context
+        - Export via screenshot/PDF
         """)
 if __name__ == "__main__":
+    main()