Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 13

Commit

b31058d

verified ·

1 Parent(s): 92b5f8b

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -172

app.py CHANGED Viewed

@@ -3,30 +3,29 @@
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
-from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langgraph.graph import END, StateGraph
-from langgraph.prebuilt import ToolNode
-from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
-import re
 import os
 import streamlit as st
 import requests
 import hashlib
-import json
 import time
-from langchain.tools.retriever import create_retriever_tool
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 # ------------------------------
 # State Schema Definition
 # ------------------------------
 class AgentState(TypedDict):
-    messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
@@ -40,37 +39,56 @@ class ResearchConfig:
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
     DOCUMENT_MAP = {
-        "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
-            "CV-Transformer Hybrid Architecture",
-        "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
-            "Transformer Architecture Analysis",
-        "Latest Trends in Machine Learning Methods Using Quantum Computing":
-            "Quantum ML Frontiers"
     }
-    ANALYSIS_TEMPLATE = """Analyze these technical documents with scientific rigor:
 {context}
-Respond with:
-1. Key Technical Contributions (bullet points)
-2. Novel Methodologies
-3. Empirical Results (with metrics)
-4. Potential Applications
-5. Limitations & Future Directions
-Format: Markdown with LaTeX mathematical notation where applicable
-"""
-# Validation
 if not ResearchConfig.DEEPSEEK_API_KEY:
-    st.error("""**Research Portal Configuration Required**
-    1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
-    2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
     3. Rebuild deployment""")
     st.stop()
 # ------------------------------
-# Quantum Document Processing
 # ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
@@ -80,17 +98,29 @@ class QuantumDocumentManager:
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
-    def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
             chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
             separators=["\n\n", "\n", "|||"]
         )
-        docs = splitter.create_documents(documents)
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
-            client=self.client,
             collection_name=collection_name,
             ids=[self._document_id(doc.page_content) for doc in docs]
         )
@@ -98,78 +128,49 @@ class QuantumDocumentManager:
     def _document_id(self, content: str) -> str:
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
-# Initialize document collections
 qdm = QuantumDocumentManager()
-research_docs = qdm.create_collection([
-    "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
-    "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
-    "Latest Trends in Machine Learning Methods Using Quantum Computing"
-], "research")
-development_docs = qdm.create_collection([
-    "Project A: UI Design Completed, API Integration in Progress",
-    "Project B: Testing New Feature X, Bug Fixes Needed",
-    "Product Y: In the Performance Optimization Stage Before Release"
-], "development")
 # ------------------------------
-# Advanced Retrieval System
 # ------------------------------
 class ResearchRetriever:
     def __init__(self):
-        self.retrievers = {
-            "research": research_docs.as_retriever(
-                search_type="mmr",
-                search_kwargs={
-                    'k': 4,
-                    'fetch_k': 20,
-                    'lambda_mult': 0.85
-                }
-            ),
-            "development": development_docs.as_retriever(
-                search_type="similarity",
-                search_kwargs={'k': 3}
-            )
-        }
-    def retrieve(self, query: str, domain: str) -> List[Any]:
         try:
-            return self.retrievers[domain].invoke(query)
-        except KeyError:
             return []
-retriever = ResearchRetriever()
 # ------------------------------
-# Cognitive Processing Unit
 # ------------------------------
 class CognitiveProcessor:
     def __init__(self):
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
-        self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
-        futures = []
-        for _ in range(3):  # Triple redundancy
-            futures.append(self.executor.submit(
-                self._execute_api_request,
-                prompt
-            ))
-        results = []
-        for future in as_completed(futures):
-            try:
-                results.append(future.result())
-            except Exception as e:
-                st.error(f"Processing Error: {str(e)}")
-        return self._consensus_check(results)
-    def _execute_api_request(self, prompt: str) -> Dict:
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
-            "Content-Type": "application/json",
-            "X-Research-Session": self.session_id
         }
         try:
@@ -190,20 +191,25 @@ class CognitiveProcessor:
             )
             response.raise_for_status()
             return response.json()
-        except requests.exceptions.RequestException as e:
             return {"error": str(e)}
-    def _consensus_check(self, results: List[Dict]) -> Dict:
         valid = [r for r in results if "error" not in r]
         if not valid:
             return {"error": "All API requests failed"}
-        return max(valid, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
 # ------------------------------
-# Research Workflow Engine
 # ------------------------------
 class ResearchWorkflow:
     def __init__(self):
         self.processor = CognitiveProcessor()
         self.workflow = StateGraph(AgentState)
         self._build_workflow()
@@ -241,70 +247,87 @@ class ResearchWorkflow:
     def retrieve_documents(self, state: AgentState) -> Dict:
         try:
-            query = state["context"]["raw_query"]
-            docs = retriever.retrieve(query, "research")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
-                "context": {
-                    "documents": docs,
-                    "retrieval_time": time.time()
-                }
             }
         except Exception as e:
             return self._error_state(f"Retrieval Error: {str(e)}")
     def analyze_content(self, state: AgentState) -> Dict:
         try:
-            docs = "\n\n".join([d.page_content for d in state["context"]["documents"]])
-            prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs)
             response = self.processor.process_query(prompt)
             if "error" in response:
-                return self._error_state(response["error"])
             return {
-                "messages": [AIMessage(content=response['choices'][0]['message']['content'])],
-                "context": {"analysis": response}
             }
         except Exception as e:
             return self._error_state(f"Analysis Error: {str(e)}")
     def validate_output(self, state: AgentState) -> Dict:
-        analysis = state["messages"][-1].content
-        validation_prompt = f"""Validate research analysis:
-        {analysis}
-        Check for:
-        1. Technical accuracy
-        2. Citation support
-        3. Logical consistency
-        4. Methodological soundness
-        Respond with 'VALID' or 'INVALID'"""
         response = self.processor.process_query(validation_prompt)
         return {
-            "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
         }
     def refine_results(self, state: AgentState) -> Dict:
-        refinement_prompt = f"""Refine this analysis:
-        {state["messages"][-1].content}
-        Improve:
-        1. Technical precision
-        2. Empirical grounding
-        3. Theoretical coherence"""
         response = self.processor.process_query(refinement_prompt)
         return {
-            "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
             "context": state["context"]
         }
     def _quality_check(self, state: AgentState) -> str:
-        content = state["messages"][-1].content
-        return "valid" if "VALID" in content else "invalid"
     def _error_state(self, message: str) -> Dict:
         return {
@@ -374,20 +397,24 @@ class ResearchInterface:
             border-radius: 8px;
             margin: 1rem 0;
         }
         </style>
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
         with st.sidebar:
             st.title("🔍 Research Database")
-            st.subheader("Technical Papers")
-            for title, short in ResearchConfig.DOCUMENT_MAP.items():
-                with st.expander(short):
-                    st.markdown(f"```\n{title}\n```")
-            st.subheader("Analysis Metrics")
-            st.metric("Vector Collections", 2)
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self):
         st.title("🧠 NeuroResearch AI")
@@ -399,54 +426,39 @@ class ResearchInterface:
     def _execute_analysis(self, query: str):
         try:
-            with st.spinner("Initializing Quantum Analysis..."):
-                results = self.workflow.app.stream(
-                    {"messages": [HumanMessage(content=query)], "context": {}, "metadata": {}}
                 )
-                for event in results:
-                    self._render_event(event)
-                st.success("✅ Analysis Completed Successfully")
-        except Exception as e:
-            st.error(f"""**Analysis Failed**
-            {str(e)}
-            Potential issues:
-            - Complex query structure
-            - Document correlation failure
-            - Temporal processing constraints""")
-    def _render_event(self, event: Dict):
-        if 'ingest' in event:
-            with st.container():
-                st.success("✅ Query Ingested")
-        elif 'retrieve' in event:
-            with st.container():
-                docs = event['retrieve']['context']['documents']
-                st.info(f"📚 Retrieved {len(docs)} documents")
-                with st.expander("View Retrieved Documents", expanded=False):
-                    for i, doc in enumerate(docs, 1):
-                        st.markdown(f"**Document {i}**")
-                        st.code(doc.page_content, language='text')
-        elif 'analyze' in event:
-            with st.container():
-                content = event['analyze']['messages'][0].content
-                with st.expander("Technical Analysis Report", expanded=True):
-                    st.markdown(content)
-        elif 'validate' in event:
-            with st.container():
-                content = event['validate']['messages'][0].content
-                if "VALID" in content:
-                    st.success("✅ Validation Passed")
-                    with st.expander("View Validated Analysis", expanded=True):
-                        st.markdown(content.split("Validation:")[0])
                 else:
-                    st.warning("⚠️ Validation Issues Detected")
-                    with st.expander("View Validation Details", expanded=True):
-                        st.markdown(content)
 if __name__ == "__main__":
     ResearchInterface()

 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
+from langchain_core.messages import HumanMessage, AIMessage
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_core.documents import Document
 from langgraph.graph import END, StateGraph
 from typing_extensions import TypedDict, Annotated
 from typing import Sequence, Dict, List, Optional, Any
 import chromadb
+import numpy as np
 import os
 import streamlit as st
 import requests
 import hashlib
+import re
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
+from sklearn.metrics.pairwise import cosine_similarity
 # ------------------------------
 # State Schema Definition
 # ------------------------------
 class AgentState(TypedDict):
+    messages: Annotated[Sequence[AIMessage | HumanMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
+    RESEARCH_EMBEDDING = np.random.randn(1536)
     DOCUMENT_MAP = {
+        "CV-Transformer Hybrid Architecture": {
+            "title": "Research Report: CV-Transformer Model (98% Accuracy)",
+            "content": """
+            Hybrid architecture combining CNNs and Transformers achieves 98% image recognition accuracy.
+            Key equation: $f(x) = \text{Attention}(\text{CNN}(x))$
+            Validation on ImageNet-1k: Top-1 Accuracy 98.2%, Inference Speed 42ms/img
+            """
+        },
+        "Transformer Architecture Analysis": {
+            "title": "Academic Paper: Transformers in NLP",
+            "content": """
+            Self-attention mechanism remains core innovation:
+            $\text{Attention}(Q, K, V) = \text{softmax}(\frac{QK^T}{\sqrt{d_k}})V$
+            GLUE Benchmark Score: 92.4%, Training Efficiency: 1.8x vs RNNs
+            """
+        },
+        "Quantum ML Frontiers": {
+            "title": "Quantum Machine Learning Review",
+            "content": """
+            Quantum gradient descent enables faster optimization:
+            $\theta_{t+1} = \theta_t - \eta \nabla_\theta \mathcal{L}(\theta_t)$
+            100x speedup on optimization tasks, 58% energy reduction
+            """
+        }
     }
+    ANALYSIS_TEMPLATE = """Analyze these technical documents:
 {context}
+Respond in MARKDOWN with:
+1. **Key Technical Contributions** (bullet points with equations)
+2. **Novel Methodologies** (algorithms with math notation)
+3. **Empirical Results** (comparative metrics)
+4. **Applications** (domain-specific implementations)
+5. **Limitations** (theoretical/practical boundaries)
+Include LaTeX equations where applicable."""
 if not ResearchConfig.DEEPSEEK_API_KEY:
+    st.error("""**Configuration Required**
+    1. Get DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
+    2. Set secret: `DEEPSEEK_API_KEY`
     3. Rebuild deployment""")
     st.stop()
 # ------------------------------
+# Document Processing System
 # ------------------------------
 class QuantumDocumentManager:
     def __init__(self):
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
+    def create_collection(self, document_map: Dict[str, Dict[str, str]], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
             chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
             separators=["\n\n", "\n", "|||"]
         )
+        docs = []
+        for key, data in document_map.items():
+            chunks = splitter.split_text(data["content"])
+            for chunk in chunks:
+                docs.append(Document(
+                    page_content=chunk,
+                    metadata={
+                        "title": data["title"],
+                        "source": collection_name,
+                        "hash": hashlib.sha256(chunk.encode()).hexdigest()[:16]
+                    }
+                ))
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
             collection_name=collection_name,
             ids=[self._document_id(doc.page_content) for doc in docs]
         )
     def _document_id(self, content: str) -> str:
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
+# Initialize document system
 qdm = QuantumDocumentManager()
+research_docs = qdm.create_collection(ResearchConfig.DOCUMENT_MAP, "research")
 # ------------------------------
+# Intelligent Retrieval System
 # ------------------------------
 class ResearchRetriever:
     def __init__(self):
+        self.retriever = research_docs.as_retriever(
+            search_type="mmr",
+            search_kwargs={
+                'k': 4,
+                'fetch_k': 20,
+                'lambda_mult': 0.85
+            }
+        )
+    def retrieve(self, query: str) -> List[Document]:
         try:
+            docs = self.retriever.invoke(query)
+            if not docs:
+                raise ValueError("No relevant documents found")
+            return docs
+        except Exception as e:
+            st.error(f"Retrieval Error: {str(e)}")
             return []
 # ------------------------------
+# Robust Processing Core
 # ------------------------------
 class CognitiveProcessor:
     def __init__(self):
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
     def process_query(self, prompt: str) -> Dict:
+        futures = [self.executor.submit(self._api_request, prompt) for _ in range(3)]
+        return self._best_result([f.result() for f in as_completed(futures)])
+    def _api_request(self, prompt: str) -> Dict:
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
+            "Content-Type": "application/json"
         }
         try:
             )
             response.raise_for_status()
             return response.json()
+        except Exception as e:
             return {"error": str(e)}
+    def _best_result(self, results: List[Dict]) -> Dict:
         valid = [r for r in results if "error" not in r]
         if not valid:
             return {"error": "All API requests failed"}
+        # Select response with most technical content
+        contents = [r.get('choices', [{}])[0].get('message', {}).get('content', '') for r in valid]
+        tech_scores = [len(re.findall(r"\$.*?\$", c)) for c in contents]
+        return valid[np.argmax(tech_scores)]
 # ------------------------------
+# Validation Workflow Engine
 # ------------------------------
 class ResearchWorkflow:
     def __init__(self):
+        self.retriever = ResearchRetriever()
         self.processor = CognitiveProcessor()
         self.workflow = StateGraph(AgentState)
         self._build_workflow()
     def retrieve_documents(self, state: AgentState) -> Dict:
         try:
+            docs = self.retriever.retrieve(state["context"]["raw_query"])
+            if not docs:
+                return self._error_state("Document correlation failure - no relevant papers found")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
+                "context": {"documents": docs}
             }
         except Exception as e:
             return self._error_state(f"Retrieval Error: {str(e)}")
     def analyze_content(self, state: AgentState) -> Dict:
         try:
+            docs = state["context"]["documents"]
+            context = "\n\n".join([f"### {doc.metadata['title']}\n{doc.page_content}" for doc in docs])
+            prompt = ResearchConfig.ANALYSIS_TEMPLATE.format(context=context)
             response = self.processor.process_query(prompt)
             if "error" in response:
+                raise RuntimeError(response["error"])
+            analysis = response['choices'][0]['message']['content']
+            self._validate_analysis_structure(analysis)
             return {
+                "messages": [AIMessage(content=analysis)],
+                "context": {"analysis": analysis}
             }
         except Exception as e:
             return self._error_state(f"Analysis Error: {str(e)}")
     def validate_output(self, state: AgentState) -> Dict:
+        validation_prompt = f"""Validate this technical analysis:
+{state["messages"][-1].content}
+Check for:
+1. Mathematical accuracy
+2. Empirical evidence
+3. Technical depth
+4. Logical consistency
+Respond with 'VALID' or 'INVALID'"""
         response = self.processor.process_query(validation_prompt)
+        content = response.get('choices', [{}])[0].get('message', {}).get('content', '')
         return {
+            "messages": [AIMessage(content=f"{state['messages'][-1].content}\n\n## Validation\n{content}")],
+            "context": {"valid": "VALID" in content}
         }
     def refine_results(self, state: AgentState) -> Dict:
+        refinement_prompt = f"""Improve this analysis:
+{state["messages"][-1].content}
+Focus on:
+1. Enhancing mathematical rigor
+2. Adding empirical references
+3. Strengthening technical arguments"""
         response = self.processor.process_query(refinement_prompt)
         return {
+            "messages": [AIMessage(content=response['choices'][0]['message']['content'])],
             "context": state["context"]
         }
     def _quality_check(self, state: AgentState) -> str:
+        return "valid" if state.get("context", {}).get("valid", False) else "invalid"
+    def _validate_analysis_structure(self, content: str):
+        required_sections = [
+            "Key Technical Contributions",
+            "Novel Methodologies",
+            "Empirical Results",
+            "Applications",
+            "Limitations"
+        ]
+        missing = [s for s in required_sections if f"## {s}" not in content]
+        if missing:
+            raise ValueError(f"Missing critical sections: {', '.join(missing)}")
+        if not re.search(r"\$.*?\$", content):
+            raise ValueError("Analysis lacks required mathematical notation")
     def _error_state(self, message: str) -> Dict:
         return {
             border-radius: 8px;
             margin: 1rem 0;
         }
+        code {
+            color: #2ecc71;
+            background: #002200;
+            padding: 2px 4px;
+            border-radius: 4px;
+        }
         </style>
         """, unsafe_allow_html=True)
     def _build_sidebar(self):
         with st.sidebar:
             st.title("🔍 Research Database")
+            for key, data in ResearchConfig.DOCUMENT_MAP.items():
+                with st.expander(data["title"]):
+                    st.markdown(f"```\n{data['content']}\n```")
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
+            st.metric("Document Chunks", len(research_docs.get()['ids']))
     def _build_main_interface(self):
         st.title("🧠 NeuroResearch AI")
     def _execute_analysis(self, query: str):
         try:
+            with st.spinner("Performing deep technical analysis..."):
+                result = self.workflow.app.invoke(
+                    {"messages": [HumanMessage(content=query)]}
                 )
+                if result.get("context", {}).get("error"):
+                    self._show_error(result["context"].get("error", "Unknown error"))
                 else:
+                    self._display_results(result)
+        except Exception as e:
+            self._show_error(str(e))
+    def _display_results(self, result):
+        content = result["messages"][-1].content
+        with st.expander("Technical Analysis Report", expanded=True):
+            st.markdown(content)
+        with st.expander("Source Documents", expanded=False):
+            for doc in result["context"].get("documents", []):
+                st.markdown(f"**{doc.metadata['title']}**")
+                st.code(doc.page_content, language='latex')
+    def _show_error(self, message):
+        st.error(f"""
+        ⚠️ Analysis Failed: {message}
+        Troubleshooting Steps:
+        1. Check query specificity
+        2. Verify document connections
+        3. Ensure mathematical notation in sources
+        4. Review API key validity
+        5. Simplify complex query structures
+        """)
 if __name__ == "__main__":
     ResearchInterface()