Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 15

Commit

ab8d4bc

verified ·

1 Parent(s): 801f405

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -52

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ------------------------------
-# UniversalResearch AI System with Refinement Counter and DEEPSEEK_API_KEY
 # ------------------------------
 import logging
 import os
@@ -27,7 +27,7 @@ from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
 from langchain.tools.retriever import create_retriever_tool
-# Increase Python's recursion limit at the very start (if needed)
 sys.setrecursionlimit(10000)
 # ------------------------------
@@ -43,6 +43,11 @@ logger = logging.getLogger(__name__)
 # State Schema Definition
 # ------------------------------
 class AgentState(TypedDict):
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
@@ -52,28 +57,37 @@ class AgentState(TypedDict):
 # ------------------------------
 class ResearchConfig:
     """
-    Generic configuration for the UniversalResearch AI System.
-    Make sure to set DEEPSEEK_API_KEY in your environment or HF Space secrets.
     """
-    DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")  # Updated to reference DEEPSEEK_API_KEY
     CHROMA_PATH = "chroma_db"
     CHUNK_SIZE = 512
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
     DOCUMENT_MAP = {
         "Sample Research Document 1": "Topic A Overview",
         "Sample Research Document 2": "Topic B Analysis",
         "Sample Research Document 3": "Topic C Innovations"
     }
     ANALYSIS_TEMPLATE = (
         "Analyze the following research documents with scientific rigor:\n{context}\n\n"
-        "Provide your analysis with the following structure:\n"
-        "1. Key Contributions (bullet points)\n"
-        "2. Novel Methodologies\n"
-        "3. Empirical Results (with metrics)\n"
-        "4. Potential Applications\n"
-        "5. Limitations & Future Directions\n\n"
         "Format your response in Markdown with LaTeX mathematical notation where applicable."
     )
@@ -93,7 +107,7 @@ if not ResearchConfig.DEEPSEEK_API_KEY:
 class UniversalDocumentManager:
     """
     Manages creation of document collections for any research domain.
-    Documents are split into chunks and embedded using OpenAI embeddings.
     """
     def __init__(self) -> None:
         try:
@@ -110,7 +124,7 @@ class UniversalDocumentManager:
     def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
         """
-        Splits documents into chunks and stores them in a Chroma collection.
         """
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -134,11 +148,11 @@ class UniversalDocumentManager:
     def _document_id(self, content: str) -> str:
         """
-        Generates a unique document ID using a SHA256 hash combined with the current timestamp.
         """
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
-# Initialize document collections for multiple research domains
 udm = UniversalDocumentManager()
 research_docs = udm.create_collection([
     "Research Report: Novel AI Techniques in Renewable Energy",
@@ -157,8 +171,8 @@ development_docs = udm.create_collection([
 # ------------------------------
 class ResearchRetriever:
     """
-    Provides retrieval methods for research documents.
-    This class supports multiple domains, such as academic research and development.
     """
     def __init__(self) -> None:
         try:
@@ -178,6 +192,7 @@ class ResearchRetriever:
     def retrieve(self, query: str, domain: str) -> List[Any]:
         """
         Retrieves documents for a given query and domain.
         """
         try:
             if domain == "research":
@@ -198,8 +213,8 @@ retriever = ResearchRetriever()
 # ------------------------------
 class CognitiveProcessor:
     """
-    Executes API requests to the DeepSeek backend using redundant parallel requests.
-    The responses are consolidated via a consensus mechanism.
     """
     def __init__(self) -> None:
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
@@ -207,10 +222,10 @@ class CognitiveProcessor:
     def process_query(self, prompt: str) -> Dict:
         """
-        Processes a query by sending multiple API requests in parallel.
         """
         futures = []
-        for _ in range(3):  # Triple redundancy for improved reliability
             futures.append(self.executor.submit(self._execute_api_request, prompt))
         results = []
@@ -225,7 +240,7 @@ class CognitiveProcessor:
     def _execute_api_request(self, prompt: str) -> Dict:
         """
-        Executes a single API request to the DeepSeek endpoint.
         """
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
@@ -258,7 +273,7 @@ class CognitiveProcessor:
     def _consensus_check(self, results: List[Dict]) -> Dict:
         """
-        Consolidates multiple API responses by selecting the one with the most content.
         """
         valid_results = [r for r in results if "error" not in r]
         if not valid_results:
@@ -267,12 +282,19 @@ class CognitiveProcessor:
         return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
 # ------------------------------
-# Research Workflow Engine
 # ------------------------------
 class ResearchWorkflow:
     """
-    Defines a multi-step research workflow using a state graph.
-    This workflow is designed to be domain-agnostic, working for any research area.
     """
     def __init__(self) -> None:
         self.processor = CognitiveProcessor()
@@ -287,7 +309,8 @@ class ResearchWorkflow:
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
-        # Set entry point and define transitions
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
@@ -301,7 +324,7 @@ class ResearchWorkflow:
     def ingest_query(self, state: AgentState) -> Dict:
         """
-        Ingests the research query and initializes the refinement counter.
         """
         try:
             query = state["messages"][-1].content
@@ -317,7 +340,8 @@ class ResearchWorkflow:
     def retrieve_documents(self, state: AgentState) -> Dict:
         """
-        Retrieves research documents for the given query.
         """
         try:
             query = state["context"]["raw_query"]
@@ -336,7 +360,10 @@ class ResearchWorkflow:
     def analyze_content(self, state: AgentState) -> Dict:
         """
-        Analyzes the retrieved research documents using the DeepSeek API.
         """
         try:
             docs = state["context"].get("documents", [])
@@ -362,12 +389,13 @@ class ResearchWorkflow:
     def validate_output(self, state: AgentState) -> Dict:
         """
-        Validates the analysis report for technical accuracy and consistency.
         """
         analysis = state["messages"][-1].content
         validation_prompt = (
-            f"Validate the following research analysis:\n{analysis}\n\n"
-            "Check for:\n1. Technical accuracy\n2. Adequate citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
             "Respond with 'VALID' or 'INVALID'."
         )
         response = self.processor.process_query(validation_prompt)
@@ -382,15 +410,22 @@ class ResearchWorkflow:
     def refine_results(self, state: AgentState) -> Dict:
         """
-        Refines the analysis report if validation fails.
-        Increments the refinement counter to avoid infinite loops.
         """
         current_count = state["context"].get("refine_count", 0)
         state["context"]["refine_count"] = current_count + 1
-        logger.info(f"Refinement iteration: {state['context']['refine_count']}")
         refinement_prompt = (
-            f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
-            "Improve by enhancing technical precision, empirical grounding, and theoretical coherence."
         )
         response = self.processor.process_query(refinement_prompt)
         logger.info("Refinement completed.")
@@ -405,21 +440,19 @@ class ResearchWorkflow:
     def _quality_check(self, state: AgentState) -> str:
         """
-        Checks whether the analysis report is valid.
-        Forces a valid state if the refinement counter exceeds a preset threshold.
         """
         refine_count = state["context"].get("refine_count", 0)
         if refine_count >= 3:
             logger.warning("Refinement limit reached. Forcing valid outcome to prevent infinite recursion.")
             return "valid"
         content = state["messages"][-1].content
-        quality = "valid" if "VALID" in content else "invalid"
-        logger.info(f"Quality check returned: {quality}")
-        return quality
     def _error_state(self, message: str) -> Dict:
         """
-        Returns a standardized error state.
         """
         logger.error(message)
         return {
@@ -433,8 +466,8 @@ class ResearchWorkflow:
 # ------------------------------
 class ResearchInterface:
     """
-    Provides a Streamlit-based interface for executing the UniversalResearch AI workflow.
-    The interface is domain-agnostic, making it suitable for research in any field.
     """
     def __init__(self) -> None:
         self.workflow = ResearchWorkflow()
@@ -442,7 +475,7 @@ class ResearchInterface:
     def _initialize_interface(self) -> None:
         st.set_page_config(
-            page_title="UniversalResearch AI",
             layout="wide",
             initial_sidebar_state="expanded"
         )
@@ -496,7 +529,7 @@ class ResearchInterface:
     def _build_sidebar(self) -> None:
         with st.sidebar:
-            st.title("🔍 Research Database")
             st.subheader("Featured Research Topics")
             for title, short in ResearchConfig.DOCUMENT_MAP.items():
                 with st.expander(short):
@@ -506,19 +539,23 @@ class ResearchInterface:
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self) -> None:
-        st.title("🧠 UniversalResearch AI")
         query = st.text_area(
             "Research Query:",
             height=200,
-            placeholder="Enter a research question or topic from any domain..."
         )
         if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str) -> None:
         try:
-            with st.spinner("Initializing Universal Analysis..."):
-                # Invoke the workflow with an increased recursion limit configuration.
                 results = self.workflow.app.stream({
                     "messages": [HumanMessage(content=query)],
                     "context": {},
@@ -539,6 +576,9 @@ Potential issues:
             )
     def _render_event(self, event: Dict) -> None:
         if 'ingest' in event:
             with st.container():
                 st.success("✅ Query Ingested")
@@ -561,6 +601,7 @@ Potential issues:
                 if "VALID" in content:
                     st.success("✅ Validation Passed")
                     with st.expander("View Validated Analysis", expanded=True):
                         st.markdown(content.split("Validation:")[0])
                 else:
                     st.warning("⚠️ Validation Issues Detected")

 # ------------------------------
+# UniversalResearch AI System + LADDER (Tufa Labs)
 # ------------------------------
 import logging
 import os
 from typing_extensions import TypedDict, Annotated
 from langchain.tools.retriever import create_retriever_tool
+# Increase Python's recursion limit if needed
 sys.setrecursionlimit(10000)
 # ------------------------------
 # State Schema Definition
 # ------------------------------
 class AgentState(TypedDict):
+    """
+    Stores the messages and context for each step in the workflow.
+    'messages' contain the conversation so far,
+    'context' can hold domain-specific data, 'metadata' for additional info.
+    """
     messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
     context: Dict[str, Any]
     metadata: Dict[str, Any]
 # ------------------------------
 class ResearchConfig:
     """
+    Universal configuration for the research system, referencing Tufa Labs' LADDER approach.
+    Make sure to set DEEPSEEK_API_KEY in your environment or HF Space secrets
+    to enable the external LLM calls.
     """
+    DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
     CHUNK_SIZE = 512
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
+    # Example map for featured research docs
     DOCUMENT_MAP = {
         "Sample Research Document 1": "Topic A Overview",
         "Sample Research Document 2": "Topic B Analysis",
         "Sample Research Document 3": "Topic C Innovations"
     }
+    # Template referencing a general approach for analyzing research documents
     ANALYSIS_TEMPLATE = (
         "Analyze the following research documents with scientific rigor:\n{context}\n\n"
+        "Using the LADDER approach from Tufa Labs, the model should:\n"
+        "1. Break down the problem into simpler subproblems.\n"
+        "2. Iteratively refine the solution.\n"
+        "3. Provide thorough analysis, including:\n"
+        "   a. Key Contributions\n"
+        "   b. Novel Methodologies\n"
+        "   c. Empirical Results (with metrics)\n"
+        "   d. Potential Applications\n"
+        "   e. Limitations & Future Directions\n\n"
         "Format your response in Markdown with LaTeX mathematical notation where applicable."
     )
 class UniversalDocumentManager:
     """
     Manages creation of document collections for any research domain.
+    Uses OpenAI embeddings for vector-based semantic search.
     """
     def __init__(self) -> None:
         try:
     def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
         """
+        Splits documents into manageable chunks and stores them in a Chroma collection.
         """
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
     def _document_id(self, content: str) -> str:
         """
+        Generates a unique document ID using SHA256 + timestamp.
         """
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
+# Create example collections (can be replaced with domain-specific docs)
 udm = UniversalDocumentManager()
 research_docs = udm.create_collection([
     "Research Report: Novel AI Techniques in Renewable Energy",
 # ------------------------------
 class ResearchRetriever:
     """
+    Provides retrieval methods for multiple domains (research, development, etc.).
+    Uses MMR (Maximal Marginal Relevance) or similarity-based retrieval from Chroma.
     """
     def __init__(self) -> None:
         try:
     def retrieve(self, query: str, domain: str) -> List[Any]:
         """
         Retrieves documents for a given query and domain.
+        Defaults to 'research' if domain is unrecognized.
         """
         try:
             if domain == "research":
 # ------------------------------
 class CognitiveProcessor:
     """
+    Sends parallel requests to the DeepSeek API to reduce failures.
+    Implements a consensus mechanism to pick the most comprehensive response.
     """
     def __init__(self) -> None:
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
     def process_query(self, prompt: str) -> Dict:
         """
+        Processes a query by sending multiple parallel requests (triple redundancy).
         """
         futures = []
+        for _ in range(3):
             futures.append(self.executor.submit(self._execute_api_request, prompt))
         results = []
     def _execute_api_request(self, prompt: str) -> Dict:
         """
+        Executes a single request to the DeepSeek endpoint.
         """
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
     def _consensus_check(self, results: List[Dict]) -> Dict:
         """
+        Chooses the best response by comparing the length of the message content.
         """
         valid_results = [r for r in results if "error" not in r]
         if not valid_results:
         return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))
 # ------------------------------
+# Research Workflow Engine (LADDER Integration)
 # ------------------------------
 class ResearchWorkflow:
     """
+    Defines a multi-step workflow using LangGraph with Tufa Labs' LADDER approach:
+    1. Ingest Query
+    2. Retrieve Documents
+    3. Analyze Content
+    4. Validate Output
+    5. Refine (Recursive Self-Learning + TTRL)
+    The 'refine_results' node applies LADDER’s idea of iteratively
+    breaking down problems and re-solving them with no external data.
     """
     def __init__(self) -> None:
         self.processor = CognitiveProcessor()
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
+        # Entry point and transitions
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
     def ingest_query(self, state: AgentState) -> Dict:
         """
+        Ingests the research query and initializes the LADDER-based refinement counter.
         """
         try:
             query = state["messages"][-1].content
     def retrieve_documents(self, state: AgentState) -> Dict:
         """
+        Retrieves relevant documents based on the query.
+        The system can handle any domain (math, code generation, theorem proving, etc.).
         """
         try:
             query = state["context"]["raw_query"]
     def analyze_content(self, state: AgentState) -> Dict:
         """
+        Analyzes the retrieved documents using Tufa Labs' LADDER principles:
+        - Break down the documents,
+        - Provide structured analysis,
+        - Return a refined solution.
         """
         try:
             docs = state["context"].get("documents", [])
     def validate_output(self, state: AgentState) -> Dict:
         """
+        Validates the analysis. If invalid, the system can refine the solution
+        (potentially multiple times) using LADDER’s iterative approach.
         """
         analysis = state["messages"][-1].content
         validation_prompt = (
+            f"Validate this analysis:\n{analysis}\n\n"
+            "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
             "Respond with 'VALID' or 'INVALID'."
         )
         response = self.processor.process_query(validation_prompt)
     def refine_results(self, state: AgentState) -> Dict:
         """
+        Applies Tufa Labs' LADDER principle:
+        - Recursively break down the problem,
+        - Re-solve with no external data,
+        - Potentially leverage TTRL for dynamic updates.
+        This method increments a refinement counter to avoid infinite recursion.
         """
         current_count = state["context"].get("refine_count", 0)
         state["context"]["refine_count"] = current_count + 1
+        logger.info(f"LADDER refinement iteration: {state['context']['refine_count']}")
         refinement_prompt = (
+            f"Refine this analysis using LADDER's self-improvement approach:\n"
+            f"{state['messages'][-1].content}\n\n"
+            "Focus on breaking down complex points further and re-solving them.\n"
+            "Enhance technical precision, empirical grounding, and theoretical coherence."
         )
         response = self.processor.process_query(refinement_prompt)
         logger.info("Refinement completed.")
     def _quality_check(self, state: AgentState) -> str:
         """
+        Determines if the analysis is 'valid' or 'invalid'.
+        If refine_count exceeds 3, forcibly accept the result to prevent infinite loops.
         """
         refine_count = state["context"].get("refine_count", 0)
         if refine_count >= 3:
             logger.warning("Refinement limit reached. Forcing valid outcome to prevent infinite recursion.")
             return "valid"
         content = state["messages"][-1].content
+        return "valid" if "VALID" in content else "invalid"
     def _error_state(self, message: str) -> Dict:
         """
+        Returns an error state if any node fails.
         """
         logger.error(message)
         return {
 # ------------------------------
 class ResearchInterface:
     """
+    Provides a Streamlit-based interface for the UniversalResearch AI with LADDER.
+    The system is domain-agnostic, handling math, code generation, theorem proving, etc.
     """
     def __init__(self) -> None:
         self.workflow = ResearchWorkflow()
     def _initialize_interface(self) -> None:
         st.set_page_config(
+            page_title="UniversalResearch AI (LADDER)",
             layout="wide",
             initial_sidebar_state="expanded"
         )
     def _build_sidebar(self) -> None:
         with st.sidebar:
+            st.title("🔍 Research Database (LADDER)")
             st.subheader("Featured Research Topics")
             for title, short in ResearchConfig.DOCUMENT_MAP.items():
                 with st.expander(short):
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self) -> None:
+        st.title("🧠 UniversalResearch AI with Tufa Labs’ LADDER")
+        st.write(
+            "Leverage the power of Tufa Labs' LADDER approach for recursive self-improvement. "
+            "No external data required—just a structured difficulty gradient and test-time reinforcement."
+        )
         query = st.text_area(
             "Research Query:",
             height=200,
+            placeholder="Enter a research question, from math to code generation..."
         )
         if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str) -> None:
         try:
+            with st.spinner("Initializing LADDER-based Analysis..."):
+                # The recursion_limit config ensures we can handle multiple refine iterations
                 results = self.workflow.app.stream({
                     "messages": [HumanMessage(content=query)],
                     "context": {},
             )
     def _render_event(self, event: Dict) -> None:
+        """
+        Renders each event in the Streamlit UI.
+        """
         if 'ingest' in event:
             with st.container():
                 st.success("✅ Query Ingested")
                 if "VALID" in content:
                     st.success("✅ Validation Passed")
                     with st.expander("View Validated Analysis", expanded=True):
+                        # Remove "Validation: ..." for a cleaner final result
                         st.markdown(content.split("Validation:")[0])
                 else:
                     st.warning("⚠️ Validation Issues Detected")