Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 15

Commit

f7afb44

verified ·

1 Parent(s): 4839711

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -59

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # ------------------------------
-# Enhanced NeuroResearch AI System with Refinement Counter and Increased Recursion Limit
 # ------------------------------
 import logging
 import os
@@ -11,6 +11,7 @@ import sys
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Dict, Any, Optional, Sequence
 import chromadb
 import requests
 import streamlit as st
@@ -50,46 +51,49 @@ class AgentState(TypedDict):
 # Configuration
 # ------------------------------
 class ResearchConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
     CHUNK_SIZE = 512
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
     DOCUMENT_MAP = {
-        "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
-            "CV-Transformer Hybrid Architecture",
-        "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
-            "Transformer Architecture Analysis",
-        "Latest Trends in Machine Learning Methods Using Quantum Computing":
-            "Quantum ML Frontiers"
     }
     ANALYSIS_TEMPLATE = (
-        "Analyze these technical documents with scientific rigor:\n{context}\n\n"
-        "Respond with:\n"
-        "1. Key Technical Contributions (bullet points)\n"
         "2. Novel Methodologies\n"
         "3. Empirical Results (with metrics)\n"
         "4. Potential Applications\n"
         "5. Limitations & Future Directions\n\n"
-        "Format: Markdown with LaTeX mathematical notation where applicable"
     )
 if not ResearchConfig.DEEPSEEK_API_KEY:
     st.error(
         """**Research Portal Configuration Required**
-1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)
-2. Configure secret: `DEEPSEEK_API_KEY` in Space settings
-3. Rebuild deployment"""
     )
     st.stop()
 # ------------------------------
-# Quantum Document Processing
 # ------------------------------
-class QuantumDocumentManager:
     """
-    Manages creation of Chroma collections from raw document texts.
     """
     def __init__(self) -> None:
         try:
@@ -98,6 +102,7 @@ class QuantumDocumentManager:
         except Exception as e:
             logger.error(f"Error initializing PersistentClient: {e}")
             self.client = chromadb.Client()  # Fallback to in-memory client
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
@@ -105,7 +110,7 @@ class QuantumDocumentManager:
     def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
         """
-        Splits documents into chunks and stores them as a Chroma collection.
         """
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
@@ -129,22 +134,23 @@ class QuantumDocumentManager:
     def _document_id(self, content: str) -> str:
         """
-        Generates a unique document ID using SHA256 and the current timestamp.
         """
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
-# Initialize document collections
-qdm = QuantumDocumentManager()
-research_docs = qdm.create_collection([
-    "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
-    "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
-    "Latest Trends in Machine Learning Methods Using Quantum Computing"
 ], "research")
-development_docs = qdm.create_collection([
-    "Project A: UI Design Completed, API Integration in Progress",
-    "Project B: Testing New Feature X, Bug Fixes Needed",
-    "Product Y: In the Performance Optimization Stage Before Release"
 ], "development")
 # ------------------------------
@@ -152,7 +158,8 @@ development_docs = qdm.create_collection([
 # ------------------------------
 class ResearchRetriever:
     """
-    Provides retrieval methods for different domains.
     """
     def __init__(self) -> None:
         try:
@@ -171,7 +178,7 @@ class ResearchRetriever:
     def retrieve(self, query: str, domain: str) -> List[Any]:
         """
-        Retrieves documents based on the query and domain.
         """
         try:
             if domain == "research":
@@ -179,8 +186,8 @@ class ResearchRetriever:
             elif domain == "development":
                 return self.development_retriever.invoke(query)
             else:
-                logger.warning(f"Domain '{domain}' not recognized.")
-                return []
         except Exception as e:
             logger.error(f"Retrieval error for domain '{domain}': {e}")
             return []
@@ -192,8 +199,8 @@ retriever = ResearchRetriever()
 # ------------------------------
 class CognitiveProcessor:
     """
-    Executes API requests to the DeepSeek backend using triple redundancy
-    and consolidates results via a consensus mechanism.
     """
     def __init__(self) -> None:
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
@@ -204,7 +211,7 @@ class CognitiveProcessor:
         Processes a query by sending multiple API requests in parallel.
         """
         futures = []
-        for _ in range(3):  # Triple redundancy for reliability
             futures.append(self.executor.submit(self._execute_api_request, prompt))
         results = []
@@ -230,7 +237,7 @@ class CognitiveProcessor:
             "model": "deepseek-chat",
             "messages": [{
                 "role": "user",
-                "content": f"Respond as Senior AI Researcher:\n{prompt}"
             }],
             "temperature": 0.7,
             "max_tokens": 1500,
@@ -252,7 +259,7 @@ class CognitiveProcessor:
     def _consensus_check(self, results: List[Dict]) -> Dict:
         """
-        Consolidates multiple API responses, selecting the one with the most content.
         """
         valid_results = [r for r in results if "error" not in r]
         if not valid_results:
@@ -265,7 +272,8 @@ class CognitiveProcessor:
 # ------------------------------
 class ResearchWorkflow:
     """
-    Defines the multi-step research workflow using a state graph.
     """
     def __init__(self) -> None:
         self.processor = CognitiveProcessor()
@@ -274,13 +282,13 @@ class ResearchWorkflow:
         self.app = self.workflow.compile()
     def _build_workflow(self) -> None:
-        # Define nodes
         self.workflow.add_node("ingest", self.ingest_query)
         self.workflow.add_node("retrieve", self.retrieve_documents)
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
-        # Set entry point and edges
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
@@ -298,7 +306,6 @@ class ResearchWorkflow:
         """
         try:
             query = state["messages"][-1].content
-            # Initialize context with raw query and refinement counter
             new_context = {"raw_query": query, "refine_count": 0}
             logger.info("Query ingested.")
             return {
@@ -311,7 +318,7 @@ class ResearchWorkflow:
     def retrieve_documents(self, state: AgentState) -> Dict:
         """
-        Retrieves research documents based on the query.
         """
         try:
             query = state["context"]["raw_query"]
@@ -326,7 +333,7 @@ class ResearchWorkflow:
     def analyze_content(self, state: AgentState) -> Dict:
         """
-        Analyzes the retrieved documents using the DeepSeek API.
         """
         try:
             docs = state["context"].get("documents", [])
@@ -345,13 +352,13 @@ class ResearchWorkflow:
     def validate_output(self, state: AgentState) -> Dict:
         """
-        Validates the technical analysis report.
         """
         analysis = state["messages"][-1].content
         validation_prompt = (
-            f"Validate research analysis:\n{analysis}\n\n"
-            "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
-            "Respond with 'VALID' or 'INVALID'"
         )
         response = self.processor.process_query(validation_prompt)
         logger.info("Output validation completed.")
@@ -362,14 +369,14 @@ class ResearchWorkflow:
     def refine_results(self, state: AgentState) -> Dict:
         """
         Refines the analysis report if validation fails.
-        Increments the refinement counter to limit infinite loops.
         """
         current_count = state["context"].get("refine_count", 0)
         state["context"]["refine_count"] = current_count + 1
         logger.info(f"Refinement iteration: {state['context']['refine_count']}")
         refinement_prompt = (
             f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
-            "Improve:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence"
         )
         response = self.processor.process_query(refinement_prompt)
         logger.info("Refinement completed.")
@@ -381,7 +388,7 @@ class ResearchWorkflow:
     def _quality_check(self, state: AgentState) -> str:
         """
         Checks whether the analysis report is valid.
-        Forces a valid state if the refinement count exceeds a threshold.
         """
         refine_count = state["context"].get("refine_count", 0)
         if refine_count >= 3:
@@ -408,7 +415,8 @@ class ResearchWorkflow:
 # ------------------------------
 class ResearchInterface:
     """
-    Provides the Streamlit-based interface for executing the research workflow.
     """
     def __init__(self) -> None:
         self.workflow = ResearchWorkflow()
@@ -416,7 +424,7 @@ class ResearchInterface:
     def _initialize_interface(self) -> None:
         st.set_page_config(
-            page_title="NeuroResearch AI",
             layout="wide",
             initial_sidebar_state="expanded"
         )
@@ -471,7 +479,8 @@ class ResearchInterface:
     def _build_sidebar(self) -> None:
         with st.sidebar:
             st.title("🔍 Research Database")
-            st.subheader("Technical Papers")
             for title, short in ResearchConfig.DOCUMENT_MAP.items():
                 with st.expander(short):
                     st.markdown(f"```\n{title}\n```")
@@ -480,19 +489,19 @@ class ResearchInterface:
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self) -> None:
-        st.title("🧠 NeuroResearch AI")
         query = st.text_area(
             "Research Query:",
             height=200,
-            placeholder="Enter technical research question..."
         )
         if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str) -> None:
         try:
-            with st.spinner("Initializing Quantum Analysis..."):
-                # Pass a recursion limit configuration into the graph invocation
                 results = self.workflow.app.stream({
                     "messages": [HumanMessage(content=query)],
                     "context": {},
@@ -527,7 +536,7 @@ Potential issues:
         elif 'analyze' in event:
             with st.container():
                 content = event['analyze']['messages'][0].content
-                with st.expander("Technical Analysis Report", expanded=True):
                     st.markdown(content)
         elif 'validate' in event:
             with st.container():

 # ------------------------------
+# UniversalResearch AI System with Refinement Counter and Increased Recursion Limit
 # ------------------------------
 import logging
 import os
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Dict, Any, Optional, Sequence
 import chromadb
 import requests
 import streamlit as st
 # Configuration
 # ------------------------------
 class ResearchConfig:
+    """
+    Generic configuration for the UniversalResearch AI System.
+    This configuration is designed to be applicable to any research domain.
+    """
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "chroma_db"
     CHUNK_SIZE = 512
     CHUNK_OVERLAP = 64
     MAX_CONCURRENT_REQUESTS = 5
     EMBEDDING_DIMENSIONS = 1536
+    # An optional map can be used to list pre-loaded or featured research topics.
     DOCUMENT_MAP = {
+        "Sample Research Document 1": "Topic A Overview",
+        "Sample Research Document 2": "Topic B Analysis",
+        "Sample Research Document 3": "Topic C Innovations"
     }
     ANALYSIS_TEMPLATE = (
+        "Analyze the following research documents with scientific rigor:\n{context}\n\n"
+        "Provide your analysis with the following structure:\n"
+        "1. Key Contributions (bullet points)\n"
         "2. Novel Methodologies\n"
         "3. Empirical Results (with metrics)\n"
         "4. Potential Applications\n"
         "5. Limitations & Future Directions\n\n"
+        "Format your response in Markdown with LaTeX mathematical notation where applicable."
     )
 if not ResearchConfig.DEEPSEEK_API_KEY:
     st.error(
         """**Research Portal Configuration Required**
+1. Obtain your DeepSeek API key from [platform.deepseek.com](https://platform.deepseek.com/)
+2. Set the secret: `DEEPSEEK_API_KEY` in your deployment settings
+3. Rebuild your deployment."""
     )
     st.stop()
 # ------------------------------
+# Universal Document Processing
 # ------------------------------
+class UniversalDocumentManager:
     """
+    Manages the creation of document collections for any research domain.
+    Documents are split into manageable chunks and embedded using OpenAI embeddings.
     """
     def __init__(self) -> None:
         try:
         except Exception as e:
             logger.error(f"Error initializing PersistentClient: {e}")
             self.client = chromadb.Client()  # Fallback to in-memory client
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
     def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
         """
+        Splits documents into chunks and stores them in a Chroma collection.
         """
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
     def _document_id(self, content: str) -> str:
         """
+        Generates a unique document ID using a SHA256 hash combined with the current timestamp.
         """
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
+# Initialize document collections for multiple research domains
+udm = UniversalDocumentManager()
+# Example collections – these can be updated with any research domain documents.
+research_docs = udm.create_collection([
+    "Research Report: Novel AI Techniques in Renewable Energy",
+    "Academic Paper: Advances in Quantum Computing for Data Analysis",
+    "Survey: Emerging Trends in Biomedical Research"
 ], "research")
+development_docs = udm.create_collection([
+    "Project Update: New Algorithms in Software Engineering",
+    "Development Report: Innovations in User Interface Design",
+    "Case Study: Agile Methodologies in Large-Scale Software Projects"
 ], "development")
 # ------------------------------
 # ------------------------------
 class ResearchRetriever:
     """
+    Provides retrieval methods for research documents.
+    This class supports multiple domains, such as academic research and development.
     """
     def __init__(self) -> None:
         try:
     def retrieve(self, query: str, domain: str) -> List[Any]:
         """
+        Retrieves documents for a given query and domain.
         """
         try:
             if domain == "research":
             elif domain == "development":
                 return self.development_retriever.invoke(query)
             else:
+                logger.warning(f"Domain '{domain}' not recognized. Defaulting to research.")
+                return self.research_retriever.invoke(query)
         except Exception as e:
             logger.error(f"Retrieval error for domain '{domain}': {e}")
             return []
 # ------------------------------
 class CognitiveProcessor:
     """
+    Executes API requests to the DeepSeek backend using redundant parallel requests.
+    The responses are consolidated via a consensus mechanism.
     """
     def __init__(self) -> None:
         self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
         Processes a query by sending multiple API requests in parallel.
         """
         futures = []
+        for _ in range(3):  # Triple redundancy for improved reliability
             futures.append(self.executor.submit(self._execute_api_request, prompt))
         results = []
             "model": "deepseek-chat",
             "messages": [{
                 "role": "user",
+                "content": f"Respond as a Senior Researcher:\n{prompt}"
             }],
             "temperature": 0.7,
             "max_tokens": 1500,
     def _consensus_check(self, results: List[Dict]) -> Dict:
         """
+        Consolidates multiple API responses by selecting the one with the most content.
         """
         valid_results = [r for r in results if "error" not in r]
         if not valid_results:
 # ------------------------------
 class ResearchWorkflow:
     """
+    Defines a multi-step research workflow using a state graph.
+    This workflow is designed to be domain-agnostic, working for any research area.
     """
     def __init__(self) -> None:
         self.processor = CognitiveProcessor()
         self.app = self.workflow.compile()
     def _build_workflow(self) -> None:
+        # Define workflow nodes
         self.workflow.add_node("ingest", self.ingest_query)
         self.workflow.add_node("retrieve", self.retrieve_documents)
         self.workflow.add_node("analyze", self.analyze_content)
         self.workflow.add_node("validate", self.validate_output)
         self.workflow.add_node("refine", self.refine_results)
+        # Set entry point and define transitions
         self.workflow.set_entry_point("ingest")
         self.workflow.add_edge("ingest", "retrieve")
         self.workflow.add_edge("retrieve", "analyze")
         """
         try:
             query = state["messages"][-1].content
             new_context = {"raw_query": query, "refine_count": 0}
             logger.info("Query ingested.")
             return {
     def retrieve_documents(self, state: AgentState) -> Dict:
         """
+        Retrieves research documents for the given query.
         """
         try:
             query = state["context"]["raw_query"]
     def analyze_content(self, state: AgentState) -> Dict:
         """
+        Analyzes the retrieved research documents using the DeepSeek API.
         """
         try:
             docs = state["context"].get("documents", [])
     def validate_output(self, state: AgentState) -> Dict:
         """
+        Validates the analysis report for technical accuracy and consistency.
         """
         analysis = state["messages"][-1].content
         validation_prompt = (
+            f"Validate the following research analysis:\n{analysis}\n\n"
+            "Check for:\n1. Technical accuracy\n2. Adequate citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
+            "Respond with 'VALID' or 'INVALID'."
         )
         response = self.processor.process_query(validation_prompt)
         logger.info("Output validation completed.")
     def refine_results(self, state: AgentState) -> Dict:
         """
         Refines the analysis report if validation fails.
+        Increments the refinement counter to avoid infinite loops.
         """
         current_count = state["context"].get("refine_count", 0)
         state["context"]["refine_count"] = current_count + 1
         logger.info(f"Refinement iteration: {state['context']['refine_count']}")
         refinement_prompt = (
             f"Refine this analysis:\n{state['messages'][-1].content}\n\n"
+            "Improve by enhancing technical precision, empirical grounding, and theoretical coherence."
         )
         response = self.processor.process_query(refinement_prompt)
         logger.info("Refinement completed.")
     def _quality_check(self, state: AgentState) -> str:
         """
         Checks whether the analysis report is valid.
+        Forces a valid state if the refinement counter exceeds a preset threshold.
         """
         refine_count = state["context"].get("refine_count", 0)
         if refine_count >= 3:
 # ------------------------------
 class ResearchInterface:
     """
+    Provides a Streamlit-based interface for executing the UniversalResearch AI workflow.
+    The interface is domain-agnostic, making it suitable for research in any field.
     """
     def __init__(self) -> None:
         self.workflow = ResearchWorkflow()
     def _initialize_interface(self) -> None:
         st.set_page_config(
+            page_title="UniversalResearch AI",
             layout="wide",
             initial_sidebar_state="expanded"
         )
     def _build_sidebar(self) -> None:
         with st.sidebar:
             st.title("🔍 Research Database")
+            st.subheader("Featured Research Topics")
+            # Display featured research topics from the DOCUMENT_MAP.
             for title, short in ResearchConfig.DOCUMENT_MAP.items():
                 with st.expander(short):
                     st.markdown(f"```\n{title}\n```")
             st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)
     def _build_main_interface(self) -> None:
+        st.title("🧠 UniversalResearch AI")
         query = st.text_area(
             "Research Query:",
             height=200,
+            placeholder="Enter a research question or topic from any domain..."
         )
         if st.button("Execute Analysis", type="primary"):
             self._execute_analysis(query)
     def _execute_analysis(self, query: str) -> None:
         try:
+            with st.spinner("Initializing Universal Analysis..."):
+                # Invoke the workflow with an increased recursion limit configuration.
                 results = self.workflow.app.stream({
                     "messages": [HumanMessage(content=query)],
                     "context": {},
         elif 'analyze' in event:
             with st.container():
                 content = event['analyze']['messages'][0].content
+                with st.expander("Research Analysis Report", expanded=True):
                     st.markdown(content)
         elif 'validate' in event:
             with st.container():