Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 25

Commit

b294f9c

verified ·

1 Parent(s): 9dc7678

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -54

app.py CHANGED Viewed

@@ -79,12 +79,80 @@ class ResearchConfig:
         "5. Limitations & Future Directions\n\n"
         "Format: Markdown with LaTeX mathematical notation where applicable"
     )
     DOMAIN_PROMPTS = {
-        "Biomedical Research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
-        "Legal Research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
-        "Environmental and Energy Studies": "Highlight renewable energy technologies, efficiency metrics, and policy implications.",
-        "Competitive Programming and Theoretical Computer Science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
-        "Social Sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
     }
     ENSEMBLE_MODELS = {
         "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
@@ -117,16 +185,13 @@ class QuantumDocumentManager:
             logger.info("Initialized PersistentClient for Chroma.")
         except Exception as e:
             logger.exception("Error initializing PersistentClient; falling back to in-memory client.")
-            self.client = chromadb.Client()  # Fallback to in-memory client
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
     def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
-        """
-        Splits documents into chunks and stores them as a Chroma collection.
-        """
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
             chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
@@ -138,7 +203,6 @@ class QuantumDocumentManager:
         except Exception as e:
             logger.exception("Error during document splitting.")
             raise e
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
@@ -148,9 +212,6 @@ class QuantumDocumentManager:
         )
     def _document_id(self, content: str) -> str:
-        """
-        Generates a unique document ID using SHA256 and the current timestamp.
-        """
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
 # ------------------------------
@@ -170,7 +231,7 @@ class ExtendedQuantumDocumentManager(QuantumDocumentManager):
                 embeddings.append(emb.numpy())
                 valid_images.append(img_path)
             except FileNotFoundError:
-                logger.warning(f"Image file not found: {img_path}. Skipping this file.")
             except Exception as e:
                 logger.exception(f"Error processing image {img_path}: {str(e)}")
         if not embeddings:
@@ -189,7 +250,6 @@ research_docs = qdm.create_collection([
     "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
     "Latest Trends in Machine Learning Methods Using Quantum Computing"
 ], "research")
 development_docs = qdm.create_collection([
     "Project A: UI Design Completed, API Integration in Progress",
     "Project B: Testing New Feature X, Bug Fixes Needed",
@@ -219,10 +279,6 @@ class ResearchRetriever:
             raise e
     def retrieve(self, query: str, domain: str) -> List[Any]:
-        """
-        Retrieves documents based on the query and domain.
-        For demonstration, always using the "research" retriever.
-        """
         try:
             return self.research_retriever.invoke(query)
         except Exception as e:
@@ -243,9 +299,6 @@ class CognitiveProcessor:
         self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
-        """
-        Processes a query by sending multiple API requests in parallel.
-        """
         futures = []
         for _ in range(3):
             futures.append(self.executor.submit(self._execute_api_request, prompt))
@@ -259,9 +312,6 @@ class CognitiveProcessor:
         return self._consensus_check(results)
     def _execute_api_request(self, prompt: str) -> Dict:
-        """
-        Executes a single API request to the backend endpoint.
-        """
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json",
@@ -292,9 +342,6 @@ class CognitiveProcessor:
             return {"error": str(e)}
     def _consensus_check(self, results: List[Dict]) -> Dict:
-        """
-        Consolidates multiple API responses, selecting the one with the most content.
-        """
         valid_results = [r for r in results if "error" not in r]
         if not valid_results:
             logger.error("All API requests failed.")
@@ -374,7 +421,7 @@ class QuantumKnowledgeGraph:
         self.nodes = {}
         self.relations = []
         self.node_counter = 0
     def create_node(self, content: Dict, node_type: str) -> int:
         self.node_counter += 1
         self.nodes[self.node_counter] = {
@@ -384,7 +431,7 @@ class QuantumKnowledgeGraph:
             "connections": []
         }
         return self.node_counter
     def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
         self.relations.append({
             "source": source,
@@ -393,7 +440,7 @@ class QuantumKnowledgeGraph:
             "strength": strength
         })
         self.nodes[source]["connections"].append(target)
     def visualize_graph(self, focus_node: int = None) -> str:
         dot = Digraph(engine="neato")
         for nid, node in self.nodes.items():
@@ -404,7 +451,7 @@ class QuantumKnowledgeGraph:
         if focus_node:
             dot.node(str(focus_node), color="red", style="filled")
         return dot.source
     def _truncate_content(self, content: Dict) -> str:
         return json.dumps(content)[:50] + "..."
@@ -415,7 +462,7 @@ class MultiModalRetriever:
         self.clip_model = clip_model
         self.clip_processor = clip_processor
         self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
     def retrieve(self, query: str, domain: str) -> Dict[str, List]:
         results = {
             "text": self._retrieve_text(query),
@@ -423,16 +470,16 @@ class MultiModalRetriever:
             "code": self._retrieve_code(query)
         }
         return results
     def _retrieve_text(self, query: str) -> List[Any]:
         return self.text_retriever.invoke(query)
     def _retrieve_images(self, query: str) -> List[str]:
         inputs = self.clip_processor(text=query, return_tensors="pt")
         with torch.no_grad():
             _ = self.clip_model.get_text_features(**inputs)
         return ["image_result_1.png", "image_result_2.png"]
     def _retrieve_code(self, query: str) -> List[str]:
         return self.code_retriever.invoke(query)
@@ -450,7 +497,6 @@ class ResearchWorkflow:
         self.app = self.workflow.compile()
     def _build_workflow(self) -> None:
-        # Base workflow nodes
         self.workflow.add_node("ingest", self.ingest_query)
         self.workflow.add_node("retrieve", self.retrieve_documents)
         self.workflow.add_node("analyze", self.analyze_content)
@@ -507,13 +553,14 @@ class ResearchWorkflow:
     def analyze_content(self, state: AgentState) -> Dict:
         """
-        Analyzes the retrieved documents. Injects a domain-specific fallback analysis for each supported domain.
         """
         try:
-            domain = state["context"].get("domain", "Biomedical Research")
-            query = state["context"].get("raw_query", "")
-            fallback_analyses = {
-                "Biomedical Research": """
 # Biomedical Research Analysis
 ## Key Contributions
 - Integration of clinical trial design with digital biomarkers.
@@ -526,7 +573,7 @@ class ResearchWorkflow:
 ## Applications
 - Personalized medicine, early diagnosis, treatment optimization.
 """,
-                "Legal Research": """
 # Legal Research Analysis
 ## Key Contributions
 - Analysis of legal precedents using NLP.
@@ -539,7 +586,7 @@ class ResearchWorkflow:
 ## Applications
 - Legal analytics, risk assessment, regulatory compliance.
 """,
-                "Environmental and Energy Studies": """
 # Environmental and Energy Studies Analysis
 ## Key Contributions
 - Novel approaches to renewable energy efficiency.
@@ -550,9 +597,9 @@ class ResearchWorkflow:
 ## Empirical Results
 - Enhanced performance in energy forecasting.
 ## Applications
-- Sustainable urban planning, energy policy formulation.
 """,
-                "Competitive Programming and Theoretical Computer Science": """
 # Competitive Programming & Theoretical CS Analysis
 ## Key Contributions
 - Advanced approximation algorithms for NP-hard problems.
@@ -565,7 +612,7 @@ class ResearchWorkflow:
 ## Applications
 - Optimization in competitive programming and algorithm design.
 """,
-                "Social Sciences": """
 # Social Sciences Analysis
 ## Key Contributions
 - Identification of economic trends through data analytics.
@@ -579,7 +626,6 @@ class ResearchWorkflow:
 - Policy design, urban studies, social impact analysis.
 """
             }
             if domain in fallback_analyses:
                 logger.info(f"Using fallback analysis for domain: {domain}")
                 return {
@@ -630,7 +676,6 @@ class ResearchWorkflow:
             refinement_history.append(current_analysis)
             difficulty_level = max(0, 3 - state["context"]["refine_count"])
             logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
             if state["context"]["refine_count"] >= 3:
                 meta_prompt = (
                     "You are given the following series of refinement outputs:\n" +
@@ -678,10 +723,6 @@ class ResearchWorkflow:
         }
     def enhance_analysis(self, state: AgentState) -> Dict:
-        """
-        Augments the analysis with multi-modal insights.
-        If images or code snippets are available in the context, they are appended to the analysis.
-        """
         try:
             analysis = state["messages"][-1].content
             enhanced = f"{analysis}\n\n## Multi-Modal Insights\n"
@@ -863,7 +904,6 @@ Potential issues:
                     st.markdown(content)
     def _display_knowledge_graph(self) -> None:
-        # Placeholder for knowledge graph visualization
         st.write("Knowledge Graph visualization is not implemented yet.")
 # ------------------------------

         "5. Limitations & Future Directions\n\n"
         "Format: Markdown with LaTeX mathematical notation where applicable"
     )
+    # Lowercase keys for fallback analyses
+    DOMAIN_FALLBACKS = {
+        "biomedical research": """
+# Biomedical Research Analysis
+## Key Contributions
+- Integration of clinical trial design with digital biomarkers.
+- Multi-omics data used for precise patient stratification.
+## Methodologies
+- Machine learning for precision medicine.
+- Federated learning for multi-center trials.
+## Empirical Results
+- Significant improvements in patient outcomes.
+## Applications
+- Personalized medicine, early diagnosis, and treatment optimization.
+""",
+        "legal research": """
+# Legal Research Analysis
+## Key Contributions
+- Analysis of legal precedents using NLP.
+- Advanced case law retrieval and summarization.
+## Methodologies
+- Automated legal reasoning with transformer models.
+- Sentiment analysis on judicial opinions.
+## Empirical Results
+- Improved accuracy in predicting case outcomes.
+## Applications
+- Legal analytics, risk assessment, and regulatory compliance.
+""",
+        "environmental and energy studies": """
+# Environmental and Energy Studies Analysis
+## Key Contributions
+- Novel approaches to renewable energy efficiency.
+- Integration of policy analysis with technical metrics.
+## Methodologies
+- Simulation models for climate impact.
+- Data fusion from sensor networks and satellite imagery.
+## Empirical Results
+- Enhanced performance in energy forecasting.
+## Applications
+- Sustainable urban planning and energy policy formulation.
+""",
+        "competitive programming and theoretical computer science": """
+# Competitive Programming & Theoretical CS Analysis
+## Key Contributions
+- Advanced approximation algorithms for NP-hard problems.
+- Use of parameterized complexity and fixed-parameter tractability.
+## Methodologies
+- Branch-and-bound with dynamic programming.
+- Quantum-inspired algorithms for optimization.
+## Empirical Results
+- Significant improvements in computational efficiency.
+## Applications
+- Optimization in competitive programming and algorithm design.
+""",
+        "social sciences": """
+# Social Sciences Analysis
+## Key Contributions
+- Identification of economic trends through data analytics.
+- Integration of sociological data with computational models.
+## Methodologies
+- Advanced statistical modeling for behavioral analysis.
+- Machine learning for trend forecasting.
+## Empirical Results
+- High correlation with traditional survey methods.
+## Applications
+- Policy design, urban studies, and social impact analysis.
+"""
+    }
     DOMAIN_PROMPTS = {
+        "biomedical research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
+        "legal research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
+        "environmental and energy studies": "Highlight renewable energy technologies, efficiency metrics, and policy implications.",
+        "competitive programming and theoretical computer science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
+        "social sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
     }
     ENSEMBLE_MODELS = {
         "deepseek-chat": {"max_tokens": 2000, "temp": 0.7},
             logger.info("Initialized PersistentClient for Chroma.")
         except Exception as e:
             logger.exception("Error initializing PersistentClient; falling back to in-memory client.")
+            self.client = chromadb.Client()
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
         )
     def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
         splitter = RecursiveCharacterTextSplitter(
             chunk_size=ResearchConfig.CHUNK_SIZE,
             chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
         except Exception as e:
             logger.exception("Error during document splitting.")
             raise e
         return Chroma.from_documents(
             documents=docs,
             embedding=self.embeddings,
         )
     def _document_id(self, content: str) -> str:
         return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"
 # ------------------------------
                 embeddings.append(emb.numpy())
                 valid_images.append(img_path)
             except FileNotFoundError:
+                logger.warning(f"Image file not found: {img_path}. Skipping.")
             except Exception as e:
                 logger.exception(f"Error processing image {img_path}: {str(e)}")
         if not embeddings:
     "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
     "Latest Trends in Machine Learning Methods Using Quantum Computing"
 ], "research")
 development_docs = qdm.create_collection([
     "Project A: UI Design Completed, API Integration in Progress",
     "Project B: Testing New Feature X, Bug Fixes Needed",
             raise e
     def retrieve(self, query: str, domain: str) -> List[Any]:
         try:
             return self.research_retriever.invoke(query)
         except Exception as e:
         self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]
     def process_query(self, prompt: str) -> Dict:
         futures = []
         for _ in range(3):
             futures.append(self.executor.submit(self._execute_api_request, prompt))
         return self._consensus_check(results)
     def _execute_api_request(self, prompt: str) -> Dict:
         headers = {
             "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
             "Content-Type": "application/json",
             return {"error": str(e)}
     def _consensus_check(self, results: List[Dict]) -> Dict:
         valid_results = [r for r in results if "error" not in r]
         if not valid_results:
             logger.error("All API requests failed.")
         self.nodes = {}
         self.relations = []
         self.node_counter = 0
     def create_node(self, content: Dict, node_type: str) -> int:
         self.node_counter += 1
         self.nodes[self.node_counter] = {
             "connections": []
         }
         return self.node_counter
     def create_relation(self, source: int, target: int, rel_type: str, strength: float = 1.0):
         self.relations.append({
             "source": source,
             "strength": strength
         })
         self.nodes[source]["connections"].append(target)
     def visualize_graph(self, focus_node: int = None) -> str:
         dot = Digraph(engine="neato")
         for nid, node in self.nodes.items():
         if focus_node:
             dot.node(str(focus_node), color="red", style="filled")
         return dot.source
     def _truncate_content(self, content: Dict) -> str:
         return json.dumps(content)[:50] + "..."
         self.clip_model = clip_model
         self.clip_processor = clip_processor
         self.code_retriever = create_retriever_tool([], "Code Retriever", "Retriever for code snippets")
     def retrieve(self, query: str, domain: str) -> Dict[str, List]:
         results = {
             "text": self._retrieve_text(query),
             "code": self._retrieve_code(query)
         }
         return results
     def _retrieve_text(self, query: str) -> List[Any]:
         return self.text_retriever.invoke(query)
     def _retrieve_images(self, query: str) -> List[str]:
         inputs = self.clip_processor(text=query, return_tensors="pt")
         with torch.no_grad():
             _ = self.clip_model.get_text_features(**inputs)
         return ["image_result_1.png", "image_result_2.png"]
     def _retrieve_code(self, query: str) -> List[str]:
         return self.code_retriever.invoke(query)
         self.app = self.workflow.compile()
     def _build_workflow(self) -> None:
         self.workflow.add_node("ingest", self.ingest_query)
         self.workflow.add_node("retrieve", self.retrieve_documents)
         self.workflow.add_node("analyze", self.analyze_content)
     def analyze_content(self, state: AgentState) -> Dict:
         """
+        Analyzes the retrieved documents using a domain-specific fallback analysis.
+        If the domain matches one of the predefined domains, a hardcoded analysis is returned.
+        Otherwise, the normal backend analysis pipeline is used.
         """
         try:
+            domain = state["context"].get("domain", "Biomedical Research").lower()
+            fallback_analyses = {
+                "biomedical research": """
 # Biomedical Research Analysis
 ## Key Contributions
 - Integration of clinical trial design with digital biomarkers.
 ## Applications
 - Personalized medicine, early diagnosis, treatment optimization.
 """,
+                "legal research": """
 # Legal Research Analysis
 ## Key Contributions
 - Analysis of legal precedents using NLP.
 ## Applications
 - Legal analytics, risk assessment, regulatory compliance.
 """,
+                "environmental and energy studies": """
 # Environmental and Energy Studies Analysis
 ## Key Contributions
 - Novel approaches to renewable energy efficiency.
 ## Empirical Results
 - Enhanced performance in energy forecasting.
 ## Applications
+- Sustainable urban planning and energy policy formulation.
 """,
+                "competitive programming and theoretical computer science": """
 # Competitive Programming & Theoretical CS Analysis
 ## Key Contributions
 - Advanced approximation algorithms for NP-hard problems.
 ## Applications
 - Optimization in competitive programming and algorithm design.
 """,
+                "social sciences": """
 # Social Sciences Analysis
 ## Key Contributions
 - Identification of economic trends through data analytics.
 - Policy design, urban studies, social impact analysis.
 """
             }
             if domain in fallback_analyses:
                 logger.info(f"Using fallback analysis for domain: {domain}")
                 return {
             refinement_history.append(current_analysis)
             difficulty_level = max(0, 3 - state["context"]["refine_count"])
             logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
             if state["context"]["refine_count"] >= 3:
                 meta_prompt = (
                     "You are given the following series of refinement outputs:\n" +
         }
     def enhance_analysis(self, state: AgentState) -> Dict:
         try:
             analysis = state["messages"][-1].content
             enhanced = f"{analysis}\n\n## Multi-Modal Insights\n"
                     st.markdown(content)
     def _display_knowledge_graph(self) -> None:
         st.write("Knowledge Graph visualization is not implemented yet.")
 # ------------------------------