Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 25

Commit

3c1cff1

verified ·

1 Parent(s): c2e09d4

Update workflow.py

Browse files

Files changed (1) hide show

workflow.py +21 -15

workflow.py CHANGED Viewed

@@ -23,13 +23,18 @@ class AgentState(TypedDict):
 class ResearchWorkflow:
     """
-    Defines a multi-step research workflow using a state graph.
-    This workflow now employs a Retrieval-Augmented Generation (RAG) approach.
-    When no documents are retrieved, the model dynamically synthesizes an analysis using its internal knowledge.
     """
     def __init__(self) -> None:
         self.processor = EnhancedCognitiveProcessor()
-        # Provide the state schema to the StateGraph constructor
         self.workflow = StateGraph(AgentState)
         self._build_workflow()
         self.app = self.workflow.compile()
@@ -50,7 +55,7 @@ class ResearchWorkflow:
         )
         self.workflow.add_edge("validate", END)
         self.workflow.add_edge("refine", "retrieve")
-        # Extended node for multi-modal enhancement
         self.workflow.add_node("enhance", self.enhance_analysis)
         self.workflow.add_edge("validate", "enhance")
         self.workflow.add_edge("enhance", END)
@@ -58,7 +63,7 @@ class ResearchWorkflow:
     def ingest_query(self, state: Dict) -> Dict:
         try:
             query = state["messages"][-1].content
-            # Retrieve the domain from the state's context (defaulting to Biomedical Research)
             domain = state.get("context", {}).get("domain", "Biomedical Research")
             new_context = {
                 "raw_query": query,
@@ -79,9 +84,9 @@ class ResearchWorkflow:
     def retrieve_documents(self, state: Dict) -> Dict:
         try:
             query = state["context"]["raw_query"]
-            # For demonstration, we use an empty document list.
-            # In a full RAG system, you would retrieve relevant documents from an external index.
-            docs = []
             logger.info(f"Retrieved {len(docs)} documents for query.")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
@@ -101,15 +106,17 @@ class ResearchWorkflow:
         try:
             domain = state["context"].get("domain", "Biomedical Research").strip().lower()
             docs = state["context"].get("documents", [])
-            # Use retrieved documents if available; otherwise, use the raw query.
             if docs:
                 docs_text = "\n\n".join([d.page_content for d in docs])
             else:
                 docs_text = state["context"].get("raw_query", "")
-                logger.info("No documents retrieved; switching to dynamic RAG mode.")
             domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
-            # RAG approach: combine domain prompt with retrieved docs or raw query.
-            full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
             response = self.processor.process_query(full_prompt)
             if "error" in response:
                 logger.error("Backend response error during analysis.")
@@ -155,7 +162,6 @@ class ResearchWorkflow:
             difficulty_level = max(0, 3 - state["context"]["refine_count"])
             domain = state["context"].get("domain", "Biomedical Research")
             logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
             if state["context"]["refine_count"] >= 3:
                 meta_prompt = (
                     f"Domain: {domain}\n"
@@ -179,7 +185,7 @@ class ResearchWorkflow:
                     "1. Technical precision\n"
                     "2. Empirical grounding\n"
                     "3. Theoretical coherence\n\n"
-                    "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
                 )
                 response = self.processor.process_query(refinement_prompt)
                 logger.info("Refinement completed.")

 class ResearchWorkflow:
     """
+    A multi-step research workflow that leverages a Retrieval-Augmented Generation (RAG) strategy.
+    It dynamically retrieves external data and integrates it with the raw query to generate domain-specific analyses.
+    Supported domains include:
+      - Biomedical Research
+      - Legal Research
+      - Environmental and Energy Studies
+      - Competitive Programming and Theoretical Computer Science
+      - Social Sciences
     """
     def __init__(self) -> None:
         self.processor = EnhancedCognitiveProcessor()
+        # Provide the state schema to the StateGraph constructor.
         self.workflow = StateGraph(AgentState)
         self._build_workflow()
         self.app = self.workflow.compile()
         )
         self.workflow.add_edge("validate", END)
         self.workflow.add_edge("refine", "retrieve")
+        # Extended node for multi-modal enhancement.
         self.workflow.add_node("enhance", self.enhance_analysis)
         self.workflow.add_edge("validate", "enhance")
         self.workflow.add_edge("enhance", END)
     def ingest_query(self, state: Dict) -> Dict:
         try:
             query = state["messages"][-1].content
+            # Get the domain from state; default to Biomedical Research if not provided.
             domain = state.get("context", {}).get("domain", "Biomedical Research")
             new_context = {
                 "raw_query": query,
     def retrieve_documents(self, state: Dict) -> Dict:
         try:
             query = state["context"]["raw_query"]
+            # For demonstration, we use an empty list to simulate retrieval failure.
+            # In a full implementation, integrate a retriever (e.g., via LangChain, LlamaIndex, or a vector DB).
+            docs = []
             logger.info(f"Retrieved {len(docs)} documents for query.")
             return {
                 "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
         try:
             domain = state["context"].get("domain", "Biomedical Research").strip().lower()
             docs = state["context"].get("documents", [])
+            # If documents are present, use their content; otherwise, fall back to the raw query.
             if docs:
                 docs_text = "\n\n".join([d.page_content for d in docs])
             else:
                 docs_text = state["context"].get("raw_query", "")
+                logger.info("No documents retrieved; switching to dynamic synthesis using RAG.")
             domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
+            # Combine the domain prompt with either retrieved text or raw query.
+            full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
+                          f"{domain_prompt}\n\n" + \
+                          ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
             response = self.processor.process_query(full_prompt)
             if "error" in response:
                 logger.error("Backend response error during analysis.")
             difficulty_level = max(0, 3 - state["context"]["refine_count"])
             domain = state["context"].get("domain", "Biomedical Research")
             logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
             if state["context"]["refine_count"] >= 3:
                 meta_prompt = (
                     f"Domain: {domain}\n"
                     "1. Technical precision\n"
                     "2. Empirical grounding\n"
                     "3. Theoretical coherence\n\n"
+                    "Use a structured difficulty gradient approach to produce a simpler yet more accurate variant, addressing the identified weaknesses."
                 )
                 response = self.processor.process_query(refinement_prompt)
                 logger.info("Refinement completed.")