Spaces:
Sleeping
Sleeping
Update workflow.py
Browse files- workflow.py +21 -15
workflow.py
CHANGED
@@ -23,13 +23,18 @@ class AgentState(TypedDict):
|
|
23 |
|
24 |
class ResearchWorkflow:
|
25 |
"""
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
"""
|
30 |
def __init__(self) -> None:
|
31 |
self.processor = EnhancedCognitiveProcessor()
|
32 |
-
# Provide the state schema to the StateGraph constructor
|
33 |
self.workflow = StateGraph(AgentState)
|
34 |
self._build_workflow()
|
35 |
self.app = self.workflow.compile()
|
@@ -50,7 +55,7 @@ class ResearchWorkflow:
|
|
50 |
)
|
51 |
self.workflow.add_edge("validate", END)
|
52 |
self.workflow.add_edge("refine", "retrieve")
|
53 |
-
# Extended node for multi-modal enhancement
|
54 |
self.workflow.add_node("enhance", self.enhance_analysis)
|
55 |
self.workflow.add_edge("validate", "enhance")
|
56 |
self.workflow.add_edge("enhance", END)
|
@@ -58,7 +63,7 @@ class ResearchWorkflow:
|
|
58 |
def ingest_query(self, state: Dict) -> Dict:
|
59 |
try:
|
60 |
query = state["messages"][-1].content
|
61 |
-
#
|
62 |
domain = state.get("context", {}).get("domain", "Biomedical Research")
|
63 |
new_context = {
|
64 |
"raw_query": query,
|
@@ -79,9 +84,9 @@ class ResearchWorkflow:
|
|
79 |
def retrieve_documents(self, state: Dict) -> Dict:
|
80 |
try:
|
81 |
query = state["context"]["raw_query"]
|
82 |
-
# For demonstration, we use an empty
|
83 |
-
# In a full
|
84 |
-
docs = []
|
85 |
logger.info(f"Retrieved {len(docs)} documents for query.")
|
86 |
return {
|
87 |
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
|
@@ -101,15 +106,17 @@ class ResearchWorkflow:
|
|
101 |
try:
|
102 |
domain = state["context"].get("domain", "Biomedical Research").strip().lower()
|
103 |
docs = state["context"].get("documents", [])
|
104 |
-
#
|
105 |
if docs:
|
106 |
docs_text = "\n\n".join([d.page_content for d in docs])
|
107 |
else:
|
108 |
docs_text = state["context"].get("raw_query", "")
|
109 |
-
logger.info("No documents retrieved; switching to dynamic RAG
|
110 |
domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
|
111 |
-
#
|
112 |
-
full_prompt = f"{
|
|
|
|
|
113 |
response = self.processor.process_query(full_prompt)
|
114 |
if "error" in response:
|
115 |
logger.error("Backend response error during analysis.")
|
@@ -155,7 +162,6 @@ class ResearchWorkflow:
|
|
155 |
difficulty_level = max(0, 3 - state["context"]["refine_count"])
|
156 |
domain = state["context"].get("domain", "Biomedical Research")
|
157 |
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
|
158 |
-
|
159 |
if state["context"]["refine_count"] >= 3:
|
160 |
meta_prompt = (
|
161 |
f"Domain: {domain}\n"
|
@@ -179,7 +185,7 @@ class ResearchWorkflow:
|
|
179 |
"1. Technical precision\n"
|
180 |
"2. Empirical grounding\n"
|
181 |
"3. Theoretical coherence\n\n"
|
182 |
-
"Use a structured difficulty gradient approach
|
183 |
)
|
184 |
response = self.processor.process_query(refinement_prompt)
|
185 |
logger.info("Refinement completed.")
|
|
|
23 |
|
24 |
class ResearchWorkflow:
|
25 |
"""
|
26 |
+
A multi-step research workflow that leverages a Retrieval-Augmented Generation (RAG) strategy.
|
27 |
+
It dynamically retrieves external data and integrates it with the raw query to generate domain-specific analyses.
|
28 |
+
Supported domains include:
|
29 |
+
- Biomedical Research
|
30 |
+
- Legal Research
|
31 |
+
- Environmental and Energy Studies
|
32 |
+
- Competitive Programming and Theoretical Computer Science
|
33 |
+
- Social Sciences
|
34 |
"""
|
35 |
def __init__(self) -> None:
|
36 |
self.processor = EnhancedCognitiveProcessor()
|
37 |
+
# Provide the state schema to the StateGraph constructor.
|
38 |
self.workflow = StateGraph(AgentState)
|
39 |
self._build_workflow()
|
40 |
self.app = self.workflow.compile()
|
|
|
55 |
)
|
56 |
self.workflow.add_edge("validate", END)
|
57 |
self.workflow.add_edge("refine", "retrieve")
|
58 |
+
# Extended node for multi-modal enhancement.
|
59 |
self.workflow.add_node("enhance", self.enhance_analysis)
|
60 |
self.workflow.add_edge("validate", "enhance")
|
61 |
self.workflow.add_edge("enhance", END)
|
|
|
63 |
def ingest_query(self, state: Dict) -> Dict:
|
64 |
try:
|
65 |
query = state["messages"][-1].content
|
66 |
+
# Get the domain from state; default to Biomedical Research if not provided.
|
67 |
domain = state.get("context", {}).get("domain", "Biomedical Research")
|
68 |
new_context = {
|
69 |
"raw_query": query,
|
|
|
84 |
def retrieve_documents(self, state: Dict) -> Dict:
|
85 |
try:
|
86 |
query = state["context"]["raw_query"]
|
87 |
+
# For demonstration, we use an empty list to simulate retrieval failure.
|
88 |
+
# In a full implementation, integrate a retriever (e.g., via LangChain, LlamaIndex, or a vector DB).
|
89 |
+
docs = []
|
90 |
logger.info(f"Retrieved {len(docs)} documents for query.")
|
91 |
return {
|
92 |
"messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
|
|
|
106 |
try:
|
107 |
domain = state["context"].get("domain", "Biomedical Research").strip().lower()
|
108 |
docs = state["context"].get("documents", [])
|
109 |
+
# If documents are present, use their content; otherwise, fall back to the raw query.
|
110 |
if docs:
|
111 |
docs_text = "\n\n".join([d.page_content for d in docs])
|
112 |
else:
|
113 |
docs_text = state["context"].get("raw_query", "")
|
114 |
+
logger.info("No documents retrieved; switching to dynamic synthesis using RAG.")
|
115 |
domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
|
116 |
+
# Combine the domain prompt with either retrieved text or raw query.
|
117 |
+
full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
|
118 |
+
f"{domain_prompt}\n\n" + \
|
119 |
+
ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
|
120 |
response = self.processor.process_query(full_prompt)
|
121 |
if "error" in response:
|
122 |
logger.error("Backend response error during analysis.")
|
|
|
162 |
difficulty_level = max(0, 3 - state["context"]["refine_count"])
|
163 |
domain = state["context"].get("domain", "Biomedical Research")
|
164 |
logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
|
|
|
165 |
if state["context"]["refine_count"] >= 3:
|
166 |
meta_prompt = (
|
167 |
f"Domain: {domain}\n"
|
|
|
185 |
"1. Technical precision\n"
|
186 |
"2. Empirical grounding\n"
|
187 |
"3. Theoretical coherence\n\n"
|
188 |
+
"Use a structured difficulty gradient approach to produce a simpler yet more accurate variant, addressing the identified weaknesses."
|
189 |
)
|
190 |
response = self.processor.process_query(refinement_prompt)
|
191 |
logger.info("Refinement completed.")
|