mgbam commited on
Commit
3c1cff1
·
verified ·
1 Parent(s): c2e09d4

Update workflow.py

Browse files
Files changed (1) hide show
  1. workflow.py +21 -15
workflow.py CHANGED
@@ -23,13 +23,18 @@ class AgentState(TypedDict):
23
 
24
  class ResearchWorkflow:
25
  """
26
- Defines a multi-step research workflow using a state graph.
27
- This workflow now employs a Retrieval-Augmented Generation (RAG) approach.
28
- When no documents are retrieved, the model dynamically synthesizes an analysis using its internal knowledge.
 
 
 
 
 
29
  """
30
  def __init__(self) -> None:
31
  self.processor = EnhancedCognitiveProcessor()
32
- # Provide the state schema to the StateGraph constructor
33
  self.workflow = StateGraph(AgentState)
34
  self._build_workflow()
35
  self.app = self.workflow.compile()
@@ -50,7 +55,7 @@ class ResearchWorkflow:
50
  )
51
  self.workflow.add_edge("validate", END)
52
  self.workflow.add_edge("refine", "retrieve")
53
- # Extended node for multi-modal enhancement
54
  self.workflow.add_node("enhance", self.enhance_analysis)
55
  self.workflow.add_edge("validate", "enhance")
56
  self.workflow.add_edge("enhance", END)
@@ -58,7 +63,7 @@ class ResearchWorkflow:
58
  def ingest_query(self, state: Dict) -> Dict:
59
  try:
60
  query = state["messages"][-1].content
61
- # Retrieve the domain from the state's context (defaulting to Biomedical Research)
62
  domain = state.get("context", {}).get("domain", "Biomedical Research")
63
  new_context = {
64
  "raw_query": query,
@@ -79,9 +84,9 @@ class ResearchWorkflow:
79
  def retrieve_documents(self, state: Dict) -> Dict:
80
  try:
81
  query = state["context"]["raw_query"]
82
- # For demonstration, we use an empty document list.
83
- # In a full RAG system, you would retrieve relevant documents from an external index.
84
- docs = []
85
  logger.info(f"Retrieved {len(docs)} documents for query.")
86
  return {
87
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
@@ -101,15 +106,17 @@ class ResearchWorkflow:
101
  try:
102
  domain = state["context"].get("domain", "Biomedical Research").strip().lower()
103
  docs = state["context"].get("documents", [])
104
- # Use retrieved documents if available; otherwise, use the raw query.
105
  if docs:
106
  docs_text = "\n\n".join([d.page_content for d in docs])
107
  else:
108
  docs_text = state["context"].get("raw_query", "")
109
- logger.info("No documents retrieved; switching to dynamic RAG mode.")
110
  domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
111
- # RAG approach: combine domain prompt with retrieved docs or raw query.
112
- full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
 
 
113
  response = self.processor.process_query(full_prompt)
114
  if "error" in response:
115
  logger.error("Backend response error during analysis.")
@@ -155,7 +162,6 @@ class ResearchWorkflow:
155
  difficulty_level = max(0, 3 - state["context"]["refine_count"])
156
  domain = state["context"].get("domain", "Biomedical Research")
157
  logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
158
-
159
  if state["context"]["refine_count"] >= 3:
160
  meta_prompt = (
161
  f"Domain: {domain}\n"
@@ -179,7 +185,7 @@ class ResearchWorkflow:
179
  "1. Technical precision\n"
180
  "2. Empirical grounding\n"
181
  "3. Theoretical coherence\n\n"
182
- "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant, addressing the weaknesses identified."
183
  )
184
  response = self.processor.process_query(refinement_prompt)
185
  logger.info("Refinement completed.")
 
23
 
24
  class ResearchWorkflow:
25
  """
26
+ A multi-step research workflow that leverages a Retrieval-Augmented Generation (RAG) strategy.
27
+ It dynamically retrieves external data and integrates it with the raw query to generate domain-specific analyses.
28
+ Supported domains include:
29
+ - Biomedical Research
30
+ - Legal Research
31
+ - Environmental and Energy Studies
32
+ - Competitive Programming and Theoretical Computer Science
33
+ - Social Sciences
34
  """
35
  def __init__(self) -> None:
36
  self.processor = EnhancedCognitiveProcessor()
37
+ # Provide the state schema to the StateGraph constructor.
38
  self.workflow = StateGraph(AgentState)
39
  self._build_workflow()
40
  self.app = self.workflow.compile()
 
55
  )
56
  self.workflow.add_edge("validate", END)
57
  self.workflow.add_edge("refine", "retrieve")
58
+ # Extended node for multi-modal enhancement.
59
  self.workflow.add_node("enhance", self.enhance_analysis)
60
  self.workflow.add_edge("validate", "enhance")
61
  self.workflow.add_edge("enhance", END)
 
63
  def ingest_query(self, state: Dict) -> Dict:
64
  try:
65
  query = state["messages"][-1].content
66
+ # Get the domain from state; default to Biomedical Research if not provided.
67
  domain = state.get("context", {}).get("domain", "Biomedical Research")
68
  new_context = {
69
  "raw_query": query,
 
84
  def retrieve_documents(self, state: Dict) -> Dict:
85
  try:
86
  query = state["context"]["raw_query"]
87
+ # For demonstration, we use an empty list to simulate retrieval failure.
88
+ # In a full implementation, integrate a retriever (e.g., via LangChain, LlamaIndex, or a vector DB).
89
+ docs = []
90
  logger.info(f"Retrieved {len(docs)} documents for query.")
91
  return {
92
  "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
 
106
  try:
107
  domain = state["context"].get("domain", "Biomedical Research").strip().lower()
108
  docs = state["context"].get("documents", [])
109
+ # If documents are present, use their content; otherwise, fall back to the raw query.
110
  if docs:
111
  docs_text = "\n\n".join([d.page_content for d in docs])
112
  else:
113
  docs_text = state["context"].get("raw_query", "")
114
+ logger.info("No documents retrieved; switching to dynamic synthesis using RAG.")
115
  domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
116
+ # Combine the domain prompt with either retrieved text or raw query.
117
+ full_prompt = f"Domain: {state['context'].get('domain', 'Biomedical Research')}\n" \
118
+ f"{domain_prompt}\n\n" + \
119
+ ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
120
  response = self.processor.process_query(full_prompt)
121
  if "error" in response:
122
  logger.error("Backend response error during analysis.")
 
162
  difficulty_level = max(0, 3 - state["context"]["refine_count"])
163
  domain = state["context"].get("domain", "Biomedical Research")
164
  logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
 
165
  if state["context"]["refine_count"] >= 3:
166
  meta_prompt = (
167
  f"Domain: {domain}\n"
 
185
  "1. Technical precision\n"
186
  "2. Empirical grounding\n"
187
  "3. Theoretical coherence\n\n"
188
+ "Use a structured difficulty gradient approach to produce a simpler yet more accurate variant, addressing the identified weaknesses."
189
  )
190
  response = self.processor.process_query(refinement_prompt)
191
  logger.info("Refinement completed.")