Spaces:
Sleeping
Sleeping
File size: 15,461 Bytes
4067b64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 |
from src.models.analysis_models import MLTaskType, ModelResponseStatus, RequirementsAnalysis, TechnicalResearch, ComponentType, ParameterSpec, ConfigParam, FunctionSpec, ComponentSpec, ImplementationPlan
from typing import Iterator, List, Optional
from phi.workflow import Workflow, RunResponse, RunEvent
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.storage.workflow.sqlite import SqlWorkflowStorage
from phi.storage.agent.sqlite import SqlAgentStorage
# from phi.memory.db.sqlite import SqliteMemoryDb
from phi.tools.duckduckgo import DuckDuckGo
from phi.utils.log import logger
from dotenv import load_dotenv
import json
import os
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
class MLAnalysisWorkflow(Workflow):
"""Workflow for analyzing ML business requirements and creating technical specifications"""
# Initialize agents
requirements_analyst: Agent = Agent(
name="ML Requirements Analyst",
model=OpenAIChat(id="gpt-4o", api_key=api_key),
description="Expert ML Solutions Architect specialized in analyzing business requirements",
instructions=[
"Analyze business problems and translate them into technical ML specifications.",
"1. Understand the core business problem and objectives",
"2. Identify the type of ML task required",
"3. Determine data requirements and constraints",
"4. List unclear points that need clarification",
"5. Specify areas that need technical research",
"Be precise in identifying what information is missing or needs validation."
],
response_model=RequirementsAnalysis,
structured_outputs=True,
reasoning=True,
storage=SqlAgentStorage(
table_name="requirements_sessions",
db_file="storage/agent_storage.db"
),
debug_mode=True,
# memory=AgentMemory(memory_db=requirements_db)
)
technical_researcher: Agent = Agent(
name="ML Technical Researcher",
model=OpenAIChat(id="gpt-4o", api_key=api_key),
description="ML Expert specialized in researching technical implementations",
tools=[DuckDuckGo(search=True, news=False)],
instructions=[
"Research and validate technical aspects of ML solutions.",
"1. Search for similar ML implementations and best practices",
"2. Find recommended models and architectures",
"3. Research typical hyperparameters and evaluation metrics",
"4. Look for implementation constraints and requirements",
"5. Validate technical feasibility",
"Provide sources for all technical information.",
"Focus on recent and reliable technical sources."
],
response_model=TechnicalResearch,
structured_outputs=True,
prevent_hallucination=True,
reasoning=True,
storage=SqlAgentStorage(
table_name="researcher_sessions",
db_file="storage/agent_storage.db"
),
debug_mode=True,
# memory=AgentMemory(memory_db=researcher_db)
)
writer: Agent = Agent(
model=OpenAIChat(id="gpt-4o", api_key=api_key),
instructions=[
"You will be provided with lots of structured outputs. Your work is to display this"
"in a nicely formatted manner without changing any of the content. Present all the links"
"as they are, with explicitly mentioned hyperlinks. Do not change any content."
],
markdown=True,
)
def validate_model_response(self, response: ModelResponseStatus) -> List[str]:
"""Check for missing or incomplete fields in ModelResponseStatus"""
logger.info("Checking for missing or incomplete fields in ModelResponseStatus...")
missing_fields = []
response_dict = response.model_dump()
for field, value in response_dict.items():
if value == "..." or value == ["..."]:
missing_fields.append(field)
elif isinstance(value, list) and not value:
missing_fields.append(field)
return missing_fields
def analyze_requirements(self, user_query: str) -> Optional[RequirementsAnalysis]:
"""Stream requirements analysis"""
logger.info("Analyzing requirements...")
prompt = f"Analyze this business problem and provide initial technical specifications: {user_query}"
analyse_stream = self.requirements_analyst.run(prompt)
return analyse_stream.content
def conduct_research(self, research_prompt: str) -> Optional[TechnicalResearch]:
"""Stream technical research"""
logger.info("Conducting technical research...")
conduct_stream = self.technical_researcher.run(research_prompt)
return conduct_stream.content
def finalize_analysis(self, final_prompt: str) -> Optional[RequirementsAnalysis]:
"""Stream final analysis"""
logger.info("Finalizing analysis...")
finalise_stream = self.requirements_analyst.run(final_prompt)
return finalise_stream.content
def write_requirements_post(self, requirements_results: RequirementsAnalysis) -> Iterator[RunResponse]:
"""
Write a blog post on a topic.
:param requirements_results: requirements_analyst response
:return: iterator for the workflow response
"""
logger.info("Writing requirements analysis...")
writer_input = {"model_response": requirements_results.model_response.model_dump(),
"unclear_points": requirements_results.unclear_points,
"search_queries": requirements_results.search_queries,
"business_understanding": requirements_results.business_understanding
}
yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)
def write_research_post(self, research_results: TechnicalResearch) -> Iterator[RunResponse]:
"""
Write a blog post on a topic.
:param research_results: research content
:return: iterator for the workflow response
"""
logger.info("Writing research findings...")
writer_input = {"research_findings": research_results.research_findings,
"reference_implementations": research_results.reference_implementations,
"sources": research_results.sources
}
yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)
def run(self, user_query: str) -> Iterator[RunResponse]:
"""
Run the ML analysis workflow
Args:
user_query: Description of the business problem
"""
try:
# Initial requirements analysis with streaming
requirements_result: Optional[RequirementsAnalysis] = self.analyze_requirements(user_query)
if not requirements_result:
yield RunResponse(
event=RunEvent.workflow_completed,
content="Error: Requirements analysis failed to produce valid results."
)
return
logger.info("Writing initial requirements analysis...")
yield from self.write_requirements_post(requirements_result)
# Check what needs research
missing_fields = self.validate_model_response(requirements_result.model_response)
logger.info("Missing fields found!")
search_queries = requirements_result.search_queries
logger.info("Search queries found!")
unclear_points = requirements_result.unclear_points
logger.info("Unclear points found!")
if missing_fields or search_queries:
# Conduct technical research
logger.info("Researching technical specifications...")
research_prompt = (
f"Research the following for this ML problem: {user_query}\n"
f"Missing information needed for: {', '.join(missing_fields)}\n"
f"Specific topics to research: {', '.join(search_queries)}\n"
f"Points needing clarification: {', '.join(unclear_points)}\n"
f"Current understanding: {requirements_result.business_understanding}"
)
logger.info("Conducting research...")
research_result: Optional[TechnicalResearch] = self.conduct_research(research_prompt)
logger.info("Sharing research findings...")
yield from self.write_research_post(research_result)
final_prompt = (
f"Original problem: {user_query}\n"
f"Research findings: {research_result.research_findings}\n"
"Please provide final technical specifications incorporating this research."
)
logger.info("Obtaining final requirements")
final_result: Optional[RequirementsAnalysis] = self.finalize_analysis(final_prompt)
logger.info("Writing final requirements...")
yield from self.write_requirements_post(final_result)
except Exception as e:
logger.error(f"Workflow error: {str(e)}")
yield RunResponse(
event=RunEvent.workflow_completed,
content=f"Error in analysis workflow: {str(e)}"
)
class MLImplementationPlanner(Workflow):
"""Workflow for creating detailed ML implementation plans"""
# Initialize architect agent
architect: Agent = Agent(
name="ML System Architect",
model=OpenAIChat(id="gpt-4o", api_key=api_key),
description="Expert ML System Architect specialized in detailed implementation planning",
instructions=[
"Create detailed technical implementation plans for ML systems.",
"1. Break down the system into logical components",
"2. Define detailed function specifications for each component",
"3. Specify clear interfaces between components",
"4. Consider error handling and edge cases",
"5. Plan testing and deployment strategies",
"Be extremely specific about function signatures and component interactions.",
"Focus on maintainability and scalability in the design."
],
response_model=ImplementationPlan,
structured_outputs=True,
reasoning=True,
storage=SqlAgentStorage(
table_name="architect_sessions",
db_file="storage/agent_storage.db"
),
debug_mode=True,
# memory=AgentMemory(memory_db=architect_db)
)
writer: Agent = Agent(
model=OpenAIChat(id="gpt-4o", api_key=api_key),
instructions=[
"You will be provided with lots of structured outputs. Your work is to display this"
"in a nicely formatted manner without changing any of the content."
],
markdown=True,
)
def create_implementation_plan(self, planning_prompt: str) -> Optional[ImplementationPlan]:
"""Stream implementation plan creation"""
logger.info("Creating implementation plan...")
planning_stream = self.architect.run(planning_prompt)
return planning_stream.content
def validate_interfaces(self, validation_prompt: str) -> Optional[ImplementationPlan]:
"""Stream interface validation"""
logger.info("Validating interfaces...")
architect_stream = self.architect.run(validation_prompt)
return architect_stream.content
def write_implementation_post(self, implementation_results: ImplementationPlan) -> Iterator[RunResponse]:
"""
Write a blog post on a topic.
:param implementation_results: implementation plan results
:return: iterator for the workflow response
"""
logger.info("Writing implementation plan...")
writer_input = {"components": [comp.model_dump() for comp in implementation_results.components],
"system_requirements": implementation_results.system_requirements,
"deployment_notes": implementation_results.deployment_notes,
"testing_strategy": implementation_results.testing_strategy,
"implementation_order": implementation_results.implementation_order
}
yield from self.writer.run(json.dumps(writer_input, indent=4), stream=True)
def run(
self,
requirements_analysis: RequirementsAnalysis,
technical_research: Optional[TechnicalResearch] = None
) -> Iterator[RunResponse]:
"""
Create implementation plan based on requirements analysis and research
Args:
requirements_analysis: Results from requirements analysis
technical_research: Optional results from technical research
"""
try:
logger.info("Starting planning workflow...")
# Prepare comprehensive prompt for the architect
planning_prompt = (
f"Create a detailed implementation plan for this ML system.\n\n"
f"Business Understanding:\n{requirements_analysis.business_understanding}\n\n"
f"Technical Specifications:\n"
f"- Task Type: {requirements_analysis.model_response.task}\n"
f"- Models: {', '.join(requirements_analysis.model_response.models)}\n"
f"- Data Requirements: {requirements_analysis.model_response.data_source}\n"
f"- Technical Requirements: {requirements_analysis.model_response.technical_requirements}\n"
)
if technical_research:
logger.info("Technical Research found! Modifying context...")
planning_prompt += (
f"\nResearch Findings:\n{technical_research.research_findings}\n"
f"Reference Implementations:\n"
f"{chr(10).join(technical_research.reference_implementations)}"
)
# Stream implementation plan
logger.info("generating implementation plan...")
plan_result: Optional[ImplementationPlan] = self.create_implementation_plan(planning_prompt)
logger.info("writing implementation plan...")
yield from self.write_implementation_post(plan_result)
if plan_result:
validation_prompt = (
"Validate the interfaces between these components "
"and ensure all dependencies are properly specified:\n"
f"{plan_result.components}"
)
logger.info("validating results...")
validate_result: Optional[ImplementationPlan] = self.validate_interfaces(validation_prompt)
logger.info("writing validated implementation plan...")
yield from self.write_implementation_post(validate_result)
except Exception as e:
logger.error("Error in planning workflow".format(e))
# yield RunResponse(
# event=RunEvent.workflow_completed,
# content=f"Error in planning workflow: {str(e)}"
# )
|