testAgentForHFSpace / modules /task_decomposer.py
tugaa's picture
Create modules/task_decomposer.py
21f9bfa verified
# modules/task_decomposer.py
import os
from openai import AsyncOpenAI
client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
class TaskDecomposer:
def __init__(self):
pass
async def decompose(self, context):
base_prompt = self._build_prompt(context)
response = await client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a task planner that breaks down multimodal user goals into executable subtasks."},
{"role": "user", "content": base_prompt}
]
)
content = response.choices[0].message.content
return self._parse_subtasks(content)
def _build_prompt(self, context):
description = []
if "text" in context:
description.append(f"Text: {context['text']}")
if "image_summary" in context:
description.append(f"Image summary: {context['image_summary']}")
if "video_summary" in context:
description.append(f"Video summary: {context['video_summary']}")
combined = "\n".join(description)
prompt = f"""
Given the following multimodal input, generate a list of clear, web-searchable subtasks needed to achieve the user's goal. Output the list in JSON array format, with each item as an object containing 'query', 'language', and 'modality'.
{combined}
"""
return prompt
def _parse_subtasks(self, llm_output):
import json
try:
return json.loads(llm_output)
except:
return []