# modules/task_decomposer.py import os from openai import AsyncOpenAI client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) class TaskDecomposer: def __init__(self): pass async def decompose(self, context): base_prompt = self._build_prompt(context) response = await client.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": "You are a task planner that breaks down multimodal user goals into executable subtasks."}, {"role": "user", "content": base_prompt} ] ) content = response.choices[0].message.content return self._parse_subtasks(content) def _build_prompt(self, context): description = [] if "text" in context: description.append(f"Text: {context['text']}") if "image_summary" in context: description.append(f"Image summary: {context['image_summary']}") if "video_summary" in context: description.append(f"Video summary: {context['video_summary']}") combined = "\n".join(description) prompt = f""" Given the following multimodal input, generate a list of clear, web-searchable subtasks needed to achieve the user's goal. Output the list in JSON array format, with each item as an object containing 'query', 'language', and 'modality'. {combined} """ return prompt def _parse_subtasks(self, llm_output): import json try: return json.loads(llm_output) except: return []