tugaa commited on
Commit
21f9bfa
·
verified ·
1 Parent(s): 4a13218

Create modules/task_decomposer.py

Browse files
Files changed (1) hide show
  1. modules/task_decomposer.py +47 -0
modules/task_decomposer.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modules/task_decomposer.py
2
+ import os
3
+ from openai import AsyncOpenAI
4
+
5
+ client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
6
+
7
+ class TaskDecomposer:
8
+ def __init__(self):
9
+ pass
10
+
11
+ async def decompose(self, context):
12
+ base_prompt = self._build_prompt(context)
13
+
14
+ response = await client.chat.completions.create(
15
+ model="gpt-4o",
16
+ messages=[
17
+ {"role": "system", "content": "You are a task planner that breaks down multimodal user goals into executable subtasks."},
18
+ {"role": "user", "content": base_prompt}
19
+ ]
20
+ )
21
+
22
+ content = response.choices[0].message.content
23
+ return self._parse_subtasks(content)
24
+
25
+ def _build_prompt(self, context):
26
+ description = []
27
+ if "text" in context:
28
+ description.append(f"Text: {context['text']}")
29
+ if "image_summary" in context:
30
+ description.append(f"Image summary: {context['image_summary']}")
31
+ if "video_summary" in context:
32
+ description.append(f"Video summary: {context['video_summary']}")
33
+
34
+ combined = "\n".join(description)
35
+ prompt = f"""
36
+ Given the following multimodal input, generate a list of clear, web-searchable subtasks needed to achieve the user's goal. Output the list in JSON array format, with each item as an object containing 'query', 'language', and 'modality'.
37
+
38
+ {combined}
39
+ """
40
+ return prompt
41
+
42
+ def _parse_subtasks(self, llm_output):
43
+ import json
44
+ try:
45
+ return json.loads(llm_output)
46
+ except:
47
+ return []