File size: 6,693 Bytes
6369972
 
767b265
6369972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
"""
https://en.wikipedia.org/wiki/Work_breakdown_structure
https://en.wikipedia.org/wiki/Program_evaluation_and_review_technique
"""
import os
import json
import time
from math import ceil
from typing import List, Optional
from uuid import uuid4
from dataclasses import dataclass
from pydantic import BaseModel, Field
from llama_index.core.llms.llm import LLM
from src.format_json_for_use_in_query import format_json_for_use_in_query

class TaskTimeEstimateDetail(BaseModel):
    """
    Details about a task duration, lower/upper bounds. Potential risks impacting the duration. 
    """
    task_id: str = Field(
        description="UUID that uniquely identifies the task."
    )
    delay_risks: str = Field(
        description="Possible issues that may delay the task. Example: ['Weather-related disruptions', 'Third-party vendors might fail to deliver on time', 'Key team members might be unavailable']. **This field MUST be filled with a meaningful description. Do not leave it empty.**"
    )
    mitigation_strategy: str = Field(
        description="Actions or strategies to minimize the risk of delays. Example: ['Engage backup vendors', 'Schedule regular progress reviews', 'Establish clear communication channels']. **This field MUST be filled with a meaningful and specific strategy. Do not leave it empty.**"
    )
    days_min: int = Field(
        description="Number of days, the best case scenario. If not applicable use minus 1."
    )
    days_max: int = Field(
        description="Number of days, the worst case scenario. If not applicable use minus 1."
    )
    days_realistic: int = Field(
        description="Number of days, in the realistic scenario. If not applicable use minus 1."
    )

class TimeEstimates(BaseModel):
    """
    Estimating realistic durations for each task and appropriately assigning resources 
    ensures that the project stays on schedule and within budget.
    """
    task_details: list[TaskTimeEstimateDetail] = Field(
        description="List with tasks with time estimates."
    )

QUERY_PREAMBLE = f"""
Assign estimated durations for each task and subtask.
Ensure a consistent voice and phrasing across tasks.

**For each task, you MUST provide a meaningful description for both 'delay_risks' and 'mitigation_strategy'. Do not leave these fields as empty strings.**

**Example of good 'delay_risks' and 'mitigation_strategy':**
For the task of "Define project scope and objectives":
- delay_risks: "Lack of clear initial requirements from stakeholders, potential for scope creep later in the project."
- mitigation_strategy: "Conduct thorough initial meetings with all key stakeholders to gather requirements, establish a clear change management process."

"""

@dataclass
class EstimateWBSTaskDurations:
    """
    Enrich an existing Work Breakdown Structure (WBS) with task duration estimates.
    """
    query: str
    response: dict
    metadata: dict

    @classmethod
    def format_query(cls, plan_json: dict, wbs_level2_json: list, task_ids: list[str]) -> str:
        if not isinstance(plan_json, dict):
            raise ValueError("Invalid plan_json.")
        if not isinstance(wbs_level2_json, list):
            raise ValueError("Invalid wbs_level1_json.")
        if not isinstance(task_ids, list):
            raise ValueError("Invalid task_ids.")

        """
        Wrap the task ids in quotes, so it looks like this:
        "0ca58751-3abd-44d0-b24b-ebcf14c794e7"
        "86f0ed30-ba23-46e4-83d9-ef53d95ff054"
        "58d5dcc3-7385-4919-adc1-e1f84727e9d2"
        """
        task_ids_in_quotes = [f'"{task_id}"' for task_id in task_ids]
        task_id_strings = "\n".join(task_ids_in_quotes)

        query = f"""
The project plan:
{format_json_for_use_in_query(plan_json)}

The Work Breakdown Structure (WBS):
{format_json_for_use_in_query(wbs_level2_json)}

Only estimate these {len(task_ids)} tasks:
{task_id_strings}
"""
        return query
    
    @classmethod
    def execute(cls, llm: LLM, query: str) -> 'EstimateWBSTaskDurations':
        """
        Invoke LLM to estimate task durations from a json representation of a project plan and Work Breakdown Structure (WBS).

        Executing with too many task_ids may result in a timeout, where the LLM cannot complete the task within a reasonable time.
        Split the task_ids into smaller chunks of around 3 task_ids each, and process them one at a time.
        """
        if not isinstance(llm, LLM):
            raise ValueError("Invalid LLM instance.")
        if not isinstance(query, str):
            raise ValueError("Invalid query.")

        start_time = time.perf_counter()

        sllm = llm.as_structured_llm(TimeEstimates)
        response = sllm.complete(QUERY_PREAMBLE + query)
        json_response = json.loads(response.text)

        end_time = time.perf_counter()
        duration = int(ceil(end_time - start_time))

        metadata = dict(llm.metadata)
        metadata["llm_classname"] = llm.class_name()
        metadata["duration"] = duration

        result = EstimateWBSTaskDurations(
            query=query,
            response=json_response,
            metadata=metadata,
        )
        return result
    
    def raw_response_dict(self, include_metadata=True, include_query=True) -> dict:
        d = self.response.copy()
        if include_metadata:
            d['metadata'] = self.metadata
        if include_query:
            d['query'] = self.query
        return d

if __name__ == "__main__":
    from llama_index.llms.ollama import Ollama

    # TODO: Eliminate hardcoded paths
    basepath = '/Users/neoneye/Desktop/planexe_data'

    def load_json(relative_path: str) -> dict:
        path = os.path.join(basepath, relative_path)
        print(f"loading file: {path}")
        with open(path, 'r', encoding='utf-8') as f:
            the_json = json.load(f)
        return the_json

    plan_json = load_json('002-project_plan.json')
    wbs_level2_json = load_json('006-wbs_level2.json')

    task_ids = [
        "c6a249af-b8d3-4d4c-b3ef-8a5caa8793d4",
        "622fa6f1-6252-445e-8b5a-2a5c75683a80",
        "fdaa706e-3d3b-4166-9730-7ea3e238d0cf"
    ]

    query = EstimateWBSTaskDurations.format_query(plan_json, wbs_level2_json, task_ids)

    model_name = "llama3.1:latest"
    # model_name = "qwen2.5-coder:latest"
    # model_name = "phi4:latest"
    llm = Ollama(model=model_name, request_timeout=120.0, temperature=0.5, is_function_calling_model=False)

    print(f"Query: {query}")
    result = EstimateWBSTaskDurations.execute(llm, query)

    print("\n\nResponse:")
    response_dict = result.raw_response_dict(include_query=False)
    print(json.dumps(response_dict, indent=2))