File size: 8,632 Bytes
6369972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
"""
PROMPT> python -m src.expert.expert_finder

Find experts that can take a look at the a document, such as a 'SWOT analysis' and provide feedback.

IDEA: Specify a number of experts to be obtained. Currently it's hardcoded 8.
When it's 4 or less, then there is no need to make a second call to the LLM model.
When it's 9 or more, then make multiple calls to the LLM model to get more experts.
"""
import json
import time
import logging
from math import ceil
from uuid import uuid4
from typing import List, Optional, Any
from dataclasses import dataclass
from pydantic import BaseModel, Field
from llama_index.core.llms.llm import LLM
from llama_index.core.llms import ChatMessage, MessageRole

logger = logging.getLogger(__name__)

class Expert(BaseModel):
    expert_title: str = Field(description="Job title of the expert.")
    expert_knowledge: str = Field(description="Industry Knowledge/Specialization, specific industries or subfields where they have focused their career, such as: tech industry for an IT consultant, healthcare sector for a medical expert. **Must be a brief comma separated list**.")
    expert_why: str = Field(description="Why can this expert be of help. Area of expertise.")
    expert_what: str = Field(description="Describe what area of this document the role should advise about.")
    expert_relevant_skills: str = Field(description="Skills that are relevant to the document.")
    expert_search_query: str = Field(description="What query to use when searching for this expert.")

class ExpertDetails(BaseModel):
    experts: list[Expert] = Field(description="List of experts.")

EXPERT_FINDER_SYSTEM_PROMPT = """
Professionals who can offer specialized perspectives and recommendations based on the document.

Ensure that each expert role directly aligns with specific sections or themes within the document.
This could involve linking particular strengths, weaknesses, opportunities, threats, extra sections, to the expertise required.

Diversity in the types of experts suggested by considering interdisciplinary insights that might not be 
immediately obvious but could offer unique perspectives on the document.

Account for geographical and contextual relevance, variations in terminology or regional differences that may affect the search outcome.

The "expert_search_query" field is a human readable text for searching in Google/DuckDuckGo/LinkedIn.

Find exactly 4 experts.
"""

@dataclass
class ExpertFinder:
    """
    Find experts that can advise about the particular domain.
    """
    system_prompt: Optional[str]
    user_prompt: str
    response: dict
    metadata: dict
    expert_list: list[dict]

    @classmethod
    def execute(cls, llm: LLM, user_prompt: str, **kwargs: Any) -> 'ExpertFinder':
        """
        Invoke LLM to find the best suited experts that can advise about attached file.
        """
        if not isinstance(llm, LLM):
            raise ValueError("Invalid LLM instance.")
        if not isinstance(user_prompt, str):
            raise ValueError("Invalid query.")

        default_args = {
            'system_prompt': EXPERT_FINDER_SYSTEM_PROMPT.strip()
        }
        default_args.update(kwargs)

        system_prompt = default_args.get('system_prompt')
        logger.debug(f"System Prompt:\n{system_prompt}")
        if system_prompt and not isinstance(system_prompt, str):
            raise ValueError("Invalid system prompt.")

        chat_message_list1 = []
        if system_prompt:
            chat_message_list1.append(
                ChatMessage(
                    role=MessageRole.SYSTEM,
                    content=system_prompt,
                )
            )
        
        logger.debug(f"User Prompt:\n{user_prompt}")
        chat_message_user1 = ChatMessage(
            role=MessageRole.USER,
            content=user_prompt,
        )
        chat_message_list1.append(chat_message_user1)

        sllm = llm.as_structured_llm(ExpertDetails)

        logger.debug("Starting LLM chat interaction 1.")
        start_time = time.perf_counter()
        chat_response1 = sllm.chat(chat_message_list1)
        end_time = time.perf_counter()
        duration1 = int(ceil(end_time - start_time))
        response_byte_count1 = len(chat_response1.message.content.encode('utf-8'))
        logger.info(f"LLM chat interaction completed in {duration1} seconds. Response byte count: {response_byte_count1}")

        # Do a follow up question, for obtaining more experts.
        chat_message_assistant2 = ChatMessage(
            role=MessageRole.ASSISTANT,
            content=chat_response1.message.content,
        )
        chat_message_user2 = ChatMessage(
            role=MessageRole.USER,
            content="4 more please",
        )
        chat_message_list2 = chat_message_list1.copy()
        chat_message_list2.append(chat_message_assistant2)
        chat_message_list2.append(chat_message_user2)

        logger.debug("Starting LLM chat interaction 2.")
        start_time = time.perf_counter()
        chat_response2 = sllm.chat(chat_message_list2)
        end_time = time.perf_counter()
        duration2 = int(ceil(end_time - start_time))
        response_byte_count2 = len(chat_response2.message.content.encode('utf-8'))
        logger.info(f"LLM chat interaction completed in {duration2} seconds. Response byte count: {response_byte_count2}")

        metadata = dict(llm.metadata)
        metadata["llm_classname"] = llm.class_name()
        metadata["duration1"] = duration1
        metadata["duration2"] = duration2
        metadata["response_byte_count1"] = response_byte_count1
        metadata["response_byte_count2"] = response_byte_count2

        json_response1 = json.loads(chat_response1.message.content)
        json_response2 = json.loads(chat_response2.message.content)

        json_response_merged = {}
        experts1 = json_response1.get('experts', [])
        experts2 = json_response2.get('experts', [])
        json_response_merged['experts'] = experts1 + experts2

        # Cleanup the json response from the LLM model, extract the experts.
        expert_list = []
        for expert in json_response_merged['experts']:
            uuid = str(uuid4())
            expert_dict = {
                "id": uuid,
                "title": expert['expert_title'],
                "knowledge": expert['expert_knowledge'],
                "why": expert['expert_why'],
                "what": expert['expert_what'],
                "skills": expert['expert_relevant_skills'],
                "search_query": expert['expert_search_query'],
            }
            expert_list.append(expert_dict)

        logger.info(f"Found {len(expert_list)} experts.")

        result = ExpertFinder(
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            response=json_response_merged,
            metadata=metadata,
            expert_list=expert_list,
        )
        logger.debug("CreateProjectPlan instance created successfully.")
        return result    

    def to_dict(self, include_metadata=True, include_system_prompt=True, include_user_prompt=True) -> dict:
        d = self.response.copy()
        if include_metadata:
            d['metadata'] = self.metadata
        if include_system_prompt:
            d['system_prompt'] = self.system_prompt
        if include_user_prompt:
            d['user_prompt'] = self.user_prompt
        return d

    def save_raw(self, file_path: str) -> None:
        with open(file_path, 'w') as f:
            f.write(json.dumps(self.to_dict(), indent=2))

    def save_cleanedup(self, file_path: str) -> None:
        with open(file_path, 'w') as f:
            f.write(json.dumps(self.expert_list, indent=2))
        

if __name__ == "__main__":
    import logging
    from src.llm_factory import get_llm
    import os

    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler()
        ]
    )

    path = os.path.join(os.path.dirname(__file__), 'test_data', 'solarfarm_swot_analysis.md')
    with open(path, 'r', encoding='utf-8') as f:
        swot_markdown = f.read()

    query = f"SWOT Analysis:\n{swot_markdown}"

    llm = get_llm("ollama-llama3.1")
    # llm = get_llm("deepseek-chat", max_tokens=8192)

    print(f"Query: {query}")
    result = ExpertFinder.execute(llm, query)

    print("\n\nResponse:")
    print(json.dumps(result.to_dict(include_system_prompt=False, include_user_prompt=False), indent=2))

    print("\n\nExperts:")
    print(json.dumps(result.expert_list, indent=2))