File size: 3,233 Bytes
3afb4b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
049ef04
 
 
 
 
 
 
 
 
 
 
 
3afb4b6
 
 
 
 
 
049ef04
181a05c
3afb4b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181a05c
 
 
 
 
 
 
3afb4b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181a05c
 
3afb4b6
 
181a05c
 
 
3afb4b6
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import copy
from PCAgent.api import resize_encode_image


def init_subtask_chat():
    operation_history = []
    system_prompt = "You are a helpful AI assistant."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def init_action_chat():
    operation_history = []
    system_prompt = "You are a helpful AI PC operating assistant. You need to help me operate the PC to complete the user\'s instruction."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def init_reflect_chat():
    operation_history = []
    system_prompt = "You are a helpful AI PC operating assistant."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def init_memory_chat():
    operation_history = []
    system_prompt = "You are a helpful AI PC operating assistant."
    operation_history.append(["system", [{"type": "text", "text": system_prompt}]])
    return operation_history


def add_response_old(role, prompt, chat_history, image=None):
    new_chat_history = copy.deepcopy(chat_history)
    if image:
        base64_image = resize_encode_image(image)
        # content = [
        #     {
        #         "type": "text", 
        #         "text": prompt
        #     },
        #     {
        #         "type": "image_url", 
        #         "image_url": {
        #             "url": f"data:image/jpeg;base64,{base64_image}"
        #         }
        #     },
        # ]
        content = [
            {
                "type": "text", 
                "text": prompt
            },
            {
                "type": "image", 
                "image": image
            },
        ]
    else:
        content = [
            {
            "type": "text", 
            "text": prompt
            },
        ]
    new_chat_history.append([role, content])
    return new_chat_history


def add_response(role, prompt, chat_history, image=[], use_qwen=False):
    new_chat_history = copy.deepcopy(chat_history)
    content = [
        {
        "type": "text", 
        "text": prompt
        },
    ]
    for i in range(len(image)):
        base64_image = resize_encode_image(image[i])
        content.append(
            {
                "type": "image_", 
                "image": image[i]
            }
        )
    new_chat_history.append([role, content])
    return new_chat_history


def add_response_two_image(role, prompt, chat_history, image):
    new_chat_history = copy.deepcopy(chat_history)

    base64_image1 = resize_encode_image(image[0])
    base64_image2 = resize_encode_image(image[1])
    content = [
        {
            "type": "text", 
            "text": prompt
        },
        {
            "type": "image", 
            "image": image[0]
        },
        {
            "type": "image", 
            "image": image[1]
        }
    ]

    new_chat_history.append([role, content])
    return new_chat_history


def print_status(chat_history):
    print("*"*100)
    for chat in chat_history:
        print("role:", chat[0])
        print(chat[1][0]["text"] + "<image>"*(len(chat[1])-1) + "\n")
    print("*"*100)