File size: 5,933 Bytes
3fdcc70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import codecs
import io
import os
import pickle
from pathlib import Path
from PIL import Image
import requests
import os
import sys

sys.path.append(os.getcwd())
from cllm.services.utils import get_bytes_value
from cllm.services.nlp.api import openai_chat_model

__ALL__ = [
    "object_detection",
    "image_classification",
    "ocr",
    "image_to_text",
    "segment_objects",
]


HOST = os.environ.get("CLLM_SERVICES_HOST", "localhost")
PORT = os.environ.get("CLLM_SERVICES_PORT", 10056)


def setup(host="localhost", port=10049):
    global HOST, PORT
    HOST = host
    PORT = port


def object_detection(image, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/object_detection"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    return response.json()


def image_classification(image, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/image_classification"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    return response.json()


def image_to_text(image, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/image_to_text"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    return response.json()


def ocr(image, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/ocr"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    return response.json()


def segment_objects(image, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/segment_objects"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    pickled = response.json()["data"]
    output = pickle.loads(codecs.decode(pickled.encode(), "base64"))
    for o in output:
        stream = io.BytesIO()
        o["mask"].save(stream, format="png")
        stream.seek(0)
        o["mask"] = stream.getvalue()

    return output


def visual_grounding(image, query, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = rf"http://{host}:{port}/visual_grounding"
    human_msg = f"""Your task is to extract the prompt from input. Here is examples:

    Input:
    find the regin of interest in the da9619_image.png: \"An elephant in right corner\"

    Answer:
    An elephant in right corner

    Input:
    locate \"A maintenance vehicle on a railway\" in the image

    Answer:
    A maintenance vehicle on a railway

    Input:
    use visual grounding method to detect the regin of interest in the 1ba6e2_image.png: The motorcycle with the rainbow flag"

    Answer:
    The motorcycle with the rainbow flag

    Input:
    for given image, find A little baby girl with brunette hair, a pink and white dress, and is being fed frosting from her mom."

    Answer:
    A little baby girl with brunette hair, a pink and white dress, and is being fed frosting from her mom

    Input:
    find the policeman on the motorcycle in the 851522_image.png"

    Answer:
    the policeman on the motorcycle

    Input:
    The legs of a zebra shown under the neck of another zebra.

    Answer:
    The legs of a zebra shown under the neck of another zebra.

    Input:
    {query}

    Answer:
    """

    extracted_prompt = openai_chat_model(human_msg)
    files = {"image": get_bytes_value(image)}
    data = {"query": extracted_prompt}
    # image = Image.open(io.BytesIO(image)).convert("RGB")
    response = requests.post(url, data=data, files=files)

    return response.json()


def image_captioning(image, endpoint="llava", **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/{endpoint}"
    data = None
    if endpoint == "llava":
        data = {"text": "Please describe the image in details."}
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files, data=data)
    return response.content.decode("utf-8")


def segment_all(image: str | Path, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/segment_all"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    return response.content


def set_image(image: str | Path, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/set_image"
    files = {"image": (image, get_bytes_value(image))}
    response = requests.post(url, files=files)
    return response.content.decode()


def segment_by_mask(mask: str | Path, image_id: str, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/segment_by_mask"
    data = {"image_id": image_id}
    files = {"mask": (mask, get_bytes_value(mask))}
    response = requests.post(url, files=files, data=data)
    return response.content


def segment_by_points(points: list | tuple | str, image_id: str, **kwargs):
    host = kwargs.get("host", HOST)
    port = kwargs.get("port", PORT)
    url = f"http://{host}:{port}/segment_by_points"
    data = {"points": points, "image_id": image_id}
    response = requests.post(url, data=data)
    return response.content


def seg_by_mask(image, prompt_mask, **kwargs):
    image_id = set_image(image)
    mask = segment_by_mask(mask=prompt_mask, image_id=image_id)
    return mask


def seg_by_points(image, prompt_points, **kwargs):
    image_id = set_image(image)
    mask = segment_by_points(points=prompt_points, image_id=image_id)
    return mask