Spaces:
Sleeping
Sleeping
File size: 5,933 Bytes
3fdcc70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import codecs
import io
import os
import pickle
from pathlib import Path
from PIL import Image
import requests
import os
import sys
sys.path.append(os.getcwd())
from cllm.services.utils import get_bytes_value
from cllm.services.nlp.api import openai_chat_model
__ALL__ = [
"object_detection",
"image_classification",
"ocr",
"image_to_text",
"segment_objects",
]
HOST = os.environ.get("CLLM_SERVICES_HOST", "localhost")
PORT = os.environ.get("CLLM_SERVICES_PORT", 10056)
def setup(host="localhost", port=10049):
global HOST, PORT
HOST = host
PORT = port
def object_detection(image, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/object_detection"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
return response.json()
def image_classification(image, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/image_classification"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
return response.json()
def image_to_text(image, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/image_to_text"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
return response.json()
def ocr(image, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/ocr"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
return response.json()
def segment_objects(image, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/segment_objects"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
pickled = response.json()["data"]
output = pickle.loads(codecs.decode(pickled.encode(), "base64"))
for o in output:
stream = io.BytesIO()
o["mask"].save(stream, format="png")
stream.seek(0)
o["mask"] = stream.getvalue()
return output
def visual_grounding(image, query, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = rf"http://{host}:{port}/visual_grounding"
human_msg = f"""Your task is to extract the prompt from input. Here is examples:
Input:
find the regin of interest in the da9619_image.png: \"An elephant in right corner\"
Answer:
An elephant in right corner
Input:
locate \"A maintenance vehicle on a railway\" in the image
Answer:
A maintenance vehicle on a railway
Input:
use visual grounding method to detect the regin of interest in the 1ba6e2_image.png: The motorcycle with the rainbow flag"
Answer:
The motorcycle with the rainbow flag
Input:
for given image, find A little baby girl with brunette hair, a pink and white dress, and is being fed frosting from her mom."
Answer:
A little baby girl with brunette hair, a pink and white dress, and is being fed frosting from her mom
Input:
find the policeman on the motorcycle in the 851522_image.png"
Answer:
the policeman on the motorcycle
Input:
The legs of a zebra shown under the neck of another zebra.
Answer:
The legs of a zebra shown under the neck of another zebra.
Input:
{query}
Answer:
"""
extracted_prompt = openai_chat_model(human_msg)
files = {"image": get_bytes_value(image)}
data = {"query": extracted_prompt}
# image = Image.open(io.BytesIO(image)).convert("RGB")
response = requests.post(url, data=data, files=files)
return response.json()
def image_captioning(image, endpoint="llava", **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/{endpoint}"
data = None
if endpoint == "llava":
data = {"text": "Please describe the image in details."}
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files, data=data)
return response.content.decode("utf-8")
def segment_all(image: str | Path, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/segment_all"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
return response.content
def set_image(image: str | Path, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/set_image"
files = {"image": (image, get_bytes_value(image))}
response = requests.post(url, files=files)
return response.content.decode()
def segment_by_mask(mask: str | Path, image_id: str, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/segment_by_mask"
data = {"image_id": image_id}
files = {"mask": (mask, get_bytes_value(mask))}
response = requests.post(url, files=files, data=data)
return response.content
def segment_by_points(points: list | tuple | str, image_id: str, **kwargs):
host = kwargs.get("host", HOST)
port = kwargs.get("port", PORT)
url = f"http://{host}:{port}/segment_by_points"
data = {"points": points, "image_id": image_id}
response = requests.post(url, data=data)
return response.content
def seg_by_mask(image, prompt_mask, **kwargs):
image_id = set_image(image)
mask = segment_by_mask(mask=prompt_mask, image_id=image_id)
return mask
def seg_by_points(image, prompt_points, **kwargs):
image_id = set_image(image)
mask = segment_by_points(points=prompt_points, image_id=image_id)
return mask
|