Spaces:
Sleeping
Sleeping
File size: 18,555 Bytes
5d6090e bbc1ea6 345e5f6 5d6090e bbc1ea6 345e5f6 5d6090e 345e5f6 5d6090e 345e5f6 5d6090e 345e5f6 5d6090e 345e5f6 5d6090e 345e5f6 5d6090e 345e5f6 bbc1ea6 5d6090e 345e5f6 641b8ed 345e5f6 c7d73b1 67c7e92 c7d73b1 345e5f6 1896a77 c7d73b1 345e5f6 45e3fef 1896a77 345e5f6 33339ca fcf926a 1bbbb4e 345e5f6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e bbc1ea6 5d6090e 1800997 5d6090e 7ff9e80 5d6090e bbc1ea6 5d6090e bbc1ea6 6e5e0b9 bbc1ea6 d8012f6 bbc1ea6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
import os
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor
import torch
import gradio as gr
import re
import openai
import base64
from io import BytesIO
# model_name = "arjunanand13/Florence-enphase2"
# model_name = "Stardragon2099/florence-adlp-40e"
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype, trust_remote_code=True).to(device)
# processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
# Retrieve the API key from Hugging Face Secrets
openai.api_key = os.environ.get("Chatgpt_api")
# torch.cuda.empty_cache()
DEFAULT_PROMPT = ("You are a Leg Lift Classifier. There is an image of a throughput component "
"and we need to identify if the leg is inserted in the hole or not. Return 'True' "
"if any leg is not completely seated in the hole; return 'False' if the leg is inserted "
"in the hole. Return only the required JSON in this format: {Leg_lift: , Reason: }.")
# def predict(img, prompt):
# inputs = processor(text=prompt, images=img, return_tensors="pt").to(device, torch_dtype)
# generated_ids = model.generate(
# input_ids=inputs["input_ids"],
# pixel_values=inputs["pixel_values"],
# max_new_tokens=1024,
# do_sample=False,
# num_beams=3
# )
# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# parsed_answer = processor.post_process_generation(generated_text, task= prompt, image_size=(img.width, img.height))
# return parsed_answer
# if not isinstance(image, Image.Image):
# raise ValueError(f"Expected image to be PIL.Image, but got {type(image)}")
# encoding = processor(images=image, text=question, return_tensors="pt").to(device)
# with torch.no_grad():
# outputs = model.generate(**encoding, max_length=725)
# answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
# return answer
def predict(img, prompt):
# Encode the Pillow image in base64
image_bytes = BytesIO()
img.save(image_bytes, format='PNG') # Save as PNG (can be changed to JPEG)
image_bytes.seek(0) # Rewind the BytesIO buffer
# Combine prompt and image
# system_prompt = (
# "You are an AI that can process text and images. The user has uploaded an image encoded in base64 "
# "format along with a text prompt. You need to consider both the image and the text while responding."
# )
# Upload the image to OpenAI as a file
upload_response = openai.File.create(
file=image_bytes,
purpose='vision'
)
# Get the file ID from the upload response
file_id = upload_response['id']
messages = [
{"role": "system", "content": prompt + "respond with JSON only"},
{"role": "user", "content": f"File ID: {file_id}"}
]
# Make API call
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=messages
)
return response['choices'][0]['message']['content']
except Exception as e:
return f"Error: {e}"
def get_prompt(method, component_label=None):
global prompt
if method == "leg_lift":
if component_label in ["Y_capacitor","varistor_red","varistor_grey"]:
prompt = """You are a Leg Lift length Analyzer. Analyze the provided cropped image of a throughput component and determine the length of the metal tip from the curve in it .
Focus on the metal leg tip and the hole .
The length will not exceed 2.5 milli meter .
Return a JSON object with the following keys:
- 'Leg_lift': 'True' if the curve of the metal leg is away from the hole or if only the hole or leg is visible.and 'False' if the metal leg is inserted in the hole or is connected to the hole surface.
- 'Measurements': A Measumrement value ranging from 1 milli meter to 2.5 milli meter .
Format the response exactly as specified:
{ "Leg_lift":True/False , "Measurements": "Measured length"}"""
else:
prompt = "Not a valid combination"
elif method == "polarity":
if component_label in ["choke_full_rd_gd"]:
prompt = """You are a Choke Polarity Classifier. Analyze the provided cropped image of a choke component.
Focus on the color pattern and orientation. For choke components, the correct polarity is indicated by the gold color being on the left and the red color being on the right.
Return 'True' if the gold color is on the left and the red color is on the right; return 'False' if the colors are reversed.
Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }"""
elif component_label in ["choke_full_gn_ye"]:
prompt = """You are a Choke Polarity Classifier. Analyze the provided cropped image of a choke component.
Focus on the color pattern and orientation. For choke components, the correct polarity is indicated by the green color being on the left and the yellow color being on the right.
Return 'True' if the green color is on the left and the yellow color is on the right; return 'False' if the colors are reversed.
Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }"""
elif component_label in ["DC_cap_blue", "DC_cap_black"]:
prompt = """You are a Polarity Classifier. Analyze the provided cropped image of capacitor. Consider the black capacitor that takes largest portion of the image, ignore others.
Focus on the long white strip on the largest capacitor. Return 'True' if there is a white strip on the left side of the capacitor(correct polarity); return 'False' if the white strip
is on the right side of the capacitor(Incorrect polarity). Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }"""
else:
prompt = "Not a valid combination"
elif method == "angle":
if component_label in ["Y_capacitor","varistor_red","varistor_grey"]:
prompt = """You are an Angle Shift Classifier. Analyze the provided image focusing on the Y capacitor or varistor.
the Y capacitor and varistor can be blue, white or red in color. it is allmost circular in shape.
consider only the largest Y capacitor or varistor.
Determine if the largest component is tilted or on the verge of falling. Use the green boundary as a reference for checking whether the largest capacitor is tilted.
If the largest Y capacitor or varistor is tilted and is outside the green boundary, return 'True'; otherwise, return 'False'.
Format the response exactly as specified:
{ "Angle": True/False, "Measurements": None}"""
elif component_label in ["DC_cap_blue", "DC_cap_black"]:
prompt = """You are a Capacitor shift classifier. Analyze the provided image of the capacitor and check whether it's touching the red line.
Consider the capacitor that takes largest portion of the image and is located inside a green rectangle. Do not approximate, take the current red line only.
Use the red line as a reference. if the base of the largest capacitor is under the red line, return 'False'; otherwise, return 'True'.
Format the response exactly as specified:
{ "Angle": True/False, "Measurements": None }"""
else:
prompt = "Not a valid combination"
elif method == "detection":
if component_label in ["Y_capacitor"]:
prompt = """You are a Y Capacitor Object Detector. Analyze the provided image and focus on identifying the Y capacitor.
The Y capacitor is sort of a circular object in blue color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image portion sometimes.
Determine if a blue object resembling a Y capacitor is present in the center of the image.
Return 'False' if you arent confident about Y capacitor object being present; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["DC_cap_blue"]:
prompt = """You are a Blue DC Capacitor Object Detector . Analyze the provided cropped image and focus on the object.
DC capacitor is a blue color object which is in cylindrical shape .
Determine if the object is present in the image.
Return 'False' if the object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["DC_cap_black"]:
prompt = """You are a Black DC Capacitor Object Detector . Analyze the provided cropped image and focus on the object.
DC capacitor is a black color object which is in cylindrical shape .
Determine if the object is present in the image.
Return 'False' if the object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["choke_full_rd_gd"]:
prompt = """You are a Red-Gold Choke Object Detector. Analyze the provided cropped image and focus on the choke component.
The choke component should have distinct red and gold colors.
Determine if the choke component with red on one side and gold on the other side is present in the image.
Return 'False' if the choke component is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["choke_full_gn_ye"]:
prompt = """You are a Green-Yellow Choke Object Detector. Analyze the provided cropped image and focus on the choke component.
The choke component should have distinct green and yellow colors.
Determine if the choke component with green on one side and yellow on the other side is present in the image.
Return 'False' if the choke component is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["Inductor"]:
prompt = """You are an Inductor Object Detector. Analyze the provided cropped image and focus on the object.
The inductor is typically a component with a dark (black/ brown/grey) body.
Determine if the inductor with the specified color is present in the image.
Return 'False' if the inductor object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["X2_capacitor"]:
prompt = """You are an X2 Capacitor Object Detector. Analyze the provided cropped image and focus on the object.
An X2 capacitor is typically a blue box-shaped cuboid.
Determine if the X2 capacitor with the specified blue color and box-like shape is present in the image.
Return 'False' if the X2 capacitor object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["Fuse_red"]:
prompt = """You are a Red Fuse Object Detector. Analyze the provided cropped image and focus on the object.
A red fuse is typically a red cuboid box-shaped object.
Determine if the red fuse with the specified color and box-like shape is present in the image.
Return 'False' if the red fuse object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["varistor_grey"]:
prompt = """You are a Varistor Object Detector. Analyze the provided image and focus on identifying the white/grey varistor.
A varistor is sort of circular-shaped in side view with a white or greyish color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image sometimes.
Determine if a circular object resembling a varistor is present anywhere in the image.
Return 'False' if the white varistor object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["varistor_red"]:
prompt = """You are a Red Varistor Object Detector. Analyze the provided image and focus on identifying the red varistor.
A varistor is typically almost circular-shaped component with a red color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image sometimes.
Determine if a red circular object resembling a varistor is present in the image.
Return 'False' if the red varistor object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["transformer"]:
prompt = """You are a Transformer Object Detector. Analyze the provided cropped image and focus on the object.
This particular transformer is characterized by its rectangular shape and metallic contacts.
Determine if this transformer is present in the image.
Return 'False' if the transformer object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["Transistor"]:
prompt = """You are a Transistor Object Detector. Analyze the provided cropped image and focus on the object.
The transistor has a blue cylindrical shape with metal contacts on the ends.
Determine if this blue cylindrical transistor is present in the image.
Return 'False' if the transistor object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
elif component_label in ["Fuse_black"]:
prompt = """You are a Black Fuse Object Detector. Analyze the provided cropped image and focus on the object.
A black fuse is typically a black cuboid box-shaped object.
Determine if the black fuse with the specified color and box-like shape is present in the image.
Return 'False' if the black fuse object is missing; return 'True' if it is present.
Format the response exactly as specified:
{ "Detection": True/False, "Measurements": None} """
else:
prompt = "Not a valid combination"
return prompt
def gradio_interface(image, prompt):
question = prompt
if image.mode != "RGB":
image = image.convert("RGB")
image = image.resize((500,500))
answer = predict(image, question)
# answer = post_processing(answer, question)
return answer
def post_processing(decoded_output, question):
if question in decoded_output.keys():
decoded_output = decoded_output[question]
return decoded_output
Method =["detection", "angle", "leg_lift", "polarity"]
Component_list = ["choke_full_rd_gd", "choke_full_gn_ye", "Y_capacitor","varistor_red","varistor_grey", "DC_cap_blue", "DC_cap_black", "Inductor", "X2_capacitor", "Fuse_red", "transformer", "Transistor", "Fuse_black"]
with gr.Blocks() as demo:
gr.Markdown("Florence-enphase Leg Lift, Angle shift, Polarity and detection classifier")
image = gr.Image(type="pil", label="Upload Image", width=500, height=500) # Ensures image is passed as a PIL object
method_choice = gr.Dropdown(choices=Method, label="Method")
componet_choice = gr.Dropdown(choices = Component_list, label="Component label")
prompt = gr.Textbox(value=get_prompt(Method[0], Component_list[0]), label="Prompt")
submit_btn = gr.Button("Submit")
# @gr.render(inputs=[method_choice, componet_choice])
def generate_prompt(method, component_label):
prompts = get_prompt(method, component_label)
return prompts
method_choice.change(fn=generate_prompt, inputs=[method_choice, componet_choice], outputs=[prompt])
componet_choice.change(fn=generate_prompt, inputs=[method_choice, componet_choice], outputs=[prompt])
submit_btn.click(fn=gradio_interface, inputs=[image, prompt], outputs=gr.Textbox(label="Answer"))
demo.launch(debug=True)
# iface = gr.Interface(
# fn=gradio_interface,
# inputs=[
# gr.Image(type="pil", label="Upload Image"), # Ensures image is passed as a PIL object
# gr.Dropdown(choices=Method, label="Method"),
# gr.Dropdown(choices = Component_list, label="Component label")
# # gr.Textbox(label="Enter your question or edit the default prompt", lines=6, value=DEFAULT_PROMPT) # Default prompt pre-filled and editable
# ],
# outputs=gr.Textbox(label="Answer"),
# title="Florence-enphase Leg Lift Classifier",
# description=("Upload an image and ask a question about the leg lift. The model will classify whether "
# "the leg is inserted in the hole or not based on the image. You can edit the default prompt if needed.")
# )
# iface.launch(debug=True)
# img_path = '/content/choke_gn_1054.jpg'
# img = Image.open(img_path).convert('RGB')
# gradio_interface(img, "polarity", "choke_full_rd_gd")
|