File size: 18,555 Bytes
5d6090e
 
 
 
 
 
bbc1ea6
345e5f6
 
 
5d6090e
bbc1ea6
345e5f6
 
 
5d6090e
345e5f6
 
 
 
5d6090e
345e5f6
 
 
 
5d6090e
 
 
 
 
 
345e5f6
 
5d6090e
345e5f6
 
 
 
 
 
 
 
5d6090e
345e5f6
5d6090e
345e5f6
bbc1ea6
 
 
 
 
 
 
 
 
 
 
 
 
5d6090e
345e5f6
 
641b8ed
 
 
345e5f6
 
 
 
 
 
c7d73b1
 
 
67c7e92
c7d73b1
345e5f6
1896a77
c7d73b1
345e5f6
45e3fef
1896a77
345e5f6
 
 
 
33339ca
fcf926a
1bbbb4e
345e5f6
 
 
 
 
5d6090e
 
 
bbc1ea6
 
5d6090e
bbc1ea6
5d6090e
bbc1ea6
 
5d6090e
bbc1ea6
 
 
 
5d6090e
 
 
 
 
 
 
bbc1ea6
 
5d6090e
 
 
 
 
bbc1ea6
 
 
 
5d6090e
bbc1ea6
 
 
5d6090e
 
 
 
bbc1ea6
5d6090e
 
bbc1ea6
 
 
 
5d6090e
bbc1ea6
 
 
 
 
 
5d6090e
bbc1ea6
5d6090e
 
 
bbc1ea6
5d6090e
bbc1ea6
 
 
 
5d6090e
 
bbc1ea6
 
 
 
 
 
5d6090e
 
bbc1ea6
 
 
 
 
 
5d6090e
 
bbc1ea6
 
 
 
 
 
5d6090e
 
bbc1ea6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d6090e
bbc1ea6
 
 
 
5d6090e
bbc1ea6
 
5d6090e
bbc1ea6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d6090e
 
bbc1ea6
 
 
 
 
 
 
 
5d6090e
 
bbc1ea6
 
5d6090e
 
1800997
5d6090e
 
7ff9e80
5d6090e
 
bbc1ea6
 
 
 
 
5d6090e
 
 
bbc1ea6
 
6e5e0b9
bbc1ea6
 
 
 
 
 
 
 
 
d8012f6
bbc1ea6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import os
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor
import torch
import gradio as gr
import re
import openai
import base64
from io import BytesIO

# model_name = "arjunanand13/Florence-enphase2"
# model_name = "Stardragon2099/florence-adlp-40e"
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype, trust_remote_code=True).to(device)
# processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)

# Retrieve the API key from Hugging Face Secrets
openai.api_key = os.environ.get("Chatgpt_api")

# torch.cuda.empty_cache()

DEFAULT_PROMPT = ("You are a Leg Lift Classifier. There is an image of a throughput component "
                  "and we need to identify if the leg is inserted in the hole or not. Return 'True' "
                  "if any leg is not completely seated in the hole; return 'False' if the leg is inserted "
                  "in the hole. Return only the required JSON in this format: {Leg_lift: , Reason: }.")

# def predict(img, prompt):
#     inputs = processor(text=prompt, images=img, return_tensors="pt").to(device, torch_dtype)

#     generated_ids = model.generate(
#         input_ids=inputs["input_ids"],
#         pixel_values=inputs["pixel_values"],
#         max_new_tokens=1024,
#         do_sample=False,
#         num_beams=3
#     )
#     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

#     parsed_answer = processor.post_process_generation(generated_text, task= prompt, image_size=(img.width, img.height))

#     return parsed_answer


    # if not isinstance(image, Image.Image):
    #     raise ValueError(f"Expected image to be PIL.Image, but got {type(image)}")


    # encoding = processor(images=image, text=question, return_tensors="pt").to(device)

    # with torch.no_grad():
    #     outputs = model.generate(**encoding, max_length=725)

    # answer = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    # return answer

def predict(img, prompt):
    # Encode the Pillow image in base64
    image_bytes = BytesIO()
    img.save(image_bytes, format='PNG')  # Save as PNG (can be changed to JPEG)
    image_bytes.seek(0)  # Rewind the BytesIO buffer

    # Combine prompt and image
    # system_prompt = (
    #     "You are an AI that can process text and images. The user has uploaded an image encoded in base64 "
    #     "format along with a text prompt. You need to consider both the image and the text while responding."
    # )
    # Upload the image to OpenAI as a file
    upload_response = openai.File.create(
            file=image_bytes,
            purpose='vision'  
        )

    # Get the file ID from the upload response
    file_id = upload_response['id']
    messages = [
        {"role": "system", "content": prompt + "respond with JSON only"},
        {"role": "user", "content": f"File ID: {file_id}"}
    ]

    # Make API call
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=messages
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        return f"Error: {e}"

def get_prompt(method, component_label=None):
    global prompt
    if method == "leg_lift":
      if component_label in ["Y_capacitor","varistor_red","varistor_grey"]:
                prompt = """You are a Leg Lift length Analyzer. Analyze the provided cropped image of a throughput component and determine the length of the metal tip from the curve in it .
                Focus on the metal leg tip and the hole .
                The length will not exceed 2.5 milli meter .
                Return a JSON object with the following keys:
                - 'Leg_lift': 'True' if the curve of the metal leg is away from the hole or if only the hole or leg is visible.and 'False' if the metal leg is inserted in the hole or is connected to the hole surface.
                - 'Measurements': A Measumrement value ranging from 1 milli meter to 2.5 milli meter .

                Format the response exactly as specified:
                { "Leg_lift":True/False , "Measurements": "Measured length"}"""
      else:
        prompt = "Not a valid combination"


    elif method == "polarity":
        if component_label in ["choke_full_rd_gd"]:
            prompt = """You are a Choke Polarity Classifier. Analyze the provided cropped image of a choke component.
            Focus on the color pattern and orientation. For choke components, the correct polarity is indicated by the gold color being on the left and the red color being on the right.
            Return 'True' if the gold color is on the left and the red color is on the right; return 'False' if the colors are reversed.
            Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }"""


        elif component_label in ["choke_full_gn_ye"]:
            prompt = """You are a Choke Polarity Classifier. Analyze the provided cropped image of a choke component.
            Focus on the color pattern and orientation. For choke components, the correct polarity is indicated by the green color being on the left and the yellow color being on the right.
            Return 'True' if the green color is on the left and the yellow color is on the right; return 'False' if the colors are reversed.
            Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }"""

        elif component_label in ["DC_cap_blue", "DC_cap_black"]:
            prompt = """You are a Polarity Classifier. Analyze the provided cropped image of capacitor. Consider the black capacitor that takes largest portion of the image, ignore others.
            Focus on the long white strip on the largest capacitor. Return 'True' if there is a white strip on the left side of the capacitor(correct polarity); return 'False' if the white strip
            is on the right side of the capacitor(Incorrect polarity). Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }"""
        else:
            prompt = "Not a valid combination"


    elif method == "angle":
        if component_label in ["Y_capacitor","varistor_red","varistor_grey"]:
            prompt = """You are an Angle Shift Classifier. Analyze the provided image focusing on the Y capacitor or varistor.
            the Y capacitor and varistor can be blue, white or red in color. it is allmost circular in shape.
            consider only the largest Y capacitor or varistor.
            Determine if the largest component is tilted or on the verge of falling. Use the green boundary as a reference for checking whether the largest capacitor is tilted.
            If the largest Y capacitor or varistor is tilted and is outside the green boundary, return 'True'; otherwise, return 'False'.
            Format the response exactly as specified:
            { "Angle": True/False, "Measurements": None}"""

        elif component_label in ["DC_cap_blue", "DC_cap_black"]:
            prompt = """You are a Capacitor shift classifier. Analyze the provided image of the capacitor and check whether it's touching the red line.
            Consider the capacitor that takes largest portion of the image and is located inside a green rectangle. Do not approximate, take the current red line only.
            Use the red line as a reference. if the base of the largest capacitor is under the red line, return 'False'; otherwise, return 'True'.
            Format the response exactly as specified:
            { "Angle": True/False, "Measurements": None }"""
        else:
            prompt = "Not a valid combination"

    elif method == "detection":
        if component_label in ["Y_capacitor"]:
            prompt = """You are a Y Capacitor Object Detector. Analyze the provided image and focus on identifying the Y capacitor.
            The Y capacitor is sort of a circular object in blue color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image portion sometimes.
            Determine if a blue object resembling a Y capacitor is present in the center of the image.
            Return 'False' if you arent confident about Y capacitor object being present; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["DC_cap_blue"]:
            prompt = """You are a  Blue DC Capacitor Object Detector . Analyze the provided cropped image and focus on the object.
            DC capacitor is a blue color object which is in cylindrical shape .
            Determine if the object is present in the image.
            Return 'False' if the object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["DC_cap_black"]:
            prompt = """You are a  Black DC Capacitor Object Detector . Analyze the provided cropped image and focus on the object.
            DC capacitor is a black color object which is in cylindrical shape .
            Determine if the object is present in the image.
            Return 'False' if the object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["choke_full_rd_gd"]:
            prompt = """You are a Red-Gold Choke Object Detector. Analyze the provided cropped image and focus on the choke component.
            The choke component should have distinct red and gold colors.
            Determine if the choke component with red on one side and gold on the other side is present in the image.
            Return 'False' if the choke component is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["choke_full_gn_ye"]:
            prompt = """You are a Green-Yellow Choke Object Detector. Analyze the provided cropped image and focus on the choke component.
            The choke component should have distinct green and yellow colors.
            Determine if the choke component with green on one side and yellow on the other side is present in the image.
            Return 'False' if the choke component is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """
        elif component_label in ["Inductor"]:
            prompt = """You are an Inductor Object Detector. Analyze the provided cropped image and focus on the object.
            The inductor is typically a component with a dark (black/ brown/grey) body.
            Determine if the inductor with the specified color is present in the image.
            Return 'False' if the inductor object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """
        elif component_label in ["X2_capacitor"]:
            prompt = """You are an X2 Capacitor Object Detector. Analyze the provided cropped image and focus on the object.
            An X2 capacitor is typically a blue box-shaped cuboid.
            Determine if the X2 capacitor with the specified blue color and box-like shape is present in the image.
            Return 'False' if the X2 capacitor object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """
        elif component_label in ["Fuse_red"]:
            prompt = """You are a Red Fuse Object Detector. Analyze the provided cropped image and focus on the object.
            A red fuse is typically a red cuboid box-shaped object.
            Determine if the red fuse with the specified color and box-like shape is present in the image.
            Return 'False' if the red fuse object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["varistor_grey"]:
            prompt = """You are a  Varistor Object Detector. Analyze the provided image and focus on identifying the white/grey varistor.
            A varistor is sort of circular-shaped in side view with a white or greyish color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image sometimes.
            Determine if a circular object resembling a varistor is present anywhere in the image.
            Return 'False' if the white varistor object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["varistor_red"]:
            prompt = """You are a Red Varistor Object Detector. Analyze the provided image and focus on identifying the red varistor.
            A varistor is typically almost circular-shaped component with a red color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image sometimes.
            Determine if a red circular object resembling a varistor is present in the image.
            Return 'False' if the red varistor object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["transformer"]:
            prompt = """You are a Transformer Object Detector. Analyze the provided cropped image and focus on the object.
            This particular transformer is characterized by its rectangular shape and metallic contacts.
            Determine if this transformer is present in the image.
            Return 'False' if the transformer object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["Transistor"]:
            prompt = """You are a Transistor Object Detector. Analyze the provided cropped image and focus on the object.
            The transistor has a blue cylindrical shape with metal contacts on the ends.
            Determine if this blue cylindrical transistor is present in the image.
            Return 'False' if the transistor object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """

        elif component_label in ["Fuse_black"]:
            prompt = """You are a Black Fuse Object Detector. Analyze the provided cropped image and focus on the object.
            A black fuse is typically a black cuboid box-shaped object.
            Determine if the black fuse with the specified color and box-like shape is present in the image.
            Return 'False' if the black fuse object is missing; return 'True' if it is present.
            Format the response exactly as specified:
            { "Detection": True/False, "Measurements": None} """
        else:
            prompt = "Not a valid combination"
    return prompt

def gradio_interface(image, prompt):
    question = prompt
    if image.mode != "RGB":
        image = image.convert("RGB")
    image = image.resize((500,500))

    answer = predict(image, question)
    # answer = post_processing(answer, question)
    return answer

def post_processing(decoded_output, question):
  if question in decoded_output.keys():
    decoded_output = decoded_output[question]
  return decoded_output

Method =["detection", "angle", "leg_lift", "polarity"]
Component_list = ["choke_full_rd_gd", "choke_full_gn_ye", "Y_capacitor","varistor_red","varistor_grey", "DC_cap_blue", "DC_cap_black", "Inductor", "X2_capacitor", "Fuse_red", "transformer", "Transistor", "Fuse_black"]

with gr.Blocks() as demo:
    gr.Markdown("Florence-enphase Leg Lift, Angle shift, Polarity and detection classifier")
    image = gr.Image(type="pil", label="Upload Image", width=500, height=500)  # Ensures image is passed as a PIL object
    method_choice = gr.Dropdown(choices=Method, label="Method")
    componet_choice = gr.Dropdown(choices = Component_list, label="Component label")
    prompt = gr.Textbox(value=get_prompt(Method[0], Component_list[0]), label="Prompt")
    submit_btn = gr.Button("Submit")

    # @gr.render(inputs=[method_choice, componet_choice])
    def generate_prompt(method, component_label):
        prompts = get_prompt(method, component_label)
        return prompts
    
    method_choice.change(fn=generate_prompt, inputs=[method_choice, componet_choice], outputs=[prompt])
    componet_choice.change(fn=generate_prompt, inputs=[method_choice, componet_choice], outputs=[prompt])


    submit_btn.click(fn=gradio_interface, inputs=[image, prompt], outputs=gr.Textbox(label="Answer"))

demo.launch(debug=True)

# iface = gr.Interface(
#     fn=gradio_interface,
#     inputs=[
#         gr.Image(type="pil", label="Upload Image"),  # Ensures image is passed as a PIL object
#         gr.Dropdown(choices=Method, label="Method"),
#         gr.Dropdown(choices = Component_list, label="Component label")
#         # gr.Textbox(label="Enter your question or edit the default prompt", lines=6, value=DEFAULT_PROMPT)  # Default prompt pre-filled and editable
#     ],
#     outputs=gr.Textbox(label="Answer"),
#     title="Florence-enphase Leg Lift Classifier",
#     description=("Upload an image and ask a question about the leg lift. The model will classify whether "
#                  "the leg is inserted in the hole or not based on the image. You can edit the default prompt if needed.")
# )

# iface.launch(debug=True)
# img_path = '/content/choke_gn_1054.jpg'
# img = Image.open(img_path).convert('RGB')
# gradio_interface(img, "polarity", "choke_full_rd_gd")