import os import torch from PIL import Image from transformers import AutoModelForCausalLM, AutoProcessor import torch import gradio as gr import re import openai import base64 from io import BytesIO # model_name = "arjunanand13/Florence-enphase2" # model_name = "Stardragon2099/florence-adlp-40e" # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print(f"Using device: {device}") # device = "cuda:0" if torch.cuda.is_available() else "cpu" # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype, trust_remote_code=True).to(device) # processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) # Retrieve the API key from Hugging Face Secrets openai.api_key = os.environ.get("Chatgpt_api") # torch.cuda.empty_cache() DEFAULT_PROMPT = ("You are a Leg Lift Classifier. There is an image of a throughput component " "and we need to identify if the leg is inserted in the hole or not. Return 'True' " "if any leg is not completely seated in the hole; return 'False' if the leg is inserted " "in the hole. Return only the required JSON in this format: {Leg_lift: , Reason: }.") # def predict(img, prompt): # inputs = processor(text=prompt, images=img, return_tensors="pt").to(device, torch_dtype) # generated_ids = model.generate( # input_ids=inputs["input_ids"], # pixel_values=inputs["pixel_values"], # max_new_tokens=1024, # do_sample=False, # num_beams=3 # ) # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] # parsed_answer = processor.post_process_generation(generated_text, task= prompt, image_size=(img.width, img.height)) # return parsed_answer # if not isinstance(image, Image.Image): # raise ValueError(f"Expected image to be PIL.Image, but got {type(image)}") # encoding = processor(images=image, text=question, return_tensors="pt").to(device) # with torch.no_grad(): # outputs = model.generate(**encoding, max_length=725) # answer = processor.batch_decode(outputs, skip_special_tokens=True)[0] # return answer def predict(img, prompt): # Encode the Pillow image in base64 buffer = BytesIO() img.save(buffer, format="PNG") # Save the image in PNG format (can change to JPEG if needed) buffer.seek(0) encoded_image = base64.b64encode(buffer.read()).decode('utf-8') # Combine prompt and image # system_prompt = ( # "You are an AI that can process text and images. The user has uploaded an image encoded in base64 " # "format along with a text prompt. You need to consider both the image and the text while responding." # ) messages = [ {"role": "system", "content": prompt}, {"role": "user", "content": f"Image (base64): {encoded_image}"} ] # Make API call try: response = openai.Chat.create( model="gpt-4", messages=messages ) return response['choices'][0]['message']['content'] except Exception as e: return f"Error: {e}" def get_prompt(method, component_label=None): global prompt if method == "leg_lift": if component_label in ["Y_capacitor","varistor_red","varistor_grey"]: prompt = """You are a Leg Lift length Analyzer. Analyze the provided cropped image of a throughput component and determine the length of the metal tip from the curve in it . Focus on the metal leg tip and the hole . The length will not exceed 2.5 milli meter . Return a JSON object with the following keys: - 'Leg_lift': 'True' if the curve of the metal leg is away from the hole or if only the hole or leg is visible.and 'False' if the metal leg is inserted in the hole or is connected to the hole surface. - 'Measurements': A Measumrement value ranging from 1 milli meter to 2.5 milli meter . Format the response exactly as specified: { "Leg_lift":True/False , "Measurements": "Measured length"}""" else: prompt = "Not a valid combination" elif method == "polarity": if component_label in ["choke_full_rd_gd"]: prompt = """You are a Choke Polarity Classifier. Analyze the provided cropped image of a choke component. Focus on the color pattern and orientation. For choke components, the correct polarity is indicated by the gold color being on the left and the red color being on the right. Return 'True' if the gold color is on the left and the red color is on the right; return 'False' if the colors are reversed. Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }""" elif component_label in ["choke_full_gn_ye"]: prompt = """You are a Choke Polarity Classifier. Analyze the provided cropped image of a choke component. Focus on the color pattern and orientation. For choke components, the correct polarity is indicated by the green color being on the left and the yellow color being on the right. Return 'True' if the green color is on the left and the yellow color is on the right; return 'False' if the colors are reversed. Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }""" elif component_label in ["DC_cap_blue", "DC_cap_black"]: prompt = """You are a Polarity Classifier. Analyze the provided cropped image of capacitor. Consider the black capacitor that takes largest portion of the image, ignore others. Focus on the long white strip on the largest capacitor. Return 'True' if there is a white strip on the left side of the capacitor(correct polarity); return 'False' if the white strip is on the right side of the capacitor(Incorrect polarity). Format the response exactly as specified: { "Polarity": True/False, "Measurements": None }""" else: prompt = "Not a valid combination" elif method == "angle": if component_label in ["Y_capacitor","varistor_red","varistor_grey"]: prompt = """You are an Angle Shift Classifier. Analyze the provided image focusing on the Y capacitor or varistor. the Y capacitor and varistor can be blue, white or red in color. it is allmost circular in shape. consider only the largest Y capacitor or varistor. Determine if the largest component is tilted or on the verge of falling. Use the green boundary as a reference for checking whether the largest capacitor is tilted. If the largest Y capacitor or varistor is tilted and is outside the green boundary, return 'True'; otherwise, return 'False'. Format the response exactly as specified: { "Angle": True/False, "Measurements": None}""" elif component_label in ["DC_cap_blue", "DC_cap_black"]: prompt = """You are a Capacitor shift classifier. Analyze the provided image of the capacitor and check whether it's touching the red line. Consider the capacitor that takes largest portion of the image and is located inside a green rectangle. Do not approximate, take the current red line only. Use the red line as a reference. if the base of the largest capacitor is under the red line, return 'False'; otherwise, return 'True'. Format the response exactly as specified: { "Angle": True/False, "Measurements": None }""" else: prompt = "Not a valid combination" elif method == "detection": if component_label in ["Y_capacitor"]: prompt = """You are a Y Capacitor Object Detector. Analyze the provided image and focus on identifying the Y capacitor. The Y capacitor is sort of a circular object in blue color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image portion sometimes. Determine if a blue object resembling a Y capacitor is present in the center of the image. Return 'False' if you arent confident about Y capacitor object being present; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["DC_cap_blue"]: prompt = """You are a Blue DC Capacitor Object Detector . Analyze the provided cropped image and focus on the object. DC capacitor is a blue color object which is in cylindrical shape . Determine if the object is present in the image. Return 'False' if the object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["DC_cap_black"]: prompt = """You are a Black DC Capacitor Object Detector . Analyze the provided cropped image and focus on the object. DC capacitor is a black color object which is in cylindrical shape . Determine if the object is present in the image. Return 'False' if the object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["choke_full_rd_gd"]: prompt = """You are a Red-Gold Choke Object Detector. Analyze the provided cropped image and focus on the choke component. The choke component should have distinct red and gold colors. Determine if the choke component with red on one side and gold on the other side is present in the image. Return 'False' if the choke component is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["choke_full_gn_ye"]: prompt = """You are a Green-Yellow Choke Object Detector. Analyze the provided cropped image and focus on the choke component. The choke component should have distinct green and yellow colors. Determine if the choke component with green on one side and yellow on the other side is present in the image. Return 'False' if the choke component is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["Inductor"]: prompt = """You are an Inductor Object Detector. Analyze the provided cropped image and focus on the object. The inductor is typically a component with a dark (black/ brown/grey) body. Determine if the inductor with the specified color is present in the image. Return 'False' if the inductor object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["X2_capacitor"]: prompt = """You are an X2 Capacitor Object Detector. Analyze the provided cropped image and focus on the object. An X2 capacitor is typically a blue box-shaped cuboid. Determine if the X2 capacitor with the specified blue color and box-like shape is present in the image. Return 'False' if the X2 capacitor object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["Fuse_red"]: prompt = """You are a Red Fuse Object Detector. Analyze the provided cropped image and focus on the object. A red fuse is typically a red cuboid box-shaped object. Determine if the red fuse with the specified color and box-like shape is present in the image. Return 'False' if the red fuse object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["varistor_grey"]: prompt = """You are a Varistor Object Detector. Analyze the provided image and focus on identifying the white/grey varistor. A varistor is sort of circular-shaped in side view with a white or greyish color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image sometimes. Determine if a circular object resembling a varistor is present anywhere in the image. Return 'False' if the white varistor object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["varistor_red"]: prompt = """You are a Red Varistor Object Detector. Analyze the provided image and focus on identifying the red varistor. A varistor is typically almost circular-shaped component with a red color. The object may appear in different views (top view, side view, etc.) and may occupy only 20-30% of the image sometimes. Determine if a red circular object resembling a varistor is present in the image. Return 'False' if the red varistor object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["transformer"]: prompt = """You are a Transformer Object Detector. Analyze the provided cropped image and focus on the object. This particular transformer is characterized by its rectangular shape and metallic contacts. Determine if this transformer is present in the image. Return 'False' if the transformer object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["Transistor"]: prompt = """You are a Transistor Object Detector. Analyze the provided cropped image and focus on the object. The transistor has a blue cylindrical shape with metal contacts on the ends. Determine if this blue cylindrical transistor is present in the image. Return 'False' if the transistor object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ elif component_label in ["Fuse_black"]: prompt = """You are a Black Fuse Object Detector. Analyze the provided cropped image and focus on the object. A black fuse is typically a black cuboid box-shaped object. Determine if the black fuse with the specified color and box-like shape is present in the image. Return 'False' if the black fuse object is missing; return 'True' if it is present. Format the response exactly as specified: { "Detection": True/False, "Measurements": None} """ else: prompt = "Not a valid combination" return prompt def gradio_interface(image, prompt): question = prompt if image.mode != "RGB": image = image.convert("RGB") answer = predict(image, question) # answer = post_processing(answer, question) return answer def post_processing(decoded_output, question): if question in decoded_output.keys(): decoded_output = decoded_output[question] return decoded_output Method =["detection", "angle", "leg_lift", "polarity"] Component_list = ["choke_full_rd_gd", "choke_full_gn_ye", "Y_capacitor","varistor_red","varistor_grey", "DC_cap_blue", "DC_cap_black", "Inductor", "X2_capacitor", "Fuse_red", "transformer", "Transistor", "Fuse_black"] with gr.Blocks() as demo: gr.Markdown("Florence-enphase Leg Lift, Angle shift, Polarity and detection classifier") image = gr.Image(type="pil", label="Upload Image", width=500, height=500) # Ensures image is passed as a PIL object method_choice = gr.Dropdown(choices=Method, label="Method") componet_choice = gr.Dropdown(choices = Component_list, label="Component label") prompt = gr.Textbox(value=get_prompt(Method[0], Component_list[0]), label="Prompt") submit_btn = gr.Button("Submit") # @gr.render(inputs=[method_choice, componet_choice]) def generate_prompt(method, component_label): prompts = get_prompt(method, component_label) return prompts method_choice.change(fn=generate_prompt, inputs=[method_choice, componet_choice], outputs=[prompt]) componet_choice.change(fn=generate_prompt, inputs=[method_choice, componet_choice], outputs=[prompt]) submit_btn.click(fn=gradio_interface, inputs=[image, prompt], outputs=gr.Textbox(label="Answer")) demo.launch(debug=True) # iface = gr.Interface( # fn=gradio_interface, # inputs=[ # gr.Image(type="pil", label="Upload Image"), # Ensures image is passed as a PIL object # gr.Dropdown(choices=Method, label="Method"), # gr.Dropdown(choices = Component_list, label="Component label") # # gr.Textbox(label="Enter your question or edit the default prompt", lines=6, value=DEFAULT_PROMPT) # Default prompt pre-filled and editable # ], # outputs=gr.Textbox(label="Answer"), # title="Florence-enphase Leg Lift Classifier", # description=("Upload an image and ask a question about the leg lift. The model will classify whether " # "the leg is inserted in the hole or not based on the image. You can edit the default prompt if needed.") # ) # iface.launch(debug=True) # img_path = '/content/choke_gn_1054.jpg' # img = Image.open(img_path).convert('RGB') # gradio_interface(img, "polarity", "choke_full_rd_gd")