import gradio as gr import requests from PIL import Image from pdf2image import convert_from_path from typing import List, Union, Dict, Optional, Tuple from io import BytesIO import base64 import numpy as np import json prompt = """You are an advanced document parsing bot. Given the fixture schedule I provided, you need to parse out 1. the name of the fixture 2. the company that produces this fixture 3. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer. 4. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3 Please format your response in json format { "fixture_name": , "manufacture_name": , "mfr": , "input wattage": } --- For example { "fixture_name": "SW24/1.5 Led Strips - Static White", "manufacture_name": "Q-Tran Inc.", "mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535", "input wattage": "1.5W" }""" def query_openai_api(messages, model, temperature=0, api_key=None, organization_key=None, json_mode=False): try: url = "https://api.openai.com/v1/chat/completions" if organization_key is not None: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", "OpenAI-Organization": f"{organization_key}", } else: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", } data = {"model": model, "messages": messages, "temperature": temperature} if json_mode: data["response_format"] = {"type": "json_object"} # Make the POST request and return the response response = requests.post(url, headers=headers, data=json.dumps(data)).json() print(response) return response["choices"][0]["message"]["content"].lstrip(), response except Exception as e: print(f"An error occurred: {e}") return f"API_ERROR: {e}", None class GPT4V_Client: def __init__(self, api_key, organization_key, model_name="gpt-4-vision-preview", max_tokens=512): self.api_key = api_key self.organization_key = organization_key # self.client = OpenAI(api_key=api_key) self.model_name = model_name self.max_tokens = max_tokens def chat(self, messages, json_mode): return query_openai_api(messages, self.model_name, api_key=self.api_key, organization_key=self.organization_key, json_mode=json_mode) def one_step_chat( self, text, image: Union[Image.Image, np.ndarray], system_msg: Optional[str] = None, json_mode=False, ): jpeg_buffer = BytesIO() # Save the image as JPEG to the buffer if isinstance(image, np.ndarray): image = Image.fromarray(image) image = image.convert("RGB") image.save(jpeg_buffer, format="JPEG") # Get the byte data from the buffer jpeg_data = jpeg_buffer.getvalue() # Encode the JPEG image data in base64 jpg_base64 = base64.b64encode(jpeg_data) # If you need it in string format jpg_base64_str = jpg_base64.decode("utf-8") messages = [] if system_msg is not None: messages.append({"role": "system", "content": system_msg}) messages += [ { "role": "user", "content": [ {"type": "text", "text": text}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{jpg_base64_str}" }, }, ], } ] return self.chat(messages, json_mode=json_mode) def one_step_multi_image_chat( self, text, images: list[Union[Image.Image, np.ndarray]], system_msg: Optional[str] = None, json_mode=False, ): """ images: [{"image": PIL.image, "detail": "high" or "low }] For low res mode, we expect a 512px x 512px image. For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px. """ details = [i["detail"] for i in images] img_strs = [] for img_info in images: image = img_info["image"] jpeg_buffer = BytesIO() # Save the image as JPEG to the buffer if isinstance(image, np.ndarray): image = Image.fromarray(image) image = image.convert("RGB") image.save(jpeg_buffer, format="JPEG") # Get the byte data from the buffer jpeg_data = jpeg_buffer.getvalue() # Encode the JPEG image data in base64 jpg_base64 = base64.b64encode(jpeg_data) # If you need it in string format jpg_base64_str = jpg_base64.decode("utf-8") img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}") messages = [] if system_msg is not None: messages.append({"role": "system", "content": system_msg}) img_sub_msg = [ { "type": "image_url", "image_url": {"url": img_str, "detail": detail}, } for img_str, detail in zip(img_strs, details) ] messages += [ { "role": "user", "content": [ {"type": "text", "text": text}, ] + img_sub_msg, } ] return self.chat(messages, json_mode=json_mode) def markdown_json_to_table(markdown_json_string, iteration): if markdown_json_string[0] == '`': json_string = markdown_json_string.strip("```json\n").rstrip("```") json_object = json.loads(json_string) values = json_object.values() if iteration == 0: headers = json_object.keys() markdown_table = "| " + " | ".join(headers) + " |\n" + \ "|---" * len(json_object) + "|\n" + \ "| " + " | ".join(map(str, values)) + " |" else: markdown_table = "|---" * len(json_object) + "|\n" + \ "| " + " | ".join(map(str, values)) + " |" else: markdown_table = "" return markdown_table def gptRead(cutsheets, api_key, organization_key): fixtureInfo = "" iteration = 0 for cutsheet in cutsheets: source = (convert_from_path(cutsheet.name))[0] client = GPT4V_Client(api_key=api_key, organization_key=organization_key) fixtureInfo += markdown_json_to_table(client.one_step_chat(prompt, source)[0], iteration) iteration += 1 return fixtureInfo if __name__ == "__main__": with gr.Blocks() as demo: gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool") api_key = gr.Textbox(label = "Input your ChatGPT4 API Key: ") organization_key = gr.Textbox(label = "Input your ChatGPT4 API Organization Key: ", info = "(optional)") # image = gr.Image() file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple") form = gr.Markdown() file_uploader.upload(gptRead, [file_uploader, api_key, organization_key], form) demo.launch(share=True)