|
import gradio as gr |
|
import requests |
|
from PIL import Image |
|
from pdf2image import convert_from_path |
|
from typing import List, Union, Dict, Optional, Tuple |
|
from io import BytesIO |
|
import base64 |
|
import numpy as np |
|
import json |
|
|
|
prompt = """You are an advanced document parsing bot. Given the fixture schedule I provided, you need to parse out |
|
|
|
1. the name of the fixture |
|
2. the company that produces this fixture |
|
3. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer. |
|
4. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3 |
|
|
|
Please format your response in json format |
|
{ |
|
"fixture_name": <fixture name>, |
|
"manufacture_name": <company name>, |
|
"mfr": <part number>, |
|
"input wattage": <numerical input wattage> |
|
} |
|
|
|
--- |
|
For example |
|
{ |
|
"fixture_name": "SW24/1.5 Led Strips - Static White", |
|
"manufacture_name": "Q-Tran Inc.", |
|
"mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535", |
|
"input wattage": "1.5W" |
|
}""" |
|
|
|
def query_openai_api(messages, model, temperature=0, api_key=None, organization_key=None, json_mode=False): |
|
try: |
|
url = "https://api.openai.com/v1/chat/completions" |
|
if organization_key is not None: |
|
headers = { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {api_key}", |
|
"OpenAI-Organization": f"{organization_key}", |
|
} |
|
else: |
|
headers = { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {api_key}", |
|
} |
|
data = {"model": model, "messages": messages, "temperature": temperature} |
|
if json_mode: |
|
data["response_format"] = {"type": "json_object"} |
|
|
|
|
|
response = requests.post(url, headers=headers, data=json.dumps(data)).json() |
|
print(response) |
|
return response["choices"][0]["message"]["content"].lstrip(), response |
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
return f"API_ERROR: {e}", None |
|
|
|
class GPT4V_Client: |
|
def __init__(self, api_key, organization_key, model_name="gpt-4-vision-preview", max_tokens=512): |
|
self.api_key = api_key |
|
self.organization_key = organization_key |
|
|
|
self.model_name = model_name |
|
self.max_tokens = max_tokens |
|
|
|
def chat(self, messages, json_mode): |
|
return query_openai_api(messages, self.model_name, api_key=self.api_key, organization_key=self.organization_key, json_mode=json_mode) |
|
|
|
def one_step_chat( |
|
self, |
|
text, |
|
image: Union[Image.Image, np.ndarray], |
|
system_msg: Optional[str] = None, |
|
json_mode=False, |
|
): |
|
jpeg_buffer = BytesIO() |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image) |
|
image = image.convert("RGB") |
|
image.save(jpeg_buffer, format="JPEG") |
|
|
|
|
|
jpeg_data = jpeg_buffer.getvalue() |
|
|
|
|
|
jpg_base64 = base64.b64encode(jpeg_data) |
|
|
|
|
|
jpg_base64_str = jpg_base64.decode("utf-8") |
|
messages = [] |
|
if system_msg is not None: |
|
messages.append({"role": "system", "content": system_msg}) |
|
messages += [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": text}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{jpg_base64_str}" |
|
}, |
|
}, |
|
], |
|
} |
|
] |
|
return self.chat(messages, json_mode=json_mode) |
|
|
|
def one_step_multi_image_chat( |
|
self, |
|
text, |
|
images: list[Union[Image.Image, np.ndarray]], |
|
system_msg: Optional[str] = None, |
|
json_mode=False, |
|
): |
|
""" |
|
images: [{"image": PIL.image, "detail": "high" or "low }] |
|
|
|
For low res mode, we expect a 512px x 512px image. For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px. |
|
""" |
|
details = [i["detail"] for i in images] |
|
img_strs = [] |
|
for img_info in images: |
|
image = img_info["image"] |
|
jpeg_buffer = BytesIO() |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image) |
|
image = image.convert("RGB") |
|
image.save(jpeg_buffer, format="JPEG") |
|
|
|
|
|
jpeg_data = jpeg_buffer.getvalue() |
|
|
|
|
|
jpg_base64 = base64.b64encode(jpeg_data) |
|
|
|
|
|
jpg_base64_str = jpg_base64.decode("utf-8") |
|
img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}") |
|
messages = [] |
|
if system_msg is not None: |
|
messages.append({"role": "system", "content": system_msg}) |
|
|
|
img_sub_msg = [ |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": img_str, "detail": detail}, |
|
} |
|
for img_str, detail in zip(img_strs, details) |
|
] |
|
messages += [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": text}, |
|
] |
|
+ img_sub_msg, |
|
} |
|
] |
|
return self.chat(messages, json_mode=json_mode) |
|
|
|
def markdown_json_to_table(markdown_json_string, iteration): |
|
if markdown_json_string[0] == '`': |
|
json_string = markdown_json_string.strip("```json\n").rstrip("```") |
|
json_object = json.loads(json_string) |
|
values = json_object.values() |
|
if iteration == 0: |
|
headers = json_object.keys() |
|
markdown_table = "| " + " | ".join(headers) + " |\n" + \ |
|
"|---" * len(json_object) + "|\n" + \ |
|
"| " + " | ".join(map(str, values)) + " |" |
|
else: |
|
markdown_table = "|---" * len(json_object) + "|\n" + \ |
|
"| " + " | ".join(map(str, values)) + " |" |
|
else: |
|
markdown_table = "" |
|
return markdown_table |
|
|
|
def gptRead(cutsheets, api_key, organization_key): |
|
fixtureInfo = "" |
|
iteration = 0 |
|
for cutsheet in cutsheets: |
|
source = (convert_from_path(cutsheet.name))[0] |
|
client = GPT4V_Client(api_key=api_key, organization_key=organization_key) |
|
fixtureInfo += markdown_json_to_table(client.one_step_chat(prompt, source)[0], iteration) |
|
iteration += 1 |
|
return fixtureInfo |
|
|
|
if __name__ == "__main__": |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool") |
|
api_key = gr.Textbox(label = "Input your ChatGPT4 API Key: ") |
|
organization_key = gr.Textbox(label = "Input your ChatGPT4 API Organization Key: ", info = "(optional)") |
|
|
|
file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple") |
|
form = gr.Markdown() |
|
file_uploader.upload(gptRead, [file_uploader, api_key, organization_key], form) |
|
|
|
demo.launch(share=True) |
|
|
|
|