|
import gradio as gr |
|
import requests |
|
from PIL import Image |
|
from pdf2image import convert_from_path |
|
from typing import List, Union, Dict, Optional, Tuple |
|
from io import BytesIO |
|
import base64 |
|
import numpy as np |
|
import json |
|
|
|
prompt = """You are an advanced document parsing bot. Given the fixture schedule I provided, you need to parse out |
|
|
|
1. the name of the fixture |
|
2. the company that produces this fixture |
|
3. the description of this fixture. This is a 20-word description which summarize the size, function and the mounting method of the fixture and mention any necessary accesories. For example: 1" x 1" recessed downlight. |
|
4. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer. |
|
5. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3 |
|
|
|
Please format your response in json format |
|
{ |
|
"fixture_name": <fixture name>, |
|
"manufacture_name": <company name>, |
|
"fixture_description": <description>, |
|
"mfr": <part number>, |
|
"input wattage": <numerical input wattage> |
|
} |
|
|
|
--- |
|
For example |
|
{ |
|
"fixture_name": "SW24/1.5 Led Strips - Static White", |
|
"manufacture_name": "Q-Tran Inc.", |
|
"fixture_description": "Surface mounted static white LED strip." |
|
"mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535", |
|
"input wattage": "1.5W" |
|
}""" |
|
|
|
def query_openai_api(messages, model, temperature=0, api_key=None, organization_key=None, json_mode=False): |
|
try: |
|
url = "https://api.openai.com/v1/chat/completions" |
|
if organization_key is not None: |
|
headers = { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {api_key}", |
|
"OpenAI-Organization": f"{organization_key}", |
|
} |
|
else: |
|
headers = { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {api_key}", |
|
} |
|
data = {"model": model, "messages": messages, "temperature": temperature} |
|
if json_mode: |
|
data["response_format"] = {"type": "json_object"} |
|
|
|
response = requests.post(url, headers=headers, data=json.dumps(data)).json() |
|
print(response) |
|
return response["choices"][0]["message"]["content"].lstrip(), response |
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
return f"API_ERROR: {e}", None |
|
|
|
class GPT4V_Client: |
|
def __init__(self, api_key, organization_key, model_name="gpt-4o", max_tokens=512): |
|
self.api_key = api_key |
|
self.organization_key = organization_key |
|
self.model_name = model_name |
|
self.max_tokens = max_tokens |
|
|
|
def chat(self, messages, json_mode): |
|
return query_openai_api(messages, self.model_name, api_key=self.api_key, organization_key=self.organization_key, json_mode=json_mode) |
|
|
|
def one_step_chat( |
|
self, |
|
text, |
|
image: Union[Image.Image, np.ndarray], |
|
system_msg: Optional[str] = None, |
|
json_mode=False, |
|
): |
|
jpeg_buffer = BytesIO() |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image) |
|
image = image.convert("RGB") |
|
image.save(jpeg_buffer, format="JPEG") |
|
|
|
|
|
jpeg_data = jpeg_buffer.getvalue() |
|
|
|
|
|
jpg_base64 = base64.b64encode(jpeg_data) |
|
|
|
|
|
jpg_base64_str = jpg_base64.decode("utf-8") |
|
messages = [] |
|
if system_msg is not None: |
|
messages.append({"role": "system", "content": system_msg}) |
|
messages += [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": text}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{jpg_base64_str}" |
|
}, |
|
}, |
|
], |
|
} |
|
] |
|
return self.chat(messages, json_mode=json_mode) |
|
|
|
def one_step_multi_image_chat( |
|
self, |
|
text, |
|
images: list[Union[Image.Image, np.ndarray]], |
|
system_msg: Optional[str] = None, |
|
json_mode=False, |
|
): |
|
details = [i["detail"] for i in images] |
|
img_strs = [] |
|
for img_info in images: |
|
image = img_info["image"] |
|
jpeg_buffer = BytesIO() |
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image) |
|
image = image.convert("RGB") |
|
image.save(jpeg_buffer, format="JPEG") |
|
jpeg_data = jpeg_buffer.getvalue() |
|
jpg_base64 = base64.b64encode(jpeg_data) |
|
jpg_base64_str = jpg_base64.decode("utf-8") |
|
img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}") |
|
|
|
messages = [] |
|
if system_msg is not None: |
|
messages.append({"role": "system", "content": system_msg}) |
|
|
|
img_sub_msg = [ |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": img_str, "detail": detail}, |
|
} |
|
for img_str, detail in zip(img_strs, details) |
|
] |
|
messages += [ |
|
{ |
|
"role": "user", |
|
"content": [{"type": "text", "text": text}] + img_sub_msg, |
|
} |
|
] |
|
return self.chat(messages, json_mode=json_mode) |
|
|
|
def markdown_json_to_table(markdown_json_string, iteration, thumbnail_md): |
|
""" |
|
Convert the GPT JSON string into a markdown row with the first column as the PDF thumbnail. |
|
Args: |
|
markdown_json_string: the raw markdown (JSON) string from GPT |
|
iteration: which row # we are on |
|
thumbnail_md: something like  |
|
Returns: |
|
A string with either: |
|
- The header row + first data row, if iteration==0 |
|
- Additional data row, if iteration>0 |
|
""" |
|
|
|
|
|
if markdown_json_string.strip().startswith("```"): |
|
|
|
json_string = markdown_json_string.strip().strip("```").strip("json").strip() |
|
else: |
|
|
|
json_string = markdown_json_string.strip() |
|
|
|
|
|
try: |
|
json_obj = json.loads(json_string) |
|
except Exception: |
|
|
|
return "" |
|
|
|
|
|
|
|
keys = list(json_obj.keys()) |
|
values = list(json_obj.values()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if iteration == 0: |
|
header = ["Thumbnail"] + keys |
|
header_row = "| " + " | ".join(header) + " |\n" |
|
sep_row = "|" + "|".join(["---"] * len(header)) + "|\n" |
|
else: |
|
header_row = "" |
|
sep_row = "" |
|
|
|
|
|
|
|
str_values = [str(v) for v in values] |
|
data_row = "| " + thumbnail_md + " | " + " | ".join(str_values) + " |\n" |
|
|
|
return header_row + sep_row + data_row |
|
|
|
|
|
def gptRead(cutsheets, api_key, organization_key): |
|
fixtureInfo = "" |
|
iteration = 0 |
|
client = GPT4V_Client(api_key=api_key, organization_key=organization_key) |
|
|
|
for cutsheet in cutsheets: |
|
|
|
source = (convert_from_path(cutsheet.name))[0] |
|
|
|
|
|
thumbnail_img = source.copy() |
|
thumbnail_img.thumbnail((100, 100)) |
|
|
|
|
|
thumb_io = BytesIO() |
|
thumbnail_img.save(thumb_io, format="JPEG") |
|
base64_thumb = base64.b64encode(thumb_io.getvalue()).decode('utf-8') |
|
thumbnail_md = f"" |
|
|
|
|
|
response_text, _ = client.one_step_chat(prompt, source) |
|
|
|
|
|
fixtureInfo += markdown_json_to_table(response_text, iteration, thumbnail_md) |
|
|
|
iteration += 1 |
|
|
|
return fixtureInfo |
|
|
|
if __name__ == "__main__": |
|
with gr.Blocks() as demo: |
|
api_key = gr.Textbox(label="Input your ChatGPT4 API Key: ") |
|
organization_key = gr.Textbox(label="Input your ChatGPT4 API Organization Key: ", info="(optional)") |
|
gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool") |
|
file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple") |
|
form = gr.Markdown() |
|
|
|
|
|
file_uploader.upload(fn=gptRead, inputs=[file_uploader, api_key, organization_key], outputs=form) |
|
|
|
demo.launch(share=True) |