LitBase / app.py
ehaemmma's picture
Beta version
73f6946 verified
raw
history blame
7.73 kB
import gradio as gr
import requests
from PIL import Image
from pdf2image import convert_from_path
from typing import List, Union, Dict, Optional, Tuple
from io import BytesIO
import base64
import numpy as np
import json
prompt = """You are an advanced document parsing bot. Given the fixture schedule I provided, you need to parse out
1. the name of the fixture
2. the company that produces this fixture
3. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer.
4. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3
Please format your response in json format
{
"fixture_name": <fixture name>,
"manufacture_name": <company name>,
"mfr": <part number>,
"input wattage": <numerical input wattage>
}
---
For example
{
"fixture_name": "SW24/1.5 Led Strips - Static White",
"manufacture_name": "Q-Tran Inc.",
"mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
"input wattage": "1.5W"
}"""
def query_openai_api(messages, model, temperature=0, api_key=None, organization_key=None, json_mode=False):
try:
url = "https://api.openai.com/v1/chat/completions"
if organization_key is not None:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
"OpenAI-Organization": f"{organization_key}",
}
else:
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
data = {"model": model, "messages": messages, "temperature": temperature}
if json_mode:
data["response_format"] = {"type": "json_object"}
# Make the POST request and return the response
response = requests.post(url, headers=headers, data=json.dumps(data)).json()
print(response)
return response["choices"][0]["message"]["content"].lstrip(), response
except Exception as e:
print(f"An error occurred: {e}")
return f"API_ERROR: {e}", None
class GPT4V_Client:
def __init__(self, api_key, organization_key, model_name="gpt-4-vision-preview", max_tokens=512):
self.api_key = api_key
self.organization_key = organization_key
# self.client = OpenAI(api_key=api_key)
self.model_name = model_name
self.max_tokens = max_tokens
def chat(self, messages, json_mode):
return query_openai_api(messages, self.model_name, api_key=self.api_key, organization_key=self.organization_key, json_mode=json_mode)
def one_step_chat(
self,
text,
image: Union[Image.Image, np.ndarray],
system_msg: Optional[str] = None,
json_mode=False,
):
jpeg_buffer = BytesIO()
# Save the image as JPEG to the buffer
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
image = image.convert("RGB")
image.save(jpeg_buffer, format="JPEG")
# Get the byte data from the buffer
jpeg_data = jpeg_buffer.getvalue()
# Encode the JPEG image data in base64
jpg_base64 = base64.b64encode(jpeg_data)
# If you need it in string format
jpg_base64_str = jpg_base64.decode("utf-8")
messages = []
if system_msg is not None:
messages.append({"role": "system", "content": system_msg})
messages += [
{
"role": "user",
"content": [
{"type": "text", "text": text},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{jpg_base64_str}"
},
},
],
}
]
return self.chat(messages, json_mode=json_mode)
def one_step_multi_image_chat(
self,
text,
images: list[Union[Image.Image, np.ndarray]],
system_msg: Optional[str] = None,
json_mode=False,
):
"""
images: [{"image": PIL.image, "detail": "high" or "low }]
For low res mode, we expect a 512px x 512px image. For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px.
"""
details = [i["detail"] for i in images]
img_strs = []
for img_info in images:
image = img_info["image"]
jpeg_buffer = BytesIO()
# Save the image as JPEG to the buffer
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
image = image.convert("RGB")
image.save(jpeg_buffer, format="JPEG")
# Get the byte data from the buffer
jpeg_data = jpeg_buffer.getvalue()
# Encode the JPEG image data in base64
jpg_base64 = base64.b64encode(jpeg_data)
# If you need it in string format
jpg_base64_str = jpg_base64.decode("utf-8")
img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
messages = []
if system_msg is not None:
messages.append({"role": "system", "content": system_msg})
img_sub_msg = [
{
"type": "image_url",
"image_url": {"url": img_str, "detail": detail},
}
for img_str, detail in zip(img_strs, details)
]
messages += [
{
"role": "user",
"content": [
{"type": "text", "text": text},
]
+ img_sub_msg,
}
]
return self.chat(messages, json_mode=json_mode)
def markdown_json_to_table(markdown_json_string, iteration):
if markdown_json_string[0] == '`':
json_string = markdown_json_string.strip("```json\n").rstrip("```")
json_object = json.loads(json_string)
values = json_object.values()
if iteration == 0:
headers = json_object.keys()
markdown_table = "| " + " | ".join(headers) + " |\n" + \
"|---" * len(json_object) + "|\n" + \
"| " + " | ".join(map(str, values)) + " |"
else:
markdown_table = "|---" * len(json_object) + "|\n" + \
"| " + " | ".join(map(str, values)) + " |"
else:
markdown_table = ""
return markdown_table
def gptRead(cutsheets, api_key, organization_key):
fixtureInfo = ""
iteration = 0
for cutsheet in cutsheets:
source = (convert_from_path(cutsheet.name))[0]
client = GPT4V_Client(api_key=api_key, organization_key=organization_key)
fixtureInfo += markdown_json_to_table(client.one_step_chat(prompt, source)[0], iteration)
iteration += 1
return fixtureInfo
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
api_key = gr.Textbox(label = "Input your ChatGPT4 API Key: ")
organization_key = gr.Textbox(label = "Input your ChatGPT4 API Organization Key: ", info = "(optional)")
# image = gr.Image()
file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
form = gr.Markdown()
file_uploader.upload(gptRead, [file_uploader, api_key, organization_key], form)
demo.launch(share=True)