Update app.py
Browse files
app.py
CHANGED
@@ -12,13 +12,15 @@ prompt = """You are an advanced document parsing bot. Given the fixture schedule
|
|
12 |
|
13 |
1. the name of the fixture
|
14 |
2. the company that produces this fixture
|
15 |
-
3. the
|
16 |
-
4. the
|
|
|
17 |
|
18 |
Please format your response in json format
|
19 |
{
|
20 |
"fixture_name": <fixture name>,
|
21 |
"manufacture_name": <company name>,
|
|
|
22 |
"mfr": <part number>,
|
23 |
"input wattage": <numerical input wattage>
|
24 |
}
|
@@ -28,6 +30,7 @@ For example
|
|
28 |
{
|
29 |
"fixture_name": "SW24/1.5 Led Strips - Static White",
|
30 |
"manufacture_name": "Q-Tran Inc.",
|
|
|
31 |
"mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
|
32 |
"input wattage": "1.5W"
|
33 |
}"""
|
@@ -50,7 +53,6 @@ def query_openai_api(messages, model, temperature=0, api_key=None, organization_
|
|
50 |
if json_mode:
|
51 |
data["response_format"] = {"type": "json_object"}
|
52 |
|
53 |
-
# Make the POST request and return the response
|
54 |
response = requests.post(url, headers=headers, data=json.dumps(data)).json()
|
55 |
print(response)
|
56 |
return response["choices"][0]["message"]["content"].lstrip(), response
|
@@ -59,10 +61,9 @@ def query_openai_api(messages, model, temperature=0, api_key=None, organization_
|
|
59 |
return f"API_ERROR: {e}", None
|
60 |
|
61 |
class GPT4V_Client:
|
62 |
-
def __init__(self, api_key, organization_key, model_name="gpt-
|
63 |
self.api_key = api_key
|
64 |
self.organization_key = organization_key
|
65 |
-
# self.client = OpenAI(api_key=api_key)
|
66 |
self.model_name = model_name
|
67 |
self.max_tokens = max_tokens
|
68 |
|
@@ -118,32 +119,21 @@ class GPT4V_Client:
|
|
118 |
system_msg: Optional[str] = None,
|
119 |
json_mode=False,
|
120 |
):
|
121 |
-
"""
|
122 |
-
images: [{"image": PIL.image, "detail": "high" or "low }]
|
123 |
-
|
124 |
-
For low res mode, we expect a 512px x 512px image. For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px.
|
125 |
-
"""
|
126 |
details = [i["detail"] for i in images]
|
127 |
img_strs = []
|
128 |
for img_info in images:
|
129 |
image = img_info["image"]
|
130 |
jpeg_buffer = BytesIO()
|
131 |
|
132 |
-
# Save the image as JPEG to the buffer
|
133 |
if isinstance(image, np.ndarray):
|
134 |
image = Image.fromarray(image)
|
135 |
image = image.convert("RGB")
|
136 |
image.save(jpeg_buffer, format="JPEG")
|
137 |
-
|
138 |
-
# Get the byte data from the buffer
|
139 |
jpeg_data = jpeg_buffer.getvalue()
|
140 |
-
|
141 |
-
# Encode the JPEG image data in base64
|
142 |
jpg_base64 = base64.b64encode(jpeg_data)
|
143 |
-
|
144 |
-
# If you need it in string format
|
145 |
jpg_base64_str = jpg_base64.decode("utf-8")
|
146 |
img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
|
|
|
147 |
messages = []
|
148 |
if system_msg is not None:
|
149 |
messages.append({"role": "system", "content": system_msg})
|
@@ -158,50 +148,104 @@ class GPT4V_Client:
|
|
158 |
messages += [
|
159 |
{
|
160 |
"role": "user",
|
161 |
-
"content": [
|
162 |
-
{"type": "text", "text": text},
|
163 |
-
]
|
164 |
-
+ img_sub_msg,
|
165 |
}
|
166 |
]
|
167 |
return self.chat(messages, json_mode=json_mode)
|
168 |
-
|
169 |
-
def markdown_json_to_table(markdown_json_string, iteration):
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
def gptRead(cutsheets, api_key, organization_key):
|
187 |
fixtureInfo = ""
|
188 |
iteration = 0
|
|
|
|
|
189 |
for cutsheet in cutsheets:
|
|
|
190 |
source = (convert_from_path(cutsheet.name))[0]
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
iteration += 1
|
|
|
194 |
return fixtureInfo
|
195 |
|
196 |
if __name__ == "__main__":
|
197 |
with gr.Blocks() as demo:
|
|
|
|
|
198 |
gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
|
199 |
-
api_key = gr.Textbox(label = "Input your ChatGPT4 API Key: ")
|
200 |
-
organization_key = gr.Textbox(label = "Input your ChatGPT4 API Organization Key: ", info = "(optional)")
|
201 |
-
# image = gr.Image()
|
202 |
file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
|
203 |
form = gr.Markdown()
|
204 |
-
file_uploader.upload(gptRead, [file_uploader, api_key, organization_key], form)
|
205 |
|
206 |
-
|
|
|
207 |
|
|
|
|
12 |
|
13 |
1. the name of the fixture
|
14 |
2. the company that produces this fixture
|
15 |
+
3. the description of this fixture. This is a 20-word description which summarize the size, function and the mounting method of the fixture and mention any necessary accesories. For example: 1" x 1" recessed downlight.
|
16 |
+
4. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer.
|
17 |
+
5. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3
|
18 |
|
19 |
Please format your response in json format
|
20 |
{
|
21 |
"fixture_name": <fixture name>,
|
22 |
"manufacture_name": <company name>,
|
23 |
+
"fixture_description": <description>,
|
24 |
"mfr": <part number>,
|
25 |
"input wattage": <numerical input wattage>
|
26 |
}
|
|
|
30 |
{
|
31 |
"fixture_name": "SW24/1.5 Led Strips - Static White",
|
32 |
"manufacture_name": "Q-Tran Inc.",
|
33 |
+
"fixture_description": "Surface mounted static white LED strip."
|
34 |
"mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
|
35 |
"input wattage": "1.5W"
|
36 |
}"""
|
|
|
53 |
if json_mode:
|
54 |
data["response_format"] = {"type": "json_object"}
|
55 |
|
|
|
56 |
response = requests.post(url, headers=headers, data=json.dumps(data)).json()
|
57 |
print(response)
|
58 |
return response["choices"][0]["message"]["content"].lstrip(), response
|
|
|
61 |
return f"API_ERROR: {e}", None
|
62 |
|
63 |
class GPT4V_Client:
|
64 |
+
def __init__(self, api_key, organization_key, model_name="gpt-4o", max_tokens=512):
|
65 |
self.api_key = api_key
|
66 |
self.organization_key = organization_key
|
|
|
67 |
self.model_name = model_name
|
68 |
self.max_tokens = max_tokens
|
69 |
|
|
|
119 |
system_msg: Optional[str] = None,
|
120 |
json_mode=False,
|
121 |
):
|
|
|
|
|
|
|
|
|
|
|
122 |
details = [i["detail"] for i in images]
|
123 |
img_strs = []
|
124 |
for img_info in images:
|
125 |
image = img_info["image"]
|
126 |
jpeg_buffer = BytesIO()
|
127 |
|
|
|
128 |
if isinstance(image, np.ndarray):
|
129 |
image = Image.fromarray(image)
|
130 |
image = image.convert("RGB")
|
131 |
image.save(jpeg_buffer, format="JPEG")
|
|
|
|
|
132 |
jpeg_data = jpeg_buffer.getvalue()
|
|
|
|
|
133 |
jpg_base64 = base64.b64encode(jpeg_data)
|
|
|
|
|
134 |
jpg_base64_str = jpg_base64.decode("utf-8")
|
135 |
img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
|
136 |
+
|
137 |
messages = []
|
138 |
if system_msg is not None:
|
139 |
messages.append({"role": "system", "content": system_msg})
|
|
|
148 |
messages += [
|
149 |
{
|
150 |
"role": "user",
|
151 |
+
"content": [{"type": "text", "text": text}] + img_sub_msg,
|
|
|
|
|
|
|
152 |
}
|
153 |
]
|
154 |
return self.chat(messages, json_mode=json_mode)
|
155 |
+
|
156 |
+
def markdown_json_to_table(markdown_json_string, iteration, thumbnail_md):
|
157 |
+
"""
|
158 |
+
Convert the GPT JSON string into a markdown row with the first column as the PDF thumbnail.
|
159 |
+
Args:
|
160 |
+
markdown_json_string: the raw markdown (JSON) string from GPT
|
161 |
+
iteration: which row # we are on
|
162 |
+
thumbnail_md: something like 
|
163 |
+
Returns:
|
164 |
+
A string with either:
|
165 |
+
- The header row + first data row, if iteration==0
|
166 |
+
- Additional data row, if iteration>0
|
167 |
+
"""
|
168 |
+
# Try to detect if the JSON is enclosed in triple-backticks
|
169 |
+
# so we can parse it out properly:
|
170 |
+
if markdown_json_string.strip().startswith("```"):
|
171 |
+
# Remove the backticks and possible extra notations
|
172 |
+
json_string = markdown_json_string.strip().strip("```").strip("json").strip()
|
173 |
+
else:
|
174 |
+
# If the model didn't wrap it in markdown
|
175 |
+
json_string = markdown_json_string.strip()
|
176 |
+
|
177 |
+
# Safely parse JSON
|
178 |
+
try:
|
179 |
+
json_obj = json.loads(json_string)
|
180 |
+
except Exception:
|
181 |
+
# If it can't parse, return empty
|
182 |
+
return ""
|
183 |
+
|
184 |
+
# Turn the JSON object into a list of values for easier table building
|
185 |
+
# e.g. [fixture_name, manufacture_name, mfr, input wattage]
|
186 |
+
keys = list(json_obj.keys())
|
187 |
+
values = list(json_obj.values())
|
188 |
+
|
189 |
+
# We want the first column to be the PDF thumbnail
|
190 |
+
# So the table columns become: [Thumbnail, key1, key2, key3, ...]
|
191 |
+
# This means we have one extra column in front compared to the JSON.
|
192 |
+
|
193 |
+
# If iteration == 0, produce header
|
194 |
+
# e.g. | Thumbnail | fixture_name | manufacture_name | mfr | input wattage |
|
195 |
+
if iteration == 0:
|
196 |
+
header = ["Thumbnail"] + keys
|
197 |
+
header_row = "| " + " | ".join(header) + " |\n"
|
198 |
+
sep_row = "|" + "|".join(["---"] * len(header)) + "|\n"
|
199 |
+
else:
|
200 |
+
header_row = ""
|
201 |
+
sep_row = ""
|
202 |
+
|
203 |
+
# Then produce the data row
|
204 |
+
# e.g. |  | "SW24..." | "Q-Tran Inc." | ...
|
205 |
+
str_values = [str(v) for v in values]
|
206 |
+
data_row = "| " + thumbnail_md + " | " + " | ".join(str_values) + " |\n"
|
207 |
+
|
208 |
+
return header_row + sep_row + data_row
|
209 |
+
|
210 |
|
211 |
def gptRead(cutsheets, api_key, organization_key):
|
212 |
fixtureInfo = ""
|
213 |
iteration = 0
|
214 |
+
client = GPT4V_Client(api_key=api_key, organization_key=organization_key)
|
215 |
+
|
216 |
for cutsheet in cutsheets:
|
217 |
+
# Convert the first page of the PDF into an image
|
218 |
source = (convert_from_path(cutsheet.name))[0]
|
219 |
+
|
220 |
+
# Create a smaller thumbnail
|
221 |
+
thumbnail_img = source.copy()
|
222 |
+
thumbnail_img.thumbnail((100, 100))
|
223 |
+
|
224 |
+
# Encode the thumbnail to base64 for embedding in Markdown
|
225 |
+
thumb_io = BytesIO()
|
226 |
+
thumbnail_img.save(thumb_io, format="JPEG")
|
227 |
+
base64_thumb = base64.b64encode(thumb_io.getvalue()).decode('utf-8')
|
228 |
+
thumbnail_md = f""
|
229 |
+
|
230 |
+
# Chat with GPT about the original (non-thumbnail) image
|
231 |
+
response_text, _ = client.one_step_chat(prompt, source)
|
232 |
+
|
233 |
+
# Convert the GPT JSON to a Markdown row, including the thumbnail in the first column
|
234 |
+
fixtureInfo += markdown_json_to_table(response_text, iteration, thumbnail_md)
|
235 |
+
|
236 |
iteration += 1
|
237 |
+
|
238 |
return fixtureInfo
|
239 |
|
240 |
if __name__ == "__main__":
|
241 |
with gr.Blocks() as demo:
|
242 |
+
api_key = gr.Textbox(label="Input your ChatGPT4 API Key: ")
|
243 |
+
organization_key = gr.Textbox(label="Input your ChatGPT4 API Organization Key: ", info="(optional)")
|
244 |
gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
|
|
|
|
|
|
|
245 |
file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
|
246 |
form = gr.Markdown()
|
|
|
247 |
|
248 |
+
# When user uploads, call gptRead -> produce the final Markdown w/ table
|
249 |
+
file_uploader.upload(fn=gptRead, inputs=[file_uploader, api_key, organization_key], outputs=form)
|
250 |
|
251 |
+
demo.launch(share=True)
|