ehaemmma commited on
Commit
8143c4e
·
verified ·
1 Parent(s): 57f4675

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -45
app.py CHANGED
@@ -12,13 +12,15 @@ prompt = """You are an advanced document parsing bot. Given the fixture schedule
12
 
13
  1. the name of the fixture
14
  2. the company that produces this fixture
15
- 3. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer.
16
- 4. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3
 
17
 
18
  Please format your response in json format
19
  {
20
  "fixture_name": <fixture name>,
21
  "manufacture_name": <company name>,
 
22
  "mfr": <part number>,
23
  "input wattage": <numerical input wattage>
24
  }
@@ -28,6 +30,7 @@ For example
28
  {
29
  "fixture_name": "SW24/1.5 Led Strips - Static White",
30
  "manufacture_name": "Q-Tran Inc.",
 
31
  "mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
32
  "input wattage": "1.5W"
33
  }"""
@@ -50,7 +53,6 @@ def query_openai_api(messages, model, temperature=0, api_key=None, organization_
50
  if json_mode:
51
  data["response_format"] = {"type": "json_object"}
52
 
53
- # Make the POST request and return the response
54
  response = requests.post(url, headers=headers, data=json.dumps(data)).json()
55
  print(response)
56
  return response["choices"][0]["message"]["content"].lstrip(), response
@@ -59,10 +61,9 @@ def query_openai_api(messages, model, temperature=0, api_key=None, organization_
59
  return f"API_ERROR: {e}", None
60
 
61
  class GPT4V_Client:
62
- def __init__(self, api_key, organization_key, model_name="gpt-4-vision-preview", max_tokens=512):
63
  self.api_key = api_key
64
  self.organization_key = organization_key
65
- # self.client = OpenAI(api_key=api_key)
66
  self.model_name = model_name
67
  self.max_tokens = max_tokens
68
 
@@ -118,32 +119,21 @@ class GPT4V_Client:
118
  system_msg: Optional[str] = None,
119
  json_mode=False,
120
  ):
121
- """
122
- images: [{"image": PIL.image, "detail": "high" or "low }]
123
-
124
- For low res mode, we expect a 512px x 512px image. For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px.
125
- """
126
  details = [i["detail"] for i in images]
127
  img_strs = []
128
  for img_info in images:
129
  image = img_info["image"]
130
  jpeg_buffer = BytesIO()
131
 
132
- # Save the image as JPEG to the buffer
133
  if isinstance(image, np.ndarray):
134
  image = Image.fromarray(image)
135
  image = image.convert("RGB")
136
  image.save(jpeg_buffer, format="JPEG")
137
-
138
- # Get the byte data from the buffer
139
  jpeg_data = jpeg_buffer.getvalue()
140
-
141
- # Encode the JPEG image data in base64
142
  jpg_base64 = base64.b64encode(jpeg_data)
143
-
144
- # If you need it in string format
145
  jpg_base64_str = jpg_base64.decode("utf-8")
146
  img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
 
147
  messages = []
148
  if system_msg is not None:
149
  messages.append({"role": "system", "content": system_msg})
@@ -158,50 +148,104 @@ class GPT4V_Client:
158
  messages += [
159
  {
160
  "role": "user",
161
- "content": [
162
- {"type": "text", "text": text},
163
- ]
164
- + img_sub_msg,
165
  }
166
  ]
167
  return self.chat(messages, json_mode=json_mode)
168
-
169
- def markdown_json_to_table(markdown_json_string, iteration):
170
- if markdown_json_string[0] == '`':
171
- json_string = markdown_json_string.strip("```json\n").rstrip("```")
172
- json_object = json.loads(json_string)
173
- values = json_object.values()
174
- if iteration == 0:
175
- headers = json_object.keys()
176
- markdown_table = "| " + " | ".join(headers) + " |\n" + \
177
- "|---" * len(json_object) + "|\n" + \
178
- "| " + " | ".join(map(str, values)) + " |"
179
- else:
180
- markdown_table = "|---" * len(json_object) + "|\n" + \
181
- "| " + " | ".join(map(str, values)) + " |"
182
- else:
183
- markdown_table = ""
184
- return markdown_table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  def gptRead(cutsheets, api_key, organization_key):
187
  fixtureInfo = ""
188
  iteration = 0
 
 
189
  for cutsheet in cutsheets:
 
190
  source = (convert_from_path(cutsheet.name))[0]
191
- client = GPT4V_Client(api_key=api_key, organization_key=organization_key)
192
- fixtureInfo += markdown_json_to_table(client.one_step_chat(prompt, source)[0], iteration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  iteration += 1
 
194
  return fixtureInfo
195
 
196
  if __name__ == "__main__":
197
  with gr.Blocks() as demo:
 
 
198
  gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
199
- api_key = gr.Textbox(label = "Input your ChatGPT4 API Key: ")
200
- organization_key = gr.Textbox(label = "Input your ChatGPT4 API Organization Key: ", info = "(optional)")
201
- # image = gr.Image()
202
  file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
203
  form = gr.Markdown()
204
- file_uploader.upload(gptRead, [file_uploader, api_key, organization_key], form)
205
 
206
- demo.launch(share=True)
 
207
 
 
 
12
 
13
  1. the name of the fixture
14
  2. the company that produces this fixture
15
+ 3. the description of this fixture. This is a 20-word description which summarize the size, function and the mounting method of the fixture and mention any necessary accesories. For example: 1" x 1" recessed downlight.
16
+ 4. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer.
17
+ 5. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3
18
 
19
  Please format your response in json format
20
  {
21
  "fixture_name": <fixture name>,
22
  "manufacture_name": <company name>,
23
+ "fixture_description": <description>,
24
  "mfr": <part number>,
25
  "input wattage": <numerical input wattage>
26
  }
 
30
  {
31
  "fixture_name": "SW24/1.5 Led Strips - Static White",
32
  "manufacture_name": "Q-Tran Inc.",
33
+ "fixture_description": "Surface mounted static white LED strip."
34
  "mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
35
  "input wattage": "1.5W"
36
  }"""
 
53
  if json_mode:
54
  data["response_format"] = {"type": "json_object"}
55
 
 
56
  response = requests.post(url, headers=headers, data=json.dumps(data)).json()
57
  print(response)
58
  return response["choices"][0]["message"]["content"].lstrip(), response
 
61
  return f"API_ERROR: {e}", None
62
 
63
  class GPT4V_Client:
64
+ def __init__(self, api_key, organization_key, model_name="gpt-4o", max_tokens=512):
65
  self.api_key = api_key
66
  self.organization_key = organization_key
 
67
  self.model_name = model_name
68
  self.max_tokens = max_tokens
69
 
 
119
  system_msg: Optional[str] = None,
120
  json_mode=False,
121
  ):
 
 
 
 
 
122
  details = [i["detail"] for i in images]
123
  img_strs = []
124
  for img_info in images:
125
  image = img_info["image"]
126
  jpeg_buffer = BytesIO()
127
 
 
128
  if isinstance(image, np.ndarray):
129
  image = Image.fromarray(image)
130
  image = image.convert("RGB")
131
  image.save(jpeg_buffer, format="JPEG")
 
 
132
  jpeg_data = jpeg_buffer.getvalue()
 
 
133
  jpg_base64 = base64.b64encode(jpeg_data)
 
 
134
  jpg_base64_str = jpg_base64.decode("utf-8")
135
  img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
136
+
137
  messages = []
138
  if system_msg is not None:
139
  messages.append({"role": "system", "content": system_msg})
 
148
  messages += [
149
  {
150
  "role": "user",
151
+ "content": [{"type": "text", "text": text}] + img_sub_msg,
 
 
 
152
  }
153
  ]
154
  return self.chat(messages, json_mode=json_mode)
155
+
156
+ def markdown_json_to_table(markdown_json_string, iteration, thumbnail_md):
157
+ """
158
+ Convert the GPT JSON string into a markdown row with the first column as the PDF thumbnail.
159
+ Args:
160
+ markdown_json_string: the raw markdown (JSON) string from GPT
161
+ iteration: which row # we are on
162
+ thumbnail_md: something like ![pdfpage]()
163
+ Returns:
164
+ A string with either:
165
+ - The header row + first data row, if iteration==0
166
+ - Additional data row, if iteration>0
167
+ """
168
+ # Try to detect if the JSON is enclosed in triple-backticks
169
+ # so we can parse it out properly:
170
+ if markdown_json_string.strip().startswith("```"):
171
+ # Remove the backticks and possible extra notations
172
+ json_string = markdown_json_string.strip().strip("```").strip("json").strip()
173
+ else:
174
+ # If the model didn't wrap it in markdown
175
+ json_string = markdown_json_string.strip()
176
+
177
+ # Safely parse JSON
178
+ try:
179
+ json_obj = json.loads(json_string)
180
+ except Exception:
181
+ # If it can't parse, return empty
182
+ return ""
183
+
184
+ # Turn the JSON object into a list of values for easier table building
185
+ # e.g. [fixture_name, manufacture_name, mfr, input wattage]
186
+ keys = list(json_obj.keys())
187
+ values = list(json_obj.values())
188
+
189
+ # We want the first column to be the PDF thumbnail
190
+ # So the table columns become: [Thumbnail, key1, key2, key3, ...]
191
+ # This means we have one extra column in front compared to the JSON.
192
+
193
+ # If iteration == 0, produce header
194
+ # e.g. | Thumbnail | fixture_name | manufacture_name | mfr | input wattage |
195
+ if iteration == 0:
196
+ header = ["Thumbnail"] + keys
197
+ header_row = "| " + " | ".join(header) + " |\n"
198
+ sep_row = "|" + "|".join(["---"] * len(header)) + "|\n"
199
+ else:
200
+ header_row = ""
201
+ sep_row = ""
202
+
203
+ # Then produce the data row
204
+ # e.g. | ![pdfpage]() | "SW24..." | "Q-Tran Inc." | ...
205
+ str_values = [str(v) for v in values]
206
+ data_row = "| " + thumbnail_md + " | " + " | ".join(str_values) + " |\n"
207
+
208
+ return header_row + sep_row + data_row
209
+
210
 
211
  def gptRead(cutsheets, api_key, organization_key):
212
  fixtureInfo = ""
213
  iteration = 0
214
+ client = GPT4V_Client(api_key=api_key, organization_key=organization_key)
215
+
216
  for cutsheet in cutsheets:
217
+ # Convert the first page of the PDF into an image
218
  source = (convert_from_path(cutsheet.name))[0]
219
+
220
+ # Create a smaller thumbnail
221
+ thumbnail_img = source.copy()
222
+ thumbnail_img.thumbnail((100, 100))
223
+
224
+ # Encode the thumbnail to base64 for embedding in Markdown
225
+ thumb_io = BytesIO()
226
+ thumbnail_img.save(thumb_io, format="JPEG")
227
+ base64_thumb = base64.b64encode(thumb_io.getvalue()).decode('utf-8')
228
+ thumbnail_md = f"![pdfpage](data:image/jpeg;base64,{base64_thumb})"
229
+
230
+ # Chat with GPT about the original (non-thumbnail) image
231
+ response_text, _ = client.one_step_chat(prompt, source)
232
+
233
+ # Convert the GPT JSON to a Markdown row, including the thumbnail in the first column
234
+ fixtureInfo += markdown_json_to_table(response_text, iteration, thumbnail_md)
235
+
236
  iteration += 1
237
+
238
  return fixtureInfo
239
 
240
  if __name__ == "__main__":
241
  with gr.Blocks() as demo:
242
+ api_key = gr.Textbox(label="Input your ChatGPT4 API Key: ")
243
+ organization_key = gr.Textbox(label="Input your ChatGPT4 API Organization Key: ", info="(optional)")
244
  gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
 
 
 
245
  file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
246
  form = gr.Markdown()
 
247
 
248
+ # When user uploads, call gptRead -> produce the final Markdown w/ table
249
+ file_uploader.upload(fn=gptRead, inputs=[file_uploader, api_key, organization_key], outputs=form)
250
 
251
+ demo.launch(share=True)