Spaces:

ehaemmma
/

LitBase

Sleeping

App Files Files Community

ehaemmma commited on Mar 7

Commit

8143c4e

verified ·

1 Parent(s): 57f4675

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -45

app.py CHANGED Viewed

@@ -12,13 +12,15 @@ prompt = """You are an advanced document parsing bot. Given the fixture schedule
 1. the name of the fixture
 2. the company that produces this fixture
-3. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer.
-4. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3
 Please format your response in json format
 {
     "fixture_name": <fixture name>,
     "manufacture_name": <company name>,
     "mfr": <part number>,
     "input wattage": <numerical input wattage>
 }
@@ -28,6 +30,7 @@ For example
 {
     "fixture_name": "SW24/1.5 Led Strips - Static White",
     "manufacture_name": "Q-Tran Inc.",
     "mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
     "input wattage": "1.5W"
 }"""
@@ -50,7 +53,6 @@ def query_openai_api(messages, model, temperature=0, api_key=None, organization_
         if json_mode:
             data["response_format"] = {"type": "json_object"}
-        # Make the POST request and return the response
         response = requests.post(url, headers=headers, data=json.dumps(data)).json()
         print(response)
         return response["choices"][0]["message"]["content"].lstrip(), response
@@ -59,10 +61,9 @@ def query_openai_api(messages, model, temperature=0, api_key=None, organization_
         return f"API_ERROR: {e}", None
 class GPT4V_Client:
-    def __init__(self, api_key, organization_key, model_name="gpt-4-vision-preview", max_tokens=512):
         self.api_key = api_key
         self.organization_key = organization_key
-        # self.client = OpenAI(api_key=api_key)
         self.model_name = model_name
         self.max_tokens = max_tokens
@@ -118,32 +119,21 @@ class GPT4V_Client:
         system_msg: Optional[str] = None,
         json_mode=False,
     ):
-        """
-        images: [{"image": PIL.image, "detail": "high" or "low }]
-        For low res mode, we expect a 512px x 512px image. For high res mode, the short side of the image should be less than 768px and the long side should be less than 2,000px.
-        """
         details = [i["detail"] for i in images]
         img_strs = []
         for img_info in images:
             image = img_info["image"]
             jpeg_buffer = BytesIO()
-            # Save the image as JPEG to the buffer
             if isinstance(image, np.ndarray):
                 image = Image.fromarray(image)
             image = image.convert("RGB")
             image.save(jpeg_buffer, format="JPEG")
-            # Get the byte data from the buffer
             jpeg_data = jpeg_buffer.getvalue()
-            # Encode the JPEG image data in base64
             jpg_base64 = base64.b64encode(jpeg_data)
-            # If you need it in string format
             jpg_base64_str = jpg_base64.decode("utf-8")
             img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
         messages = []
         if system_msg is not None:
             messages.append({"role": "system", "content": system_msg})
@@ -158,50 +148,104 @@ class GPT4V_Client:
         messages += [
             {
                 "role": "user",
-                "content": [
-                    {"type": "text", "text": text},
-                ]
-                + img_sub_msg,
             }
         ]
         return self.chat(messages, json_mode=json_mode)
-def markdown_json_to_table(markdown_json_string, iteration):
-    if markdown_json_string[0] == '`':
-        json_string = markdown_json_string.strip("```json\n").rstrip("```")
-        json_object = json.loads(json_string)
-        values = json_object.values()
-        if iteration == 0:
-            headers = json_object.keys()
-            markdown_table = "| " + " | ".join(headers) + " |\n" + \
-                            "|---" * len(json_object) + "|\n" + \
-                            "| " + " | ".join(map(str, values)) + " |"
-        else:
-            markdown_table =  "|---" * len(json_object) + "|\n" + \
-                        "| " + " | ".join(map(str, values)) + " |"
-    else:
-        markdown_table = ""
-    return markdown_table
 def gptRead(cutsheets, api_key, organization_key):
     fixtureInfo = ""
     iteration = 0
     for cutsheet in cutsheets:
         source = (convert_from_path(cutsheet.name))[0]
-        client = GPT4V_Client(api_key=api_key, organization_key=organization_key)
-        fixtureInfo += markdown_json_to_table(client.one_step_chat(prompt, source)[0], iteration)
         iteration += 1
     return fixtureInfo
 if __name__ == "__main__":
     with gr.Blocks() as demo:
         gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
-        api_key = gr.Textbox(label = "Input your ChatGPT4 API Key: ")
-        organization_key = gr.Textbox(label = "Input your ChatGPT4 API Organization Key: ", info = "(optional)")
-        # image = gr.Image()
         file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
         form = gr.Markdown()
-        file_uploader.upload(gptRead, [file_uploader, api_key, organization_key], form)
-    demo.launch(share=True)

 1. the name of the fixture
 2. the company that produces this fixture
+3. the description of this fixture. This is a 20-word description which summarize the size, function and the mounting method of the fixture and mention any necessary accesories. For example: 1" x 1" recessed downlight.
+4. the part number of this fixture. It is a series of specification codes connected with - , and you can get the info by reading the texts marked in a different color or reading the top bar. Include every specification code in a correct order in your answer.
+5. the input wattage of this fixture, short answer. Please answer the wattage according to the part number you found in question 3
 Please format your response in json format
 {
     "fixture_name": <fixture name>,
     "manufacture_name": <company name>,
+    "fixture_description": <description>,
     "mfr": <part number>,
     "input wattage": <numerical input wattage>
 }
 {
     "fixture_name": "SW24/1.5 Led Strips - Static White",
     "manufacture_name": "Q-Tran Inc.",
+    "fixture_description": "Surface mounted static white LED strip."
     "mfr": "SW24-1.5-DRY-30-BW-BW-WH-CL2-535",
     "input wattage": "1.5W"
 }"""
         if json_mode:
             data["response_format"] = {"type": "json_object"}
         response = requests.post(url, headers=headers, data=json.dumps(data)).json()
         print(response)
         return response["choices"][0]["message"]["content"].lstrip(), response
         return f"API_ERROR: {e}", None
 class GPT4V_Client:
+    def __init__(self, api_key, organization_key, model_name="gpt-4o", max_tokens=512):
         self.api_key = api_key
         self.organization_key = organization_key
         self.model_name = model_name
         self.max_tokens = max_tokens
         system_msg: Optional[str] = None,
         json_mode=False,
     ):
         details = [i["detail"] for i in images]
         img_strs = []
         for img_info in images:
             image = img_info["image"]
             jpeg_buffer = BytesIO()
             if isinstance(image, np.ndarray):
                 image = Image.fromarray(image)
             image = image.convert("RGB")
             image.save(jpeg_buffer, format="JPEG")
             jpeg_data = jpeg_buffer.getvalue()
             jpg_base64 = base64.b64encode(jpeg_data)
             jpg_base64_str = jpg_base64.decode("utf-8")
             img_strs.append(f"data:image/jpeg;base64,{jpg_base64_str}")
         messages = []
         if system_msg is not None:
             messages.append({"role": "system", "content": system_msg})
         messages += [
             {
                 "role": "user",
+                "content": [{"type": "text", "text": text}] + img_sub_msg,
             }
         ]
         return self.chat(messages, json_mode=json_mode)
+def markdown_json_to_table(markdown_json_string, iteration, thumbnail_md):
+    """
+    Convert the GPT JSON string into a markdown row with the first column as the PDF thumbnail.
+    Args:
+        markdown_json_string: the raw markdown (JSON) string from GPT
+        iteration: which row # we are on
+        thumbnail_md: something like ![pdfpage](data:image/jpeg;base64,xxxxxx)
+    Returns:
+        A string with either:
+          - The header row + first data row, if iteration==0
+          - Additional data row, if iteration>0
+    """
+    # Try to detect if the JSON is enclosed in triple-backticks
+    # so we can parse it out properly:
+    if markdown_json_string.strip().startswith("```"):
+        # Remove the backticks and possible extra notations
+        json_string = markdown_json_string.strip().strip("```").strip("json").strip()
+    else:
+        # If the model didn't wrap it in markdown
+        json_string = markdown_json_string.strip()
+    # Safely parse JSON
+    try:
+        json_obj = json.loads(json_string)
+    except Exception:
+        # If it can't parse, return empty
+        return ""
+    # Turn the JSON object into a list of values for easier table building
+    # e.g. [fixture_name, manufacture_name, mfr, input wattage]
+    keys = list(json_obj.keys())
+    values = list(json_obj.values())
+    # We want the first column to be the PDF thumbnail
+    # So the table columns become: [Thumbnail, key1, key2, key3, ...]
+    # This means we have one extra column in front compared to the JSON.
+    # If iteration == 0, produce header
+    # e.g. | Thumbnail | fixture_name | manufacture_name | mfr | input wattage |
+    if iteration == 0:
+        header = ["Thumbnail"] + keys
+        header_row = "| " + " | ".join(header) + " |\n"
+        sep_row = "|" + "|".join(["---"] * len(header)) + "|\n"
+    else:
+        header_row = ""
+        sep_row = ""
+    # Then produce the data row
+    # e.g. | ![pdfpage](data:image/jpeg;base64,xxx) | "SW24..." | "Q-Tran Inc." | ...
+    str_values = [str(v) for v in values]
+    data_row = "| " + thumbnail_md + " | " + " | ".join(str_values) + " |\n"
+    return header_row + sep_row + data_row
 def gptRead(cutsheets, api_key, organization_key):
     fixtureInfo = ""
     iteration = 0
+    client = GPT4V_Client(api_key=api_key, organization_key=organization_key)
     for cutsheet in cutsheets:
+        # Convert the first page of the PDF into an image
         source = (convert_from_path(cutsheet.name))[0]
+        # Create a smaller thumbnail
+        thumbnail_img = source.copy()
+        thumbnail_img.thumbnail((100, 100))
+        # Encode the thumbnail to base64 for embedding in Markdown
+        thumb_io = BytesIO()
+        thumbnail_img.save(thumb_io, format="JPEG")
+        base64_thumb = base64.b64encode(thumb_io.getvalue()).decode('utf-8')
+        thumbnail_md = f"![pdfpage](data:image/jpeg;base64,{base64_thumb})"
+        # Chat with GPT about the original (non-thumbnail) image
+        response_text, _ = client.one_step_chat(prompt, source)
+        # Convert the GPT JSON to a Markdown row, including the thumbnail in the first column
+        fixtureInfo += markdown_json_to_table(response_text, iteration, thumbnail_md)
         iteration += 1
     return fixtureInfo
 if __name__ == "__main__":
     with gr.Blocks() as demo:
+        api_key = gr.Textbox(label="Input your ChatGPT4 API Key: ")
+        organization_key = gr.Textbox(label="Input your ChatGPT4 API Organization Key: ", info="(optional)")
         gr.Markdown("# Lighting Manufacture Cutsheet GPT Tool")
         file_uploader = gr.UploadButton("Upload cutsheets", type="filepath", file_count="multiple")
         form = gr.Markdown()
+        # When user uploads, call gptRead -> produce the final Markdown w/ table
+        file_uploader.upload(fn=gptRead, inputs=[file_uploader, api_key, organization_key], outputs=form)
+    demo.launch(share=True)