Spaces:

Tonic
/

florence-pdf

Sleeping

App Files Files Community

Tonic commited on Sep 12, 2024

Commit

19975ad

unverified ·

1 Parent(s): 1d6e1d0

add quad boxes

Browse files

Files changed (1) hide show

app.py +4 -14

app.py CHANGED Viewed

@@ -9,14 +9,15 @@ from modeling_florence2 import Florence2ForConditionalGeneration
 import io
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 import numpy as np
 import random
 import json
-# Load and parse the config.json file
 with open("config.json", "r") as f:
     config = json.load(f)
-# Extract necessary variables from the config
 d_model = config['text_config']['d_model']
 num_layers = config['text_config']['encoder_layers']
 attention_heads = config['text_config']['encoder_attention_heads']
@@ -29,12 +30,10 @@ no_repeat_ngram_size = config['text_config']['no_repeat_ngram_size']
 patch_size = config['vision_config']['patch_size'][0]
 temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
 title = """# 🙋🏻‍♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
 description = """
 ---
 This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
 ### **How to Use**:
@@ -119,7 +118,6 @@ def plot_bbox(image, data, use_quad_boxes=False):
     fig, ax = plt.subplots()
     ax.imshow(image)
-    # Handle both 'bboxes' and 'quad_boxes'
     if use_quad_boxes:
         for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
             quad_box = np.array(quad_box).reshape(-1, 2)
@@ -156,19 +154,11 @@ def draw_ocr_bboxes(image, prediction):
 def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
     """
     Draws quadrilateral bounding boxes on the image.
-    Args:
-        image: The original image where the bounding boxes will be drawn.
-        quad_boxes: List of quadrilateral bounding box points. Each bounding box contains four points.
-        labels: List of labels corresponding to each bounding box.
-        color: Color of the bounding box. Default is green.
-        thickness: Thickness of the bounding box lines. Default is 2.
     """
     for i, quad in enumerate(quad_boxes):
         points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2))  # Reshape the quad points for drawing
         image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
-        # Add label text near the top-left point of the bounding box
-        label_pos = (int(quad[0]), int(quad[1]) - 10)  # Positioning label slightly above the bounding box
         cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
     return image

 import io
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
+from matplotlib.patches import Polygon
 import numpy as np
 import random
 import json
 with open("config.json", "r") as f:
     config = json.load(f)
 d_model = config['text_config']['d_model']
 num_layers = config['text_config']['encoder_layers']
 attention_heads = config['text_config']['encoder_attention_heads']
 patch_size = config['vision_config']['patch_size'][0]
 temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
 title = """# 🙋🏻‍♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
 description = """
 ---
 This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
 ### **How to Use**:
     fig, ax = plt.subplots()
     ax.imshow(image)
     if use_quad_boxes:
         for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
             quad_box = np.array(quad_box).reshape(-1, 2)
 def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
     """
     Draws quadrilateral bounding boxes on the image.
     """
     for i, quad in enumerate(quad_boxes):
         points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2))  # Reshape the quad points for drawing
         image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
+        label_pos = (int(quad[0]), int(quad[1]) - 10)
         cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
     return image