Tonic commited on
Commit
19975ad
·
unverified ·
1 Parent(s): 1d6e1d0

add quad boxes

Browse files
Files changed (1) hide show
  1. app.py +4 -14
app.py CHANGED
@@ -9,14 +9,15 @@ from modeling_florence2 import Florence2ForConditionalGeneration
9
  import io
10
  import matplotlib.pyplot as plt
11
  import matplotlib.patches as patches
 
12
  import numpy as np
13
  import random
14
  import json
15
- # Load and parse the config.json file
 
16
  with open("config.json", "r") as f:
17
  config = json.load(f)
18
 
19
- # Extract necessary variables from the config
20
  d_model = config['text_config']['d_model']
21
  num_layers = config['text_config']['encoder_layers']
22
  attention_heads = config['text_config']['encoder_attention_heads']
@@ -29,12 +30,10 @@ no_repeat_ngram_size = config['text_config']['no_repeat_ngram_size']
29
  patch_size = config['vision_config']['patch_size'][0]
30
  temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
31
 
32
-
33
  title = """# 🙋🏻‍♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
34
  description = """
35
  ---
36
 
37
-
38
  This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
39
 
40
  ### **How to Use**:
@@ -119,7 +118,6 @@ def plot_bbox(image, data, use_quad_boxes=False):
119
  fig, ax = plt.subplots()
120
  ax.imshow(image)
121
 
122
- # Handle both 'bboxes' and 'quad_boxes'
123
  if use_quad_boxes:
124
  for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
125
  quad_box = np.array(quad_box).reshape(-1, 2)
@@ -156,19 +154,11 @@ def draw_ocr_bboxes(image, prediction):
156
  def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
157
  """
158
  Draws quadrilateral bounding boxes on the image.
159
-
160
- Args:
161
- image: The original image where the bounding boxes will be drawn.
162
- quad_boxes: List of quadrilateral bounding box points. Each bounding box contains four points.
163
- labels: List of labels corresponding to each bounding box.
164
- color: Color of the bounding box. Default is green.
165
- thickness: Thickness of the bounding box lines. Default is 2.
166
  """
167
  for i, quad in enumerate(quad_boxes):
168
  points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2)) # Reshape the quad points for drawing
169
  image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
170
- # Add label text near the top-left point of the bounding box
171
- label_pos = (int(quad[0]), int(quad[1]) - 10) # Positioning label slightly above the bounding box
172
  cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
173
 
174
  return image
 
9
  import io
10
  import matplotlib.pyplot as plt
11
  import matplotlib.patches as patches
12
+ from matplotlib.patches import Polygon
13
  import numpy as np
14
  import random
15
  import json
16
+
17
+
18
  with open("config.json", "r") as f:
19
  config = json.load(f)
20
 
 
21
  d_model = config['text_config']['d_model']
22
  num_layers = config['text_config']['encoder_layers']
23
  attention_heads = config['text_config']['encoder_attention_heads']
 
30
  patch_size = config['vision_config']['patch_size'][0]
31
  temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
32
 
 
33
  title = """# 🙋🏻‍♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
34
  description = """
35
  ---
36
 
 
37
  This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
38
 
39
  ### **How to Use**:
 
118
  fig, ax = plt.subplots()
119
  ax.imshow(image)
120
 
 
121
  if use_quad_boxes:
122
  for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
123
  quad_box = np.array(quad_box).reshape(-1, 2)
 
154
  def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
155
  """
156
  Draws quadrilateral bounding boxes on the image.
 
 
 
 
 
 
 
157
  """
158
  for i, quad in enumerate(quad_boxes):
159
  points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2)) # Reshape the quad points for drawing
160
  image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
161
+ label_pos = (int(quad[0]), int(quad[1]) - 10)
 
162
  cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
163
 
164
  return image