Spaces:
Sleeping
Sleeping
add quad boxes
Browse files
app.py
CHANGED
@@ -9,14 +9,15 @@ from modeling_florence2 import Florence2ForConditionalGeneration
|
|
9 |
import io
|
10 |
import matplotlib.pyplot as plt
|
11 |
import matplotlib.patches as patches
|
|
|
12 |
import numpy as np
|
13 |
import random
|
14 |
import json
|
15 |
-
|
|
|
16 |
with open("config.json", "r") as f:
|
17 |
config = json.load(f)
|
18 |
|
19 |
-
# Extract necessary variables from the config
|
20 |
d_model = config['text_config']['d_model']
|
21 |
num_layers = config['text_config']['encoder_layers']
|
22 |
attention_heads = config['text_config']['encoder_attention_heads']
|
@@ -29,12 +30,10 @@ no_repeat_ngram_size = config['text_config']['no_repeat_ngram_size']
|
|
29 |
patch_size = config['vision_config']['patch_size'][0]
|
30 |
temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
|
31 |
|
32 |
-
|
33 |
title = """# 🙋🏻♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
|
34 |
description = """
|
35 |
---
|
36 |
|
37 |
-
|
38 |
This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
|
39 |
|
40 |
### **How to Use**:
|
@@ -119,7 +118,6 @@ def plot_bbox(image, data, use_quad_boxes=False):
|
|
119 |
fig, ax = plt.subplots()
|
120 |
ax.imshow(image)
|
121 |
|
122 |
-
# Handle both 'bboxes' and 'quad_boxes'
|
123 |
if use_quad_boxes:
|
124 |
for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
|
125 |
quad_box = np.array(quad_box).reshape(-1, 2)
|
@@ -156,19 +154,11 @@ def draw_ocr_bboxes(image, prediction):
|
|
156 |
def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
|
157 |
"""
|
158 |
Draws quadrilateral bounding boxes on the image.
|
159 |
-
|
160 |
-
Args:
|
161 |
-
image: The original image where the bounding boxes will be drawn.
|
162 |
-
quad_boxes: List of quadrilateral bounding box points. Each bounding box contains four points.
|
163 |
-
labels: List of labels corresponding to each bounding box.
|
164 |
-
color: Color of the bounding box. Default is green.
|
165 |
-
thickness: Thickness of the bounding box lines. Default is 2.
|
166 |
"""
|
167 |
for i, quad in enumerate(quad_boxes):
|
168 |
points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2)) # Reshape the quad points for drawing
|
169 |
image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
|
170 |
-
|
171 |
-
label_pos = (int(quad[0]), int(quad[1]) - 10) # Positioning label slightly above the bounding box
|
172 |
cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
|
173 |
|
174 |
return image
|
|
|
9 |
import io
|
10 |
import matplotlib.pyplot as plt
|
11 |
import matplotlib.patches as patches
|
12 |
+
from matplotlib.patches import Polygon
|
13 |
import numpy as np
|
14 |
import random
|
15 |
import json
|
16 |
+
|
17 |
+
|
18 |
with open("config.json", "r") as f:
|
19 |
config = json.load(f)
|
20 |
|
|
|
21 |
d_model = config['text_config']['d_model']
|
22 |
num_layers = config['text_config']['encoder_layers']
|
23 |
attention_heads = config['text_config']['encoder_attention_heads']
|
|
|
30 |
patch_size = config['vision_config']['patch_size'][0]
|
31 |
temporal_embeddings = config['vision_config']['visual_temporal_embedding']['max_temporal_embeddings']
|
32 |
|
|
|
33 |
title = """# 🙋🏻♂️Welcome to Tonic's PLeIAs/📸📈✍🏻Florence-PDF"""
|
34 |
description = """
|
35 |
---
|
36 |
|
|
|
37 |
This application showcases the **PLeIAs/📸📈✍🏻Florence-PDF** model, a powerful AI system designed for both **text and image generation tasks**. The model is capable of handling complex tasks such as object detection, image captioning, OCR (Optical Character Recognition), and detailed region-based image analysis.
|
38 |
|
39 |
### **How to Use**:
|
|
|
118 |
fig, ax = plt.subplots()
|
119 |
ax.imshow(image)
|
120 |
|
|
|
121 |
if use_quad_boxes:
|
122 |
for quad_box, label in zip(data.get('quad_boxes', []), data.get('labels', [])):
|
123 |
quad_box = np.array(quad_box).reshape(-1, 2)
|
|
|
154 |
def draw_bounding_boxes(image, quad_boxes, labels, color=(0, 255, 0), thickness=2):
|
155 |
"""
|
156 |
Draws quadrilateral bounding boxes on the image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
"""
|
158 |
for i, quad in enumerate(quad_boxes):
|
159 |
points = np.array(quad, dtype=np.int32).reshape((-1, 1, 2)) # Reshape the quad points for drawing
|
160 |
image = cv2.polylines(image, [points], isClosed=True, color=color, thickness=thickness)
|
161 |
+
label_pos = (int(quad[0]), int(quad[1]) - 10)
|
|
|
162 |
cv2.putText(image, labels[i], label_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
|
163 |
|
164 |
return image
|