Spaces:
Runtime error
Runtime error
File size: 10,236 Bytes
01bb3bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import numpy as np
# def detect_para(bbox_dict):
# alpha1 = 0.2
# alpha2 = 0.7
# beta1 = 0.4
# data = bbox_dict
# word_crops = list(data.keys())
# for i in word_crops:
# data[i]["x1"], data[i]["y1"], data[i]["x2"], data[i]["y2"] = data[i]["bbox"]
# data[i]["xc"] = (data[i]["x1"] + data[i]["x2"]) / 2
# data[i]["yc"] = (data[i]["y1"] + data[i]["y2"]) / 2
# data[i]["w"] = data[i]["x2"] - data[i]["x1"]
# data[i]["h"] = data[i]["y2"] - data[i]["y1"]
# patch_info = {}
# while word_crops:
# img_name = word_crops[0].split("_")[0]
# word_crop_collection = [
# word_crop for word_crop in word_crops if word_crop.startswith(img_name)
# ]
# centroids = {}
# lines = []
# img_word_crops = word_crop_collection.copy()
# para = []
# while img_word_crops:
# clusters = []
# para_words_group = [
# img_word_crops[0],
# ]
# added = [
# img_word_crops[0],
# ]
# img_word_crops.remove(img_word_crops[0])
# ## determining the paragraph
# while added:
# word_crop = added.pop()
# for i in range(len(img_word_crops)):
# word_crop_ = img_word_crops[i]
# if (
# abs(data[word_crop_]["yc"] - data[word_crop]["yc"])
# < data[word_crop]["h"] * alpha1
# ):
# if data[word_crop]["xc"] > data[word_crop_]["xc"]:
# if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[
# word_crop
# ]["h"] * alpha2:
# para_words_group.append(word_crop_)
# added.append(word_crop_)
# else:
# if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[
# word_crop
# ]["h"] * alpha2:
# para_words_group.append(word_crop_)
# added.append(word_crop_)
# else:
# if data[word_crop]["yc"] > data[word_crop_]["yc"]:
# if (data[word_crop]["y1"] - data[word_crop_]["y2"]) < data[
# word_crop
# ]["h"] * beta1 and (
# (
# (data[word_crop_]["x1"] < data[word_crop]["x2"])
# and (data[word_crop_]["x1"] > data[word_crop]["x1"])
# )
# or (
# (data[word_crop_]["x2"] < data[word_crop]["x2"])
# and (data[word_crop_]["x2"] > data[word_crop]["x1"])
# )
# or (
# (data[word_crop]["x1"] > data[word_crop_]["x1"])
# and (data[word_crop]["x2"] < data[word_crop_]["x2"])
# )
# ):
# para_words_group.append(word_crop_)
# added.append(word_crop_)
# else:
# if (data[word_crop_]["y1"] - data[word_crop]["y2"]) < data[
# word_crop
# ]["h"] * beta1 and (
# (
# (data[word_crop_]["x1"] < data[word_crop]["x2"])
# and (data[word_crop_]["x1"] > data[word_crop]["x1"])
# )
# or (
# (data[word_crop_]["x2"] < data[word_crop]["x2"])
# and (data[word_crop_]["x2"] > data[word_crop]["x1"])
# )
# or (
# (data[word_crop]["x1"] > data[word_crop_]["x1"])
# and (data[word_crop]["x2"] < data[word_crop_]["x2"])
# )
# ):
# para_words_group.append(word_crop_)
# added.append(word_crop_)
# img_word_crops = [p for p in img_word_crops if p not in para_words_group]
# ## processing for the line
# while para_words_group:
# line_words_group = [
# para_words_group[0],
# ]
# added = [
# para_words_group[0],
# ]
# para_words_group.remove(para_words_group[0])
# ## determining the line
# while added:
# word_crop = added.pop()
# for i in range(len(para_words_group)):
# word_crop_ = para_words_group[i]
# if (
# abs(data[word_crop_]["yc"] - data[word_crop]["yc"])
# < data[word_crop]["h"] * alpha1
# ):
# if data[word_crop]["xc"] > data[word_crop_]["xc"]:
# if (data[word_crop]["x1"] - data[word_crop_]["x2"]) < data[
# word_crop
# ]["h"] * alpha2:
# line_words_group.append(word_crop_)
# added.append(word_crop_)
# else:
# if (data[word_crop_]["x1"] - data[word_crop]["x2"]) < data[
# word_crop
# ]["h"] * alpha2:
# line_words_group.append(word_crop_)
# added.append(word_crop_)
# para_words_group = [
# p for p in para_words_group if p not in line_words_group
# ]
# xc = [data[word_crop]["xc"] for word_crop in line_words_group]
# idxs = np.argsort(xc)
# patch_cluster_ = [line_words_group[i] for i in idxs]
# line_words_group = patch_cluster_
# x1 = [data[word_crop]["x1"] for word_crop in line_words_group]
# x2 = [data[word_crop]["x2"] for word_crop in line_words_group]
# y1 = [data[word_crop]["y1"] for word_crop in line_words_group]
# y2 = [data[word_crop]["y2"] for word_crop in line_words_group]
# txt_line = [data[word_crop]["txt"] for word_crop in line_words_group]
# txt = " ".join(txt_line)
# x = [x1[0]]
# y1_ = [y1[0]]
# y2_ = [y2[0]]
# l = [len(txt_l) for txt_l in txt_line]
# for i in range(1, len(x1)):
# x.append((x1[i] + x2[i - 1]) / 2)
# y1_.append((y1[i] + y1[i - 1]) / 2)
# y2_.append((y2[i] + y2[i - 1]) / 2)
# x.append(x2[-1])
# y1_.append(y1[-1])
# y2_.append(y2[-1])
# line_info = {
# "x": x,
# "y1": y1_,
# "y2": y2_,
# "l": l,
# "txt": txt,
# "word_crops": line_words_group,
# }
# clusters.append(line_info)
# y_ = [clusters[i]["y1"][0] for i in range(len(clusters))]
# idxs = np.argsort(y_)
# clusters_ = [clusters[i] for i in idxs]
# txt = [clusters[i]["txt"] for i in idxs]
# l = [len(t) for t in txt]
# txt = " ".join(txt)
# para_info = {"lines": clusters_, "l": l, "txt": txt}
# para.append(para_info)
# for word_crop in word_crop_collection:
# word_crops.remove(word_crop)
# return "\n".join([para[i]["txt"] for i in range(len(para))])
def detect_para(recognized_texts):
"""
Sort words into lines based on horizontal overlap of bounding boxes.
Args:
recognized_texts (dict): A dictionary with recognized texts as keys and bounding boxes as values.
Each bounding box is a list of points [x1, y1, x2, y2].
Returns:
list: A list of lists where each sublist contains words sorted by x-coordinate for a single line.
"""
def calculate_overlap(bbox1, bbox2):
"""Calculate the vertical overlap between two bounding boxes."""
# Extract bounding box coordinates
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2
overlap = max(0, min(y2_1, y2_2) - max(y1_1, y1_2))
height = min(y2_1 - y1_1, y2_2 - y1_2)
return overlap / height if height > 0 else 0
# Convert recognized_texts dictionary to a list of tuples for processing
items = list(recognized_texts.items())
lines = []
while items:
current_image, current_data = items.pop(0)
current_text, current_bbox = current_data['txt'], current_data['bbox']
current_line = [(current_text, current_bbox)]
remaining_items = []
for image, data in items:
text, bbox = data['txt'], data['bbox']
if calculate_overlap(current_bbox, bbox) > 0.4:
current_line.append((text, bbox))
else:
remaining_items.append((image, data))
items = remaining_items
lines.append(current_line)
# Sort words within each line based on x1 (horizontal position)
sorted_lines = [
[text for text, bbox in sorted(line, key=lambda x: x[1][0])] for line in lines
]
return sorted_lines
|