File size: 3,085 Bytes
240e0a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in, _is_part_overlap
from magic_pdf.libs.drop_reason import DropReason

def _remove_overlap_between_bbox(bbox1, bbox2):
   if _is_part_overlap(bbox1, bbox2):
        ix0, iy0, ix1, iy1 = bbox1
        x0, y0, x1, y1 = bbox2

        diff_x = min(x1, ix1) - max(x0, ix0)
        diff_y = min(y1, iy1) - max(y0, iy0)

        if diff_y > diff_x:
            if x1 >= ix1:
                mid = (x0 + ix1) // 2
                ix1 = min(mid - 0.25, ix1)
                x0 = max(mid + 0.25, x0)
            else:
                mid = (ix0 + x1) // 2
                ix0 = max(mid + 0.25, ix0)
                x1 = min(mid - 0.25, x1)
        else:
            if y1 >= iy1:
                mid = (y0 + iy1) // 2
                y0 = max(mid + 0.25, y0)
                iy1 = min(iy1, mid-0.25)
            else:
                mid = (iy0 + y1) // 2
                y1 = min(y1, mid-0.25)
                iy0 = max(mid + 0.25, iy0)

        if ix1 > ix0 and iy1 > iy0 and y1 > y0 and x1 > x0:
            bbox1 = [ix0, iy0, ix1, iy1]
            bbox2 = [x0, y0, x1, y1]
            return bbox1, bbox2, None
        else:
            return bbox1, bbox2, DropReason.NEGATIVE_BBOX_AREA
   else:
       return bbox1, bbox2, None


def _remove_overlap_between_bboxes(arr):
    drop_reasons = []
    N = len(arr)
    keeps = [True] * N
    res = [None] * N
    for i in range(N):
        for j in range(N):
            if i == j:
                continue
            if _is_in(arr[i]["bbox"], arr[j]["bbox"]):
                keeps[i] = False

    for idx, v in enumerate(arr):
        if not keeps[idx]:
            continue
        for i in range(N):
            if res[i] is None:
                continue
        
            bbox1, bbox2, drop_reason = _remove_overlap_between_bbox(v["bbox"], res[i]["bbox"])
            if drop_reason is None:
                v["bbox"] = bbox1
                res[i]["bbox"] = bbox2
            else:
                if v["score"] > res[i]["score"]:
                    keeps[i] = False
                    res[i] = None
                else:
                    keeps[idx] = False
                drop_reasons.append(drop_reasons)
        if keeps[idx]:
            res[idx] = v
    return res, drop_reasons


def remove_overlap_between_bbox_for_span(spans):
    arr = [{"bbox": span["bbox"], "score": span.get("score", 0.1)} for span in spans ]
    res, drop_reasons = _remove_overlap_between_bboxes(arr)
    ret = []
    for i in range(len(res)):
        if res[i] is None:
            continue
        spans[i]["bbox"] = res[i]["bbox"]
        ret.append(spans[i])
    return ret, drop_reasons


def remove_overlap_between_bbox_for_block(all_bboxes):
    arr = [{"bbox": bbox[:4], "score": bbox[-1]} for bbox in all_bboxes ]
    res, drop_reasons = _remove_overlap_between_bboxes(arr)
    ret = []
    for i in range(len(res)):
        if res[i] is None:
            continue
        all_bboxes[i][:4] = res[i]["bbox"]
        ret.append(all_bboxes[i])
    return ret, drop_reasons