|
import numpy as np |
|
|
|
class OrderPolygons: |
|
def __init__(self, text_direction = 'lr'): |
|
self.text_direction = text_direction |
|
|
|
|
|
def _y_overlaps(self, u, v): |
|
|
|
return u[3] < v[2] and u[2] > v[3] |
|
|
|
|
|
def _x_overlaps(self, u, v): |
|
|
|
return u[1] < v[0] and u[0] > v[1] |
|
|
|
|
|
def _above(self, u, v): |
|
|
|
return u[3] < v[3] |
|
|
|
|
|
def _left_of(self, u, v): |
|
|
|
return u[0] < v[1] |
|
|
|
|
|
def _separates(self, w, u, v): |
|
if w == u or w == v: |
|
return 0 |
|
|
|
if w[2] < min(u[3], v[3]): |
|
return 0 |
|
|
|
if w[3] > max(u[2], v[2]): |
|
return 0 |
|
|
|
if w[1] < u[0] and w[0] > v[1]: |
|
return 1 |
|
return 0 |
|
|
|
|
|
|
|
def reading_order(self, lines): |
|
"""Given the list of lines, computes |
|
the partial reading order. The output is a binary 2D array |
|
such that order[i,j] is true if line i comes before line j |
|
in reading order.""" |
|
|
|
|
|
|
|
|
|
order = np.zeros((len(lines), len(lines)), 'B') |
|
|
|
|
|
if self.text_direction == 'rl': |
|
def horizontal_order(u, v): |
|
return not self._left_of(u, v) |
|
else: |
|
horizontal_order = self._left_of |
|
|
|
for i, u in enumerate(lines): |
|
for j, v in enumerate(lines): |
|
if self._x_overlaps(u, v): |
|
if self._above(u, v): |
|
|
|
order[i, j] = 1 |
|
else: |
|
|
|
if [w for w in lines if self._separates(w, u, v)] == []: |
|
if horizontal_order(u, v): |
|
order[i, j] = 1 |
|
elif self._y_overlaps(u, v) and horizontal_order(u, v): |
|
order[i, j] = 1 |
|
|
|
return order |
|
|
|
|
|
|
|
def topsort(self, order): |
|
"""Given a binary array defining a partial order (o[i,j]==True means i<j), |
|
compute a topological sort. This is a quick and dirty implementation |
|
that works for up to a few thousand elements.""" |
|
|
|
n = len(order) |
|
visited = np.zeros(n) |
|
L = [] |
|
|
|
def _visit(k): |
|
if visited[k]: |
|
return |
|
visited[k] = 1 |
|
a, = np.nonzero(np.ravel(order[:, k])) |
|
for line in a: |
|
_visit(line) |
|
L.append(k) |
|
|
|
for k in range(n): |
|
_visit(k) |
|
return L |
|
|
|
def order(self, lines): |
|
order = self.reading_order(lines) |
|
sorted = self.topsort(order) |
|
|
|
return sorted |
|
|