|
import re |
|
|
|
def layout_rm_equation(layout_res): |
|
rm_idxs = [] |
|
for idx, ele in enumerate(layout_res['layout_dets']): |
|
if ele['category_id'] == 10: |
|
rm_idxs.append(idx) |
|
|
|
for idx in rm_idxs[::-1]: |
|
del layout_res['layout_dets'][idx] |
|
return layout_res |
|
|
|
|
|
def get_croped_image(image_pil, bbox): |
|
x_min, y_min, x_max, y_max = bbox |
|
croped_img = image_pil.crop((x_min, y_min, x_max, y_max)) |
|
return croped_img |
|
|
|
|
|
def latex_rm_whitespace(s: str): |
|
"""Remove unnecessary whitespace from LaTeX code. |
|
""" |
|
text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})' |
|
letter = '[a-zA-Z]' |
|
noletter = '[\W_^\d]' |
|
names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)] |
|
s = re.sub(text_reg, lambda match: str(names.pop(0)), s) |
|
news = s |
|
while True: |
|
s = news |
|
news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s) |
|
news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news) |
|
news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news) |
|
if news == s: |
|
break |
|
return s |