File size: 3,043 Bytes
240e0a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
def construct_page_component(page_id, image_info, table_info, text_blocks_preproc, layout_bboxes, inline_eq_info,
interline_eq_info, raw_pymu_blocks,
removed_text_blocks, removed_image_blocks, images_backup, droped_table_block, table_backup,
layout_tree,
page_w, page_h, footnote_bboxes_tmp):
"""
"""
return_dict = {}
return_dict['para_blocks'] = {}
return_dict['preproc_blocks'] = text_blocks_preproc
return_dict['images'] = image_info
return_dict['tables'] = table_info
return_dict['interline_equations'] = interline_eq_info
return_dict['inline_equations'] = inline_eq_info
return_dict['layout_bboxes'] = layout_bboxes
return_dict['pymu_raw_blocks'] = raw_pymu_blocks
return_dict['global_statistic'] = {}
return_dict['droped_text_block'] = removed_text_blocks
return_dict['droped_image_block'] = removed_image_blocks
return_dict['droped_table_block'] = []
return_dict['image_backup'] = images_backup
return_dict['table_backup'] = []
return_dict['page_idx'] = page_id
return_dict['page_size'] = [page_w, page_h]
return_dict['_layout_tree'] = layout_tree # 辅助分析layout作用
return_dict['footnote_bboxes_tmp'] = footnote_bboxes_tmp
return return_dict
def ocr_construct_page_component(blocks, layout_bboxes, page_id, page_w, page_h, layout_tree,
images, tables, interline_equations, inline_equations,
dropped_text_block, dropped_image_block, dropped_table_block, dropped_equation_block,
need_remove_spans_bboxes_dict):
return_dict = {
'preproc_blocks': blocks,
'layout_bboxes': layout_bboxes,
'page_idx': page_id,
'page_size': [page_w, page_h],
'_layout_tree': layout_tree,
'images': images,
'tables': tables,
'interline_equations': interline_equations,
'inline_equations': inline_equations,
'droped_text_block': dropped_text_block,
'droped_image_block': dropped_image_block,
'droped_table_block': dropped_table_block,
'dropped_equation_block': dropped_equation_block,
'droped_bboxes': need_remove_spans_bboxes_dict,
}
return return_dict
def ocr_construct_page_component_v2(blocks, layout_bboxes, page_id, page_w, page_h, layout_tree,
images, tables, interline_equations, discarded_blocks, need_drop, drop_reason):
return_dict = {
'preproc_blocks': blocks,
'layout_bboxes': layout_bboxes,
'page_idx': page_id,
'page_size': [page_w, page_h],
'_layout_tree': layout_tree,
'images': images,
'tables': tables,
'interline_equations': interline_equations,
'discarded_blocks': discarded_blocks,
'need_drop': need_drop,
'drop_reason': drop_reason,
}
return return_dict
|