02alexander commited on
Commit
e347567
·
1 Parent(s): ad31b8f

fix bug highligthing of enity when clicking on markdown text

Browse files
Files changed (1) hide show
  1. ocr.py +15 -13
ocr.py CHANGED
@@ -119,10 +119,11 @@ Layout Class:
119
 
120
 
121
  class Layout:
122
- def __init__(self, show_unknown: bool = False):
123
  self.counts = {layout_type: 0 for layout_type in LayoutType}
124
  self.records: dict[LayoutType, Any] = {layout_type: [] for layout_type in LayoutType}
125
  self.recovery = """"""
 
126
  self.show_unknown = show_unknown
127
 
128
  def add(
@@ -145,7 +146,7 @@ class Layout:
145
  "table": table,
146
  })
147
  if layout_type != LayoutType.UNKNOWN or self.show_unknown: # Discards the unknown layout types detections
148
- path = f"recording://Image/{layout_type.type.title()}/{name.title()}"
149
  self.recovery += f"\n\n## [{name.title()}]({path})\n\n" # Log Type as Heading
150
  # Enhancement - Logged image for Figure type TODO(#6517)
151
  if layout_type == LayoutType.TABLE:
@@ -153,7 +154,7 @@ class Layout:
153
  self.recovery += table # Log details (table)
154
  elif detections:
155
  for index, detection in enumerate(detections):
156
- path_text = f"recording://Image/{layout_type.type.title()}/{name.title()}/Detections/{index}"
157
  self.recovery += f' [{detection["text"]}]({path_text})' # Log details (text)
158
  else:
159
  logging.warning(f"Invalid layout type detected: {layout_type}")
@@ -221,13 +222,14 @@ class Layout:
221
  return f"Error processing the table: {str(e)}"
222
 
223
 
224
- def process_layout_records(log_queue: SimpleQueue[Any], layout: Layout, page_path: str) -> LayoutStructure:
225
  paths, detections_paths = [], []
226
  zoom_paths: list[rrb.Spatial2DView] = []
227
  zoom_paths_figures: list[rrb.Spatial2DView] = []
228
  zoom_paths_tables: list[rrb.Spatial2DView] = []
229
  zoom_paths_texts: list[rrb.Spatial2DView] = []
230
 
 
231
  for layout_type in LayoutType:
232
  for record in layout.records[layout_type]:
233
  record_name = record["name"].title()
@@ -327,11 +329,11 @@ def update_zoom_paths(
327
 
328
  def generate_blueprint(
329
  layouts: list[Layout],
330
- page_paths: list[str],
331
  processed_layouts: list[LayoutStructure],
332
  ) -> rrb.Blueprint:
333
  page_tabs = []
334
- for layout, (page_path, processed_layout) in zip(layouts, zip(page_paths, processed_layouts)):
 
335
  paths, detections_paths, zoom_paths_figures, zoom_paths_tables, zoom_paths_texts = processed_layout
336
 
337
  section_tabs = []
@@ -399,28 +401,28 @@ def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str, start_pa
399
 
400
  # Extracte the layout from each image
401
  layouts: list[Layout] = []
402
- page_paths = [f"page_{i + start_page}" for i in range(len(images))]
403
  processed_layouts: list[LayoutStructure] = []
404
- for i, (image, page_path) in enumerate(zip(images, page_paths)):
405
- layouts.append(detect_and_log_layout(log_queue, image, page_path))
406
 
407
  # Generate and send a blueprint based on the detected layouts
408
  processed_layouts.append(
409
  process_layout_records(
410
  log_queue,
411
  layouts[-1],
412
- page_path,
413
  )
414
  )
415
  logging.info("Sending blueprint...")
416
- blueprint = generate_blueprint(layouts, page_paths, processed_layouts)
417
  log_queue.put(["blueprint", blueprint])
418
  logging.info("Blueprint sent...")
419
 
420
 
421
- def detect_and_log_layout(log_queue: SimpleQueue, coloured_image: npt.NDArray[np.uint8], page_path: str = "") -> Layout:
422
  # Layout Object - This will contain the detected layouts and their detections
423
- layout = Layout()
 
424
 
425
  # Log Image and add Annotation Context
426
  log_queue.put([
 
119
 
120
 
121
  class Layout:
122
+ def __init__(self, page_number: int, show_unknown: bool = False):
123
  self.counts = {layout_type: 0 for layout_type in LayoutType}
124
  self.records: dict[LayoutType, Any] = {layout_type: [] for layout_type in LayoutType}
125
  self.recovery = """"""
126
+ self.page_number = page_number
127
  self.show_unknown = show_unknown
128
 
129
  def add(
 
146
  "table": table,
147
  })
148
  if layout_type != LayoutType.UNKNOWN or self.show_unknown: # Discards the unknown layout types detections
149
+ path = f"recording://page_{self.page_number}/Image/{layout_type.type.title()}/{name.title()}"
150
  self.recovery += f"\n\n## [{name.title()}]({path})\n\n" # Log Type as Heading
151
  # Enhancement - Logged image for Figure type TODO(#6517)
152
  if layout_type == LayoutType.TABLE:
 
154
  self.recovery += table # Log details (table)
155
  elif detections:
156
  for index, detection in enumerate(detections):
157
+ path_text = f"recording://page_{self.page_number}/Image/{layout_type.type.title()}/{name.title()}/Detections/{index}"
158
  self.recovery += f' [{detection["text"]}]({path_text})' # Log details (text)
159
  else:
160
  logging.warning(f"Invalid layout type detected: {layout_type}")
 
222
  return f"Error processing the table: {str(e)}"
223
 
224
 
225
+ def process_layout_records(log_queue: SimpleQueue[Any], layout: Layout) -> LayoutStructure:
226
  paths, detections_paths = [], []
227
  zoom_paths: list[rrb.Spatial2DView] = []
228
  zoom_paths_figures: list[rrb.Spatial2DView] = []
229
  zoom_paths_tables: list[rrb.Spatial2DView] = []
230
  zoom_paths_texts: list[rrb.Spatial2DView] = []
231
 
232
+ page_path = f'page_{layout.page_number}'
233
  for layout_type in LayoutType:
234
  for record in layout.records[layout_type]:
235
  record_name = record["name"].title()
 
329
 
330
  def generate_blueprint(
331
  layouts: list[Layout],
 
332
  processed_layouts: list[LayoutStructure],
333
  ) -> rrb.Blueprint:
334
  page_tabs = []
335
+ for layout, processed_layout in zip(layouts, processed_layouts):
336
+ page_path = f'page_{layout.page_number}'
337
  paths, detections_paths, zoom_paths_figures, zoom_paths_tables, zoom_paths_texts = processed_layout
338
 
339
  section_tabs = []
 
401
 
402
  # Extracte the layout from each image
403
  layouts: list[Layout] = []
404
+ page_numbers = [i + start_page for i in range(len(images))]
405
  processed_layouts: list[LayoutStructure] = []
406
+ for i, (image, page_number) in enumerate(zip(images, page_numbers)):
407
+ layouts.append(detect_and_log_layout(log_queue, image, page_number))
408
 
409
  # Generate and send a blueprint based on the detected layouts
410
  processed_layouts.append(
411
  process_layout_records(
412
  log_queue,
413
  layouts[-1],
 
414
  )
415
  )
416
  logging.info("Sending blueprint...")
417
+ blueprint = generate_blueprint(layouts, processed_layouts)
418
  log_queue.put(["blueprint", blueprint])
419
  logging.info("Blueprint sent...")
420
 
421
 
422
+ def detect_and_log_layout(log_queue: SimpleQueue, coloured_image: npt.NDArray[np.uint8], page_number: int) -> Layout:
423
  # Layout Object - This will contain the detected layouts and their detections
424
+ layout = Layout(page_number)
425
+ page_path = f'page_{page_number}'
426
 
427
  # Log Image and add Annotation Context
428
  log_queue.put([