02alexander commited on
Commit
ad31b8f
·
1 Parent(s): a4bd933

behave reasonably

Browse files
Files changed (1) hide show
  1. ocr.py +4 -2
ocr.py CHANGED
@@ -374,21 +374,23 @@ def generate_blueprint(
374
 
375
  def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str, start_page: int = 1, end_page: int | None = -1) -> None:
376
  if end_page == -1:
377
- end_page = start_page + PAGE_LIMIT
378
  if end_page < start_page:
379
  end_page = start_page
 
380
 
381
  images: list[npt.NDArray[np.uint8]] = []
382
  if file_path.endswith(".pdf"):
383
  # convert pdf to images
384
  images.extend(np.array(img, dtype=np.uint8) for img in pdf2image.convert_from_path(file_path, first_page=start_page, last_page=end_page))
 
385
  if len(images) > PAGE_LIMIT:
386
  log_queue.put([
387
  "log",
388
  "progress",
389
  [rr.TextDocument(f"Too many pages requsted: {len(images)} requested but the limit is {PAGE_LIMIT}")],
390
  ])
391
- return
392
  else:
393
  # read image
394
  img = cv2.imread(file_path)
 
374
 
375
  def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str, start_page: int = 1, end_page: int | None = -1) -> None:
376
  if end_page == -1:
377
+ end_page = start_page + PAGE_LIMIT-1
378
  if end_page < start_page:
379
  end_page = start_page
380
+ print(start_page, end_page)
381
 
382
  images: list[npt.NDArray[np.uint8]] = []
383
  if file_path.endswith(".pdf"):
384
  # convert pdf to images
385
  images.extend(np.array(img, dtype=np.uint8) for img in pdf2image.convert_from_path(file_path, first_page=start_page, last_page=end_page))
386
+ print(len(images))
387
  if len(images) > PAGE_LIMIT:
388
  log_queue.put([
389
  "log",
390
  "progress",
391
  [rr.TextDocument(f"Too many pages requsted: {len(images)} requested but the limit is {PAGE_LIMIT}")],
392
  ])
393
+ return
394
  else:
395
  # read image
396
  img = cv2.imread(file_path)