02alexander
commited on
Commit
·
ad31b8f
1
Parent(s):
a4bd933
behave reasonably
Browse files
ocr.py
CHANGED
@@ -374,21 +374,23 @@ def generate_blueprint(
|
|
374 |
|
375 |
def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str, start_page: int = 1, end_page: int | None = -1) -> None:
|
376 |
if end_page == -1:
|
377 |
-
end_page = start_page + PAGE_LIMIT
|
378 |
if end_page < start_page:
|
379 |
end_page = start_page
|
|
|
380 |
|
381 |
images: list[npt.NDArray[np.uint8]] = []
|
382 |
if file_path.endswith(".pdf"):
|
383 |
# convert pdf to images
|
384 |
images.extend(np.array(img, dtype=np.uint8) for img in pdf2image.convert_from_path(file_path, first_page=start_page, last_page=end_page))
|
|
|
385 |
if len(images) > PAGE_LIMIT:
|
386 |
log_queue.put([
|
387 |
"log",
|
388 |
"progress",
|
389 |
[rr.TextDocument(f"Too many pages requsted: {len(images)} requested but the limit is {PAGE_LIMIT}")],
|
390 |
])
|
391 |
-
|
392 |
else:
|
393 |
# read image
|
394 |
img = cv2.imread(file_path)
|
|
|
374 |
|
375 |
def detect_and_log_layouts(log_queue: SimpleQueue[Any], file_path: str, start_page: int = 1, end_page: int | None = -1) -> None:
|
376 |
if end_page == -1:
|
377 |
+
end_page = start_page + PAGE_LIMIT-1
|
378 |
if end_page < start_page:
|
379 |
end_page = start_page
|
380 |
+
print(start_page, end_page)
|
381 |
|
382 |
images: list[npt.NDArray[np.uint8]] = []
|
383 |
if file_path.endswith(".pdf"):
|
384 |
# convert pdf to images
|
385 |
images.extend(np.array(img, dtype=np.uint8) for img in pdf2image.convert_from_path(file_path, first_page=start_page, last_page=end_page))
|
386 |
+
print(len(images))
|
387 |
if len(images) > PAGE_LIMIT:
|
388 |
log_queue.put([
|
389 |
"log",
|
390 |
"progress",
|
391 |
[rr.TextDocument(f"Too many pages requsted: {len(images)} requested but the limit is {PAGE_LIMIT}")],
|
392 |
])
|
393 |
+
return
|
394 |
else:
|
395 |
# read image
|
396 |
img = cv2.imread(file_path)
|