parser-leaderboard / utils /pdf_utils.py
jojortz's picture
add content from extract-leaderboard-gradio
f745baf
raw
history blame
836 Bytes
import fitz
from PIL import Image
def update_page(file_path, page_num, direction):
if page_num is None:
page_num = 0
new_page_num = page_num + direction
img, actual_page_num, total_pages = get_pdf_page(file_path, new_page_num)
return img, f"Page {actual_page_num + 1} of {total_pages}", actual_page_num
def get_pdf_page(file_path, page_num):
doc = fitz.open(file_path)
page_count = len(doc)
page_num = max(0, min(page_num, page_count - 1)) # Ensure page_num is within bounds
page = doc.load_page(page_num)
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return img, page_num, page_count
def load_pdf(file_path):
img, page_num, total_pages = get_pdf_page(file_path, 0)
return img, f"Page {page_num + 1} of {total_pages}", page_num