pierreguillou
commited on
Commit
·
6ec863b
1
Parent(s):
f768a86
Update files/functions.py
Browse files- files/functions.py +14 -3
files/functions.py
CHANGED
@@ -11,8 +11,7 @@ import pypdf
|
|
11 |
from pypdf import PdfReader
|
12 |
from pypdf.errors import PdfReadError
|
13 |
|
14 |
-
import
|
15 |
-
from pdf2image import convert_from_path
|
16 |
import langdetect
|
17 |
from langdetect import detect_langs
|
18 |
|
@@ -363,7 +362,19 @@ def pdf_to_images(uploaded_pdf):
|
|
363 |
images = [Image.open(image_blank)]
|
364 |
else:
|
365 |
try:
|
366 |
-
images = convert_from_path(path_to_file, last_page=max_imgboxes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
num_imgs = len(images)
|
368 |
msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
|
369 |
except:
|
|
|
11 |
from pypdf import PdfReader
|
12 |
from pypdf.errors import PdfReadError
|
13 |
|
14 |
+
import pypdfium2 as pdfium
|
|
|
15 |
import langdetect
|
16 |
from langdetect import detect_langs
|
17 |
|
|
|
362 |
images = [Image.open(image_blank)]
|
363 |
else:
|
364 |
try:
|
365 |
+
# images = convert_from_path(path_to_file, last_page=max_imgboxes)
|
366 |
+
|
367 |
+
pdf = pdfium.PdfDocument(str(filename))
|
368 |
+
version = pdf.get_version() # get the PDF standard version
|
369 |
+
n_pages = len(pdf) # get the number of pages in the document
|
370 |
+
last_page = max_imgboxes
|
371 |
+
page_indices = [i for i in range(last_page)] # pages until last_page
|
372 |
+
images = list(pdf.render(
|
373 |
+
pdfium.PdfBitmap.to_pil,
|
374 |
+
page_indices = page_indices,
|
375 |
+
scale = 300/72, # 300dpi resolution
|
376 |
+
))
|
377 |
+
|
378 |
num_imgs = len(images)
|
379 |
msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
|
380 |
except:
|