File size: 1,296 Bytes
f094d88
391e0c9
f094d88
 
 
 
 
391e0c9
f094d88
caa8617
f094d88
 
d19d055
f094d88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
caa8617
f094d88
 
 
caa8617
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
import pymupdf
import os
import zipfile

def pdf_to_images(pdf_file):

    doc = pymupdf.open(pdf_file)
    images = []
    
    for page_id in range(doc.page_count):
        page = doc[page_id]
        pix = page.get_pixmap(dpi=200)
        img_bytes = pix.tobytes("png")
        images.append((img_bytes, f"{page_id+1}.png"))


    doc.close()


    temp_dir = "temp_images"
    os.makedirs(temp_dir, exist_ok=True)


    for img_bytes, img_name in images:
        with open(os.path.join(temp_dir, img_name), "wb") as f:
            f.write(img_bytes)


    zip_path = "images.zip"
    with zipfile.ZipFile(zip_path, "w") as zipf:
        for img_name in os.listdir(temp_dir):
            zipf.write(os.path.join(temp_dir, img_name), img_name)

  
    for img_name in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, img_name))
    os.rmdir(temp_dir)

    return zip_path


iface = gr.Interface(
    fn=pdf_to_images,
    inputs=gr.File(label="Upload PDF File"),
    outputs=gr.File(label="Download ZIP File"),
    title="PDF to Images Converter",
    description="Upload a PDF file and download a ZIP file containing all the pages as images. Host it on huggingface for convenience."
)
iface.launch()

# Source code
# https://juejin.cn/post/7382480523846467595