trysem pierreguillou commited on
Commit
ec45fc1
·
0 Parent(s):

Duplicate from pierreguillou/pdf-firstpage-to-img

Browse files

Co-authored-by: Pierre Guillou <[email protected]>

Files changed (6) hide show
  1. .gitattributes +27 -0
  2. README.md +13 -0
  3. app.py +48 -0
  4. example.pdf +0 -0
  5. packages.txt +1 -0
  6. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: First page of a PDF >> image
3
+ emoji: 💻
4
+ colorFrom: pink
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.0.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: pierreguillou/pdf-firstpage-to-img
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdf2image
3
+ import numpy as np
4
+
5
+ # Convert a PDF to images
6
+ def pdf_to_imgs(pdf):
7
+ """
8
+ pdf: pdf file
9
+ first_page: convert to image only the first page
10
+ return numpy array of the first page and number of images
11
+ """
12
+
13
+ # get path to pdf
14
+ path_to_pdf = pdf.name
15
+
16
+ # convert PDF to PIL images (one image by page)
17
+ first_page=True # we want here only the first page as image
18
+ if first_page: last_page = 1
19
+ else: last_page = None
20
+
21
+ imgs = pdf2image.convert_from_path(path_to_pdf, last_page=last_page)
22
+ #num_pages = len(imgs)
23
+
24
+ return np.array(imgs[0])
25
+ #return np.array(imgs[0]), num_pages
26
+
27
+ title = "First page of a PDF >> image"
28
+ description = "Drop a PDF (WARNING: only the first page will be converted into an image)."
29
+ examples = [["example.pdf"]]
30
+ css = ".output-image, .input-image, .image-preview {height: 600px !important}"
31
+ allow_flagging = "never"
32
+ live = False
33
+
34
+ iface = gr.Interface(fn=pdf_to_imgs,
35
+ #inputs=[gr.File(label="PDF"), gr.Checkbox(label="Only first page?", value=True)],
36
+ inputs=gr.File(label="PDF"), # sdk_version: 3.0.2
37
+ #inputs=gr.inputs.File(type="file", label="PDF"), # sdk_version: 2.9.4 needed to use gr.Interface.load("spaces/pierreguillou/pdf2imgs") in other Spaces
38
+ #outputs=[gr.Image(type="numpy", label="page image"), gr.Textbox(label="number of pages")], # sdk_version: 3.0.2
39
+ outputs=gr.Image(type="numpy", label="image of the first page"), # sdk_version: 3.0.2
40
+ title=title,
41
+ description=description,
42
+ examples=examples,
43
+ #article=article,
44
+ css=css,
45
+ allow_flagging=allow_flagging,
46
+ live=live
47
+ )
48
+ iface.launch(debug=True, enable_queue=True)
example.pdf ADDED
Binary file (51.1 kB). View file
 
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ poppler-utils
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pdf2image
2
+ numpy