HawkeyeHS commited on
Commit
dff2b65
Β·
1 Parent(s): 503cd37
Files changed (2) hide show
  1. app.py +157 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from PyPDF2 import PdfWriter, PdfReader
4
+ import zipfile
5
+ import tempfile
6
+ import fitz # PyMuPDF
7
+
8
+ def merge_pdfs(pdf_files):
9
+ if not pdf_files:
10
+ return "❌ No PDF files uploaded.", None
11
+
12
+ output_dir = tempfile.mkdtemp()
13
+ output_file = os.path.join(output_dir, "merged.pdf")
14
+
15
+ doc_out = fitz.open()
16
+ a4_rect = fitz.paper_rect("a4")
17
+
18
+ for file in pdf_files:
19
+ src = fitz.open(file.name)
20
+ for page in src:
21
+ page_out = doc_out.new_page(width=a4_rect.width, height=a4_rect.height)
22
+ page_out.show_pdf_page(a4_rect, src, page.number, keep_proportion=True)
23
+ src.close()
24
+
25
+ doc_out.save(output_file)
26
+ doc_out.close()
27
+ return "βœ… PDFs merged successfully.", output_file
28
+
29
+
30
+ def compress_pdf(file, dpi_threshold, dpi_target, quality):
31
+ input_path = file.name
32
+
33
+ # Create a unique temporary file for the compressed output
34
+ temp_dir = tempfile.mkdtemp()
35
+ output_path = os.path.join(temp_dir, "compressed_output.pdf")
36
+
37
+ doc = fitz.open(input_path)
38
+
39
+ # Recompress images above dpi_threshold to target DPI with JPEG at quality
40
+ doc.rewrite_images(
41
+ dpi_threshold=dpi_threshold,
42
+ dpi_target=dpi_target,
43
+ quality=quality,
44
+ lossy=True,
45
+ lossless=True,
46
+ bitonal=True,
47
+ color=True,
48
+ gray=True,
49
+ set_to_gray=False,
50
+ )
51
+
52
+ # Subset fonts and apply full garbage collection + stream compression
53
+ doc.subset_fonts()
54
+ doc.save(output_path,
55
+ garbage=3,
56
+ deflate=True,
57
+ use_objstms=True)
58
+
59
+ status = f"βœ… PDF compressed successfully!"
60
+ return status, output_path
61
+
62
+
63
+ def split_pdf(file_path, start_page, end_page):
64
+ file_name = os.path.basename(file_path)
65
+ base_name = file_name[:-4] # Remove .pdf extension
66
+ output_dir = tempfile.mkdtemp()
67
+ zip_path = os.path.join(output_dir, f"{base_name}_split_pages.zip")
68
+
69
+ input_pdf = PdfReader(open(file_path, "rb"))
70
+ total_pages = len(input_pdf.pages)
71
+
72
+ # Clamp values within range
73
+ start_page = max(0, min(start_page, total_pages - 1))
74
+ end_page = max(start_page, min(end_page, total_pages - 1))
75
+
76
+ zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
77
+
78
+ for i in range(start_page, end_page + 1):
79
+ writer = PdfWriter()
80
+ writer.add_page(input_pdf.pages[i])
81
+ split_pdf_path = os.path.join(output_dir, f"{base_name}-page{i+1}.pdf")
82
+ with open(split_pdf_path, "wb") as f_out:
83
+ writer.write(f_out)
84
+ zipf.write(split_pdf_path, arcname=os.path.basename(split_pdf_path))
85
+
86
+ zipf.close()
87
+ return zip_path, start_page + 1, end_page + 1
88
+
89
+ def process_pdf(file, start_page, end_page):
90
+ if file is None:
91
+ return "❌ No file uploaded.", None
92
+ zip_file_path, actual_start, actual_end = split_pdf(file.name, start_page, end_page)
93
+ status = f"βœ… File '{file.name}' split from page {actual_start} to {actual_end}."
94
+ return status, zip_file_path
95
+
96
+ with gr.Blocks(title="PDF Utility") as demo:
97
+ gr.Markdown("# πŸ“„ PDF Utility App")
98
+
99
+ with gr.Tabs():
100
+ with gr.TabItem("Split PDF"):
101
+ gr.Markdown("Upload a PDF, select page range, and click **Split PDF** to download a ZIP of split pages.")
102
+ with gr.Row():
103
+ file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
104
+
105
+ with gr.Row():
106
+ start_page = gr.Number(label="Start Page (0-based)", value=0, precision=0)
107
+ end_page = gr.Number(label="End Page (0-based)", value=0, precision=0)
108
+
109
+ split_button = gr.Button("πŸš€ Split PDF")
110
+
111
+ status_text = gr.Textbox(label="Status", lines=2)
112
+ download_link = gr.File(label="Download ZIP")
113
+
114
+ split_button.click(
115
+ fn=process_pdf,
116
+ inputs=[file_input, start_page, end_page],
117
+ outputs=[status_text, download_link]
118
+ )
119
+
120
+ with gr.TabItem("Compress PDF"):
121
+ gr.Markdown("Upload a PDF and click **Compress PDF** to download the compressed version.")
122
+ with gr.Row():
123
+ file_input_compress = gr.File(label="Upload PDF", file_types=[".pdf"])
124
+
125
+ with gr.Row():
126
+ dpi_threshold = gr.Number(label="DPI Threshold", value=100, precision=0)
127
+ dpi_target = gr.Number(label="Target DPI", value=72, precision=0)
128
+ quality = gr.Number(label="JPEG Quality (1-100)", value=60, precision=0)
129
+
130
+ compress_button = gr.Button("πŸš€ Compress PDF")
131
+
132
+ status_text_compress = gr.Textbox(label="Status", lines=2)
133
+ download_link_compress = gr.File(label="Download compressed PDF")
134
+
135
+ compress_button.click(
136
+ fn=compress_pdf,
137
+ inputs=[file_input_compress, dpi_threshold, dpi_target, quality],
138
+ outputs=[status_text_compress, download_link_compress]
139
+ )
140
+
141
+ with gr.TabItem("Merge PDFs"):
142
+ gr.Markdown("Upload multiple PDFs and click **Merge PDFs** to download the merged version.")
143
+ pdf_uploads = gr.File(label="Upload PDFs", file_types=[".pdf"], file_count="multiple")
144
+
145
+ merge_button = gr.Button("πŸ“Ž Merge PDF Files")
146
+
147
+ merge_status = gr.Textbox(label="Status", lines=2)
148
+ merged_file = gr.File(label="Download Merged PDF")
149
+
150
+ merge_button.click(
151
+ fn=merge_pdfs,
152
+ inputs=[pdf_uploads],
153
+ outputs=[merge_status, merged_file]
154
+ )
155
+
156
+ if __name__ == "__main__":
157
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ PyMuPDF
3
+ PyPDF2