|
import os |
|
from PyPDF2 import PdfReader, PdfWriter |
|
from io import BytesIO |
|
import gradio as gr |
|
|
|
def get_pdf_size(pdf_writer): |
|
"""获取当前PdfWriter对象的内容大小(以字节为单位)""" |
|
buffer = BytesIO() |
|
pdf_writer.write(buffer) |
|
return buffer.tell() |
|
|
|
def split_pdf_by_pages(input_pdf_path, output_folder, split_pages): |
|
"""按分页数分割PDF""" |
|
|
|
if not os.path.exists(output_folder): |
|
os.makedirs(output_folder) |
|
|
|
|
|
reader = PdfReader(input_pdf_path) |
|
total_pages = len(reader.pages) |
|
|
|
|
|
if any(page > total_pages or page < 1 for page in split_pages): |
|
return f"错误:分页数超出PDF总页数范围(总页数:{total_pages})。" |
|
|
|
|
|
split_pages = sorted(set(split_pages)) |
|
|
|
|
|
start_page = 0 |
|
result_files = [] |
|
for i, split_page in enumerate(split_pages): |
|
writer = PdfWriter() |
|
end_page = split_page - 1 |
|
|
|
|
|
for page_num in range(start_page, end_page + 1): |
|
writer.add_page(reader.pages[page_num]) |
|
|
|
|
|
output_pdf_path = os.path.join(output_folder, f"part_{i+1}.pdf") |
|
with open(output_pdf_path, "wb") as output_pdf: |
|
writer.write(output_pdf) |
|
|
|
result_files.append(output_pdf_path) |
|
start_page = end_page + 1 |
|
|
|
|
|
if start_page < total_pages: |
|
writer = PdfWriter() |
|
for page_num in range(start_page, total_pages): |
|
writer.add_page(reader.pages[page_num]) |
|
|
|
output_pdf_path = os.path.join(output_folder, f"part_{len(split_pages) + 1}.pdf") |
|
with open(output_pdf_path, "wb") as output_pdf: |
|
writer.write(output_pdf) |
|
|
|
result_files.append(output_pdf_path) |
|
|
|
return result_files |
|
|
|
def split_pdf_by_size(input_pdf_path, output_folder, max_size_mb): |
|
"""按文件大小分割PDF""" |
|
|
|
if not os.path.exists(output_folder): |
|
os.makedirs(output_folder) |
|
|
|
|
|
reader = PdfReader(input_pdf_path) |
|
total_pages = len(reader.pages) |
|
|
|
|
|
writer = PdfWriter() |
|
current_part = 1 |
|
max_size_bytes = max_size_mb * 1024 * 1024 |
|
result_files = [] |
|
|
|
|
|
for page_num in range(total_pages): |
|
|
|
writer.add_page(reader.pages[page_num]) |
|
|
|
|
|
current_size = get_pdf_size(writer) |
|
if current_size >= max_size_bytes: |
|
|
|
output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf") |
|
with open(output_pdf_path, "wb") as output_pdf: |
|
writer.write(output_pdf) |
|
result_files.append(output_pdf_path) |
|
|
|
|
|
writer = PdfWriter() |
|
current_part += 1 |
|
|
|
|
|
if len(writer.pages) > 0: |
|
output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf") |
|
with open(output_pdf_path, "wb") as output_pdf: |
|
writer.write(output_pdf) |
|
result_files.append(output_pdf_path) |
|
|
|
return result_files |
|
|
|
def process_pdf(input_pdf, mode, split_pages=None, max_size_mb=None): |
|
"""处理PDF文件""" |
|
output_folder = "output_parts" |
|
if not os.path.exists(output_folder): |
|
os.makedirs(output_folder) |
|
|
|
if mode == "按分页数分割": |
|
if not split_pages: |
|
return "错误:请输入分页数。" |
|
split_pages = [int(page) for page in split_pages.split(",")] |
|
result_files = split_pdf_by_pages(input_pdf, output_folder, split_pages) |
|
elif mode == "按文件大小分割": |
|
if not max_size_mb: |
|
return "错误:请输入最大文件大小。" |
|
result_files = split_pdf_by_size(input_pdf, output_folder, float(max_size_mb)) |
|
else: |
|
return "错误:无效的模式。" |
|
|
|
if isinstance(result_files, str): |
|
return result_files |
|
|
|
|
|
return result_files |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# PDF 分割工具") |
|
with gr.Row(): |
|
input_pdf = gr.File(label="上传PDF文件", type="filepath") |
|
mode = gr.Radio(choices=["按分页数分割", "按文件大小分割"], label="选择分割模式") |
|
with gr.Row(): |
|
split_pages = gr.Textbox(label="分页数(例如:3,5,10)", visible=True) |
|
max_size_mb = gr.Number(label="每部分的最大大小(MB)", visible=False) |
|
with gr.Row(): |
|
output_files = gr.Files(label="分割后的文件") |
|
with gr.Row(): |
|
submit_btn = gr.Button("开始分割") |
|
|
|
|
|
def toggle_inputs(mode): |
|
if mode == "按分页数分割": |
|
return gr.Textbox(visible=True), gr.Number(visible=False) |
|
else: |
|
return gr.Textbox(visible=False), gr.Number(visible=True) |
|
|
|
mode.change(toggle_inputs, inputs=mode, outputs=[split_pages, max_size_mb]) |
|
|
|
|
|
submit_btn.click( |
|
process_pdf, |
|
inputs=[input_pdf, mode, split_pages, max_size_mb], |
|
outputs=output_files |
|
) |
|
theme=gr.themes.Soft() |
|
|
|
|
|
demo.launch() |