File size: 5,538 Bytes
c1288e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
from PyPDF2 import PdfReader, PdfWriter
from io import BytesIO
import gradio as gr
def get_pdf_size(pdf_writer):
"""获取当前PdfWriter对象的内容大小(以字节为单位)"""
buffer = BytesIO()
pdf_writer.write(buffer)
return buffer.tell()
def split_pdf_by_pages(input_pdf_path, output_folder, split_pages):
"""按分页数分割PDF"""
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 读取PDF文件
reader = PdfReader(input_pdf_path)
total_pages = len(reader.pages)
# 检查用户输入的分页数是否有效
if any(page > total_pages or page < 1 for page in split_pages):
return f"错误:分页数超出PDF总页数范围(总页数:{total_pages})。"
# 对分页数进行排序并去重
split_pages = sorted(set(split_pages))
# 切割PDF
start_page = 0
result_files = []
for i, split_page in enumerate(split_pages):
writer = PdfWriter()
end_page = split_page - 1 # PyPDF2的页码从0开始
# 添加从 start_page 到 end_page 的页面
for page_num in range(start_page, end_page + 1):
writer.add_page(reader.pages[page_num])
# 保存切割后的PDF文件
output_pdf_path = os.path.join(output_folder, f"part_{i+1}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
start_page = end_page + 1
# 处理最后一组页面
if start_page < total_pages:
writer = PdfWriter()
for page_num in range(start_page, total_pages):
writer.add_page(reader.pages[page_num])
output_pdf_path = os.path.join(output_folder, f"part_{len(split_pages) + 1}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
return result_files
def split_pdf_by_size(input_pdf_path, output_folder, max_size_mb):
"""按文件大小分割PDF"""
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 读取PDF文件
reader = PdfReader(input_pdf_path)
total_pages = len(reader.pages)
# 初始化变量
writer = PdfWriter()
current_part = 1
max_size_bytes = max_size_mb * 1024 * 1024 # 将MB转换为字节
result_files = []
# 逐页处理
for page_num in range(total_pages):
# 添加当前页
writer.add_page(reader.pages[page_num])
# 检查当前文件大小
current_size = get_pdf_size(writer)
if current_size >= max_size_bytes:
# 如果超过最大大小,保存当前部分
output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
# 重置writer并增加部分计数
writer = PdfWriter()
current_part += 1
# 保存最后一部分(如果有剩余页面)
if len(writer.pages) > 0:
output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
return result_files
def process_pdf(input_pdf, mode, split_pages=None, max_size_mb=None):
"""处理PDF文件"""
output_folder = "output_parts"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
if mode == "按分页数分割":
if not split_pages:
return "错误:请输入分页数。"
split_pages = [int(page) for page in split_pages.split(",")]
result_files = split_pdf_by_pages(input_pdf, output_folder, split_pages)
elif mode == "按文件大小分割":
if not max_size_mb:
return "错误:请输入最大文件大小。"
result_files = split_pdf_by_size(input_pdf, output_folder, float(max_size_mb))
else:
return "错误:无效的模式。"
if isinstance(result_files, str): # 如果返回的是错误信息
return result_files
# 返回所有生成的文件
return result_files
# Gradio 界面
with gr.Blocks() as demo:
gr.Markdown("# PDF 分割工具")
with gr.Row():
input_pdf = gr.File(label="上传PDF文件", type="filepath")
mode = gr.Radio(choices=["按分页数分割", "按文件大小分割"], label="选择分割模式")
with gr.Row():
split_pages = gr.Textbox(label="分页数(例如:3,5,10)", visible=True)
max_size_mb = gr.Number(label="每部分的最大大小(MB)", visible=False)
with gr.Row():
output_files = gr.Files(label="分割后的文件")
with gr.Row():
submit_btn = gr.Button("开始分割")
# 动态显示/隐藏输入框
def toggle_inputs(mode):
if mode == "按分页数分割":
return gr.Textbox(visible=True), gr.Number(visible=False)
else:
return gr.Textbox(visible=False), gr.Number(visible=True)
mode.change(toggle_inputs, inputs=mode, outputs=[split_pages, max_size_mb])
# 绑定处理函数
submit_btn.click(
process_pdf,
inputs=[input_pdf, mode, split_pages, max_size_mb],
outputs=output_files
)
theme=gr.themes.Soft()
# 启动应用
demo.launch() |