File size: 6,409 Bytes
c1288e1 d3c24c9 c1288e1 d3c24c9 c1288e1 d3c24c9 c1288e1 d3c24c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import os
from PyPDF2 import PdfReader, PdfWriter
from io import BytesIO
import gradio as gr
def get_pdf_size(pdf_writer):
"""获取当前PdfWriter对象的内容大小(以字节为单位)"""
buffer = BytesIO()
pdf_writer.write(buffer)
return buffer.tell()
def split_pdf_by_pages(input_pdf_path, output_folder, split_pages):
"""按分页数分割PDF"""
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 读取PDF文件
reader = PdfReader(input_pdf_path)
total_pages = len(reader.pages)
# 检查用户输入的分页数是否有效
if any(page > total_pages or page < 1 for page in split_pages):
return f"错误:分页数超出PDF总页数范围(总页数:{total_pages})。"
# 对分页数进行排序并去重
split_pages = sorted(set(split_pages))
# 切割PDF
start_page = 0
result_files = []
for i, split_page in enumerate(split_pages):
writer = PdfWriter()
end_page = split_page - 1 # PyPDF2的页码从0开始
# 添加从 start_page 到 end_page 的页面
for page_num in range(start_page, end_page + 1):
writer.add_page(reader.pages[page_num])
# 保存切割后的PDF文件
output_pdf_path = os.path.join(output_folder, f"part_{i+1}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
start_page = end_page + 1
# 处理最后一组页面
if start_page < total_pages:
writer = PdfWriter()
for page_num in range(start_page, total_pages):
writer.add_page(reader.pages[page_num])
output_pdf_path = os.path.join(output_folder, f"part_{len(split_pages) + 1}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
return result_files
def split_pdf_by_size(input_pdf_path, output_folder, max_size_mb):
"""按文件大小分割PDF"""
# 创建输出文件夹
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 读取PDF文件
reader = PdfReader(input_pdf_path)
total_pages = len(reader.pages)
# 初始化变量
writer = PdfWriter()
current_part = 1
max_size_bytes = max_size_mb * 1024 * 1024 # 将MB转换为字节
result_files = []
# 逐页处理
for page_num in range(total_pages):
# 添加当前页
writer.add_page(reader.pages[page_num])
# 检查当前文件大小
current_size = get_pdf_size(writer)
if current_size >= max_size_bytes:
# 如果超过最大大小,保存当前部分
output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
# 重置writer并增加部分计数
writer = PdfWriter()
current_part += 1
# 保存最后一部分(如果有剩余页面)
if len(writer.pages) > 0:
output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
with open(output_pdf_path, "wb") as output_pdf:
writer.write(output_pdf)
result_files.append(output_pdf_path)
return result_files
def process_pdf(input_pdf, mode, split_pages=None, max_size_mb=None):
"""处理PDF文件"""
output_folder = "output_parts"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
if mode == "按分页数分割":
if not split_pages:
return "错误:请输入分页数。"
split_pages = [int(page) for page in split_pages.split(",")]
result_files = split_pdf_by_pages(input_pdf, output_folder, split_pages)
elif mode == "按文件大小分割":
if not max_size_mb:
return "错误:请输入最大文件大小。"
result_files = split_pdf_by_size(input_pdf, output_folder, float(max_size_mb))
else:
return "错误:无效的模式。"
if isinstance(result_files, str): # 如果返回的是错误信息
return result_files
# 返回所有生成的文件
return result_files
# Gradio 界面
with gr.Blocks() as demo:
gr.Markdown("# PDF 分割工具")
with gr.Row():
input_pdf = gr.File(label="上传PDF文件", type="filepath")
mode = gr.Radio(choices=["按分页数分割", "按文件大小分割"], label="选择分割模式")
with gr.Row():
split_pages = gr.Textbox(label="分页数(例如:3,5,10)", visible=True)
max_size_mb = gr.Number(label="每部分的最大大小(MB)", visible=False)
with gr.Row():
output_files = gr.Files(label="分割后的文件")
with gr.Row():
submit_btn = gr.Button("开始分割")
download_all_btn = gr.Button("批量下载")
# 动态显示/隐藏输入框
def toggle_inputs(mode):
if mode == "按分页数分割":
return gr.Textbox(visible=True), gr.Number(visible=False)
else:
return gr.Textbox(visible=False), gr.Number(visible=True)
# 批量下载功能
def download_all_files(file_list):
if not file_list:
return None
# 创建一个临时zip文件
import tempfile
import zipfile
import shutil
with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as temp_zip:
with zipfile.ZipFile(temp_zip.name, 'w') as zf:
for file_path in file_list:
# 获取文件名
file_name = os.path.basename(file_path)
# 将文件添加到zip中
zf.write(file_path, file_name)
return temp_zip.name
mode.change(toggle_inputs, inputs=mode, outputs=[split_pages, max_size_mb])
# 绑定处理函数
submit_btn.click(
process_pdf,
inputs=[input_pdf, mode, split_pages, max_size_mb],
outputs=output_files
)
# 绑定批量下载函数
download_all_btn.click(
download_all_files,
inputs=[output_files],
outputs=gr.File(label="下载所有文件")
)
theme=gr.themes.Soft()
# 启动应用
demo.launch()
|