File size: 6,409 Bytes
c1288e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3c24c9
c1288e1
 
 
 
 
 
 
 
d3c24c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1288e1
 
 
 
 
 
 
 
d3c24c9
 
 
 
 
 
 
 
c1288e1
 
 
d3c24c9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import os
from PyPDF2 import PdfReader, PdfWriter
from io import BytesIO
import gradio as gr

def get_pdf_size(pdf_writer):
    """获取当前PdfWriter对象的内容大小(以字节为单位)"""
    buffer = BytesIO()
    pdf_writer.write(buffer)
    return buffer.tell()

def split_pdf_by_pages(input_pdf_path, output_folder, split_pages):
    """按分页数分割PDF"""
    # 创建输出文件夹
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 读取PDF文件
    reader = PdfReader(input_pdf_path)
    total_pages = len(reader.pages)

    # 检查用户输入的分页数是否有效
    if any(page > total_pages or page < 1 for page in split_pages):
        return f"错误:分页数超出PDF总页数范围(总页数:{total_pages})。"

    # 对分页数进行排序并去重
    split_pages = sorted(set(split_pages))

    # 切割PDF
    start_page = 0
    result_files = []
    for i, split_page in enumerate(split_pages):
        writer = PdfWriter()
        end_page = split_page - 1  # PyPDF2的页码从0开始

        # 添加从 start_page 到 end_page 的页面
        for page_num in range(start_page, end_page + 1):
            writer.add_page(reader.pages[page_num])

        # 保存切割后的PDF文件
        output_pdf_path = os.path.join(output_folder, f"part_{i+1}.pdf")
        with open(output_pdf_path, "wb") as output_pdf:
            writer.write(output_pdf)

        result_files.append(output_pdf_path)
        start_page = end_page + 1

    # 处理最后一组页面
    if start_page < total_pages:
        writer = PdfWriter()
        for page_num in range(start_page, total_pages):
            writer.add_page(reader.pages[page_num])

        output_pdf_path = os.path.join(output_folder, f"part_{len(split_pages) + 1}.pdf")
        with open(output_pdf_path, "wb") as output_pdf:
            writer.write(output_pdf)

        result_files.append(output_pdf_path)

    return result_files

def split_pdf_by_size(input_pdf_path, output_folder, max_size_mb):
    """按文件大小分割PDF"""
    # 创建输出文件夹
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # 读取PDF文件
    reader = PdfReader(input_pdf_path)
    total_pages = len(reader.pages)

    # 初始化变量
    writer = PdfWriter()
    current_part = 1
    max_size_bytes = max_size_mb * 1024 * 1024  # 将MB转换为字节
    result_files = []

    # 逐页处理
    for page_num in range(total_pages):
        # 添加当前页
        writer.add_page(reader.pages[page_num])

        # 检查当前文件大小
        current_size = get_pdf_size(writer)
        if current_size >= max_size_bytes:
            # 如果超过最大大小,保存当前部分
            output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
            with open(output_pdf_path, "wb") as output_pdf:
                writer.write(output_pdf)
            result_files.append(output_pdf_path)

            # 重置writer并增加部分计数
            writer = PdfWriter()
            current_part += 1

    # 保存最后一部分(如果有剩余页面)
    if len(writer.pages) > 0:
        output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
        with open(output_pdf_path, "wb") as output_pdf:
            writer.write(output_pdf)
        result_files.append(output_pdf_path)

    return result_files

def process_pdf(input_pdf, mode, split_pages=None, max_size_mb=None):
    """处理PDF文件"""
    output_folder = "output_parts"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    if mode == "按分页数分割":
        if not split_pages:
            return "错误:请输入分页数。"
        split_pages = [int(page) for page in split_pages.split(",")]
        result_files = split_pdf_by_pages(input_pdf, output_folder, split_pages)
    elif mode == "按文件大小分割":
        if not max_size_mb:
            return "错误:请输入最大文件大小。"
        result_files = split_pdf_by_size(input_pdf, output_folder, float(max_size_mb))
    else:
        return "错误:无效的模式。"

    if isinstance(result_files, str):  # 如果返回的是错误信息
        return result_files

    # 返回所有生成的文件
    return result_files

# Gradio 界面
with gr.Blocks() as demo:
    gr.Markdown("# PDF 分割工具")
    with gr.Row():
        input_pdf = gr.File(label="上传PDF文件", type="filepath")
        mode = gr.Radio(choices=["按分页数分割", "按文件大小分割"], label="选择分割模式")
    with gr.Row():
        split_pages = gr.Textbox(label="分页数(例如:3,5,10)", visible=True)
        max_size_mb = gr.Number(label="每部分的最大大小(MB)", visible=False)
    with gr.Row():
        output_files = gr.Files(label="分割后的文件")
    with gr.Row():
        submit_btn = gr.Button("开始分割")
        download_all_btn = gr.Button("批量下载")

    # 动态显示/隐藏输入框
    def toggle_inputs(mode):
        if mode == "按分页数分割":
            return gr.Textbox(visible=True), gr.Number(visible=False)
        else:
            return gr.Textbox(visible=False), gr.Number(visible=True)

    # 批量下载功能
    def download_all_files(file_list):
        if not file_list:
            return None
        # 创建一个临时zip文件
        import tempfile
        import zipfile
        import shutil

        with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as temp_zip:
            with zipfile.ZipFile(temp_zip.name, 'w') as zf:
                for file_path in file_list:
                    # 获取文件名
                    file_name = os.path.basename(file_path)
                    # 将文件添加到zip中
                    zf.write(file_path, file_name)
            return temp_zip.name

    mode.change(toggle_inputs, inputs=mode, outputs=[split_pages, max_size_mb])

    # 绑定处理函数
    submit_btn.click(
        process_pdf,
        inputs=[input_pdf, mode, split_pages, max_size_mb],
        outputs=output_files
    )

    # 绑定批量下载函数
    download_all_btn.click(
        download_all_files,
        inputs=[output_files],
        outputs=gr.File(label="下载所有文件")
    )

    theme=gr.themes.Soft()

# 启动应用
demo.launch()