zjrwtxtechstudio commited on
Commit
c1288e1
·
verified ·
1 Parent(s): fdb120d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PyPDF2 import PdfReader, PdfWriter
3
+ from io import BytesIO
4
+ import gradio as gr
5
+
6
+ def get_pdf_size(pdf_writer):
7
+ """获取当前PdfWriter对象的内容大小(以字节为单位)"""
8
+ buffer = BytesIO()
9
+ pdf_writer.write(buffer)
10
+ return buffer.tell()
11
+
12
+ def split_pdf_by_pages(input_pdf_path, output_folder, split_pages):
13
+ """按分页数分割PDF"""
14
+ # 创建输出文件夹
15
+ if not os.path.exists(output_folder):
16
+ os.makedirs(output_folder)
17
+
18
+ # 读取PDF文件
19
+ reader = PdfReader(input_pdf_path)
20
+ total_pages = len(reader.pages)
21
+
22
+ # 检查用户输入的分页数是否有效
23
+ if any(page > total_pages or page < 1 for page in split_pages):
24
+ return f"错误:分页数超出PDF总页数范围(总页数:{total_pages})。"
25
+
26
+ # 对分页数进行排序并去重
27
+ split_pages = sorted(set(split_pages))
28
+
29
+ # 切割PDF
30
+ start_page = 0
31
+ result_files = []
32
+ for i, split_page in enumerate(split_pages):
33
+ writer = PdfWriter()
34
+ end_page = split_page - 1 # PyPDF2的页码从0开始
35
+
36
+ # 添加从 start_page 到 end_page 的页面
37
+ for page_num in range(start_page, end_page + 1):
38
+ writer.add_page(reader.pages[page_num])
39
+
40
+ # 保存切割后的PDF文件
41
+ output_pdf_path = os.path.join(output_folder, f"part_{i+1}.pdf")
42
+ with open(output_pdf_path, "wb") as output_pdf:
43
+ writer.write(output_pdf)
44
+
45
+ result_files.append(output_pdf_path)
46
+ start_page = end_page + 1
47
+
48
+ # 处理最后一组页面
49
+ if start_page < total_pages:
50
+ writer = PdfWriter()
51
+ for page_num in range(start_page, total_pages):
52
+ writer.add_page(reader.pages[page_num])
53
+
54
+ output_pdf_path = os.path.join(output_folder, f"part_{len(split_pages) + 1}.pdf")
55
+ with open(output_pdf_path, "wb") as output_pdf:
56
+ writer.write(output_pdf)
57
+
58
+ result_files.append(output_pdf_path)
59
+
60
+ return result_files
61
+
62
+ def split_pdf_by_size(input_pdf_path, output_folder, max_size_mb):
63
+ """按文件大小分割PDF"""
64
+ # 创建输出文件夹
65
+ if not os.path.exists(output_folder):
66
+ os.makedirs(output_folder)
67
+
68
+ # 读取PDF文件
69
+ reader = PdfReader(input_pdf_path)
70
+ total_pages = len(reader.pages)
71
+
72
+ # 初始化变量
73
+ writer = PdfWriter()
74
+ current_part = 1
75
+ max_size_bytes = max_size_mb * 1024 * 1024 # 将MB转换为字节
76
+ result_files = []
77
+
78
+ # 逐页处理
79
+ for page_num in range(total_pages):
80
+ # 添加当前页
81
+ writer.add_page(reader.pages[page_num])
82
+
83
+ # 检查当前文件大小
84
+ current_size = get_pdf_size(writer)
85
+ if current_size >= max_size_bytes:
86
+ # 如果超过最大大小,保存当前部分
87
+ output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
88
+ with open(output_pdf_path, "wb") as output_pdf:
89
+ writer.write(output_pdf)
90
+ result_files.append(output_pdf_path)
91
+
92
+ # 重置writer并增加部分计数
93
+ writer = PdfWriter()
94
+ current_part += 1
95
+
96
+ # 保存最后一部分(如果有剩余页面)
97
+ if len(writer.pages) > 0:
98
+ output_pdf_path = os.path.join(output_folder, f"part_{current_part}.pdf")
99
+ with open(output_pdf_path, "wb") as output_pdf:
100
+ writer.write(output_pdf)
101
+ result_files.append(output_pdf_path)
102
+
103
+ return result_files
104
+
105
+ def process_pdf(input_pdf, mode, split_pages=None, max_size_mb=None):
106
+ """处理PDF文件"""
107
+ output_folder = "output_parts"
108
+ if not os.path.exists(output_folder):
109
+ os.makedirs(output_folder)
110
+
111
+ if mode == "按分页数分割":
112
+ if not split_pages:
113
+ return "错误:请输入分页数。"
114
+ split_pages = [int(page) for page in split_pages.split(",")]
115
+ result_files = split_pdf_by_pages(input_pdf, output_folder, split_pages)
116
+ elif mode == "按文件大小分割":
117
+ if not max_size_mb:
118
+ return "错误:请输入最大文件大小。"
119
+ result_files = split_pdf_by_size(input_pdf, output_folder, float(max_size_mb))
120
+ else:
121
+ return "错误:无效的模式。"
122
+
123
+ if isinstance(result_files, str): # 如果返回的是错误信息
124
+ return result_files
125
+
126
+ # 返回所有生成的文件
127
+ return result_files
128
+
129
+ # Gradio 界面
130
+ with gr.Blocks() as demo:
131
+ gr.Markdown("# PDF 分割工具")
132
+ with gr.Row():
133
+ input_pdf = gr.File(label="上传PDF文件", type="filepath")
134
+ mode = gr.Radio(choices=["按分页数分割", "按文件大小分割"], label="选择分割模式")
135
+ with gr.Row():
136
+ split_pages = gr.Textbox(label="分页数(例如:3,5,10)", visible=True)
137
+ max_size_mb = gr.Number(label="每部分的最大大小(MB)", visible=False)
138
+ with gr.Row():
139
+ output_files = gr.Files(label="分割后的文件")
140
+ with gr.Row():
141
+ submit_btn = gr.Button("开始分割")
142
+
143
+ # 动态显示/隐藏输入框
144
+ def toggle_inputs(mode):
145
+ if mode == "按分页数分割":
146
+ return gr.Textbox(visible=True), gr.Number(visible=False)
147
+ else:
148
+ return gr.Textbox(visible=False), gr.Number(visible=True)
149
+
150
+ mode.change(toggle_inputs, inputs=mode, outputs=[split_pages, max_size_mb])
151
+
152
+ # 绑定处理函数
153
+ submit_btn.click(
154
+ process_pdf,
155
+ inputs=[input_pdf, mode, split_pages, max_size_mb],
156
+ outputs=output_files
157
+ )
158
+ theme=gr.themes.Soft()
159
+
160
+ # 启动应用
161
+ demo.launch()