XinyueZhou commited on
Commit
d72fd49
·
verified ·
1 Parent(s): 6e73024

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +427 -366
app.py CHANGED
@@ -1,366 +1,427 @@
1
- import base64
2
- import io
3
- import json
4
- import os
5
- import re
6
- import tempfile
7
- import zipfile
8
- from datetime import datetime
9
- from pathlib import Path
10
-
11
- import gradio as gr
12
- import requests
13
- from PIL import Image
14
- import pdf2image
15
-
16
- API_URL = "https://t707h6d9q6oftbx3.aistudio-app.com/layout-parsing"
17
- TOKEN = os.getenv("API_TOKEN", "c9e4aaf9634724e215690ba66a66dbdbdf3222a2")
18
-
19
- CSS = """
20
- :root {
21
- --sand-color: #FAF9F6;
22
- --white: #ffffff;
23
- --shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
24
- --text-color: #FAF9F6;
25
- --black:#000000;
26
- }
27
-
28
- body {
29
- display: flex;
30
- justify-content: center;
31
- background-color: var(--sand-color);
32
- color: var(--text-color);
33
- }
34
-
35
- .gradio-container {
36
- max-width: 1200px;
37
- width: 100%;
38
- margin: 20px auto;
39
- padding: 20px;
40
- background-color: var(--white);
41
- border-radius: 8px;
42
- box-shadow: var(--shadow);
43
- }
44
-
45
-
46
- #component-0,
47
- #tabs,
48
- #settings {
49
- background-color: var(--white) !important;
50
- border-radius: 8px;
51
- padding: 15px;
52
- }
53
-
54
- .upload-section {
55
- width: 100%;
56
- max-width: 600px;
57
- margin: 0 auto 30px;
58
- padding: 20px;
59
- background-color: var(--sand-color) !important;
60
- border-radius: 8px;
61
- box-shadow: var(--shadow);
62
- }
63
-
64
- .center-content {
65
- display: flex;
66
- flex-direction: column;
67
- align-items: center;
68
- text-align: center;
69
- margin-bottom: 20px;
70
- }
71
-
72
- .header {
73
- margin-bottom: 30px;
74
- }
75
-
76
- .result-container,
77
- .pdf-preview,
78
- .markdown-result,
79
- .download-section {
80
- background-color: var(--white);
81
- border-radius: 8px;
82
- box-shadow: var(--shadow);
83
- padding: 20px;
84
- }
85
-
86
- .result-container {
87
- display: flex;
88
- gap: 20px;
89
- margin-bottom: 30px;
90
- }
91
-
92
- .pdf-preview, .markdown-result {
93
- flex: 1;
94
- }
95
-
96
- button {
97
- background-color: var(--text-color) !important;
98
- color: var(--black) !important;
99
- border: none !important;
100
- border-radius: 4px;
101
- padding: 8px 16px;
102
- }
103
-
104
- button:hover {
105
- opacity: 0.8 !important;
106
- }
107
-
108
- .radio-group {
109
- margin-bottom: 15px !important;
110
- }
111
-
112
- .file-download {
113
- margin-top: 15px !important;
114
- }
115
- .loader {
116
- border: 5px solid #f3f3f3;
117
- border-top: 5px solid #3498db;
118
- border-radius: 50%;
119
- width: 50px;
120
- height: 50px;
121
- animation: spin 1s linear infinite;
122
- margin: 20px auto;
123
- }
124
-
125
- @keyframes spin {
126
- 0% { transform: rotate(0deg); }
127
- 100% { transform: rotate(360deg); }
128
- }
129
-
130
- .loader-container {
131
- text-align: center;
132
- margin: 20px 0;
133
- }
134
- """
135
-
136
-
137
- def clean_markdown_text(text):
138
- if not text:
139
- return ""
140
- text = re.sub(r'<[^>]+>', '', text)
141
- text = re.sub(r'\n{3,}', '\n\n', text)
142
- return text.strip()
143
-
144
-
145
- def pdf_to_images(pdf_path):
146
- try:
147
- images = pdf2image.convert_from_path(pdf_path)
148
- return [image for image in images]
149
- except:
150
- return None
151
-
152
-
153
- def process_file(file_path, file_type):
154
- try:
155
- with open(file_path, "rb") as f:
156
- file_bytes = f.read()
157
-
158
- file_data = base64.b64encode(file_bytes).decode("ascii")
159
- headers = {
160
- "Authorization": f"token {TOKEN}",
161
- "Content-Type": "application/json"
162
- }
163
-
164
- response = requests.post(
165
- API_URL,
166
- json={"file": file_data, "fileType": 0 if file_type == "pdf" else 1},
167
- headers=headers,
168
- timeout=60
169
- )
170
- response.raise_for_status()
171
-
172
- result = response.json()
173
- layout_results = result.get("result", {}).get("layoutParsingResults", [])
174
-
175
- markdown_contents = []
176
- clean_markdown_contents = []
177
- for res in layout_results:
178
- markdown = res.get("markdown", {})
179
- if isinstance(markdown, str):
180
- original = markdown
181
- elif isinstance(markdown, dict):
182
- original = markdown.get("text", "")
183
-
184
- markdown_contents.append(original)
185
- clean_markdown_contents.append(clean_markdown_text(original))
186
-
187
- if file_type == "pdf":
188
- images = pdf_to_images(file_path)
189
- else:
190
- images = [Image.open(file_path)]
191
-
192
- return {
193
- "original_file": file_path,
194
- "markdown_contents": markdown_contents,
195
- "clean_markdown_contents": clean_markdown_contents,
196
- "pdf_images": images,
197
- "api_response": result
198
- }
199
-
200
- except Exception as e:
201
- raise gr.Error(f"Error processing file: {str(e)}")
202
-
203
-
204
- def create_zip_file(results):
205
- try:
206
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
207
- zip_filename = f"analysis_results_{timestamp}.zip"
208
-
209
- temp_dir = tempfile.mkdtemp()
210
- zip_path = os.path.join(temp_dir, zip_filename)
211
-
212
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
213
- original_path = results.get("original_file", "")
214
- if original_path and Path(original_path).exists():
215
- zipf.write(original_path, f"original/{Path(original_path).name}")
216
-
217
- markdowns = results.get("markdown_contents", [])
218
- for i, md_content in enumerate(markdowns):
219
- if md_content:
220
- zipf.writestr(f"markdown/original/markdown_{i + 1}.md", md_content)
221
-
222
- api_response = results.get("api_response", {})
223
- zipf.writestr("api_response.json", json.dumps(api_response, indent=2, ensure_ascii=False))
224
-
225
- return zip_path
226
-
227
- except Exception as e:
228
- raise gr.Error(f"Error creating ZIP file: {str(e)}")
229
-
230
-
231
- def export_markdown(results):
232
- try:
233
- markdowns = results.get("markdown_contents", [])
234
- if not markdowns:
235
- raise gr.Error("No markdown content to export")
236
-
237
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
238
- filename = f"original_markdown_{timestamp}.md"
239
- content = "\n\n".join(markdowns)
240
-
241
- temp_dir = tempfile.mkdtemp()
242
- file_path = os.path.join(temp_dir, filename)
243
-
244
- with open(file_path, 'w', encoding='utf-8') as f:
245
- f.write(content)
246
-
247
- return file_path
248
-
249
- except Exception as e:
250
- raise gr.Error(f"Error exporting markdown: {str(e)}")
251
-
252
-
253
- with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
254
- results_state = gr.State()
255
-
256
- with gr.Column(elem_classes=["center-content", "header"]):
257
- gr.Markdown("# Document Parsing System")
258
- gr.Markdown("### Upload PDF or image files for analysis")
259
-
260
- with gr.Column(elem_classes=["center-content", "upload-section"]):
261
- file_type = gr.Radio(
262
- ["pdf", "image"],
263
- label="File type",
264
- value="pdf",
265
- interactive=True
266
- )
267
- file_input = gr.File(
268
- label="Upload document",
269
- file_types=[".pdf", ".jpg", ".jpeg", ".png"],
270
- type="filepath"
271
- )
272
- process_btn = gr.Button("Analyze document", variant="primary")
273
-
274
- # Loading spinner container
275
- loading_spinner = gr.Column(
276
- visible=False,
277
- elem_classes=["loader-container"]
278
- )
279
- with loading_spinner:
280
- gr.HTML("""
281
- <div class="loader"></div>
282
- <p>Wait...</p>
283
- """)
284
-
285
- with gr.Row(elem_classes=["result-container"]):
286
- with gr.Column(elem_classes=["pdf-preview"]):
287
- gr.Markdown("### Original document preview")
288
- pdf_display = gr.Gallery(label="PDF page", show_label=False)
289
-
290
- with gr.Column(elem_classes=["markdown-result"]):
291
- with gr.Row(elem_classes=["radio-group"]):
292
- display_mode = gr.Radio(
293
- ["Original Markdown", "Cleaned Text"],
294
- label="Display Mode",
295
- value="Original Markdown",
296
- interactive=True
297
- )
298
- markdown_display = gr.HTML(label="Analysis Results")
299
-
300
- with gr.Column(elem_classes=["download-section"]):
301
- gr.Markdown("### Result Export")
302
- with gr.Row():
303
- download_md_btn = gr.Button("Download Original Markdown", variant="secondary")
304
- download_all_btn = gr.Button("Download Complete Analysis Results (ZIP)", variant="primary")
305
- download_file = gr.File(visible=False, label="Download file", elem_classes=["file-download"])
306
-
307
- # Define a function to toggle the loading spinner
308
- def toggle_spinner():
309
- return gr.update(visible=True)
310
-
311
-
312
- def hide_spinner():
313
- return gr.update(visible=False)
314
-
315
-
316
- process_btn.click(
317
- toggle_spinner,
318
- outputs=[loading_spinner]
319
- ).then(
320
- process_file,
321
- inputs=[file_input, file_type],
322
- outputs=[results_state]
323
- ).then(
324
- hide_spinner,
325
- outputs=[loading_spinner]
326
- ).success(
327
- lambda res: res["pdf_images"] if res and res.get("pdf_images") else [],
328
- inputs=[results_state],
329
- outputs=[pdf_display]
330
- ).success(
331
- lambda res: res["markdown_contents"][0] if res and res.get("markdown_contents") else "",
332
- inputs=[results_state],
333
- outputs=[markdown_display]
334
- )
335
-
336
- display_mode.change(
337
- lambda mode, res: (
338
- res["markdown_contents"][0] if mode == "原始Markdown"
339
- else res["clean_markdown_contents"][0]
340
- ) if res else "",
341
- inputs=[display_mode, results_state],
342
- outputs=[markdown_display]
343
- )
344
-
345
- download_md_btn.click(
346
- export_markdown,
347
- inputs=[results_state],
348
- outputs=[download_file]
349
- ).then(
350
- lambda x: gr.update(visible=True),
351
- inputs=[download_file],
352
- outputs=[download_file]
353
- )
354
-
355
- download_all_btn.click(
356
- create_zip_file,
357
- inputs=[results_state],
358
- outputs=[download_file]
359
- ).then(
360
- lambda x: gr.update(visible=True),
361
- inputs=[download_file],
362
- outputs=[download_file]
363
- )
364
-
365
- if __name__ == "__main__":
366
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import json
4
+ import os
5
+ import re
6
+ import tempfile
7
+ import zipfile
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+
11
+ import gradio as gr
12
+ import requests
13
+ from PIL import Image
14
+ import pdf2image
15
+
16
+ API_URL = "https://t707h6d9q6oftbx3.aistudio-app.com/layout-parsing"
17
+ TOKEN = os.getenv("API_TOKEN", "c9e4aaf9634724e215690ba66a66dbdbdf3222a2")
18
+
19
+
20
+ CSS = """
21
+ :root {
22
+ --sand-color: #D7B4F8;
23
+ --white: #ffffff;
24
+ --shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
25
+ --text-color: #F5ECFD;
26
+ --black:#000000;
27
+ --link-hover: #F5ECFD;
28
+ --content-width: 1200px;
29
+ --button-color: #cbbdff; /* 新增按钮颜色变量 */
30
+ }
31
+
32
+ body {
33
+ display: flex;
34
+ justify-content: center;
35
+ background-color: var(--sand-color);
36
+ color: var(--black);
37
+ font-family: Arial, sans-serif;
38
+ }
39
+
40
+ .gradio-container {
41
+ max-width: var(--content-width) !important;
42
+ width: 100% !important;
43
+ margin: 20px auto;
44
+ padding: 20px;
45
+ background-color: var(--white) !important;
46
+ }
47
+
48
+ /* 修改1: 优化header容器样式 */
49
+ .header-container {
50
+ width: 100%;
51
+ background-color: var(--text-color) !important;
52
+ padding: 20px 0 10px 0; /* 减少底部padding */
53
+ margin-bottom: 20px;
54
+ border-radius: 8px;
55
+ }
56
+
57
+ .logo-container {
58
+ width: 100%;
59
+ margin-bottom: 15px; /* 减少logo与按钮间距 */
60
+ text-align: center;
61
+ }
62
+
63
+ .logo-img {
64
+ width: 100%;
65
+ max-width: var(--content-width);
66
+ margin: 0 auto;
67
+ display: block;
68
+ }
69
+
70
+ /* 修改2: 优化导航按钮布局 */
71
+ .nav-buttons {
72
+ display: flex;
73
+ justify-content: center;
74
+ gap: 20px; /* 减少按钮间距 */
75
+ margin-top: 10px; /* 减少顶部间距 */
76
+ width: 80%; /* 控制按钮区域宽度 */
77
+ margin-left: auto;
78
+ margin-right: auto;
79
+ }
80
+
81
+ .nav-button {
82
+ background-color: var(--button-color) !important; /* 修改为FAF9F6 */
83
+ color: var(--black) !important;
84
+ text-decoration: none;
85
+ font-weight: bold;
86
+ font-size: 20px;
87
+ padding: 6px 20px !important;
88
+ border-radius: 24px !important;
89
+ border: none !important;
90
+ transition: opacity 0.2s;
91
+ flex: 1;
92
+ max-width: 200px;
93
+ text-align: center;
94
+ }
95
+
96
+ .nav-button:hover {
97
+ opacity: 0.8 !important;
98
+ }
99
+
100
+ .upload-section {
101
+ width: 100%;
102
+ margin: 0 auto 30px;
103
+ padding: 20px;
104
+ background-color: var(--text-color) !important;
105
+ border-radius: 8px;
106
+ box-shadow: var(--shadow);
107
+ }
108
+ /* 强制底部链接为白色背景 */
109
+ #component-16, /* Use via API 部分 */
110
+ #component-17, /* Settings 部分 */
111
+ #component-18 { /* Built with Gradio 部分 */
112
+ background-color: var(--white) !important;
113
+ }
114
+
115
+ footer .gr-panel,
116
+ .gr-footer,
117
+ .gr-panel:has(a[href*="api"]), /* 针对Use via API */
118
+ .gr-panel:has(a[href*="settings"]) /* 针对Settings */ {
119
+ background-color: var(--white) !important;
120
+ color: var(--black) !important;
121
+ }
122
+
123
+
124
+ /* 其他样式保持不变 */
125
+ .result-container {
126
+ display: flex;
127
+ gap: 20px;
128
+ margin-bottom: 30px;
129
+ width: 100%;
130
+ }
131
+
132
+ .pdf-preview {
133
+ flex: 1;
134
+ min-width: 0;
135
+ }
136
+
137
+ .markdown-result {
138
+ flex: 1;
139
+ min-width: 0;
140
+ }
141
+
142
+ .gallery-container {
143
+ width: 100% !important;
144
+ }
145
+
146
+ .gallery-item {
147
+ width: 100% !important;
148
+ height: auto !important;
149
+ aspect-ratio: auto !important;
150
+ }
151
+
152
+ button {
153
+ background-color: var(--text-color) !important;
154
+ color: var(--black) !important;
155
+ border: none !important;
156
+ border-radius: 4px;
157
+ padding: 8px 16px;
158
+ }
159
+ button:hover {
160
+ opacity: 0.8 !important;
161
+ }
162
+
163
+ .radio-group {
164
+ margin-bottom: 15px !important;
165
+ }
166
+
167
+ .file-download {
168
+ margin-top: 15px !important;
169
+ }
170
+ .loader {
171
+ border: 5px solid #f3f3f3;
172
+ border-top: 5px solid #3498db;
173
+ border-radius: 50%;
174
+ width: 50px;
175
+ height: 50px;
176
+ animation: spin 1s linear infinite;
177
+ margin: 20px auto;
178
+ }
179
+
180
+ @keyframes spin {
181
+ 0% { transform: rotate(0deg); }
182
+ 100% { transform: rotate(360deg); }
183
+ }
184
+
185
+ .loader-container {
186
+ text-align: center;
187
+ margin: 20px 0;
188
+ }
189
+ """
190
+
191
+ def clean_markdown_text(text):
192
+ if not text:
193
+ return ""
194
+ text = re.sub(r'<[^>]+>', '', text)
195
+ text = re.sub(r'\n{3,}', '\n\n', text)
196
+ return text.strip()
197
+
198
+
199
+ def pdf_to_images(pdf_path):
200
+ try:
201
+ images = pdf2image.convert_from_path(pdf_path)
202
+ return [image for image in images]
203
+ except:
204
+ return None
205
+
206
+
207
+ def process_file(file_path, file_type):
208
+ try:
209
+ with open(file_path, "rb") as f:
210
+ file_bytes = f.read()
211
+
212
+ file_data = base64.b64encode(file_bytes).decode("ascii")
213
+ headers = {
214
+ "Authorization": f"token {TOKEN}",
215
+ "Content-Type": "application/json"
216
+ }
217
+
218
+ response = requests.post(
219
+ API_URL,
220
+ json={"file": file_data, "fileType": 0 if file_type == "pdf" else 1},
221
+ headers=headers,
222
+ timeout=60
223
+ )
224
+ response.raise_for_status()
225
+
226
+ result = response.json()
227
+ layout_results = result.get("result", {}).get("layoutParsingResults", [])
228
+
229
+ markdown_contents = []
230
+ clean_markdown_contents = []
231
+ for res in layout_results:
232
+ markdown = res.get("markdown", {})
233
+ if isinstance(markdown, str):
234
+ original = markdown
235
+ elif isinstance(markdown, dict):
236
+ original = markdown.get("text", "")
237
+
238
+ markdown_contents.append(original)
239
+ clean_markdown_contents.append(clean_markdown_text(original))
240
+
241
+ if file_type == "pdf":
242
+ images = pdf_to_images(file_path)
243
+ else:
244
+ images = [Image.open(file_path)]
245
+
246
+ return {
247
+ "original_file": file_path,
248
+ "markdown_contents": markdown_contents,
249
+ "clean_markdown_contents": clean_markdown_contents,
250
+ "pdf_images": images,
251
+ "api_response": result
252
+ }
253
+
254
+ except Exception as e:
255
+ raise gr.Error(f"Error processing file: {str(e)}")
256
+
257
+
258
+ def create_zip_file(results):
259
+ try:
260
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
261
+ zip_filename = f"analysis_results_{timestamp}.zip"
262
+
263
+ temp_dir = tempfile.mkdtemp()
264
+ zip_path = os.path.join(temp_dir, zip_filename)
265
+
266
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
267
+ original_path = results.get("original_file", "")
268
+ if original_path and Path(original_path).exists():
269
+ zipf.write(original_path, f"original/{Path(original_path).name}")
270
+
271
+ markdowns = results.get("markdown_contents", [])
272
+ for i, md_content in enumerate(markdowns):
273
+ if md_content:
274
+ zipf.writestr(f"markdown/original/markdown_{i + 1}.md", md_content)
275
+
276
+ api_response = results.get("api_response", {})
277
+ zipf.writestr("api_response.json", json.dumps(api_response, indent=2, ensure_ascii=False))
278
+
279
+ return zip_path
280
+
281
+ except Exception as e:
282
+ raise gr.Error(f"Error creating ZIP file: {str(e)}")
283
+
284
+
285
+ def export_markdown(results):
286
+ try:
287
+ markdowns = results.get("markdown_contents", [])
288
+ if not markdowns:
289
+ raise gr.Error("No markdown content to export")
290
+
291
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
292
+ filename = f"original_markdown_{timestamp}.md"
293
+ content = "\n\n".join(markdowns)
294
+
295
+ temp_dir = tempfile.mkdtemp()
296
+ file_path = os.path.join(temp_dir, filename)
297
+
298
+ with open(file_path, 'w', encoding='utf-8') as f:
299
+ f.write(content)
300
+
301
+ return file_path
302
+
303
+ except Exception as e:
304
+ raise gr.Error(f"Error exporting markdown: {str(e)}")
305
+
306
+
307
+ with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
308
+ results_state = gr.State()
309
+
310
+ # 修改1: 将logo和导航按钮整合到一个容器中
311
+ with gr.Column(elem_classes=["header-container"]):
312
+ with gr.Column(elem_classes=["logo-container"]):
313
+ gr.Image("pp-structurev3_altered.png", elem_classes=["logo-img"], show_label=False)
314
+
315
+ # 修改2: 使用按钮代替链接
316
+ with gr.Row(elem_classes=["nav-buttons"]):
317
+ gr.Button("GitHub", link="https://github.com/PaddlePaddle/PaddleOCR", elem_classes=["nav-button"])
318
+ gr.Button("Homepage", link="https://paddleocr.ai", elem_classes=["nav-button"])
319
+
320
+ with gr.Column(elem_classes=["upload-section"]):
321
+ file_type = gr.Radio(
322
+ ["pdf", "image"],
323
+ label="File type",
324
+ value="pdf",
325
+ interactive=True
326
+ )
327
+ file_input = gr.File(
328
+ label="Upload document",
329
+ file_types=[".pdf", ".jpg", ".jpeg", ".png"],
330
+ type="filepath"
331
+ )
332
+ process_btn = gr.Button("Analyze document", variant="primary")
333
+
334
+ loading_spinner = gr.Column(
335
+ visible=False,
336
+ elem_classes=["loader-container"]
337
+ )
338
+ with loading_spinner:
339
+ gr.HTML("""
340
+ <div class="loader"></div>
341
+ <p>Wait...</p>
342
+ """)
343
+
344
+ with gr.Row(elem_classes=["result-container"]):
345
+ with gr.Column(elem_classes=["pdf-preview"]):
346
+ gr.Markdown("### Original document preview")
347
+ pdf_display = gr.Gallery(
348
+ label="PDF page",
349
+ show_label=False,
350
+ elem_classes=["gallery-container"]
351
+ )
352
+
353
+ with gr.Column(elem_classes=["markdown-result"]):
354
+ with gr.Row(elem_classes=["radio-group"]):
355
+ display_mode = gr.Radio(
356
+ ["Original Markdown", "Cleaned Text"],
357
+ label="Display Mode",
358
+ value="Original Markdown",
359
+ interactive=True
360
+ )
361
+ markdown_display = gr.HTML(label="Analysis Results")
362
+ with gr.Column(elem_classes=["download-section"]):
363
+ gr.Markdown("### Result Export")
364
+ with gr.Row():
365
+ download_md_btn = gr.Button("Download Original Markdown", variant="secondary")
366
+ download_all_btn = gr.Button("Download Complete Analysis Results (ZIP)", variant="primary")
367
+ download_file = gr.File(visible=False, label="Download file", elem_classes=["file-download"])
368
+
369
+ def toggle_spinner():
370
+ return gr.update(visible=True)
371
+
372
+
373
+ def hide_spinner():
374
+ return gr.update(visible=False)
375
+
376
+
377
+ process_btn.click(
378
+ toggle_spinner,
379
+ outputs=[loading_spinner]
380
+ ).then(
381
+ process_file,
382
+ inputs=[file_input, file_type],
383
+ outputs=[results_state]
384
+ ).then(
385
+ hide_spinner,
386
+ outputs=[loading_spinner]
387
+ ).success(
388
+ lambda res: res["pdf_images"] if res and res.get("pdf_images") else [],
389
+ inputs=[results_state],
390
+ outputs=[pdf_display]
391
+ ).success(
392
+ lambda res: res["markdown_contents"][0] if res and res.get("markdown_contents") else "",
393
+ inputs=[results_state],
394
+ outputs=[markdown_display]
395
+ )
396
+
397
+ display_mode.change(
398
+ lambda mode, res: (
399
+ res["markdown_contents"][0] if mode == "原始Markdown"
400
+ else res["clean_markdown_contents"][0]
401
+ ) if res else "",
402
+ inputs=[display_mode, results_state],
403
+ outputs=[markdown_display]
404
+ )
405
+
406
+ download_md_btn.click(
407
+ export_markdown,
408
+ inputs=[results_state],
409
+ outputs=[download_file]
410
+ ).then(
411
+ lambda x: gr.update(visible=True),
412
+ inputs=[download_file],
413
+ outputs=[download_file]
414
+ )
415
+
416
+ download_all_btn.click(
417
+ create_zip_file,
418
+ inputs=[results_state],
419
+ outputs=[download_file]
420
+ ).then(
421
+ lambda x: gr.update(visible=True),
422
+ inputs=[download_file],
423
+ outputs=[download_file]
424
+ )
425
+
426
+ if __name__ == "__main__":
427
+ demo.launch(share=True)