zjrwtxtechstudio commited on
Commit
2a08f3d
·
verified ·
1 Parent(s): 6888b5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +471 -329
app.py CHANGED
@@ -1,329 +1,471 @@
1
- import os
2
- import time
3
- import cv2
4
- import imutils
5
- import shutil
6
- import img2pdf
7
- import glob
8
- from skimage.metrics import structural_similarity
9
- import gradio as gr
10
- import tempfile
11
-
12
- ############# Define constants
13
-
14
- OUTPUT_SLIDES_DIR = f"./output"
15
-
16
- FRAME_RATE = 3 # no.of frames per second that needs to be processed, fewer the count faster the speed
17
- WARMUP = FRAME_RATE # initial number of frames to be skipped
18
- FGBG_HISTORY = FRAME_RATE * 15 # no.of frames in background object
19
- VAR_THRESHOLD = 16 # Threshold on the squared Mahalanobis distance between the pixel and the model to decide whether a pixel is well described by the background model.
20
- DETECT_SHADOWS = False # If true, the algorithm will detect shadows and mark them.
21
- MIN_PERCENT = 0.1 # min % of diff between foreground and background to detect if motion has stopped
22
- MAX_PERCENT = 3 # max % of diff between foreground and background to detect if frame is still in motion
23
- SSIM_THRESHOLD = 0.9 # SSIM threshold of two consecutive frame
24
-
25
-
26
- def get_frames(video_path):
27
- '''A fucntion to return the frames from a video located at video_path
28
- this function skips frames as defined in FRAME_RATE'''
29
-
30
-
31
- # open a pointer to the video file initialize the width and height of the frame
32
- vs = cv2.VideoCapture(video_path)
33
- if not vs.isOpened():
34
- raise Exception(f'unable to open file {video_path}')
35
-
36
-
37
- total_frames = vs.get(cv2.CAP_PROP_FRAME_COUNT)
38
- frame_time = 0
39
- frame_count = 0
40
-
41
- # loop over the frames of the video
42
- while True:
43
- vs.set(cv2.CAP_PROP_POS_MSEC, frame_time * 1000) # move frame to a timestamp
44
- frame_time += 1/FRAME_RATE
45
-
46
- (_, frame) = vs.read()
47
- # if the frame is None, then we have reached the end of the video file
48
- if frame is None:
49
- break
50
-
51
- frame_count += 1
52
- yield frame_count, frame_time, frame
53
-
54
- vs.release()
55
-
56
-
57
-
58
- def detect_unique_screenshots(video_path, output_folder_screenshot_path, progress=gr.Progress()):
59
- '''Extract unique screenshots from video'''
60
- fgbg = cv2.createBackgroundSubtractorMOG2(history=FGBG_HISTORY, varThreshold=VAR_THRESHOLD,detectShadows=DETECT_SHADOWS)
61
-
62
- captured = False
63
- start_time = time.time()
64
- (W, H) = (None, None)
65
-
66
- # Get total frames for progress calculation
67
- cap = cv2.VideoCapture(video_path)
68
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
69
- cap.release()
70
-
71
- screenshoots_count = 0
72
- last_screenshot = None
73
- saved_files = []
74
-
75
- progress(0, desc="初始化视频处理...")
76
-
77
- for frame_count, frame_time, frame in get_frames(video_path):
78
- # Update progress
79
- progress((frame_count / total_frames) * 0.7, desc=f"处理视频帧 {frame_count}/{total_frames}")
80
-
81
- orig = frame.copy()
82
- frame = imutils.resize(frame, width=600)
83
- mask = fgbg.apply(frame)
84
-
85
- if W is None or H is None:
86
- (H, W) = mask.shape[:2]
87
-
88
- p_diff = (cv2.countNonZero(mask) / float(W * H)) * 100
89
-
90
- if p_diff < MIN_PERCENT and not captured and frame_count > WARMUP:
91
- captured = True
92
- filename = f"{screenshoots_count:03}_{round(frame_time/60, 2)}.png"
93
- path = os.path.join(output_folder_screenshot_path, filename)
94
-
95
- image_ssim = 0.0
96
- if last_screenshot is not None:
97
- image_ssim = structural_similarity(last_screenshot, orig, channel_axis=2, data_range=255)
98
-
99
- if image_ssim < SSIM_THRESHOLD:
100
- try:
101
- progress(0.7 + (screenshoots_count * 0.1), desc=f"保存截图 {screenshoots_count + 1}")
102
- print("saving {}".format(path))
103
- cv2.imwrite(str(path), orig)
104
- last_screenshot = orig
105
- saved_files.append(path)
106
- screenshoots_count += 1
107
- except Exception as e:
108
- print(f"Error saving image: {str(e)}")
109
- continue
110
-
111
- elif captured and p_diff >= MAX_PERCENT:
112
- captured = False
113
-
114
- progress(0.8, desc="截图提取完成")
115
- print(f'{screenshoots_count} screenshots Captured!')
116
- print(f'Time taken {time.time()-start_time}s')
117
- return saved_files
118
-
119
-
120
- def initialize_output_folder(video_path):
121
- '''Clean the output folder if already exists'''
122
- # Create a safe folder name from video filename
123
- video_filename = os.path.splitext(os.path.basename(video_path))[0]
124
- # Replace potentially problematic characters
125
- safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
126
- output_folder_screenshot_path = os.path.join(OUTPUT_SLIDES_DIR, safe_filename)
127
-
128
- if os.path.exists(output_folder_screenshot_path):
129
- shutil.rmtree(output_folder_screenshot_path)
130
-
131
- os.makedirs(output_folder_screenshot_path, exist_ok=True)
132
- print('initialized output folder', output_folder_screenshot_path)
133
- return output_folder_screenshot_path
134
-
135
-
136
- def convert_screenshots_to_pdf(video_path, output_folder_screenshot_path):
137
- # Create a safe filename
138
- video_filename = os.path.splitext(os.path.basename(video_path))[0]
139
- safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
140
- output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
141
-
142
- try:
143
- print('output_folder_screenshot_path', output_folder_screenshot_path)
144
- print('output_pdf_path', output_pdf_path)
145
- print('converting images to pdf..')
146
-
147
- # Get all PNG files and ensure they exist
148
- png_files = sorted(glob.glob(os.path.join(output_folder_screenshot_path, "*.png")))
149
- if not png_files:
150
- raise Exception("No PNG files found to convert to PDF")
151
-
152
- with open(output_pdf_path, "wb") as f:
153
- f.write(img2pdf.convert(png_files))
154
-
155
- print('Pdf Created!')
156
- print('pdf saved at', output_pdf_path)
157
- return output_pdf_path
158
- except Exception as e:
159
- print(f"Error creating PDF: {str(e)}")
160
- raise
161
-
162
-
163
- def video_to_slides(video_path, progress=gr.Progress()):
164
- progress(0.1, desc="准备处理视频...")
165
- output_folder_screenshot_path = initialize_output_folder(video_path)
166
- saved_files = detect_unique_screenshots(video_path, output_folder_screenshot_path, progress)
167
- return output_folder_screenshot_path, saved_files
168
-
169
-
170
- def slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress=gr.Progress()):
171
- video_filename = os.path.splitext(os.path.basename(video_path))[0]
172
- safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
173
- output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
174
-
175
- try:
176
- progress(0.9, desc="正在生成PDF...")
177
- print('output_folder_screenshot_path', output_folder_screenshot_path)
178
- print('output_pdf_path', output_pdf_path)
179
-
180
- if not saved_files:
181
- raise Exception("未从视频中捕获到截图")
182
-
183
- existing_files = [f for f in saved_files if os.path.exists(f)]
184
- if not existing_files:
185
- raise Exception("未找到保存的截图文件")
186
-
187
- with open(output_pdf_path, "wb") as f:
188
- f.write(img2pdf.convert(existing_files))
189
-
190
- progress(1.0, desc="处理完成!")
191
- print('PDF创建成功!')
192
- print('PDF保存位置:', output_pdf_path)
193
- return output_pdf_path
194
- except Exception as e:
195
- print(f"创建PDF时出错: {str(e)}")
196
- raise
197
-
198
-
199
- def run_app(video_path, progress=gr.Progress()):
200
- try:
201
- if not video_path:
202
- raise gr.Error("请选择要处理的视频文件")
203
-
204
- progress(0, desc="开始处理...")
205
- output_folder_screenshot_path, saved_files = video_to_slides(video_path, progress)
206
- return slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress)
207
- except Exception as e:
208
- raise gr.Error(f"处理失败: {str(e)}")
209
-
210
-
211
- def process_video_file(video_file):
212
- """Handle uploaded video file and return PDF"""
213
- try:
214
- # If video_file is a string (path), use it directly
215
- if isinstance(video_file, str):
216
- if video_file.strip() == "":
217
- return None
218
- return run_app(video_file)
219
-
220
- # If it's an uploaded file, create a temporary file
221
- if video_file is not None:
222
- # Generate a unique filename for the temporary video
223
- temp_filename = f"temp_video_{int(time.time())}.mp4"
224
- temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
225
-
226
- try:
227
- if hasattr(video_file, 'name'): # If it's already a file path
228
- shutil.copyfile(video_file, temp_path)
229
- else: # If it's file content
230
- with open(temp_path, 'wb') as f:
231
- f.write(video_file)
232
-
233
- # Process the video
234
- output_folder_screenshot_path, saved_files = video_to_slides(temp_path)
235
- pdf_path = slides_to_pdf(temp_path, output_folder_screenshot_path, saved_files)
236
-
237
- # Cleanup
238
- if os.path.exists(temp_path):
239
- os.unlink(temp_path)
240
- return pdf_path
241
-
242
- except Exception as e:
243
- if os.path.exists(temp_path):
244
- os.unlink(temp_path)
245
- raise gr.Error(f"处理视频时出错: {str(e)}")
246
- return None
247
- except Exception as e:
248
- raise gr.Error(f"处理视频时出错: {str(e)}")
249
-
250
- # Create a modern interface with custom CSS
251
- css = """
252
- .gradio-container {
253
- font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
254
- }
255
- .container {
256
- max-width: 900px;
257
- margin: auto;
258
- padding: 20px;
259
- }
260
- .gr-button {
261
- background: linear-gradient(90deg, #2563eb, #3b82f6);
262
- border: none;
263
- color: white;
264
- }
265
- .gr-button:hover {
266
- background: linear-gradient(90deg, #1d4ed8, #2563eb);
267
- transform: translateY(-1px);
268
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
269
- }
270
- .status-info {
271
- margin-top: 10px;
272
- padding: 10px;
273
- border-radius: 4px;
274
- background-color: #f3f4f6;
275
- }
276
- """
277
-
278
- with gr.Blocks(css=css) as iface:
279
- gr.Markdown(
280
- """
281
- # 🎥 视频转PDF智能助手
282
-
283
- ### 轻松将视频转换为高质量PDF文档
284
- 公众号:正经人王同学 | 全网同名
285
- """
286
- )
287
-
288
- with gr.Row():
289
- with gr.Column():
290
- video_input = gr.Video(label="上传视频")
291
- video_path = gr.Textbox(label="或输入视频路径", placeholder="例如: ./input/video.mp4")
292
- convert_btn = gr.Button("开始转换", variant="primary")
293
-
294
- with gr.Row():
295
- output_file = gr.File(label="下载PDF")
296
-
297
- with gr.Row():
298
- status = gr.Markdown(value="", elem_classes=["status-info"])
299
-
300
- gr.Markdown(
301
- """
302
- ### 使用说明
303
- 1. 上传视频文件 或 输入视频文件路径
304
- 2. 点击"开始转换"按钮
305
- 3. 等待处理完成后下载生成的PDF文件
306
-
307
- ### 特点
308
- - 智能检测视频关键帧
309
- - 高质量PDF输出
310
- - 支持多种视频格式
311
- """
312
- )
313
-
314
- def process_video(video, path):
315
- if video:
316
- return run_app(video)
317
- elif path:
318
- return run_app(path)
319
- else:
320
- raise gr.Error("请上传视频或输入视频路径")
321
-
322
- convert_btn.click(
323
- fn=process_video,
324
- inputs=[video_input, video_path],
325
- outputs=[output_file],
326
- )
327
-
328
- if __name__ == "__main__":
329
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import cv2
4
+ import imutils
5
+ import shutil
6
+ import img2pdf
7
+ import glob
8
+ from skimage.metrics import structural_similarity
9
+ import gradio as gr
10
+ import tempfile
11
+ import whisper
12
+ from moviepy.editor import VideoFileClip
13
+ from PIL import Image, ImageDraw, ImageFont
14
+
15
+ ############# Define constants
16
+
17
+ OUTPUT_SLIDES_DIR = f"./output"
18
+
19
+ FRAME_RATE = 3 # no.of frames per second that needs to be processed, fewer the count faster the speed
20
+ WARMUP = FRAME_RATE # initial number of frames to be skipped
21
+ FGBG_HISTORY = FRAME_RATE * 15 # no.of frames in background object
22
+ VAR_THRESHOLD = 16 # Threshold on the squared Mahalanobis distance between the pixel and the model to decide whether a pixel is well described by the background model.
23
+ DETECT_SHADOWS = False # If true, the algorithm will detect shadows and mark them.
24
+ MIN_PERCENT = 0.1 # min % of diff between foreground and background to detect if motion has stopped
25
+ MAX_PERCENT = 3 # max % of diff between foreground and background to detect if frame is still in motion
26
+ SSIM_THRESHOLD = 0.9 # SSIM threshold of two consecutive frame
27
+
28
+
29
+ def get_frames(video_path):
30
+ '''A fucntion to return the frames from a video located at video_path
31
+ this function skips frames as defined in FRAME_RATE'''
32
+
33
+
34
+ # open a pointer to the video file initialize the width and height of the frame
35
+ vs = cv2.VideoCapture(video_path)
36
+ if not vs.isOpened():
37
+ raise Exception(f'unable to open file {video_path}')
38
+
39
+
40
+ total_frames = vs.get(cv2.CAP_PROP_FRAME_COUNT)
41
+ frame_time = 0
42
+ frame_count = 0
43
+
44
+ # loop over the frames of the video
45
+ while True:
46
+ vs.set(cv2.CAP_PROP_POS_MSEC, frame_time * 1000) # move frame to a timestamp
47
+ frame_time += 1/FRAME_RATE
48
+
49
+ (_, frame) = vs.read()
50
+ # if the frame is None, then we have reached the end of the video file
51
+ if frame is None:
52
+ break
53
+
54
+ frame_count += 1
55
+ yield frame_count, frame_time, frame
56
+
57
+ vs.release()
58
+
59
+
60
+
61
+ def detect_unique_screenshots(video_path, output_folder_screenshot_path, progress=gr.Progress()):
62
+ '''Extract unique screenshots from video'''
63
+ fgbg = cv2.createBackgroundSubtractorMOG2(history=FGBG_HISTORY, varThreshold=VAR_THRESHOLD,detectShadows=DETECT_SHADOWS)
64
+
65
+ captured = False
66
+ start_time = time.time()
67
+ (W, H) = (None, None)
68
+
69
+ # Get total frames for progress calculation
70
+ cap = cv2.VideoCapture(video_path)
71
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
72
+ cap.release()
73
+
74
+ screenshoots_count = 0
75
+ last_screenshot = None
76
+ saved_files = []
77
+
78
+ progress(0, desc="初始化视频处理...")
79
+
80
+ for frame_count, frame_time, frame in get_frames(video_path):
81
+ # Update progress
82
+ progress((frame_count / total_frames) * 0.7, desc=f"处理视频帧 {frame_count}/{total_frames}")
83
+
84
+ orig = frame.copy()
85
+ frame = imutils.resize(frame, width=600)
86
+ mask = fgbg.apply(frame)
87
+
88
+ if W is None or H is None:
89
+ (H, W) = mask.shape[:2]
90
+
91
+ p_diff = (cv2.countNonZero(mask) / float(W * H)) * 100
92
+
93
+ if p_diff < MIN_PERCENT and not captured and frame_count > WARMUP:
94
+ captured = True
95
+ filename = f"{screenshoots_count:03}_{round(frame_time/60, 2)}.png"
96
+ path = os.path.join(output_folder_screenshot_path, filename)
97
+
98
+ image_ssim = 0.0
99
+ if last_screenshot is not None:
100
+ image_ssim = structural_similarity(last_screenshot, orig, channel_axis=2, data_range=255)
101
+
102
+ if image_ssim < SSIM_THRESHOLD:
103
+ try:
104
+ progress(0.7 + (screenshoots_count * 0.1), desc=f"保存截图 {screenshoots_count + 1}")
105
+ print("saving {}".format(path))
106
+ cv2.imwrite(str(path), orig)
107
+ last_screenshot = orig
108
+ saved_files.append(path)
109
+ screenshoots_count += 1
110
+ except Exception as e:
111
+ print(f"Error saving image: {str(e)}")
112
+ continue
113
+
114
+ elif captured and p_diff >= MAX_PERCENT:
115
+ captured = False
116
+
117
+ progress(0.8, desc="截图提取完成")
118
+ print(f'{screenshoots_count} screenshots Captured!')
119
+ print(f'Time taken {time.time()-start_time}s')
120
+ return saved_files
121
+
122
+
123
+ def initialize_output_folder(video_path):
124
+ '''Clean the output folder if already exists'''
125
+ # Create a safe folder name from video filename
126
+ video_filename = os.path.splitext(os.path.basename(video_path))[0]
127
+ # Replace potentially problematic characters
128
+ safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
129
+ output_folder_screenshot_path = os.path.join(OUTPUT_SLIDES_DIR, safe_filename)
130
+
131
+ if os.path.exists(output_folder_screenshot_path):
132
+ shutil.rmtree(output_folder_screenshot_path)
133
+
134
+ os.makedirs(output_folder_screenshot_path, exist_ok=True)
135
+ print('initialized output folder', output_folder_screenshot_path)
136
+ return output_folder_screenshot_path
137
+
138
+
139
+ def convert_screenshots_to_pdf(video_path, output_folder_screenshot_path):
140
+ # Create a safe filename
141
+ video_filename = os.path.splitext(os.path.basename(video_path))[0]
142
+ safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
143
+ output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
144
+
145
+ try:
146
+ print('output_folder_screenshot_path', output_folder_screenshot_path)
147
+ print('output_pdf_path', output_pdf_path)
148
+ print('converting images to pdf..')
149
+
150
+ # Get all PNG files and ensure they exist
151
+ png_files = sorted(glob.glob(os.path.join(output_folder_screenshot_path, "*.png")))
152
+ if not png_files:
153
+ raise Exception("No PNG files found to convert to PDF")
154
+
155
+ with open(output_pdf_path, "wb") as f:
156
+ f.write(img2pdf.convert(png_files))
157
+
158
+ print('Pdf Created!')
159
+ print('pdf saved at', output_pdf_path)
160
+ return output_pdf_path
161
+ except Exception as e:
162
+ print(f"Error creating PDF: {str(e)}")
163
+ raise
164
+
165
+
166
+ def video_to_slides(video_path, progress=gr.Progress()):
167
+ progress(0.1, desc="准备处理视频...")
168
+ output_folder_screenshot_path = initialize_output_folder(video_path)
169
+ saved_files = detect_unique_screenshots(video_path, output_folder_screenshot_path, progress)
170
+ return output_folder_screenshot_path, saved_files
171
+
172
+
173
+ def slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress=gr.Progress()):
174
+ video_filename = os.path.splitext(os.path.basename(video_path))[0]
175
+ safe_filename = "".join(x for x in video_filename if x.isalnum() or x in (' ', '-', '_'))
176
+ output_pdf_path = os.path.join(OUTPUT_SLIDES_DIR, f"{safe_filename}.pdf")
177
+
178
+ try:
179
+ progress(0.9, desc="正在生成PDF...")
180
+ print('output_folder_screenshot_path', output_folder_screenshot_path)
181
+ print('output_pdf_path', output_pdf_path)
182
+
183
+ if not saved_files:
184
+ raise Exception("未从视频中捕获到截图")
185
+
186
+ existing_files = [f for f in saved_files if os.path.exists(f)]
187
+ if not existing_files:
188
+ raise Exception("未找到保存的截图文件")
189
+
190
+ with open(output_pdf_path, "wb") as f:
191
+ f.write(img2pdf.convert(existing_files))
192
+
193
+ progress(1.0, desc="处理完成!")
194
+ print('PDF创建成功!')
195
+ print('PDF保存���置:', output_pdf_path)
196
+ return output_pdf_path
197
+ except Exception as e:
198
+ print(f"创建PDF时出错: {str(e)}")
199
+ raise
200
+
201
+
202
+ def run_app(video_path, progress=gr.Progress()):
203
+ try:
204
+ if not video_path:
205
+ raise gr.Error("请选择要处理的视频文件")
206
+
207
+ progress(0, desc="开始处理...")
208
+ output_folder_screenshot_path, saved_files = video_to_slides(video_path, progress)
209
+ return slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress)
210
+ except Exception as e:
211
+ raise gr.Error(f"处理失败: {str(e)}")
212
+
213
+
214
+ def process_video_file(video_file):
215
+ """Handle uploaded video file and return PDF"""
216
+ try:
217
+ # If video_file is a string (path), use it directly
218
+ if isinstance(video_file, str):
219
+ if video_file.strip() == "":
220
+ return None
221
+ return run_app(video_file)
222
+
223
+ # If it's an uploaded file, create a temporary file
224
+ if video_file is not None:
225
+ # Generate a unique filename for the temporary video
226
+ temp_filename = f"temp_video_{int(time.time())}.mp4"
227
+ temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
228
+
229
+ try:
230
+ if hasattr(video_file, 'name'): # If it's already a file path
231
+ shutil.copyfile(video_file, temp_path)
232
+ else: # If it's file content
233
+ with open(temp_path, 'wb') as f:
234
+ f.write(video_file)
235
+
236
+ # Process the video
237
+ output_folder_screenshot_path, saved_files = video_to_slides(temp_path)
238
+ pdf_path = slides_to_pdf(temp_path, output_folder_screenshot_path, saved_files)
239
+
240
+ # Cleanup
241
+ if os.path.exists(temp_path):
242
+ os.unlink(temp_path)
243
+ return pdf_path
244
+
245
+ except Exception as e:
246
+ if os.path.exists(temp_path):
247
+ os.unlink(temp_path)
248
+ raise gr.Error(f"处理视频时出错: {str(e)}")
249
+ return None
250
+ except Exception as e:
251
+ raise gr.Error(f"处理视频时出错: {str(e)}")
252
+
253
+
254
+ def extract_audio_and_transcribe(video_path, progress=gr.Progress()):
255
+ """Extract audio from video and transcribe it using Whisper"""
256
+ progress(0, desc="正在提取音频...")
257
+
258
+ # Load the video and extract audio
259
+ video = VideoFileClip(video_path)
260
+ audio = video.audio
261
+
262
+ # Save audio to temporary file
263
+ temp_audio = tempfile.mktemp(suffix='.wav')
264
+ audio.write_audiofile(temp_audio)
265
+
266
+ progress(0.3, desc="正在转录音频...")
267
+
268
+ # Load Whisper model and transcribe
269
+ model = whisper.load_model("base")
270
+ result = model.transcribe(temp_audio)
271
+ print("完成的转录文本结果如下:"+result)
272
+
273
+ # Clean up
274
+ os.remove(temp_audio)
275
+ video.close()
276
+
277
+ # Process segments with timestamps
278
+ segments = []
279
+ for segment in result["segments"]:
280
+ segments.append({
281
+ "start": segment["start"],
282
+ "end": segment["end"],
283
+ "text": segment["text"].strip()
284
+ })
285
+
286
+ return segments
287
+
288
+ def add_text_to_image(image_path, text):
289
+ """Add text below the image"""
290
+ # Open image
291
+ img = Image.open(image_path)
292
+ width, height = img.size
293
+
294
+ # Create new image with space for text
295
+ font_size = 30
296
+ font = ImageFont.truetype("arial.ttf", font_size)
297
+ text_height = font_size * (text.count('\n') + 2) # Add padding
298
+
299
+ new_img = Image.new('RGB', (width, height + text_height), 'white')
300
+ new_img.paste(img, (0, 0))
301
+
302
+ # Add text
303
+ draw = ImageDraw.Draw(new_img)
304
+ draw.text((10, height + 10), text, font=font, fill='black')
305
+
306
+ # Save the modified image
307
+ new_img.save(image_path)
308
+
309
+ def process_video_with_transcription(video_path, output_folder_screenshot_path, progress=gr.Progress()):
310
+ """Process video with transcription and add text to images"""
311
+ # First, get the transcription
312
+ segments = extract_audio_and_transcribe(video_path, progress)
313
+
314
+ # Then get the frames as before
315
+ saved_files = detect_unique_screenshots(video_path, output_folder_screenshot_path, progress)
316
+
317
+ progress(0.8, desc="正在添加字幕...")
318
+
319
+ # Match transcription segments with images
320
+ for i, image_path in enumerate(saved_files):
321
+ # Extract timestamp from filename (format: 000_1.23.png)
322
+ timestamp = float(os.path.basename(image_path).split('_')[1].split('.png')[0])
323
+
324
+ # Find relevant text segments for this timestamp
325
+ relevant_text = []
326
+ for segment in segments:
327
+ if segment["start"] <= timestamp * 60 <= segment["end"]:
328
+ relevant_text.append(segment["text"])
329
+
330
+ # Add text to image
331
+ if relevant_text:
332
+ text = "\n".join(relevant_text)
333
+ add_text_to_image(image_path, text)
334
+
335
+ progress(0.9, desc="处理完成...")
336
+ return saved_files
337
+
338
+ def run_app_with_transcription(video_path, progress=gr.Progress()):
339
+ try:
340
+ if not video_path:
341
+ raise gr.Error("请选择要处理的视频文件")
342
+
343
+ progress(0, desc="开始处理...")
344
+ output_folder_screenshot_path = initialize_output_folder(video_path)
345
+ saved_files = process_video_with_transcription(video_path, output_folder_screenshot_path, progress)
346
+ return slides_to_pdf(video_path, output_folder_screenshot_path, saved_files, progress)
347
+ except Exception as e:
348
+ raise gr.Error(f"处理失败: {str(e)}")
349
+
350
+ def process_video_file_with_transcription(video_file):
351
+ """Handle uploaded video file and return PDF with transcription"""
352
+ try:
353
+ # If video_file is a string (path), use it directly
354
+ if isinstance(video_file, str):
355
+ if video_file.strip() == "":
356
+ return None
357
+ return run_app_with_transcription(video_file)
358
+
359
+ # If it's an uploaded file, create a temporary file
360
+ if video_file is not None:
361
+ # Generate a unique filename for the temporary video
362
+ temp_filename = f"temp_video_{int(time.time())}.mp4"
363
+ temp_path = os.path.join(tempfile.gettempdir(), temp_filename)
364
+
365
+ try:
366
+ if hasattr(video_file, 'name'): # If it's already a file path
367
+ shutil.copyfile(video_file, temp_path)
368
+ else: # If it's file content
369
+ with open(temp_path, 'wb') as f:
370
+ f.write(video_file)
371
+
372
+ # Process the video
373
+ output_folder_screenshot_path, saved_files = video_to_slides(temp_path)
374
+ saved_files = process_video_with_transcription(temp_path, output_folder_screenshot_path)
375
+ pdf_path = slides_to_pdf(temp_path, output_folder_screenshot_path, saved_files)
376
+
377
+ # Cleanup
378
+ if os.path.exists(temp_path):
379
+ os.unlink(temp_path)
380
+ return pdf_path
381
+
382
+ except Exception as e:
383
+ if os.path.exists(temp_path):
384
+ os.unlink(temp_path)
385
+ raise gr.Error(f"处理视频时出错: {str(e)}")
386
+ return None
387
+ except Exception as e:
388
+ raise gr.Error(f"处理视频时出错: {str(e)}")
389
+
390
+
391
+ def process_video(video, path):
392
+ if video:
393
+ return run_app(video)
394
+ elif path:
395
+ return run_app(path)
396
+ else:
397
+ raise gr.Error("请上传视频或输入视频路径")
398
+
399
+ def handle_video_with_transcription(video, path):
400
+ if video:
401
+ return run_app_with_transcription(video)
402
+ elif path:
403
+ return run_app_with_transcription(path)
404
+ else:
405
+ raise gr.Error("请上传视频或输入视频路径")
406
+
407
+ # Create a modern interface with custom CSS
408
+ css = """
409
+ .gradio-container {
410
+ font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
411
+ }
412
+ .container {
413
+ max-width: 900px;
414
+ margin: auto;
415
+ padding: 20px;
416
+ }
417
+ .gr-button {
418
+ background: linear-gradient(90deg, #2563eb, #3b82f6);
419
+ border: none;
420
+ color: white;
421
+ }
422
+ .gr-button:hover {
423
+ background: linear-gradient(90deg, #1d4ed8, #2563eb);
424
+ transform: translateY(-1px);
425
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
426
+ }
427
+ .status-info {
428
+ margin-top: 10px;
429
+ padding: 10px;
430
+ border-radius: 4px;
431
+ background-color: #f3f4f6;
432
+ }
433
+ """
434
+
435
+ if __name__ == "__main__":
436
+ with gr.Blocks(css=css) as iface:
437
+ gr.Markdown("# 视频转PDF工具")
438
+
439
+ with gr.Tab("基础转换"):
440
+ with gr.Row():
441
+ with gr.Column():
442
+ video_input = gr.Video(label="上传视频")
443
+ video_path = gr.Textbox(label="或输入视频路径", placeholder="例如: ./input/video.mp4")
444
+ convert_btn = gr.Button("开始转换", variant="primary")
445
+
446
+ with gr.Row():
447
+ output_file = gr.File(label="下载PDF")
448
+
449
+ with gr.Tab("带语音转文字"):
450
+ with gr.Row():
451
+ with gr.Column():
452
+ video_input_with_transcription = gr.Video(label="上传视频")
453
+ video_path_with_transcription = gr.Textbox(label="或输入视频路径", placeholder="例如: ./input/video.mp4")
454
+ convert_btn_with_transcription = gr.Button("开始转换(带字幕)", variant="primary")
455
+
456
+ with gr.Row():
457
+ output_file_with_transcription = gr.File(label="下载PDF(带字幕)")
458
+
459
+ convert_btn.click(
460
+ fn=process_video,
461
+ inputs=[video_input, video_path],
462
+ outputs=[output_file],
463
+ )
464
+
465
+ convert_btn_with_transcription.click(
466
+ fn=handle_video_with_transcription,
467
+ inputs=[video_input_with_transcription, video_path_with_transcription],
468
+ outputs=[output_file_with_transcription],
469
+ )
470
+
471
+ iface.launch()