adamchanadam commited on
Commit
5b80b81
·
verified ·
1 Parent(s): df5e96d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +564 -525
app.py CHANGED
@@ -1,526 +1,565 @@
1
- import os
2
- import json
3
- import datetime
4
- import subprocess
5
- from queue import Queue
6
- from threading import Thread
7
-
8
- import torch
9
- import yt_dlp
10
- from faster_whisper import WhisperModel
11
- from flask import Flask, render_template, request, Response, jsonify
12
- from openai import OpenAI
13
- import spacy
14
- from collections import Counter
15
-
16
- import time
17
- import uuid
18
-
19
- import logging
20
- from logging.handlers import RotatingFileHandler
21
- from werkzeug.utils import secure_filename
22
- from collections import deque
23
-
24
- # 設置基本日誌配置
25
- logging.basicConfig(level=logging.INFO)
26
- logger = logging.getLogger(__name__)
27
-
28
- # 創建一個文件處理器,使用 RotatingFileHandler 來限制日誌文件大小
29
- log_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'app.log')
30
- file_handler = RotatingFileHandler(log_file_path, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
31
- file_handler.setLevel(logging.DEBUG)
32
-
33
- # 創建一個控制台處理器
34
- console_handler = logging.StreamHandler()
35
- console_handler.setLevel(logging.INFO)
36
-
37
- # 創建一個格式器
38
- formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
39
- file_handler.setFormatter(formatter)
40
- console_handler.setFormatter(formatter)
41
-
42
- # 將處理器添加到日誌器
43
- logger.addHandler(file_handler)
44
- logger.addHandler(console_handler)
45
-
46
- # 設置其他模塊的日誌級別
47
- logging.getLogger("faster_whisper").setLevel(logging.INFO)
48
-
49
- os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
50
-
51
- app = Flask(__name__, static_folder='static', static_url_path='/static')
52
-
53
- # 讀取設定檔
54
- current_directory = os.path.dirname(os.path.realpath(__file__))
55
- config_file_path = os.path.join(current_directory, 'config.json')
56
- try:
57
- with open(config_file_path, 'r', encoding='utf-8') as f:
58
- config = json.load(f)
59
- logger.info("成功加載配置文件")
60
- except Exception as e:
61
- logger.exception("加載配置文件時發生錯誤")
62
- raise
63
-
64
- # 設置 OpenAI API 金鑰
65
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
66
-
67
- # 初始化 SpaCy
68
- nlp = spacy.load(config['spacy_model'])
69
-
70
- # 初始化 Whisper 模型
71
- model = WhisperModel(config['whisper_model'], device="auto", compute_type=config['whisper_compute_type'])
72
-
73
- # 設置 FFmpeg 路徑
74
- ffmpeg_path = config['ffmpeg_path']
75
- if ffmpeg_path not in os.environ["PATH"]:
76
- os.environ["PATH"] += os.pathsep + ffmpeg_path
77
-
78
- def send_sse_message(q, data):
79
- q.put_nowait(data)
80
-
81
- def clean_filename(filename):
82
- return ''.join(c for c in filename if c.isalnum() or c in (' ', '.', '_')).rstrip()
83
-
84
- def download_audio(youtube_url, save_directory, q):
85
- send_sse_message(q, {"status": "開始下載 YouTube 音頻..."})
86
- unique_id = str(uuid.uuid4())[:8] # 生成一個唯一的識別碼
87
- output_filename = f"audio_{unique_id}"
88
- output_path = os.path.join(save_directory, output_filename)
89
-
90
- ydl_opts = {
91
- 'format': 'bestaudio/best',
92
- 'outtmpl': output_path + ".%(ext)s",
93
- 'postprocessors': [{
94
- 'key': 'FFmpegExtractAudio',
95
- 'preferredcodec': 'mp3',
96
- 'preferredquality': '192',
97
- }],
98
- 'ffmpeg_location': ffmpeg_path,
99
- 'quiet': True
100
- }
101
-
102
- try:
103
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
104
- info = ydl.extract_info(youtube_url, download=True)
105
- video_title = clean_filename(info.get('title', 'Untitled'))
106
-
107
- # 等待一小段時間,確保文件已經完全寫入
108
- time.sleep(2)
109
-
110
- # 檢查生成的文件
111
- for file in os.listdir(save_directory):
112
- if file.startswith(output_filename) and file.endswith('.mp3'):
113
- converted_output_path = os.path.join(save_directory, file)
114
- break
115
- else:
116
- raise FileNotFoundError("無法找到下載的音頻文件")
117
-
118
- send_sse_message(q, {"status": f"音頻下載完成: {video_title}"})
119
- return converted_output_path, video_title
120
- except Exception as e:
121
- send_sse_message(q, {"status": f"下載音頻時發生錯誤: {str(e)}"})
122
- raise
123
-
124
- def process_local_video(video_path, save_directory, q):
125
- send_sse_message(q, {"status": "正在處理本地視頻..."})
126
- video_title = os.path.splitext(os.path.basename(video_path))[0]
127
- output_path = os.path.join(save_directory, f"{video_title}_audio.mp3")
128
-
129
- ffmpeg_command = [
130
- os.path.join(ffmpeg_path, 'ffmpeg'), # 使用完整路徑
131
- '-i', video_path,
132
- '-vn', # 禁用視頻
133
- '-acodec', 'libmp3lame', # 使用 MP3 編碼器
134
- '-q:a', '2', # 音頻質量,2 是很好的質量
135
- output_path
136
- ]
137
-
138
- logger.info(f"FFmpeg 命令: {' '.join(ffmpeg_command)}")
139
- logger.info(f"輸入視頻路徑: {video_path}")
140
- logger.info(f"輸出音頻路徑: {output_path}")
141
-
142
- try:
143
- # 檢查輸入文件是否存在
144
- if not os.path.exists(video_path):
145
- raise FileNotFoundError(f"輸入視頻文件不存在: {video_path}")
146
-
147
- # 檢查輸出目錄是否可寫
148
- if not os.access(os.path.dirname(output_path), os.W_OK):
149
- raise PermissionError(f"沒有寫入權限: {os.path.dirname(output_path)}")
150
-
151
- result = subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
152
- logger.info(f"FFmpeg 輸出: {result.stdout}")
153
- send_sse_message(q, {"status": f"本地視頻處理完成: {video_title}"})
154
- return output_path, video_title
155
- except subprocess.CalledProcessError as e:
156
- error_message = f"處理本地視頻時出錯: {e}\n\nFFmpeg 輸出:\n{e.stdout}\n\nFFmpeg 錯誤:\n{e.stderr}"
157
- logger.error(error_message)
158
- send_sse_message(q, {"status": "錯誤", "error": error_message})
159
- raise
160
- except Exception as e:
161
- error_message = f"處理本地視頻時出現意外錯誤: {str(e)}"
162
- logger.error(error_message)
163
- send_sse_message(q, {"status": "錯誤", "error": error_message})
164
- raise
165
-
166
- def generate_transcript(audio_path, video_title, q):
167
- send_sse_message(q, {"status": "開始音頻轉錄..."})
168
- segments, info = model.transcribe(
169
- audio_path,
170
- beam_size=config['whisper_beam_size'],
171
- language=config['whisper_language'],
172
- temperature=config['whisper_temperature'],
173
- initial_prompt=video_title,
174
- repetition_penalty=2,
175
- condition_on_previous_text=False
176
- )
177
- transcript = "\n".join([segment.text for segment in segments])
178
- send_sse_message(q, {"status": f"音頻轉錄完成,檢測到的語言: {info.language}", "transcript": transcript})
179
- return transcript
180
-
181
- def smart_split_transcript(transcript, q):
182
- send_sse_message(q, {"status": "開始智能分割轉錄文本..."})
183
- doc = nlp(transcript)
184
- segments = []
185
- current_segment = ""
186
- max_length = 1024
187
-
188
- for sent in doc.sents:
189
- if len(current_segment) + len(sent.text) <= max_length:
190
- current_segment += " " + sent.text
191
- else:
192
- if current_segment:
193
- segments.append(current_segment.strip())
194
- current_segment = sent.text
195
-
196
- if current_segment:
197
- segments.append(current_segment.strip())
198
-
199
- send_sse_message(q, {"status": f"轉錄文本分割完成,共 {len(segments)} 個段落"})
200
- return segments
201
-
202
- def extract_keywords_and_entities(text):
203
- doc = nlp(text)
204
- keywords = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
205
- keyword_freq = Counter(keywords).most_common(5)
206
- entities = [(ent.text, ent.label_) for ent in doc.ents]
207
- return [keyword for keyword, _ in keyword_freq], entities
208
-
209
- def process_youtube_description(description):
210
- prompt = f"""請處理以下 YouTube 影片描述,移除所有渠道宣傳內容後,保留原文。
211
-
212
- 描述內容:
213
- {description}"""
214
-
215
- response = client.chat.completions.create(
216
- model=config['openai_model'],
217
- messages=[{"role": "system", "content": prompt}],
218
- temperature=0.1,
219
- max_tokens=500
220
- )
221
-
222
- processed_description = response.choices[0].message.content.strip()
223
-
224
- # 在終端機打印處理後的描述
225
- print("處理後的 YouTube 描述:")
226
- print(processed_description)
227
- print("------------------------")
228
-
229
- return processed_description
230
-
231
- def get_openai_summary(segment, video_title, is_final_summary, keywords, entities, processed_description, q):
232
- if is_final_summary:
233
- prompt = f"""以下是YouTube視頻'{video_title}'的多個段落摘要。請生成一個深入且全面的最終摘要,盡力保留主要內容、資訊細節、關鍵點和結論。摘要應該是連貫的、有條理的、詳細的,並且避免重複信息。在內容結尾,加入能夠方便搜尋器和 SEO 找到的 3 個 Hash Tag。請用繁體中文(香港)回應。
234
-
235
- 影片描述提供的可靠資訊 (請特別使用來補充和糾正摘要中的信息,尤其是戈人名或專有名詞):
236
- {processed_description}
237
-
238
-
239
-
240
- 以下是待處理的摘要內容:
241
- {segment}"""
242
- else:
243
- keywords_str = ", ".join(keywords)
244
- entities_str = ", ".join([f"{text}({label})" for text, label in entities])
245
- prompt = f"""以下內容是YouTube視頻的部份字幕文本,每行以短句顯示,閱讀時需要將多行組合一起才是一句完整的句子,偶爾會出現音譯的錯別字,請修正。內容主題是關於:'{video_title}',其中包含的關鍵詞有:{keywords_str},和以下的NER實體:{entities_str}。
246
-
247
- 影片描述提供的可靠資訊 (請特別使用來補充和糾正摘要中的信息,尤其是戈人名或專有名詞):
248
- {processed_description}
249
-
250
- 請根據每個NER實體的意思,以及上述描述資訊,以不少於 200 字的繁體中文(香港) 重組文章段落。目標是盡量抽取與主題有關的所有觀點、事件、案例、學問、步驟、方法、時間、人物、數據、名詞的基礎資料,建構成一篇連貫的、全面的、詳細的紀錄。請特別注意使用描述資訊來糾正可能的錯誤,尤其是人名和地名。忽略重複的、單純抒發個人情緒的訊息、與 Youtuber 個人宣傳的訊息。
251
-
252
- 你要處理的內容如下:
253
- {segment}"""
254
-
255
- response = client.chat.completions.create(
256
- model=config['openai_model'],
257
- messages=[{"role": "system", "content": prompt}],
258
- temperature=0.6,
259
- max_tokens=1000
260
- )
261
-
262
- summary = response.choices[0].message.content.strip()
263
- return summary
264
-
265
- def save_summary(text, video_title, url_or_path, save_directory):
266
- current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
267
- cleaned_title = clean_filename(video_title)[:20]
268
- summary_file_name = f"GPT_Summary_{cleaned_title}_{current_time}.txt"
269
- summary_file_path = os.path.join(save_directory, summary_file_name)
270
-
271
- # 移除文本開頭可能存在的影片名稱和 URL/路徑信息
272
- lines = text.split('\n')
273
- if lines[0].startswith("影片名稱:") and lines[1].startswith("網址或路徑:"):
274
- text = '\n'.join(lines[2:])
275
-
276
- summary_text = f"影片名稱:\"{video_title}\"\n網址或路徑:\"{url_or_path}\"\n\n{text}"
277
-
278
- with open(summary_file_path, "w", encoding="utf-8") as file:
279
- file.write(summary_text)
280
-
281
- def save_transcript(transcript, video_title, url_or_path, save_directory):
282
- current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
283
- cleaned_title = clean_filename(video_title)[:20]
284
- transcript_file_name = f"Transcript_{cleaned_title}_{current_time}.txt"
285
- transcript_file_path = os.path.join(save_directory, transcript_file_name)
286
-
287
- with open(transcript_file_path, "w", encoding="utf-8") as file:
288
- file.write(f"影片名稱:\"{video_title}\"\n網址或路徑:\"{url_or_path}\"\n\n{transcript}")
289
-
290
- logger.info(f"轉錄文本已保存至 {transcript_file_path}")
291
-
292
- def save_segment_summary(summary_text, segment_index, video_title, save_directory):
293
- current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
294
- cleaned_title = clean_filename(video_title)[:20]
295
- summary_file_name = f"Segment_Summary_{cleaned_title}_{segment_index}_{current_time}.txt"
296
- summary_file_path = os.path.join(save_directory, summary_file_name)
297
-
298
- with open(summary_file_path, "w", encoding="utf-8") as file:
299
- file.write(summary_text)
300
-
301
- logger.info(f"段落摘要已保存至 {summary_file_path}")
302
-
303
- def process_video(url_or_path, q, local_video_description=''):
304
- try:
305
- logger.info(f"開始處理視頻: {url_or_path}")
306
- save_directory = config['save_directory']
307
-
308
- processed_description = ""
309
- if url_or_path.startswith('http'):
310
- # YouTube URL 處理邏輯保持不變
311
- logger.info("檢測到 YouTube URL,開始獲取視頻信息")
312
- ydl_opts = {'quiet': True}
313
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
314
- video_info = ydl.extract_info(url_or_path, download=False)
315
-
316
- video_data = {
317
- 'title': video_info['title'],
318
- 'duration': str(datetime.timedelta(seconds=video_info['duration'])),
319
- 'view_count': video_info['view_count'],
320
- 'like_count': video_info.get('like_count', 'N/A'),
321
- 'description': video_info['description']
322
- }
323
- send_sse_message(q, {"status": "獲取到視頻信息", "video_info": video_data})
324
-
325
- # 處理 YouTube 描述
326
- raw_description = video_info['description']
327
- processed_description = process_youtube_description(raw_description)
328
-
329
- logger.info("開始下載 YouTube 音頻")
330
- audio_path, video_title = download_audio(url_or_path, save_directory, q)
331
- else:
332
- logger.info("檢測到本地文件路徑,開始處理本地視頻")
333
- audio_path, video_title = process_local_video(url_or_path, save_directory, q)
334
- processed_description = local_video_description if local_video_description else "這是一個本地視頻文件,用戶沒有提供視頻描述。"
335
-
336
- if not audio_path or not os.path.exists(audio_path):
337
- raise FileNotFoundError(f"音頻文件不存在: {audio_path}")
338
-
339
- logger.info("開始生成轉錄文本")
340
- transcript = generate_transcript(audio_path, video_title, q)
341
-
342
- # 保存轉錄文本
343
- save_transcript(transcript, video_title, url_or_path, save_directory)
344
-
345
- logger.info("開始分割轉錄文本")
346
- segments = smart_split_transcript(transcript, q)
347
-
348
- all_summaries = []
349
- for i, segment in enumerate(segments, start=1):
350
- logger.info(f"開始為文本段 {i}/{len(segments)} 生��摘要")
351
- send_sse_message(q, {"status": f"正在為文本段 {i}/{len(segments)} 生成摘要..."})
352
- keywords, entities = extract_keywords_and_entities(segment)
353
- segment_summary = get_openai_summary(segment, video_title, False, keywords, entities, processed_description, q)
354
- if segment_summary:
355
- all_summaries.append(segment_summary)
356
- save_segment_summary(segment_summary, i, video_title, save_directory)
357
- send_sse_message(q, {"status": f"段落 {i} 摘要完成", "summary": segment_summary})
358
-
359
- logger.info("開始生成最終摘要")
360
- send_sse_message(q, {"status": "正在生成最終摘要..."})
361
- all_summaries_text = "\n\n".join(all_summaries)
362
- final_summary = get_openai_summary(all_summaries_text, video_title, True, [], [], processed_description, q)
363
- # 將最終摘要添加到 summary_versions
364
- summary_versions.append(final_summary)
365
- # 修改這裡:發送包含版本信息的最終摘要
366
- send_sse_message(q, {
367
- "status": "處理完成",
368
- "final_summary": final_summary,
369
- "version": 0,
370
- "total_versions": len(summary_versions)
371
- })
372
-
373
- # 添加影片名稱和 URL/路徑到最終摘要
374
- final_summary_with_info = f'影片名稱:"{video_title}"\n網址或路徑:"{url_or_path}"\n\n{final_summary}'
375
-
376
- send_sse_message(q, {"status": "處理完成", "final_summary": final_summary_with_info})
377
-
378
- # 保存最終摘要
379
- logger.info("保存最終摘要")
380
- save_summary(final_summary_with_info, video_title, url_or_path, save_directory)
381
-
382
- # 刪除臨時音頻文件
383
- if os.path.exists(audio_path):
384
- try:
385
- os.remove(audio_path)
386
- logger.info("臨時音頻文件已刪除")
387
- send_sse_message(q, {"status": "臨時音頻文件已刪除"})
388
- except Exception as e:
389
- logger.error(f"無法刪除臨時音頻文件: {str(e)}")
390
- send_sse_message(q, {"status": f"無法刪除臨時音頻文件: {str(e)}"})
391
-
392
- # 如果是本地上傳的 .mp4 文件,刪除臨時文件
393
- if not url_or_path.startswith('http') and url_or_path.lower().endswith('.mp4'):
394
- try:
395
- os.remove(url_or_path)
396
- logger.info("臨時上傳的 .mp4 文件已刪除")
397
- send_sse_message(q, {"status": "臨時上傳的 .mp4 文件已刪除"})
398
- except Exception as e:
399
- logger.error(f"無法刪除臨時上傳的 .mp4 文件: {str(e)}")
400
- send_sse_message(q, {"status": f"無法刪除臨時上傳的 .mp4 文件: {str(e)}"})
401
-
402
- logger.info("視頻處理完成")
403
-
404
- except Exception as e:
405
- logger.exception("處理視頻時發生錯誤")
406
- send_sse_message(q, {"status": f"錯誤: {str(e)}"})
407
-
408
-
409
- # 在全局變量部分添加:
410
- refinement_count = 0
411
- max_refinement_count = config.get('max_refinement_count', 5) # 使用 get 方法,如果 config.json 配置中沒有,則使用默認值 5
412
- summary_versions = deque(maxlen=max_refinement_count + 1)
413
-
414
- # 添加新的函數:
415
- def refine_final_summary(original_summary, user_feedback, video_title, processed_description):
416
- prompt = f"""你是一個專業的廣東話視頻內容摘要編輯。請根據用戶的反饋,改進以下內容摘要。標題是"{video_title}"。
417
-
418
- 原始摘要:
419
- {original_summary}
420
-
421
- 用戶反饋:
422
- {user_feedback}
423
-
424
-
425
- 請遵循以下指引:
426
- 1. 仔細閱讀原始摘要和用戶反饋,以用戶反饋的指示作為優先原則。
427
- 2. 根據用戶反饋,補充、修正在原始摘要內,任何錯誤或不準確的資訊,確保摘要全面涵蓋主題內容。
428
- 3. 保留原始摘要中準確和重要的部分。
429
- 4. 確保摘要邏輯清晰,結構完整,易於閱讀理解。
430
- 5. 如有必要,重新組織摘要結構以提高清晰度和連貫性。
431
- 6. 保留原有的 Hash Tag(如果有的話),或根據更新後的內容調整 Hash Tag。
432
-
433
- 請生成最終摘要,確保其準確、全面、連貫,並符合用戶的反饋意見。"""
434
-
435
- response = client.chat.completions.create(
436
- model=config['openai_model'],
437
- messages=[{"role": "system", "content": prompt}],
438
- temperature=0.8,
439
- max_tokens=1000
440
- )
441
-
442
- refined_summary = response.choices[0].message.content.strip()
443
- return refined_summary
444
-
445
- # 添加新的路由:
446
- @app.route('/refine_summary', methods=['POST'])
447
- def refine_summary():
448
- global refinement_count
449
- data = request.json
450
- #logger.info(f"Received refinement request: {data}") #{'original_summary': .... 'user_feedback': .... 'video_title':...'video_url'...'processed_description'...
451
- original_summary = data['original_summary']
452
- user_feedback = data['user_feedback']
453
- video_title = data['video_title']
454
- video_url = data['video_url']
455
- processed_description = data['processed_description']
456
-
457
- if refinement_count >= config['max_refinement_count']:
458
- return jsonify({"error": "已達到最大重新生成次數"}), 400
459
-
460
- refined_summary = refine_final_summary(original_summary, user_feedback, video_title, processed_description)
461
- refinement_count += 1
462
-
463
- # 添加視頻信息到摘要
464
- refined_summary_with_info = f"影片名稱:{video_title}\n網址或路徑:{video_url}\n\n{refined_summary}"
465
-
466
- logger.info(f"Sending refined summary: {refined_summary_with_info}")
467
- return jsonify({
468
- "refined_summary": refined_summary_with_info,
469
- "version": refinement_count,
470
- "total_versions": refinement_count + 1
471
- })
472
-
473
-
474
-
475
- @app.route('/')
476
- def index():
477
- return render_template('index.html')
478
-
479
- @app.route('/process', methods=['POST'])
480
- def process():
481
- try:
482
- url_or_path = request.form.get('url_or_path')
483
-
484
- if not url_or_path:
485
- return jsonify({"error": "No URL or path provided"}), 400
486
-
487
- if url_or_path.startswith('http'):
488
- # YouTube URL 處理邏輯保持不變
489
- pass
490
- else:
491
- # 本地文件處理
492
- if 'file' not in request.files:
493
- return jsonify({"error": "No file uploaded"}), 400
494
- file = request.files['file']
495
- if file.filename == '':
496
- return jsonify({"error": "No file selected"}), 400
497
- if file:
498
- filename = secure_filename(file.filename)
499
- file_path = os.path.join(config['save_directory'], filename)
500
- file.save(file_path)
501
- url_or_path = file_path
502
-
503
- # 獲取本地視頻描述
504
- local_video_description = request.form.get('localVideoDescription', '')
505
-
506
- logger.info(f"處理文件: {url_or_path}")
507
-
508
- q = Queue()
509
- thread = Thread(target=process_video, args=(url_or_path, q, local_video_description))
510
- thread.start()
511
- return Response(event_stream(q), content_type='text/event-stream')
512
- except Exception as e:
513
- error_message = f"處理請求時出現錯誤: {str(e)}"
514
- logger.error(error_message)
515
- return jsonify({"error": error_message}), 500
516
-
517
- def event_stream(q):
518
- while True:
519
- message = q.get()
520
- yield f"data: {json.dumps(message)}\n\n"
521
- if message.get('status') == '處理完成' or message.get('status').startswith('錯誤'):
522
- break
523
-
524
- if __name__ == '__main__':
525
- port = int(os.environ.get('PORT', 5000))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  app.run(host='0.0.0.0', port=port)
 
1
+ import os
2
+ import json
3
+ import datetime
4
+ import subprocess
5
+ from queue import Queue
6
+ from threading import Thread
7
+
8
+ import torch
9
+ import yt_dlp
10
+ from faster_whisper import WhisperModel
11
+ from flask import Flask, render_template, request, Response, jsonify
12
+ from openai import OpenAI
13
+ import spacy
14
+ from collections import Counter
15
+
16
+ import time
17
+ import uuid
18
+
19
+ import logging
20
+ from logging.handlers import RotatingFileHandler
21
+ from werkzeug.utils import secure_filename
22
+ from collections import deque
23
+
24
+ # 設置基本日誌配置
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # 創建一個文件處理器,使用 RotatingFileHandler 來限制日誌文件大小
29
+ log_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'app.log')
30
+ file_handler = RotatingFileHandler(log_file_path, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8')
31
+ file_handler.setLevel(logging.DEBUG)
32
+
33
+ # 創建一個控制台處理器
34
+ console_handler = logging.StreamHandler()
35
+ console_handler.setLevel(logging.INFO)
36
+
37
+ # 創建一個格式器
38
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
39
+ file_handler.setFormatter(formatter)
40
+ console_handler.setFormatter(formatter)
41
+
42
+ # 將處理器添加到日誌器
43
+ logger.addHandler(file_handler)
44
+ logger.addHandler(console_handler)
45
+
46
+ # 設置其他模塊的日誌級別
47
+ logging.getLogger("faster_whisper").setLevel(logging.INFO)
48
+
49
+ os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
50
+
51
+ app = Flask(__name__, static_folder='static', static_url_path='/static')
52
+
53
+ # 讀取設定檔
54
+ current_directory = os.path.dirname(os.path.realpath(__file__))
55
+ config_file_path = os.path.join(current_directory, 'config.json')
56
+ try:
57
+ with open(config_file_path, 'r', encoding='utf-8') as f:
58
+ config = json.load(f)
59
+ logger.info("成功加載配置文件")
60
+ except Exception as e:
61
+ logger.exception("加載配置文件時發生錯誤")
62
+ raise
63
+
64
+ # 設置 OpenAI API 金鑰
65
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
66
+
67
+ # 初始化 SpaCy
68
+ nlp = spacy.load(config['spacy_model'])
69
+
70
+ # 初始化 Whisper 模型
71
+ model = WhisperModel(config['whisper_model'], device="auto", compute_type=config['whisper_compute_type'])
72
+
73
+ # 設置 FFmpeg 路徑
74
+ ffmpeg_path = config['ffmpeg_path']
75
+ if ffmpeg_path not in os.environ["PATH"]:
76
+ os.environ["PATH"] += os.pathsep + ffmpeg_path
77
+
78
+ def send_sse_message(q, data):
79
+ q.put_nowait(data)
80
+
81
+ def clean_filename(filename):
82
+ return ''.join(c for c in filename if c.isalnum() or c in (' ', '.', '_')).rstrip()
83
+
84
+ def download_audio(youtube_url, save_directory, q):
85
+ send_sse_message(q, {"status": "開始下載 YouTube 音頻..."})
86
+ unique_id = str(uuid.uuid4())[:8] # 生成一個唯一的識別碼
87
+ output_filename = f"audio_{unique_id}"
88
+ output_path = os.path.join(save_directory, output_filename)
89
+
90
+ ydl_opts = {
91
+ 'format': 'bestaudio/best',
92
+ 'postprocessors': [{
93
+ 'key': 'FFmpegExtractAudio',
94
+ 'preferredcodec': 'mp3',
95
+ 'preferredquality': '192',
96
+ }],
97
+ 'ffmpeg_location': ffmpeg_path,
98
+ 'outtmpl': output_path + ".%(ext)s",
99
+ 'quiet': True,
100
+ 'no_warnings': True,
101
+ 'no_check_certificate': True,
102
+ 'ignoreerrors': True,
103
+ 'nocheckcertificate': True,
104
+ 'logtostderr': False,
105
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
106
+ 'socket_timeout': 30, # 增加超時時間
107
+ 'retries': 5, # 失敗時的重試次數
108
+ 'verbose': True, # 啟用詳細日誌輸出,有助於調試
109
+ 'extract_flat': 'in_playlist',
110
+ 'youtube_include_dash_manifest': False,
111
+ 'source_address': '0.0.0.0', # 綁定到所有接口
112
+ }
113
+
114
+ try:
115
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
116
+ info = ydl.extract_info(youtube_url, download=True)
117
+ video_title = clean_filename(info.get('title', 'Untitled'))
118
+
119
+ # 等待一小段時間,確保文件已經完全寫入
120
+ time.sleep(2)
121
+
122
+ # 檢查生成的文件
123
+ for file in os.listdir(save_directory):
124
+ if file.startswith(output_filename) and file.endswith('.mp3'):
125
+ converted_output_path = os.path.join(save_directory, file)
126
+ break
127
+ else:
128
+ raise FileNotFoundError("無法找到下載的音頻文件")
129
+
130
+ send_sse_message(q, {"status": f"音頻下載完成: {video_title}"})
131
+ return converted_output_path, video_title
132
+ except Exception as e:
133
+ send_sse_message(q, {"status": f"下載音頻時發生錯誤: {str(e)}"})
134
+ raise
135
+
136
+ def process_local_video(video_path, save_directory, q):
137
+ send_sse_message(q, {"status": "正在處理本地視頻..."})
138
+ video_title = os.path.splitext(os.path.basename(video_path))[0]
139
+ output_path = os.path.join(save_directory, f"{video_title}_audio.mp3")
140
+
141
+ ffmpeg_command = [
142
+ os.path.join(ffmpeg_path, 'ffmpeg'), # 使用完整路徑
143
+ '-i', video_path,
144
+ '-vn', # 禁用視頻
145
+ '-acodec', 'libmp3lame', # 使用 MP3 編碼器
146
+ '-q:a', '2', # 音頻質量,2 是很好的質量
147
+ output_path
148
+ ]
149
+
150
+ logger.info(f"FFmpeg 命令: {' '.join(ffmpeg_command)}")
151
+ logger.info(f"輸入視頻路徑: {video_path}")
152
+ logger.info(f"輸出音頻路徑: {output_path}")
153
+
154
+ try:
155
+ # 檢查輸入文件是否存在
156
+ if not os.path.exists(video_path):
157
+ raise FileNotFoundError(f"輸入視頻文件不存在: {video_path}")
158
+
159
+ # 檢查輸出目錄是否可寫
160
+ if not os.access(os.path.dirname(output_path), os.W_OK):
161
+ raise PermissionError(f"沒有寫入權限: {os.path.dirname(output_path)}")
162
+
163
+ result = subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
164
+ logger.info(f"FFmpeg 輸出: {result.stdout}")
165
+ send_sse_message(q, {"status": f"本地視頻處理完成: {video_title}"})
166
+ return output_path, video_title
167
+ except subprocess.CalledProcessError as e:
168
+ error_message = f"處理本地視頻時出錯: {e}\n\nFFmpeg 輸出:\n{e.stdout}\n\nFFmpeg 錯誤:\n{e.stderr}"
169
+ logger.error(error_message)
170
+ send_sse_message(q, {"status": "錯誤", "error": error_message})
171
+ raise
172
+ except Exception as e:
173
+ error_message = f"處理本地視頻時出現意外錯誤: {str(e)}"
174
+ logger.error(error_message)
175
+ send_sse_message(q, {"status": "錯誤", "error": error_message})
176
+ raise
177
+
178
+ def generate_transcript(audio_path, video_title, q):
179
+ send_sse_message(q, {"status": "開始音頻轉錄..."})
180
+ segments, info = model.transcribe(
181
+ audio_path,
182
+ beam_size=config['whisper_beam_size'],
183
+ language=config['whisper_language'],
184
+ temperature=config['whisper_temperature'],
185
+ initial_prompt=video_title,
186
+ repetition_penalty=2,
187
+ condition_on_previous_text=False
188
+ )
189
+ transcript = "\n".join([segment.text for segment in segments])
190
+ send_sse_message(q, {"status": f"音頻轉錄完成,檢測到的語言: {info.language}", "transcript": transcript})
191
+ return transcript
192
+
193
+ def smart_split_transcript(transcript, q):
194
+ send_sse_message(q, {"status": "開始智能分割轉錄文本..."})
195
+ doc = nlp(transcript)
196
+ segments = []
197
+ current_segment = ""
198
+ max_length = 1024
199
+
200
+ for sent in doc.sents:
201
+ if len(current_segment) + len(sent.text) <= max_length:
202
+ current_segment += " " + sent.text
203
+ else:
204
+ if current_segment:
205
+ segments.append(current_segment.strip())
206
+ current_segment = sent.text
207
+
208
+ if current_segment:
209
+ segments.append(current_segment.strip())
210
+
211
+ send_sse_message(q, {"status": f"轉錄文本分割完成,共 {len(segments)} 個段落"})
212
+ return segments
213
+
214
+ def extract_keywords_and_entities(text):
215
+ doc = nlp(text)
216
+ keywords = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
217
+ keyword_freq = Counter(keywords).most_common(5)
218
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
219
+ return [keyword for keyword, _ in keyword_freq], entities
220
+
221
+ def process_youtube_description(description):
222
+ prompt = f"""請處理以下 YouTube 影片描述,移除所有渠道宣傳內容後,保留原文。
223
+
224
+ 描述內容:
225
+ {description}"""
226
+
227
+ response = client.chat.completions.create(
228
+ model=config['openai_model'],
229
+ messages=[{"role": "system", "content": prompt}],
230
+ temperature=0.1,
231
+ max_tokens=500
232
+ )
233
+
234
+ processed_description = response.choices[0].message.content.strip()
235
+
236
+ # 在終端機打印處理後的描述
237
+ print("處理後的 YouTube 描述:")
238
+ print(processed_description)
239
+ print("------------------------")
240
+
241
+ return processed_description
242
+
243
+ def get_openai_summary(segment, video_title, is_final_summary, keywords, entities, processed_description, q):
244
+ if is_final_summary:
245
+ prompt = f"""以下是YouTube視頻'{video_title}'的多個段落摘要。請生成一個深入且全面的最終摘要,盡力保留主要內容、資訊細節、關鍵點和結論。摘要應該是連貫的、有條理的、詳細的,並且避免重複信息。在內容結尾,加入能夠方便搜尋器和 SEO 找到的 3 個 Hash Tag。請用繁體中文(香港)回應。
246
+
247
+ 影片描述提供的可靠資訊 (請特別使用來補充和糾正摘要中的信息,尤其是戈人名或專有名詞):
248
+ {processed_description}
249
+
250
+
251
+
252
+ 以下是待處理的摘要內容:
253
+ {segment}"""
254
+ else:
255
+ keywords_str = ", ".join(keywords)
256
+ entities_str = ", ".join([f"{text}({label})" for text, label in entities])
257
+ prompt = f"""以下內容是YouTube視頻的部份字幕文本,每行以短句顯示,閱讀時需要將多行組合一起才是一句完整的句子,偶爾會出現音譯的錯別字,請修正。內容主題是關於:'{video_title}',其中包含的關鍵詞有:{keywords_str},和以下的NER實體:{entities_str}。
258
+
259
+ 影片描述提供的可靠資訊 (請特別使用來補充和糾正摘要中的信息,尤其是戈人名或專有名詞):
260
+ {processed_description}
261
+
262
+ 請根據每個NER實體的意思,以及上述描述資訊,以不少於 200 字的繁體中文(香港) 重組文章段落。目標是盡量抽取與主題有關的所有觀點、事件、案例、學問、步驟、方法、時間、人物、數據、名詞的基礎資料,建構成一篇連貫的、全面的、詳細的紀錄。請特別注意使用描述資訊來糾正可能的錯誤,尤其是人名和地名。忽略重複的、單純抒發個人情緒的訊息、與 Youtuber 個人宣傳的訊息。
263
+
264
+ 你要處理的內容如下:
265
+ {segment}"""
266
+
267
+ response = client.chat.completions.create(
268
+ model=config['openai_model'],
269
+ messages=[{"role": "system", "content": prompt}],
270
+ temperature=0.6,
271
+ max_tokens=1000
272
+ )
273
+
274
+ summary = response.choices[0].message.content.strip()
275
+ return summary
276
+
277
+ def save_summary(text, video_title, url_or_path, save_directory):
278
+ current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
279
+ cleaned_title = clean_filename(video_title)[:20]
280
+ summary_file_name = f"GPT_Summary_{cleaned_title}_{current_time}.txt"
281
+ summary_file_path = os.path.join(save_directory, summary_file_name)
282
+
283
+ # 移除文本開頭可能存在的影片名稱和 URL/路徑信息
284
+ lines = text.split('\n')
285
+ if lines[0].startswith("影片名稱:") and lines[1].startswith("網址或路徑:"):
286
+ text = '\n'.join(lines[2:])
287
+
288
+ summary_text = f"影片名稱:\"{video_title}\"\n網址或路徑:\"{url_or_path}\"\n\n{text}"
289
+
290
+ with open(summary_file_path, "w", encoding="utf-8") as file:
291
+ file.write(summary_text)
292
+
293
+ def save_transcript(transcript, video_title, url_or_path, save_directory):
294
+ current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
295
+ cleaned_title = clean_filename(video_title)[:20]
296
+ transcript_file_name = f"Transcript_{cleaned_title}_{current_time}.txt"
297
+ transcript_file_path = os.path.join(save_directory, transcript_file_name)
298
+
299
+ with open(transcript_file_path, "w", encoding="utf-8") as file:
300
+ file.write(f"影片名稱:\"{video_title}\"\n網址或���徑:\"{url_or_path}\"\n\n{transcript}")
301
+
302
+ logger.info(f"轉錄文本已保存至 {transcript_file_path}")
303
+
304
+ def save_segment_summary(summary_text, segment_index, video_title, save_directory):
305
+ current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
306
+ cleaned_title = clean_filename(video_title)[:20]
307
+ summary_file_name = f"Segment_Summary_{cleaned_title}_{segment_index}_{current_time}.txt"
308
+ summary_file_path = os.path.join(save_directory, summary_file_name)
309
+
310
+ with open(summary_file_path, "w", encoding="utf-8") as file:
311
+ file.write(summary_text)
312
+
313
+ logger.info(f"段落摘要已保存至 {summary_file_path}")
314
+
315
+ def process_video(url_or_path, q, local_video_description=''):
316
+ try:
317
+ logger.info(f"開始處理視頻: {url_or_path}")
318
+ save_directory = config['save_directory']
319
+
320
+ processed_description = ""
321
+ if url_or_path.startswith('http'):
322
+ logger.info("檢測到 YouTube URL,開始獲取視頻信息")
323
+ ydl_opts = {
324
+ 'format': 'bestaudio/best',
325
+ 'postprocessors': [{
326
+ 'key': 'FFmpegExtractAudio',
327
+ 'preferredcodec': 'mp3',
328
+ 'preferredquality': '192',
329
+ }],
330
+ 'ffmpeg_location': config['ffmpeg_path'],
331
+ 'outtmpl': os.path.join(save_directory, 'audio_%(id)s.%(ext)s'),
332
+ 'quiet': True,
333
+ 'no_warnings': True,
334
+ 'no_check_certificate': True,
335
+ 'ignoreerrors': True,
336
+ 'nocheckcertificate': True,
337
+ 'logtostderr': False,
338
+ 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
339
+ 'socket_timeout': 30,
340
+ 'retries': 5,
341
+ 'verbose': True,
342
+ 'extract_flat': 'in_playlist',
343
+ 'youtube_include_dash_manifest': False,
344
+ 'source_address': '0.0.0.0',
345
+ }
346
+
347
+ try:
348
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
349
+ video_info = ydl.extract_info(url_or_path, download=False)
350
+
351
+ video_data = {
352
+ 'title': video_info['title'],
353
+ 'duration': str(datetime.timedelta(seconds=video_info['duration'])),
354
+ 'view_count': video_info['view_count'],
355
+ 'like_count': video_info.get('like_count', 'N/A'),
356
+ 'description': video_info['description']
357
+ }
358
+ send_sse_message(q, {"status": "獲取到視頻信息", "video_info": video_data})
359
+
360
+ # 處理 YouTube 描述
361
+ raw_description = video_info['description']
362
+ processed_description = process_youtube_description(raw_description)
363
+
364
+ logger.info("開始下載 YouTube 音頻")
365
+ audio_path, video_title = download_audio(url_or_path, save_directory, q)
366
+ except yt_dlp.utils.DownloadError as e:
367
+ if "Sign in to confirm you're not a bot" in str(e):
368
+ send_sse_message(q, {"status": "錯誤:YouTube 要求人機驗證。請稍後再試或使用其他視頻。"})
369
+ else:
370
+ send_sse_message(q, {"status": f"下載音頻時發生錯誤: {str(e)}"})
371
+ raise
372
+ else:
373
+ logger.info("檢測到本地文件路徑,開始處理本地視頻")
374
+ audio_path, video_title = process_local_video(url_or_path, save_directory, q)
375
+ processed_description = local_video_description if local_video_description else "這是一個本地視頻文件,用戶沒有提供視頻描述。"
376
+
377
+ if not audio_path or not os.path.exists(audio_path):
378
+ raise FileNotFoundError(f"音頻文件不存在: {audio_path}")
379
+
380
+ logger.info("開始生成轉錄文本")
381
+ transcript = generate_transcript(audio_path, video_title, q)
382
+
383
+ # 保存轉錄文本
384
+ save_transcript(transcript, video_title, url_or_path, save_directory)
385
+
386
+ logger.info("開始分割轉錄文本")
387
+ segments = smart_split_transcript(transcript, q)
388
+
389
+ all_summaries = []
390
+ for i, segment in enumerate(segments, start=1):
391
+ logger.info(f"開始為文本段 {i}/{len(segments)} 生成摘要")
392
+ send_sse_message(q, {"status": f"正在為文本段 {i}/{len(segments)} 生成摘要..."})
393
+ keywords, entities = extract_keywords_and_entities(segment)
394
+ segment_summary = get_openai_summary(segment, video_title, False, keywords, entities, processed_description, q)
395
+ if segment_summary:
396
+ all_summaries.append(segment_summary)
397
+ save_segment_summary(segment_summary, i, video_title, save_directory)
398
+ send_sse_message(q, {"status": f"段落 {i} 摘要完成", "summary": segment_summary})
399
+
400
+ logger.info("開始生成最終摘要")
401
+ send_sse_message(q, {"status": "正在生成最終摘要..."})
402
+ all_summaries_text = "\n\n".join(all_summaries)
403
+ final_summary = get_openai_summary(all_summaries_text, video_title, True, [], [], processed_description, q)
404
+ summary_versions.append(final_summary)
405
+ send_sse_message(q, {
406
+ "status": "處理完成",
407
+ "final_summary": final_summary,
408
+ "version": 0,
409
+ "total_versions": len(summary_versions)
410
+ })
411
+
412
+ # 添加影片名稱和 URL/路徑到最終摘要
413
+ final_summary_with_info = f'影片名稱:"{video_title}"\n網址或路徑:"{url_or_path}"\n\n{final_summary}'
414
+
415
+ send_sse_message(q, {"status": "處理完成", "final_summary": final_summary_with_info})
416
+
417
+ # 保存最終摘要
418
+ logger.info("保存最終摘要")
419
+ save_summary(final_summary_with_info, video_title, url_or_path, save_directory)
420
+
421
+ # 刪除臨時音頻文件
422
+ if os.path.exists(audio_path):
423
+ try:
424
+ os.remove(audio_path)
425
+ logger.info("臨時音頻文件已刪除")
426
+ send_sse_message(q, {"status": "臨時音頻文件已刪除"})
427
+ except Exception as e:
428
+ logger.error(f"無法刪除臨時音頻文件: {str(e)}")
429
+ send_sse_message(q, {"status": f"無法刪除臨時音頻文件: {str(e)}"})
430
+
431
+ # 如果是本地上傳的 .mp4 文件,刪除臨時文件
432
+ if not url_or_path.startswith('http') and url_or_path.lower().endswith('.mp4'):
433
+ try:
434
+ os.remove(url_or_path)
435
+ logger.info("臨時上傳的 .mp4 文件已刪除")
436
+ send_sse_message(q, {"status": "臨時上傳的 .mp4 文件已刪除"})
437
+ except Exception as e:
438
+ logger.error(f"無法刪除臨時上傳的 .mp4 文件: {str(e)}")
439
+ send_sse_message(q, {"status": f"無法刪除臨時上傳的 .mp4 文件: {str(e)}"})
440
+
441
+ logger.info("視頻處理完成")
442
+
443
+ except Exception as e:
444
+ logger.exception("處理視頻時發生錯誤")
445
+ send_sse_message(q, {"status": f"錯誤: {str(e)}"})
446
+
447
+
448
+ # 在全局變量部分添加:
449
+ refinement_count = 0
450
+ max_refinement_count = config.get('max_refinement_count', 5) # 使用 get 方法,如果 config.json 配置中沒有,則使用默認值 5
451
+ summary_versions = deque(maxlen=max_refinement_count + 1)
452
+
453
+ # 添加新的函數:
454
+ def refine_final_summary(original_summary, user_feedback, video_title, processed_description):
455
+ prompt = f"""你是一個專業的廣東話視頻內容摘要編輯。請根據用戶的反饋,改進以下內容摘要。標題是"{video_title}"。
456
+
457
+ 原始摘要:
458
+ {original_summary}
459
+
460
+ 用戶反饋:
461
+ {user_feedback}
462
+
463
+
464
+ 請遵循以下指引:
465
+ 1. 仔細閱讀原始摘要和用戶反饋,以用戶反饋的指示作為優先原則。
466
+ 2. 根據用戶反饋,補充、修正在原始摘要內,任何錯誤或不準確的資訊,確保摘要全面涵蓋主題內容。
467
+ 3. 保留原始摘要中準確和重要的部分。
468
+ 4. 確保摘要邏輯清晰,結構完整,易於閱讀理解。
469
+ 5. 如有必要,重新組織摘要結構以提高清晰度和連貫性。
470
+ 6. 保留原有的 Hash Tag(如果有的話),或根據更新後的內容調整 Hash Tag。
471
+
472
+ 請生成最終摘要,確保其準確、全面、連貫,並符合用戶的反饋意見。"""
473
+
474
+ response = client.chat.completions.create(
475
+ model=config['openai_model'],
476
+ messages=[{"role": "system", "content": prompt}],
477
+ temperature=0.8,
478
+ max_tokens=1000
479
+ )
480
+
481
+ refined_summary = response.choices[0].message.content.strip()
482
+ return refined_summary
483
+
484
+ # 添加新的路由:
485
+ @app.route('/refine_summary', methods=['POST'])
486
+ def refine_summary():
487
+ global refinement_count
488
+ data = request.json
489
+ #logger.info(f"Received refinement request: {data}") #{'original_summary': .... 'user_feedback': .... 'video_title':...'video_url'...'processed_description'...
490
+ original_summary = data['original_summary']
491
+ user_feedback = data['user_feedback']
492
+ video_title = data['video_title']
493
+ video_url = data['video_url']
494
+ processed_description = data['processed_description']
495
+
496
+ if refinement_count >= config['max_refinement_count']:
497
+ return jsonify({"error": "已達到最大重新生成次數"}), 400
498
+
499
+ refined_summary = refine_final_summary(original_summary, user_feedback, video_title, processed_description)
500
+ refinement_count += 1
501
+
502
+ # 添加視頻信息到摘要
503
+ refined_summary_with_info = f"影片名稱:{video_title}\n網址或路徑:{video_url}\n\n{refined_summary}"
504
+
505
+ logger.info(f"Sending refined summary: {refined_summary_with_info}")
506
+ return jsonify({
507
+ "refined_summary": refined_summary_with_info,
508
+ "version": refinement_count,
509
+ "total_versions": refinement_count + 1
510
+ })
511
+
512
+
513
+
514
+ @app.route('/')
515
+ def index():
516
+ return render_template('index.html')
517
+
518
+ @app.route('/process', methods=['POST'])
519
+ def process():
520
+ try:
521
+ url_or_path = request.form.get('url_or_path')
522
+
523
+ if not url_or_path:
524
+ return jsonify({"error": "No URL or path provided"}), 400
525
+
526
+ if url_or_path.startswith('http'):
527
+ # YouTube URL 處理邏輯保持不變
528
+ pass
529
+ else:
530
+ # 本地文件處理
531
+ if 'file' not in request.files:
532
+ return jsonify({"error": "No file uploaded"}), 400
533
+ file = request.files['file']
534
+ if file.filename == '':
535
+ return jsonify({"error": "No file selected"}), 400
536
+ if file:
537
+ filename = secure_filename(file.filename)
538
+ file_path = os.path.join(config['save_directory'], filename)
539
+ file.save(file_path)
540
+ url_or_path = file_path
541
+
542
+ # 獲取本地視頻描述
543
+ local_video_description = request.form.get('localVideoDescription', '')
544
+
545
+ logger.info(f"處理文件: {url_or_path}")
546
+
547
+ q = Queue()
548
+ thread = Thread(target=process_video, args=(url_or_path, q, local_video_description))
549
+ thread.start()
550
+ return Response(event_stream(q), content_type='text/event-stream')
551
+ except Exception as e:
552
+ error_message = f"處理請求時出現錯誤: {str(e)}"
553
+ logger.error(error_message)
554
+ return jsonify({"error": error_message}), 500
555
+
556
+ def event_stream(q):
557
+ while True:
558
+ message = q.get()
559
+ yield f"data: {json.dumps(message)}\n\n"
560
+ if message.get('status') == '處理完成' or message.get('status').startswith('錯誤'):
561
+ break
562
+
563
+ if __name__ == '__main__':
564
+ port = int(os.environ.get('PORT', 5000))
565
  app.run(host='0.0.0.0', port=port)