Spaces:
Sleeping
Sleeping
print("逐字稿已上传到GCS")
Browse files
app.py
CHANGED
@@ -423,17 +423,18 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
423 |
gcs_client = init_gcs_client(GCS_KEY)
|
424 |
bucket_name = 'video_ai_assistant'
|
425 |
# 检查 folder 是否存在
|
426 |
-
is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
|
427 |
-
if not is_gcs_exists:
|
428 |
-
|
429 |
-
|
430 |
-
else:
|
431 |
-
|
432 |
|
433 |
# 逐字稿文件名
|
434 |
-
|
|
|
435 |
# 检查逐字稿是否存在
|
436 |
-
exists = gcs_check_file_exists(gcs_client, bucket_name,
|
437 |
if not exists:
|
438 |
# 从YouTube获取逐字稿并上传
|
439 |
transcript = get_transcript(video_id)
|
@@ -442,22 +443,21 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
442 |
else:
|
443 |
print("沒有找到字幕")
|
444 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
# print(f"截图已上传到GCS: {img_file_id}")
|
456 |
|
457 |
-
#
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
|
462 |
return transcript
|
463 |
|
|
|
423 |
gcs_client = init_gcs_client(GCS_KEY)
|
424 |
bucket_name = 'video_ai_assistant'
|
425 |
# 检查 folder 是否存在
|
426 |
+
# is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
|
427 |
+
# if not is_gcs_exists:
|
428 |
+
# gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
|
429 |
+
# print("GCS folder:{video_id} 已创建")
|
430 |
+
# else:
|
431 |
+
# print("GCS folder:{video_id} 已存在")
|
432 |
|
433 |
# 逐字稿文件名
|
434 |
+
transcript_file_name = f'{video_id}_transcript.json'
|
435 |
+
transcript_blob_name = f"{video_id}/{transcript_file_name}"
|
436 |
# 检查逐字稿是否存在
|
437 |
+
exists = gcs_check_file_exists(gcs_client, bucket_name, transcript_blob_name)
|
438 |
if not exists:
|
439 |
# 从YouTube获取逐字稿并上传
|
440 |
transcript = get_transcript(video_id)
|
|
|
443 |
else:
|
444 |
print("沒有找到字幕")
|
445 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
446 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
|
447 |
+
print("逐字稿已上传到GCS")
|
448 |
+
|
449 |
+
for entry in transcript:
|
450 |
+
if 'img_file_id' not in entry:
|
451 |
+
screenshot_path = screenshot_youtube_video(video_id, entry['start'])
|
452 |
+
transcript_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
|
453 |
+
img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, transcript_blob_name, screenshot_path)
|
454 |
+
entry['img_file_id'] = img_file_id
|
455 |
+
print(f"截图已上传到GCS: {img_file_id}")
|
|
|
456 |
|
457 |
+
# 更新逐字稿文件
|
458 |
+
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
459 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
|
460 |
+
print("逐字稿已更新,包括截图链接")
|
461 |
|
462 |
return transcript
|
463 |
|