Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Apr 6, 2024

Commit

44c77f0

1 Parent(s): 62dae33

if is_new_transcript:

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -489,6 +489,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
     transcript_file_name = f'{video_id}_transcript.json'
     transcript_blob_name = f"{video_id}/{transcript_file_name}"
     # 检查逐字稿是否存在
     is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
     if not is_transcript_exists:
         # 从YouTube获取逐字稿并上传
@@ -507,6 +508,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
     else:
         # 逐字稿已存在，下载逐字稿内容
         print("逐字稿已存在于GCS中")
@@ -542,15 +544,19 @@ def process_transcript_and_screenshots_on_gcs(video_id):
             img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
             entry['img_file_id'] = img_file_id
             print(f"截图已上传到GCS: {img_file_id}")
-    # 更新逐字稿文件
-    print("===更新逐字稿文件===")
-    print(transcript)
-    print("===更新逐字稿文件===")
-    updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
-    upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
-    print("逐字稿已更新，包括截图链接")
-    updated_transcript_json = json.loads(updated_transcript_text)
     return updated_transcript_json

     transcript_file_name = f'{video_id}_transcript.json'
     transcript_blob_name = f"{video_id}/{transcript_file_name}"
     # 检查逐字稿是否存在
+    is_new_transcript = False
     is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
     if not is_transcript_exists:
         # 从YouTube获取逐字稿并上传
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
+        is_new_transcript = True
     else:
         # 逐字稿已存在，下载逐字稿内容
         print("逐字稿已存在于GCS中")
             img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
             entry['img_file_id'] = img_file_id
             print(f"截图已上传到GCS: {img_file_id}")
+            is_new_transcript = True
+    if is_new_transcript:
+        # 更新逐字稿文件
+        print("===更新逐字稿文件===")
+        print(transcript)
+        print("===更新逐字稿文件===")
+        updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
+        print("逐字稿已更新，包括截图链接")
+        updated_transcript_json = json.loads(updated_transcript_text)
+    else:
+        updated_transcript_json = transcript
     return updated_transcript_json