Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Feb 28, 2024

Commit

2e1a0a6

1 Parent(s): 345a1ff

no 截圖

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -442,19 +442,26 @@ def process_transcript_and_screenshots_on_gcs(video_id):
         else:
             print("沒有找到字幕")
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
     else:
         # 逐字稿已存在，下载逐字稿内容
         print("逐字稿已存在于GCS中")
         transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
         transcript = json.loads(transcript_text)
-    for entry in transcript:
-        if 'img_file_id' not in entry:
-            screenshot_path = screenshot_youtube_video(video_id, entry['start'])
-            screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
-            img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
-            entry['img_file_id'] = img_file_id
-            print(f"截图已上传到GCS: {img_file_id}")
     # 更新逐字稿文件
     print("===更新逐字稿文件===")
@@ -491,7 +498,8 @@ def process_youtube_link(link):
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
-        img_file_id = entry['img_file_id']
         # 先取消 Google Drive 的图片
         # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
         screenshot_path = img_file_id

         else:
             print("沒有找到字幕")
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
     else:
         # 逐字稿已存在，下载逐字稿内容
         print("逐字稿已存在于GCS中")
         transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
         transcript = json.loads(transcript_text)
+    source = "gcs"
+    get_questions(video_id, transcript_text, source)
+    get_video_id_summary(video_id, transcript_text, source)
+    get_mind_map(video_id, transcript_text, source)
+    # 處理截圖
+    # for entry in transcript:
+    #     if 'img_file_id' not in entry:
+    #         screenshot_path = screenshot_youtube_video(video_id, entry['start'])
+    #         screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
+    #         img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
+    #         entry['img_file_id'] = img_file_id
+    #         print(f"截图已上传到GCS: {img_file_id}")
     # 更新逐字稿文件
     print("===更新逐字稿文件===")
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
+        # img_file_id = entry['img_file_id']
+        img_file_id =""
         # 先取消 Google Drive 的图片
         # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
         screenshot_path = img_file_id