Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Feb 24, 2024

Commit

b872b89

1 Parent(s): f28acdd

def get_video_id_summary(video_id, df_string, source):

Browse files

Files changed (1) hide show

app.py +53 -35

app.py CHANGED Viewed

@@ -521,7 +521,7 @@ def process_youtube_link(link):
     source = "gcs"
     questions = get_questions(video_id, formatted_simple_transcript, source)
     formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
-    summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
     summary = summary_json["summary"]
     html_content = format_transcript_to_html(formatted_transcript)
     simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
@@ -702,46 +702,64 @@ def processed_video_summary_to_json(summary):
     return summary_json
 # get video_id_summary.json content
-def get_video_id_summary(video_id, df_string):
-    print("===get_video_id_summary===")
-    service = init_drive_service()
-    parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
-    folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
-    file_name = f'{video_id}_summary.json'
-    # 检查逐字稿是否存在
-    exists, file_id = check_file_exists(service, folder_id, file_name)
-    if not exists:
-        summary = generate_summarise(df_string)
-        # processed_summary = processed_video_summary_to_json(summary)
-        summary_json = {"summary": str(summary)}
-        summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
-        try:
-            upload_content_directly(service, file_name, folder_id, summary_text)
-            print("summary已上傳到Google Drive")
-        except Exception as e:
-            error_msg = f" {video_id} 摘要錯誤: {str(e)}"
-            print("===get_video_id_summary error===")
-            print(error_msg)
-            print("===get_video_id_summary error===")
-        # 存在 local at OUTPUT_PATH as {video_id}_summary.json
-        # with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
-        #     f.write(summary_text)
-        #     print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
-        # file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
-    else:
-        # 逐字稿已存在，下载逐字稿内容
-        print("summary已存在Google Drive中")
-        summary_text = download_file_as_string(service, file_id)
-        summary_json = json.loads(summary_text)
     return summary_json

     source = "gcs"
     questions = get_questions(video_id, formatted_simple_transcript, source)
     formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
+    summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
     summary = summary_json["summary"]
     html_content = format_transcript_to_html(formatted_transcript)
     simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
     return summary_json
 # get video_id_summary.json content
+def get_video_id_summary(video_id, df_string, source):
+    if source == "gcs":
+        print("===get_video_id_summary on gcs===")
+        gcs_client = init_gcs_client(GCS_KEY)
+        bucket_name = 'video_ai_assistant'
+        file_name = f'{video_id}_summary.json'
+        summary_file_blob_name = f"{video_id}/{file_name}"
+        # 检查 summary_file 是否存在
+        is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
+        if not is_summary_file_exists:
+            summary_json = processed_video_summary_to_json(df_string)
+            summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
+            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
+            print("summary已上传到GCS")
+        else:
+            # summary已存在，下载内容
+            print("summary已存在于GCS中")
+            summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name)
+            summary_json = json.loads(summary_text)
+    elif source == "drive":
+        print("===get_video_id_summary===")
+        service = init_drive_service()
+        parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
+        folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
+        file_name = f'{video_id}_summary.json'
+        # 检查逐字稿是否存在
+        exists, file_id = check_file_exists(service, folder_id, file_name)
+        if not exists:
+            summary = generate_summarise(df_string)
+            # processed_summary = processed_video_summary_to_json(summary)
+            summary_json = {"summary": str(summary)}
+            summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
+            try:
+                upload_content_directly(service, file_name, folder_id, summary_text)
+                print("summary已上傳到Google Drive")
+            except Exception as e:
+                error_msg = f" {video_id} 摘要錯誤: {str(e)}"
+                print("===get_video_id_summary error===")
+                print(error_msg)
+                print("===get_video_id_summary error===")
+            # 存在 local at OUTPUT_PATH as {video_id}_summary.json
+            # with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
+            #     f.write(summary_text)
+            #     print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
+            # file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
+        else:
+            # 逐字稿已存在，下载逐字稿内容
+            print("summary已存在Google Drive中")
+            summary_text = download_file_as_string(service, file_id)
+            summary_json = json.loads(summary_text)
     return summary_json