Spaces:
Sleeping
Sleeping
def get_video_id_summary(video_id, df_string, source):
Browse files
app.py
CHANGED
@@ -521,7 +521,7 @@ def process_youtube_link(link):
|
|
521 |
source = "gcs"
|
522 |
questions = get_questions(video_id, formatted_simple_transcript, source)
|
523 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
524 |
-
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
525 |
summary = summary_json["summary"]
|
526 |
html_content = format_transcript_to_html(formatted_transcript)
|
527 |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
@@ -702,46 +702,64 @@ def processed_video_summary_to_json(summary):
|
|
702 |
return summary_json
|
703 |
|
704 |
# get video_id_summary.json content
|
705 |
-
def get_video_id_summary(video_id, df_string):
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
|
|
723 |
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
|
|
729 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
730 |
|
|
|
|
|
|
|
|
|
|
|
731 |
|
732 |
-
# 存在 local at OUTPUT_PATH as {video_id}_summary.json
|
733 |
-
# with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
|
734 |
-
# f.write(summary_text)
|
735 |
-
# print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
|
736 |
-
# file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
|
737 |
|
738 |
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
|
|
|
|
|
|
|
|
|
|
745 |
return summary_json
|
746 |
|
747 |
|
|
|
521 |
source = "gcs"
|
522 |
questions = get_questions(video_id, formatted_simple_transcript, source)
|
523 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
524 |
+
summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
|
525 |
summary = summary_json["summary"]
|
526 |
html_content = format_transcript_to_html(formatted_transcript)
|
527 |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
|
|
702 |
return summary_json
|
703 |
|
704 |
# get video_id_summary.json content
|
705 |
+
def get_video_id_summary(video_id, df_string, source):
|
706 |
+
if source == "gcs":
|
707 |
+
print("===get_video_id_summary on gcs===")
|
708 |
+
gcs_client = init_gcs_client(GCS_KEY)
|
709 |
+
bucket_name = 'video_ai_assistant'
|
710 |
+
file_name = f'{video_id}_summary.json'
|
711 |
+
summary_file_blob_name = f"{video_id}/{file_name}"
|
712 |
+
# 检查 summary_file 是否存在
|
713 |
+
is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
|
714 |
+
if not is_summary_file_exists:
|
715 |
+
summary_json = processed_video_summary_to_json(df_string)
|
716 |
+
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
717 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
|
718 |
+
print("summary已上传到GCS")
|
719 |
+
else:
|
720 |
+
# summary已存在,下载内容
|
721 |
+
print("summary已存在于GCS中")
|
722 |
+
summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name)
|
723 |
+
summary_json = json.loads(summary_text)
|
724 |
|
725 |
+
elif source == "drive":
|
726 |
+
print("===get_video_id_summary===")
|
727 |
+
service = init_drive_service()
|
728 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
729 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
730 |
+
file_name = f'{video_id}_summary.json'
|
731 |
|
732 |
+
# 检查逐字稿是否存在
|
733 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
734 |
+
if not exists:
|
735 |
+
summary = generate_summarise(df_string)
|
736 |
+
# processed_summary = processed_video_summary_to_json(summary)
|
737 |
+
summary_json = {"summary": str(summary)}
|
738 |
+
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
739 |
+
|
740 |
+
try:
|
741 |
+
upload_content_directly(service, file_name, folder_id, summary_text)
|
742 |
+
print("summary已上傳到Google Drive")
|
743 |
|
744 |
+
except Exception as e:
|
745 |
+
error_msg = f" {video_id} 摘要錯誤: {str(e)}"
|
746 |
+
print("===get_video_id_summary error===")
|
747 |
+
print(error_msg)
|
748 |
+
print("===get_video_id_summary error===")
|
749 |
|
|
|
|
|
|
|
|
|
|
|
750 |
|
751 |
|
752 |
+
# 存在 local at OUTPUT_PATH as {video_id}_summary.json
|
753 |
+
# with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
|
754 |
+
# f.write(summary_text)
|
755 |
+
# print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
|
756 |
+
# file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
|
757 |
+
else:
|
758 |
+
# 逐字稿已存在,下载逐字稿内容
|
759 |
+
print("summary已存在Google Drive中")
|
760 |
+
summary_text = download_file_as_string(service, file_id)
|
761 |
+
summary_json = json.loads(summary_text)
|
762 |
+
|
763 |
return summary_json
|
764 |
|
765 |
|