Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Feb 24, 2024

Commit

2df824b

1 Parent(s): 1491bd4

upload_file_to_gcs_with_json_string

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -93,12 +93,20 @@ def gcs_check_file_exists(gcs_client, bucket_name, file_name):
     blob = bucket.blob(file_name)
     return blob.exists()
-def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, source_file_name):
     """上传文件到指定的 GCS 存储桶"""
     bucket = gcs_client.bucket(bucket_name)
     blob = bucket.blob(destination_blob_name)
-    blob.upload_from_filename(source_file_name)
-    print(f"File {source_file_name} uploaded to {destination_blob_name}.")
 def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
     """从 GCS 下载文件内容到字符串"""
@@ -433,20 +441,22 @@ def process_transcript_and_screenshots_on_gcs(video_id):
             print("成功獲取字幕")
         else:
             print("沒有找到字幕")
-        transcript
-    # 处理逐字稿中的每个条目，检查并上传截图 到 GCS，然後設定 GCS 權限
-    for entry in transcript:
-        if 'img_file_id' not in entry:
-            screenshot_path = screenshot_youtube_video(video_id, entry['start'])
-            img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
-            entry['img_file_id'] = img_file_id
-            print(f"截图已上传到GCS: {img_file_id}")
-    # 更新逐字稿文件
-    updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
-    upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
-    print("逐字稿已更新，包括截图链接")
     return transcript

     blob = bucket.blob(file_name)
     return blob.exists()
+def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
     """上传文件到指定的 GCS 存储桶"""
     bucket = gcs_client.bucket(bucket_name)
     blob = bucket.blob(destination_blob_name)
+    blob.upload_from_filename(file_path)
+    print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
+def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
+    """上传字符串到指定的 GCS 存储桶"""
+    bucket = gcs_client.bucket(bucket_name)
+    blob = bucket.blob(destination_blob_name)
+    blob.upload_from_string(json_string)
+    print(f"JSON string uploaded to {destination_blob_name} in GCS.")
 def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
     """从 GCS 下载文件内容到字符串"""
             print("成功獲取字幕")
         else:
             print("沒有找到字幕")
+        transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+        destination_blob_name = f"{video_id}/{file_name}"
+        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, transcript_text)
+    # # 处理逐字稿中的每个条目，检查并上传截图 到 GCS，然後設定 GCS 權限
+    # for entry in transcript:
+    #     if 'img_file_id' not in entry:
+    #         screenshot_path = screenshot_youtube_video(video_id, entry['start'])
+    #         img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
+    #         entry['img_file_id'] = img_file_id
+    #         print(f"截图已上传到GCS: {img_file_id}")
+    # # 更新逐字稿文件
+    # updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+    # upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
+    # print("逐字稿已更新，包括截图链接")
     return transcript