Spaces:
Sleeping
Sleeping
upload_file_to_gcs_with_json_string
Browse files
app.py
CHANGED
@@ -93,12 +93,20 @@ def gcs_check_file_exists(gcs_client, bucket_name, file_name):
|
|
93 |
blob = bucket.blob(file_name)
|
94 |
return blob.exists()
|
95 |
|
96 |
-
def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name,
|
97 |
"""上传文件到指定的 GCS 存储桶"""
|
98 |
bucket = gcs_client.bucket(bucket_name)
|
99 |
blob = bucket.blob(destination_blob_name)
|
100 |
-
blob.upload_from_filename(
|
101 |
-
print(f"File {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
|
104 |
"""从 GCS 下载文件内容到字符串"""
|
@@ -433,20 +441,22 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
433 |
print("成功獲取字幕")
|
434 |
else:
|
435 |
print("沒有找到字幕")
|
436 |
-
transcript
|
|
|
|
|
437 |
|
438 |
-
# 处理逐字稿中的每个条目,检查并上传截图 到 GCS,然後設定 GCS 權限
|
439 |
-
for entry in transcript:
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
|
446 |
-
# 更新逐字稿文件
|
447 |
-
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
448 |
-
upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
|
449 |
-
print("逐字稿已更新,包括截图链接")
|
450 |
|
451 |
return transcript
|
452 |
|
|
|
93 |
blob = bucket.blob(file_name)
|
94 |
return blob.exists()
|
95 |
|
96 |
+
def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
|
97 |
"""上传文件到指定的 GCS 存储桶"""
|
98 |
bucket = gcs_client.bucket(bucket_name)
|
99 |
blob = bucket.blob(destination_blob_name)
|
100 |
+
blob.upload_from_filename(file_path)
|
101 |
+
print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
|
102 |
+
|
103 |
+
def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
|
104 |
+
"""上传字符串到指定的 GCS 存储桶"""
|
105 |
+
bucket = gcs_client.bucket(bucket_name)
|
106 |
+
blob = bucket.blob(destination_blob_name)
|
107 |
+
blob.upload_from_string(json_string)
|
108 |
+
print(f"JSON string uploaded to {destination_blob_name} in GCS.")
|
109 |
+
|
110 |
|
111 |
def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
|
112 |
"""从 GCS 下载文件内容到字符串"""
|
|
|
441 |
print("成功獲取字幕")
|
442 |
else:
|
443 |
print("沒有找到字幕")
|
444 |
+
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
445 |
+
destination_blob_name = f"{video_id}/{file_name}"
|
446 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, transcript_text)
|
447 |
|
448 |
+
# # 处理逐字稿中的每个条目,检查并上传截图 到 GCS,然後設定 GCS 權限
|
449 |
+
# for entry in transcript:
|
450 |
+
# if 'img_file_id' not in entry:
|
451 |
+
# screenshot_path = screenshot_youtube_video(video_id, entry['start'])
|
452 |
+
# img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, f"{video_id}_{entry['start']}.jpg", screenshot_path)
|
453 |
+
# entry['img_file_id'] = img_file_id
|
454 |
+
# print(f"截图已上传到GCS: {img_file_id}")
|
455 |
|
456 |
+
# # 更新逐字稿文件
|
457 |
+
# updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
458 |
+
# upload_file_to_gcs(gcs_client, bucket_name, file_name, updated_transcript_text)
|
459 |
+
# print("逐字稿已更新,包括截图链接")
|
460 |
|
461 |
return transcript
|
462 |
|