Spaces:
Sleeping
Sleeping
print("逐字稿已存在于Google Drive中")
Browse files
app.py
CHANGED
@@ -108,6 +108,19 @@ def upload_content_directly(service, file_name, folder_id, content):
|
|
108 |
# 执行上传
|
109 |
service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
def process_file(file):
|
113 |
# 读取文件
|
@@ -178,6 +191,7 @@ def process_youtube_link(link):
|
|
178 |
file_name = f"{video_id}_transcript.txt"
|
179 |
|
180 |
# 检查逐字稿是否存在
|
|
|
181 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
182 |
if not exists:
|
183 |
# 获取逐字稿
|
@@ -188,9 +202,9 @@ def process_youtube_link(link):
|
|
188 |
print("逐字稿已上传到Google Drive")
|
189 |
else:
|
190 |
print("逐字稿已存在于Google Drive中")
|
|
|
|
|
191 |
|
192 |
-
# 再取得 transcript
|
193 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
|
194 |
# 基于逐字稿生成其他所需的输出
|
195 |
questions = generate_questions(transcript)
|
196 |
df_summarise = generate_df_summarise(transcript)
|
|
|
108 |
# 执行上传
|
109 |
service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
110 |
|
111 |
+
def download_file_as_string(service, file_id):
|
112 |
+
"""
|
113 |
+
从Google Drive下载文件并将其作为字符串返回。
|
114 |
+
"""
|
115 |
+
request = service.files().get_media(fileId=file_id)
|
116 |
+
fh = io.BytesIO()
|
117 |
+
downloader = MediaIoBaseDownload(fh, request)
|
118 |
+
done = False
|
119 |
+
while done is False:
|
120 |
+
status, done = downloader.next_chunk()
|
121 |
+
fh.seek(0)
|
122 |
+
content = fh.read().decode('utf-8')
|
123 |
+
return content
|
124 |
|
125 |
def process_file(file):
|
126 |
# 读取文件
|
|
|
191 |
file_name = f"{video_id}_transcript.txt"
|
192 |
|
193 |
# 检查逐字稿是否存在
|
194 |
+
transcript = None
|
195 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
196 |
if not exists:
|
197 |
# 获取逐字稿
|
|
|
202 |
print("逐字稿已上传到Google Drive")
|
203 |
else:
|
204 |
print("逐字稿已存在于Google Drive中")
|
205 |
+
transcript_text = download_file_as_string(service, file_id)
|
206 |
+
transcript = json.loads(transcript_text)
|
207 |
|
|
|
|
|
208 |
# 基于逐字稿生成其他所需的输出
|
209 |
questions = generate_questions(transcript)
|
210 |
df_summarise = generate_df_summarise(transcript)
|