Spaces:
Sleeping
Sleeping
extract_youtube_id
Browse files
app.py
CHANGED
@@ -15,6 +15,9 @@ import os
|
|
15 |
from google.oauth2 import service_account
|
16 |
from googleapiclient.discovery import build
|
17 |
|
|
|
|
|
|
|
18 |
# 假设您的环境变量或Secret的名称是GOOGLE_APPLICATION_CREDENTIALS_JSON
|
19 |
credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
20 |
credentials_dict = json.loads(credentials_json_string)
|
@@ -86,10 +89,24 @@ def format_seconds_to_time(seconds):
|
|
86 |
seconds = int(seconds % 60)
|
87 |
return f"{hours:02}:{minutes:02}:{seconds:02}"
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def process_youtube_link(link):
|
90 |
# 使用 YouTube API 获取逐字稿
|
91 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
92 |
-
video_id = link
|
|
|
93 |
# 先下載 video
|
94 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
95 |
# 再取得 transcript
|
|
|
15 |
from google.oauth2 import service_account
|
16 |
from googleapiclient.discovery import build
|
17 |
|
18 |
+
from urllib.parse import urlparse, parse_qs
|
19 |
+
|
20 |
+
|
21 |
# 假设您的环境变量或Secret的名称是GOOGLE_APPLICATION_CREDENTIALS_JSON
|
22 |
credentials_json_string = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
23 |
credentials_dict = json.loads(credentials_json_string)
|
|
|
89 |
seconds = int(seconds % 60)
|
90 |
return f"{hours:02}:{minutes:02}:{seconds:02}"
|
91 |
|
92 |
+
def extract_youtube_id(url):
|
93 |
+
parsed_url = urlparse(url)
|
94 |
+
|
95 |
+
if "youtube.com" in parsed_url.netloc:
|
96 |
+
# 对于标准链接,视频ID在查询参数'v'中
|
97 |
+
query_params = parse_qs(parsed_url.query)
|
98 |
+
return query_params.get("v")[0] if "v" in query_params else None
|
99 |
+
elif "youtu.be" in parsed_url.netloc:
|
100 |
+
# 对于短链接,视频ID是路径的一部分
|
101 |
+
return parsed_url.path.lstrip('/')
|
102 |
+
else:
|
103 |
+
return None
|
104 |
+
|
105 |
def process_youtube_link(link):
|
106 |
# 使用 YouTube API 获取逐字稿
|
107 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
108 |
+
video_id = extract_youtube_id(link)
|
109 |
+
|
110 |
# 先下載 video
|
111 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
112 |
# 再取得 transcript
|