Update app.py
Browse files
app.py
CHANGED
@@ -129,42 +129,56 @@ async def get_video_url(youtube_url: str):
|
|
129 |
@app.get("/script")
|
130 |
async def get_transcript(youtube_url: str):
|
131 |
try:
|
132 |
-
# 定义 ydl_opts
|
133 |
ydl_opts = {
|
134 |
'skip_download': True,
|
135 |
'writesubtitles': True,
|
136 |
'writeautomaticsub': True,
|
137 |
-
'subtitleslangs': ['en'], #
|
138 |
-
'
|
139 |
-
'outtmpl': '%(id)s
|
140 |
-
'noplaylist': True
|
|
|
141 |
}
|
142 |
-
|
|
|
143 |
env_to_cookies_from_env("firefox-cookies.txt")
|
144 |
-
ydl_opts["cookiefile"] = cookiefile
|
145 |
|
146 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
147 |
info = ydl.extract_info(youtube_url, download=False)
|
|
|
|
|
|
|
148 |
subtitles = info.get('subtitles', {})
|
|
|
149 |
auto_subtitles = info.get('automatic_captions', {})
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
if not
|
159 |
-
# 如果没有手动字幕,使用自动生成的字幕
|
160 |
for lang in ydl_opts['subtitleslangs']:
|
161 |
if lang in auto_subtitles:
|
162 |
-
|
163 |
break
|
164 |
|
165 |
-
if
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
else:
|
169 |
return jsonify({'transcript': 'No transcript available'})
|
170 |
|
|
|
129 |
@app.get("/script")
|
130 |
async def get_transcript(youtube_url: str):
|
131 |
try:
|
|
|
132 |
ydl_opts = {
|
133 |
'skip_download': True,
|
134 |
'writesubtitles': True,
|
135 |
'writeautomaticsub': True,
|
136 |
+
'subtitleslangs': ['en'], # Adjust regex if needed, e.g., 'en.*'
|
137 |
+
'subtitlesformat': 'srt', # Use 'srt' for easier parsing
|
138 |
+
'outtmpl': '%(id)s', # Output template without ext to avoid conflicts
|
139 |
+
'noplaylist': True,
|
140 |
+
'cookiefile': "firefox-cookies.txt",
|
141 |
}
|
142 |
+
|
143 |
+
# Load cookies (ensure this function correctly sets up the cookie file)
|
144 |
env_to_cookies_from_env("firefox-cookies.txt")
|
|
|
145 |
|
146 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
147 |
info = ydl.extract_info(youtube_url, download=False)
|
148 |
+
video_id = info['id']
|
149 |
+
|
150 |
+
# Check for manually created subtitles
|
151 |
subtitles = info.get('subtitles', {})
|
152 |
+
# Check for auto-generated captions
|
153 |
auto_subtitles = info.get('automatic_captions', {})
|
154 |
|
155 |
+
transcript_lang = None
|
156 |
+
# Prioritize manual subs
|
157 |
+
for lang in ydl_opts['subtitleslangs']:
|
158 |
+
if lang in subtitles:
|
159 |
+
transcript_lang = lang
|
160 |
+
break
|
161 |
+
# Fallback to auto subs
|
162 |
+
if not transcript_lang:
|
|
|
163 |
for lang in ydl_opts['subtitleslangs']:
|
164 |
if lang in auto_subtitles:
|
165 |
+
transcript_lang = lang
|
166 |
break
|
167 |
|
168 |
+
if transcript_lang:
|
169 |
+
# Look for the downloaded subtitle file
|
170 |
+
subtitle_file = f"{video_id}.{transcript_lang}.srt"
|
171 |
+
if os.path.exists(subtitle_file):
|
172 |
+
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
173 |
+
srt_content = f.read()
|
174 |
+
# Simple parsing to extract text (consider using a library like pysrt)
|
175 |
+
text = ' '.join(line.strip() for line in srt_content.split('\n')
|
176 |
+
if not line.strip().isdigit()
|
177 |
+
and '-->' not in line
|
178 |
+
and line.strip())
|
179 |
+
return jsonify({'transcript': text})
|
180 |
+
else:
|
181 |
+
return jsonify({'transcript': 'Subtitle file not found'})
|
182 |
else:
|
183 |
return jsonify({'transcript': 'No transcript available'})
|
184 |
|