Update app.py
Browse files
app.py
CHANGED
@@ -133,54 +133,51 @@ async def get_transcript(youtube_url: str):
|
|
133 |
'skip_download': True,
|
134 |
'writesubtitles': True,
|
135 |
'writeautomaticsub': True,
|
136 |
-
'subtitleslangs': ['en'], #
|
137 |
-
'subtitlesformat': '
|
138 |
-
'outtmpl': '%(id)s',
|
139 |
'noplaylist': True,
|
140 |
'cookiefile': "firefox-cookies.txt",
|
141 |
}
|
142 |
|
143 |
-
# Load cookies (ensure this function correctly sets up the cookie file)
|
144 |
env_to_cookies_from_env("firefox-cookies.txt")
|
145 |
|
146 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
147 |
info = ydl.extract_info(youtube_url, download=False)
|
148 |
video_id = info['id']
|
149 |
|
150 |
-
#
|
151 |
-
|
152 |
-
# Check for auto-generated captions
|
153 |
-
auto_subtitles = info.get('automatic_captions', {})
|
154 |
-
|
155 |
-
transcript_lang = None
|
156 |
-
# Prioritize manual subs
|
157 |
for lang in ydl_opts['subtitleslangs']:
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
if not transcript_lang:
|
163 |
-
for lang in ydl_opts['subtitleslangs']:
|
164 |
-
if lang in auto_subtitles:
|
165 |
-
transcript_lang = lang
|
166 |
break
|
|
|
|
|
167 |
|
168 |
-
if
|
169 |
-
|
170 |
-
subtitle_file = f"{video_id}.{transcript_lang}.srt"
|
171 |
if os.path.exists(subtitle_file):
|
172 |
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
else:
|
181 |
-
return {
|
182 |
else:
|
183 |
-
return {
|
184 |
|
185 |
except Exception as e:
|
186 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
133 |
'skip_download': True,
|
134 |
'writesubtitles': True,
|
135 |
'writeautomaticsub': True,
|
136 |
+
'subtitleslangs': ['en'], # Try 'en.*' for all English variants
|
137 |
+
'subtitlesformat': 'best', # Let yt-dlp choose best available format
|
138 |
+
'outtmpl': '%(id)s', # Output template without extension
|
139 |
'noplaylist': True,
|
140 |
'cookiefile': "firefox-cookies.txt",
|
141 |
}
|
142 |
|
|
|
143 |
env_to_cookies_from_env("firefox-cookies.txt")
|
144 |
|
145 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
146 |
info = ydl.extract_info(youtube_url, download=False)
|
147 |
video_id = info['id']
|
148 |
|
149 |
+
# Find available subtitle format
|
150 |
+
sub_ext = None
|
|
|
|
|
|
|
|
|
|
|
151 |
for lang in ydl_opts['subtitleslangs']:
|
152 |
+
for sub_type in ['subtitles', 'automatic_captions']:
|
153 |
+
subs = info.get(sub_type, {}).get(lang, [])
|
154 |
+
if subs:
|
155 |
+
sub_ext = subs[0].get('ext', 'vtt')
|
|
|
|
|
|
|
|
|
156 |
break
|
157 |
+
if sub_ext:
|
158 |
+
break
|
159 |
|
160 |
+
if sub_ext:
|
161 |
+
subtitle_file = f"{video_id}.{lang}.{sub_ext}"
|
|
|
162 |
if os.path.exists(subtitle_file):
|
163 |
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
164 |
+
content = f.read()
|
165 |
+
|
166 |
+
# Parse both VTT and SRT formats
|
167 |
+
lines = content.split('\n')
|
168 |
+
text_lines = []
|
169 |
+
for line in lines:
|
170 |
+
# Skip timestamps and metadata
|
171 |
+
if '-->' in line or line.strip().isdigit() or line.startswith('WEBVTT'):
|
172 |
+
continue
|
173 |
+
if line.strip():
|
174 |
+
text_lines.append(line.strip())
|
175 |
+
|
176 |
+
return {"transcript": ' '.join(text_lines)}
|
177 |
else:
|
178 |
+
return {"transcript": f"Found subtitles but file {subtitle_file} missing"}
|
179 |
else:
|
180 |
+
return {"transcript": "No subtitles available in requested languages"}
|
181 |
|
182 |
except Exception as e:
|
183 |
raise HTTPException(status_code=500, detail=str(e))
|