Chrunos commited on
Commit
8fe48f4
·
verified ·
1 Parent(s): 40fdd4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -20
app.py CHANGED
@@ -129,42 +129,56 @@ async def get_video_url(youtube_url: str):
129
  @app.get("/script")
130
  async def get_transcript(youtube_url: str):
131
  try:
132
- # 定义 ydl_opts
133
  ydl_opts = {
134
  'skip_download': True,
135
  'writesubtitles': True,
136
  'writeautomaticsub': True,
137
- 'subtitleslangs': ['en'], # 可以根据需要修改语言
138
- 'format': 'bestaudio/best',
139
- 'outtmpl': '%(id)s.%(ext)s',
140
- 'noplaylist': True
 
141
  }
142
- cookiefile = "firefox-cookies.txt"
 
143
  env_to_cookies_from_env("firefox-cookies.txt")
144
- ydl_opts["cookiefile"] = cookiefile
145
 
146
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
147
  info = ydl.extract_info(youtube_url, download=False)
 
 
 
148
  subtitles = info.get('subtitles', {})
 
149
  auto_subtitles = info.get('automatic_captions', {})
150
 
151
- transcript = None
152
- if subtitles:
153
- # 优先使用手动字幕
154
- for lang in ydl_opts['subtitleslangs']:
155
- if lang in subtitles:
156
- transcript = subtitles[lang]
157
- break
158
- if not transcript and auto_subtitles:
159
- # 如果没有手动字幕,使用自动生成的字幕
160
  for lang in ydl_opts['subtitleslangs']:
161
  if lang in auto_subtitles:
162
- transcript = auto_subtitles[lang]
163
  break
164
 
165
- if transcript:
166
- text = ' '.join([entry['text'] for entry in transcript])
167
- return jsonify({'transcript': text})
 
 
 
 
 
 
 
 
 
 
 
168
  else:
169
  return jsonify({'transcript': 'No transcript available'})
170
 
 
129
  @app.get("/script")
130
  async def get_transcript(youtube_url: str):
131
  try:
 
132
  ydl_opts = {
133
  'skip_download': True,
134
  'writesubtitles': True,
135
  'writeautomaticsub': True,
136
+ 'subtitleslangs': ['en'], # Adjust regex if needed, e.g., 'en.*'
137
+ 'subtitlesformat': 'srt', # Use 'srt' for easier parsing
138
+ 'outtmpl': '%(id)s', # Output template without ext to avoid conflicts
139
+ 'noplaylist': True,
140
+ 'cookiefile': "firefox-cookies.txt",
141
  }
142
+
143
+ # Load cookies (ensure this function correctly sets up the cookie file)
144
  env_to_cookies_from_env("firefox-cookies.txt")
 
145
 
146
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
147
  info = ydl.extract_info(youtube_url, download=False)
148
+ video_id = info['id']
149
+
150
+ # Check for manually created subtitles
151
  subtitles = info.get('subtitles', {})
152
+ # Check for auto-generated captions
153
  auto_subtitles = info.get('automatic_captions', {})
154
 
155
+ transcript_lang = None
156
+ # Prioritize manual subs
157
+ for lang in ydl_opts['subtitleslangs']:
158
+ if lang in subtitles:
159
+ transcript_lang = lang
160
+ break
161
+ # Fallback to auto subs
162
+ if not transcript_lang:
 
163
  for lang in ydl_opts['subtitleslangs']:
164
  if lang in auto_subtitles:
165
+ transcript_lang = lang
166
  break
167
 
168
+ if transcript_lang:
169
+ # Look for the downloaded subtitle file
170
+ subtitle_file = f"{video_id}.{transcript_lang}.srt"
171
+ if os.path.exists(subtitle_file):
172
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
173
+ srt_content = f.read()
174
+ # Simple parsing to extract text (consider using a library like pysrt)
175
+ text = ' '.join(line.strip() for line in srt_content.split('\n')
176
+ if not line.strip().isdigit()
177
+ and '-->' not in line
178
+ and line.strip())
179
+ return jsonify({'transcript': text})
180
+ else:
181
+ return jsonify({'transcript': 'Subtitle file not found'})
182
  else:
183
  return jsonify({'transcript': 'No transcript available'})
184