cstr commited on
Commit
57968e0
1 Parent(s): 26eb097
Files changed (1) hide show
  1. app.py +26 -4
app.py CHANGED
@@ -31,6 +31,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
31
 
32
  def download_audio(url, method_choice):
33
  parsed_url = urlparse(url)
 
34
  if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
35
  return download_youtube_audio(url, method_choice)
36
  else:
@@ -47,12 +48,14 @@ def download_youtube_audio(url, method_choice):
47
  }
48
  method = methods.get(method_choice, youtube_dl_method)
49
  try:
 
50
  return method(url)
51
  except Exception as e:
52
  logging.error(f"Error downloading using {method_choice}: {str(e)}")
53
  return None
54
 
55
  def youtube_dl_method(url):
 
56
  ydl_opts = {
57
  'format': 'bestaudio/best',
58
  'postprocessors': [{
@@ -64,9 +67,11 @@ def youtube_dl_method(url):
64
  }
65
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
66
  info = ydl.extract_info(url, download=True)
 
67
  return f"{info['id']}.mp3"
68
 
69
  def pytube_method(url):
 
70
  from pytube import YouTube
71
  yt = YouTube(url)
72
  audio_stream = yt.streams.filter(only_audio=True).first()
@@ -74,9 +79,11 @@ def pytube_method(url):
74
  base, ext = os.path.splitext(out_file)
75
  new_file = base + '.mp3'
76
  os.rename(out_file, new_file)
 
77
  return new_file
78
 
79
  def youtube_dl_classic_method(url):
 
80
  ydl_opts = {
81
  'format': 'bestaudio/best',
82
  'postprocessors': [{
@@ -88,9 +95,11 @@ def youtube_dl_classic_method(url):
88
  }
89
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
90
  info = ydl.extract_info(url, download=True)
 
91
  return f"{info['id']}.mp3"
92
 
93
  def youtube_dl_alternative_method(url):
 
94
  ydl_opts = {
95
  'format': 'bestaudio/best',
96
  'postprocessors': [{
@@ -106,21 +115,27 @@ def youtube_dl_alternative_method(url):
106
  }
107
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
108
  info = ydl.extract_info(url, download=True)
 
109
  return f"{info['id']}.mp3"
110
 
111
  def ffmpeg_method(url):
 
112
  output_file = tempfile.mktemp(suffix='.mp3')
113
  command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
114
  subprocess.run(command, check=True, capture_output=True)
 
115
  return output_file
116
 
117
  def aria2_method(url):
 
118
  output_file = tempfile.mktemp(suffix='.mp3')
119
  command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
120
  subprocess.run(command, check=True, capture_output=True)
 
121
  return output_file
122
 
123
  def download_direct_audio(url, method_choice):
 
124
  if method_choice == 'wget':
125
  return wget_method(url)
126
  else:
@@ -129,6 +144,7 @@ def download_direct_audio(url, method_choice):
129
  if response.status_code == 200:
130
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
131
  temp_file.write(response.content)
 
132
  return temp_file.name
133
  else:
134
  raise Exception(f"Failed to download audio from {url}")
@@ -137,32 +153,38 @@ def download_direct_audio(url, method_choice):
137
  return None
138
 
139
  def wget_method(url):
 
140
  output_file = tempfile.mktemp(suffix='.mp3')
141
  command = ['wget', '-O', output_file, url]
142
  subprocess.run(command, check=True, capture_output=True)
 
143
  return output_file
144
 
145
  def trim_audio(audio_path, start_time, end_time):
 
146
  audio = AudioSegment.from_file(audio_path)
147
  trimmed_audio = audio[start_time*1000:end_time*1000] if end_time else audio[start_time*1000:]
148
  trimmed_audio_path = tempfile.mktemp(suffix='.wav')
149
  trimmed_audio.export(trimmed_audio_path, format="wav")
 
150
  return trimmed_audio_path
151
 
152
  def save_transcription(transcription):
153
  file_path = tempfile.mktemp(suffix='.txt')
154
  with open(file_path, 'w') as f:
155
  f.write(transcription)
 
156
  return file_path
157
 
158
  def get_model_options(pipeline_type):
159
  if pipeline_type == "faster-batched":
160
- return ["cstr/whisper-large-v3-turbo-int8_float32"]
161
  elif pipeline_type == "faster-sequenced":
162
- return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
163
  elif pipeline_type == "transformers":
164
- return ["openai/whisper-large-v3"]
165
- return []
 
166
 
167
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
168
  try:
 
31
 
32
  def download_audio(url, method_choice):
33
  parsed_url = urlparse(url)
34
+ logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
35
  if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
36
  return download_youtube_audio(url, method_choice)
37
  else:
 
48
  }
49
  method = methods.get(method_choice, youtube_dl_method)
50
  try:
51
+ logging.info(f"Attempting to download YouTube audio using {method_choice}")
52
  return method(url)
53
  except Exception as e:
54
  logging.error(f"Error downloading using {method_choice}: {str(e)}")
55
  return None
56
 
57
  def youtube_dl_method(url):
58
+ logging.info("Using yt-dlp method")
59
  ydl_opts = {
60
  'format': 'bestaudio/best',
61
  'postprocessors': [{
 
67
  }
68
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
69
  info = ydl.extract_info(url, download=True)
70
+ logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
71
  return f"{info['id']}.mp3"
72
 
73
  def pytube_method(url):
74
+ logging.info("Using pytube method")
75
  from pytube import YouTube
76
  yt = YouTube(url)
77
  audio_stream = yt.streams.filter(only_audio=True).first()
 
79
  base, ext = os.path.splitext(out_file)
80
  new_file = base + '.mp3'
81
  os.rename(out_file, new_file)
82
+ logging.info(f"Downloaded and converted audio to: {new_file}")
83
  return new_file
84
 
85
  def youtube_dl_classic_method(url):
86
+ logging.info("Using youtube-dl classic method")
87
  ydl_opts = {
88
  'format': 'bestaudio/best',
89
  'postprocessors': [{
 
95
  }
96
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
97
  info = ydl.extract_info(url, download=True)
98
+ logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
99
  return f"{info['id']}.mp3"
100
 
101
  def youtube_dl_alternative_method(url):
102
+ logging.info("Using yt-dlp alternative method")
103
  ydl_opts = {
104
  'format': 'bestaudio/best',
105
  'postprocessors': [{
 
115
  }
116
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
117
  info = ydl.extract_info(url, download=True)
118
+ logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
119
  return f"{info['id']}.mp3"
120
 
121
  def ffmpeg_method(url):
122
+ logging.info("Using ffmpeg method")
123
  output_file = tempfile.mktemp(suffix='.mp3')
124
  command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
125
  subprocess.run(command, check=True, capture_output=True)
126
+ logging.info(f"Downloaded and converted audio to: {output_file}")
127
  return output_file
128
 
129
  def aria2_method(url):
130
+ logging.info("Using aria2 method")
131
  output_file = tempfile.mktemp(suffix='.mp3')
132
  command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
133
  subprocess.run(command, check=True, capture_output=True)
134
+ logging.info(f"Downloaded audio to: {output_file}")
135
  return output_file
136
 
137
  def download_direct_audio(url, method_choice):
138
+ logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
139
  if method_choice == 'wget':
140
  return wget_method(url)
141
  else:
 
144
  if response.status_code == 200:
145
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
146
  temp_file.write(response.content)
147
+ logging.info(f"Downloaded direct audio to: {temp_file.name}")
148
  return temp_file.name
149
  else:
150
  raise Exception(f"Failed to download audio from {url}")
 
153
  return None
154
 
155
  def wget_method(url):
156
+ logging.info("Using wget method")
157
  output_file = tempfile.mktemp(suffix='.mp3')
158
  command = ['wget', '-O', output_file, url]
159
  subprocess.run(command, check=True, capture_output=True)
160
+ logging.info(f"Downloaded audio to: {output_file}")
161
  return output_file
162
 
163
  def trim_audio(audio_path, start_time, end_time):
164
+ logging.info(f"Trimming audio from {start_time} to {end_time}")
165
  audio = AudioSegment.from_file(audio_path)
166
  trimmed_audio = audio[start_time*1000:end_time*1000] if end_time else audio[start_time*1000:]
167
  trimmed_audio_path = tempfile.mktemp(suffix='.wav')
168
  trimmed_audio.export(trimmed_audio_path, format="wav")
169
+ logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
170
  return trimmed_audio_path
171
 
172
  def save_transcription(transcription):
173
  file_path = tempfile.mktemp(suffix='.txt')
174
  with open(file_path, 'w') as f:
175
  f.write(transcription)
176
+ logging.info(f"Transcription saved to: {file_path}")
177
  return file_path
178
 
179
  def get_model_options(pipeline_type):
180
  if pipeline_type == "faster-batched":
181
+ return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
182
  elif pipeline_type == "faster-sequenced":
183
+ return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
184
  elif pipeline_type == "transformers":
185
+ return ["openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "primeline/whisper-large-v3-german"]
186
+ else:
187
+ return []
188
 
189
  def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
190
  try: