Spaces:
Running
Running
+verbosity
Browse files
app.py
CHANGED
@@ -15,15 +15,13 @@ import yt_dlp
|
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
|
17 |
# Clone and install faster-whisper from GitHub
|
18 |
-
# (we should be able to do this in build.sh in a hf space)
|
19 |
try:
|
20 |
subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
|
21 |
subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
|
22 |
except subprocess.CalledProcessError as e:
|
23 |
-
|
24 |
sys.exit(1)
|
25 |
|
26 |
-
# Add the faster-whisper directory to the Python path
|
27 |
sys.path.append("./faster-whisper")
|
28 |
|
29 |
from faster_whisper import WhisperModel
|
@@ -159,16 +157,18 @@ def save_transcription(transcription):
|
|
159 |
|
160 |
def get_model_options(pipeline_type):
|
161 |
if pipeline_type == "faster-batched":
|
162 |
-
return ["cstr/whisper-large-v3-turbo-int8_float32"
|
163 |
elif pipeline_type == "faster-sequenced":
|
164 |
-
return ["
|
165 |
elif pipeline_type == "transformers":
|
166 |
-
return ["openai/whisper-large-v3"
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
171 |
try:
|
|
|
|
|
|
|
172 |
if pipeline_type == "faster-batched":
|
173 |
model = WhisperModel(model_id, device="auto", compute_type=dtype)
|
174 |
pipeline = BatchedInferencePipeline(model=model)
|
@@ -198,6 +198,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
198 |
|
199 |
if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
|
200 |
audio_path = download_audio(input_source, download_method)
|
|
|
201 |
if audio_path.startswith("Error"):
|
202 |
yield f"Error: {audio_path}", "", None
|
203 |
return
|
@@ -207,6 +208,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
207 |
if start_time is not None or end_time is not None:
|
208 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
209 |
audio_path = trimmed_audio_path
|
|
|
210 |
|
211 |
start_time_perf = time.time()
|
212 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
@@ -225,7 +227,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
225 |
)
|
226 |
|
227 |
if verbose:
|
228 |
-
yield metrics_output, "", None
|
229 |
|
230 |
transcription = ""
|
231 |
|
@@ -237,12 +239,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
237 |
)
|
238 |
transcription += transcription_segment
|
239 |
if verbose:
|
240 |
-
yield metrics_output, transcription, None
|
241 |
|
242 |
transcription_file = save_transcription(transcription)
|
243 |
-
yield metrics_output, transcription, transcription_file
|
244 |
|
245 |
except Exception as e:
|
|
|
246 |
yield f"An error occurred: {str(e)}", "", None
|
247 |
|
248 |
finally:
|
@@ -261,15 +264,6 @@ def update_model_dropdown(pipeline_type):
|
|
261 |
model_choices = get_model_options(pipeline_type)
|
262 |
return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
|
263 |
|
264 |
-
def get_model_options(pipeline_type):
|
265 |
-
if pipeline_type == "faster-batched":
|
266 |
-
return ["cstr/whisper-large-v3-turbo-int8_float32"]
|
267 |
-
elif pipeline_type == "faster-sequenced":
|
268 |
-
return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
|
269 |
-
elif pipeline_type == "transformers":
|
270 |
-
return ["openai/whisper-large-v3"]
|
271 |
-
return []
|
272 |
-
|
273 |
with gr.Blocks() as iface:
|
274 |
gr.Markdown("# Multi-Pipeline Transcription")
|
275 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|
|
|
15 |
logging.basicConfig(level=logging.INFO)
|
16 |
|
17 |
# Clone and install faster-whisper from GitHub
|
|
|
18 |
try:
|
19 |
subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
|
20 |
subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
|
21 |
except subprocess.CalledProcessError as e:
|
22 |
+
logging.error(f"Error during faster-whisper installation: {e}")
|
23 |
sys.exit(1)
|
24 |
|
|
|
25 |
sys.path.append("./faster-whisper")
|
26 |
|
27 |
from faster_whisper import WhisperModel
|
|
|
157 |
|
158 |
def get_model_options(pipeline_type):
|
159 |
if pipeline_type == "faster-batched":
|
160 |
+
return ["cstr/whisper-large-v3-turbo-int8_float32"]
|
161 |
elif pipeline_type == "faster-sequenced":
|
162 |
+
return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
|
163 |
elif pipeline_type == "transformers":
|
164 |
+
return ["openai/whisper-large-v3"]
|
165 |
+
return []
|
166 |
+
|
|
|
167 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
168 |
try:
|
169 |
+
logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
|
170 |
+
verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
|
171 |
+
|
172 |
if pipeline_type == "faster-batched":
|
173 |
model = WhisperModel(model_id, device="auto", compute_type=dtype)
|
174 |
pipeline = BatchedInferencePipeline(model=model)
|
|
|
198 |
|
199 |
if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
|
200 |
audio_path = download_audio(input_source, download_method)
|
201 |
+
verbose_messages += f"Audio file downloaded: {audio_path}\n"
|
202 |
if audio_path.startswith("Error"):
|
203 |
yield f"Error: {audio_path}", "", None
|
204 |
return
|
|
|
208 |
if start_time is not None or end_time is not None:
|
209 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
210 |
audio_path = trimmed_audio_path
|
211 |
+
verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
|
212 |
|
213 |
start_time_perf = time.time()
|
214 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
|
|
227 |
)
|
228 |
|
229 |
if verbose:
|
230 |
+
yield verbose_messages + metrics_output, "", None
|
231 |
|
232 |
transcription = ""
|
233 |
|
|
|
239 |
)
|
240 |
transcription += transcription_segment
|
241 |
if verbose:
|
242 |
+
yield verbose_messages + metrics_output, transcription, None
|
243 |
|
244 |
transcription_file = save_transcription(transcription)
|
245 |
+
yield verbose_messages + metrics_output, transcription, transcription_file
|
246 |
|
247 |
except Exception as e:
|
248 |
+
logging.error(f"An error occurred during transcription: {str(e)}")
|
249 |
yield f"An error occurred: {str(e)}", "", None
|
250 |
|
251 |
finally:
|
|
|
264 |
model_choices = get_model_options(pipeline_type)
|
265 |
return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
|
266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
267 |
with gr.Blocks() as iface:
|
268 |
gr.Markdown("# Multi-Pipeline Transcription")
|
269 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|