Commit
·
da83059
1
Parent(s):
7ed745d
Update app.py
Browse files
app.py
CHANGED
@@ -319,10 +319,11 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
319 |
*Processing time: {time_diff:.5} seconds.*
|
320 |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}MiB.*
|
321 |
"""
|
|
|
322 |
filename, _ = os.path.splitext(video_file_path)
|
323 |
filename = filename.replace(" ", "_")
|
324 |
filename = filename.replace("(", "_").replace(")", "_")
|
325 |
-
output_filename = f"{filename}_{
|
326 |
save_path = os.path.join("output", output_filename)
|
327 |
df_results = pd.DataFrame(objects)
|
328 |
df_results.to_csv(save_path)
|
@@ -339,7 +340,7 @@ youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
|
|
339 |
df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
|
340 |
memory = psutil.virtual_memory()
|
341 |
selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
|
342 |
-
selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="
|
343 |
number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
|
344 |
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
345 |
download_transcript = gr.File(label="Download transcript")
|
|
|
319 |
*Processing time: {time_diff:.5} seconds.*
|
320 |
*GPU Utilization: {gpu_utilization}%, GPU Memory: {gpu_memory}MiB.*
|
321 |
"""
|
322 |
+
selected_whisper_model_name = selected_whisper_model.value
|
323 |
filename, _ = os.path.splitext(video_file_path)
|
324 |
filename = filename.replace(" ", "_")
|
325 |
filename = filename.replace("(", "_").replace(")", "_")
|
326 |
+
output_filename = f"{filename}_{selected_whisper_model_name}.csv"
|
327 |
save_path = os.path.join("output", output_filename)
|
328 |
df_results = pd.DataFrame(objects)
|
329 |
df_results.to_csv(save_path)
|
|
|
340 |
df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
|
341 |
memory = psutil.virtual_memory()
|
342 |
selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
|
343 |
+
selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="large-v2", label="Selected Whisper model", interactive=True)
|
344 |
number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
|
345 |
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
346 |
download_transcript = gr.File(label="Download transcript")
|