Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# import whisper
|
2 |
from faster_whisper import WhisperModel
|
3 |
import datetime
|
@@ -345,70 +350,33 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
|
|
345 |
raise RuntimeError("Error Running inference with local model", e)
|
346 |
|
347 |
|
348 |
-
#
|
349 |
-
|
350 |
-
|
351 |
-
youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
|
352 |
-
df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
|
353 |
-
memory = psutil.virtual_memory()
|
354 |
-
selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
|
355 |
-
selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
|
356 |
-
number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
|
357 |
-
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
358 |
-
download_transcript = gr.File(label="Download transcript")
|
359 |
-
transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
|
360 |
-
title = "Whisper speaker diarization"
|
361 |
-
demo = gr.Blocks(title=title)
|
362 |
-
demo.encrypt = False
|
363 |
-
|
364 |
-
|
365 |
-
with demo:
|
366 |
-
with gr.Tab("Med Speech Pro"):
|
367 |
-
gr.Markdown('''
|
368 |
-
<div>
|
369 |
-
<h1 style='text-align: center'>Med Speech Pro : Lightning-Fast</h1>
|
370 |
-
Description: Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription
|
371 |
-
</div>
|
372 |
-
''')
|
373 |
-
with gr.Row():
|
374 |
-
with gr.Column():
|
375 |
-
youtube_url_in.render()
|
376 |
-
download_youtube_btn = gr.Button("Download Youtube video")
|
377 |
-
download_youtube_btn.click(get_youtube, [youtube_url_in], [
|
378 |
-
video_in])
|
379 |
-
print(video_in)
|
380 |
-
|
381 |
-
|
382 |
-
with gr.Row():
|
383 |
-
with gr.Column():
|
384 |
-
video_in.render()
|
385 |
-
with gr.Column():
|
386 |
-
gr.Markdown('''.
|
387 |
-
''')
|
388 |
-
selected_source_lang.render()
|
389 |
-
selected_whisper_model.render()
|
390 |
-
number_speakers.render()
|
391 |
-
transcribe_btn = gr.Button("Transcribe Now")
|
392 |
-
transcribe_btn.click(speech_to_text,
|
393 |
-
[video_in, selected_source_lang, selected_whisper_model, number_speakers],
|
394 |
-
[transcription_df, system_info, download_transcript]
|
395 |
-
)
|
396 |
-
|
397 |
-
with gr.Row():
|
398 |
-
gr.Markdown('''
|
399 |
-
##### Results
|
400 |
-
##### ''')
|
401 |
-
|
402 |
|
403 |
-
|
404 |
-
|
405 |
-
download_transcript.render()
|
406 |
-
transcription_df.render()
|
407 |
-
system_info.render()
|
408 |
-
gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
|
409 |
|
410 |
-
|
411 |
-
|
412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
|
414 |
-
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
import datetime
|
5 |
+
import subprocess
|
6 |
# import whisper
|
7 |
from faster_whisper import WhisperModel
|
8 |
import datetime
|
|
|
350 |
raise RuntimeError("Error Running inference with local model", e)
|
351 |
|
352 |
|
353 |
+
# Streamlit app layout
|
354 |
+
st.title('Med Speech Pro : Lightning-Fast')
|
355 |
+
st.markdown('Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
|
357 |
+
# Handling YouTube URL input
|
358 |
+
youtube_url = st.text_input("Enter YouTube URL")
|
|
|
|
|
|
|
|
|
359 |
|
360 |
+
if st.button('Download YouTube Video'):
|
361 |
+
# Call your function to handle YouTube video downloading
|
362 |
+
video_file_path = get_youtube(youtube_url)
|
363 |
+
st.video(video_file_path)
|
364 |
+
|
365 |
+
# File Uploader for videos
|
366 |
+
video_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"])
|
367 |
+
|
368 |
+
selected_source_lang = st.selectbox("Select Spoken Language in Video", source_language_list)
|
369 |
+
selected_whisper_model = st.selectbox("Select Whisper Model", whisper_models)
|
370 |
+
number_of_speakers = st.number_input("Number of Speakers (0 for automatic detection)", min_value=0, value=0)
|
371 |
+
|
372 |
+
if st.button('Transcribe Video'):
|
373 |
+
if video_file is not None:
|
374 |
+
# Process the video file
|
375 |
+
df_results, system_info, save_path = speech_to_text(video_file, selected_source_lang, selected_whisper_model, number_of_speakers)
|
376 |
+
st.dataframe(df_results)
|
377 |
+
st.markdown(system_info)
|
378 |
+
st.download_button('Download Transcript', data=pd.read_csv(save_path).to_csv(), file_name='transcript.csv')
|
379 |
+
else:
|
380 |
+
st.error("Please upload a video file or download one from YouTube.")
|
381 |
|
382 |
+
# Additional components and functionalities can be added here as needed.
|