0mid commited on
Commit
45edaec
Β·
1 Parent(s): 216adac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -32
app.py CHANGED
@@ -1,8 +1,3 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import os
4
- import datetime
5
- import subprocess
6
  # import whisper
7
  from faster_whisper import WhisperModel
8
  import datetime
@@ -350,33 +345,70 @@ def speech_to_text(video_file_path, selected_source_lang, whisper_model, num_spe
350
  raise RuntimeError("Error Running inference with local model", e)
351
 
352
 
353
- # Streamlit app layout
354
- st.title('Med Speech Pro : Lightning-Fast')
355
- st.markdown('Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription')
356
-
357
- # Handling YouTube URL input
358
- youtube_url = st.text_input("Enter YouTube URL")
359
-
360
- if st.button('Download YouTube Video'):
361
- # Call your function to handle YouTube video downloading
362
- video_file_path = get_youtube(youtube_url)
363
- st.video(video_file_path)
364
-
365
- # File Uploader for videos
366
- video_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
- selected_source_lang = st.selectbox("Select Spoken Language in Video", source_language_list)
369
- selected_whisper_model = st.selectbox("Select Whisper Model", whisper_models)
370
- number_of_speakers = st.number_input("Number of Speakers (0 for automatic detection)", min_value=0, value=0)
 
 
 
371
 
372
- if st.button('Transcribe Video'):
373
- if video_file is not None:
374
- # Process the video file
375
- df_results, system_info, save_path = speech_to_text(video_file, selected_source_lang, selected_whisper_model, number_of_speakers)
376
- st.dataframe(df_results)
377
- st.markdown(system_info)
378
- st.download_button('Download Transcript', data=pd.read_csv(save_path).to_csv(), file_name='transcript.csv')
379
- else:
380
- st.error("Please upload a video file or download one from YouTube.")
381
 
382
- # Additional components and functionalities can be added here as needed.
 
 
 
 
 
 
1
  # import whisper
2
  from faster_whisper import WhisperModel
3
  import datetime
 
345
  raise RuntimeError("Error Running inference with local model", e)
346
 
347
 
348
+ # ---- Gradio Layout -----
349
+ # Inspiration from https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles
350
+ video_in = gr.Video(label="Video file", mirror_webcam=False)
351
+ youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
352
+ df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
353
+ memory = psutil.virtual_memory()
354
+ selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
355
+ selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
356
+ number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
357
+ system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
358
+ download_transcript = gr.File(label="Download transcript")
359
+ transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
360
+ title = "Whisper speaker diarization"
361
+ demo = gr.Blocks(title=title)
362
+ demo.encrypt = False
363
+
364
+
365
+ with demo:
366
+ with gr.Tab("Med Speech Pro"):
367
+ gr.Markdown('''
368
+ <div>
369
+ <h1 style='text-align: center'>Med Speech Pro : Lightning-Fast</h1>
370
+ Description: Experience Rapid Speech Recognition and Seamless Speaker identification With SpeechPro, a cutting-edge solution for accurate Medical Transcription
371
+ </div>
372
+ ''')
373
+ with gr.Row():
374
+ with gr.Column():
375
+ youtube_url_in.render()
376
+ download_youtube_btn = gr.Button("Download Youtube video")
377
+ download_youtube_btn.click(get_youtube, [youtube_url_in], [
378
+ video_in])
379
+ print(video_in)
380
+
381
+
382
+ with gr.Row():
383
+ with gr.Column():
384
+ video_in.render()
385
+ with gr.Column():
386
+ gr.Markdown('''.
387
+ ''')
388
+ selected_source_lang.render()
389
+ selected_whisper_model.render()
390
+ number_speakers.render()
391
+ transcribe_btn = gr.Button("Transcribe Now")
392
+ transcribe_btn.click(speech_to_text,
393
+ [video_in, selected_source_lang, selected_whisper_model, number_speakers],
394
+ [transcription_df, system_info, download_transcript]
395
+ )
396
+
397
+ with gr.Row():
398
+ gr.Markdown('''
399
+ ##### Results
400
+ ##### ''')
401
+
402
 
403
+ with gr.Row():
404
+ with gr.Column():
405
+ download_transcript.render()
406
+ transcription_df.render()
407
+ system_info.render()
408
+ gr.Markdown('''<center><img src='https://visitor-badge.glitch.me/badge?page_id=WhisperDiarizationSpeakers' alt='visitor badge'><a href="https://opensource.org/licenses/Apache-2.0"><img src='https://img.shields.io/badge/License-Apache_2.0-blue.svg' alt='License: Apache 2.0'></center>''')
409
 
410
+
411
+
412
+
 
 
 
 
 
 
413
 
414
+ demo.launch(debug=True,share=True)