Whisper_speaker_diarization2

Runtime error

App Files Files Community

raihanrifaldi commited on Dec 21, 2023

Commit

f82a6ed

1 Parent(s): 5e779c8

update title

Browse files

Files changed (1) hide show

app.py +7 -6

app.py CHANGED Viewed

@@ -351,9 +351,9 @@ video_in = gr.Video(label="Video file", mirror_webcam=False)
 youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
 df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
 memory = psutil.virtual_memory()
-selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="Indonesia", label="Spoken language in video", interactive=True)
-selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
-number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
 system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
 download_transcript = gr.File(label="Download transcript")
 transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
@@ -366,9 +366,10 @@ with demo:
     with gr.Tab("Whisper speaker diarization"):
         gr.Markdown('''
             <div>
-            <h1 style='text-align: center'>Whisper speaker diarization</h1>
-            This space uses Whisper models from <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> with <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> which is a fast inference engine for Transformer models to recognize the speech (4 times faster than original openai model with same accuracy)
-            and ECAPA-TDNN model from <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> to encode and clasify speakers
             </div>
         ''')

 youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
 df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
 memory = psutil.virtual_memory()
+selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="Indonesia", label="Bahasa", interactive=True)
+selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Whisper model", interactive=True)
+number_speakers = gr.Number(precision=0, value=0, label="Masukkan jumlah pembicara untuk hasil yang lebih baik. Jika nilai=0, model akan secara otomatis menemukan jumlah pembicara terbaik.", interactive=True)
 system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
 download_transcript = gr.File(label="Download transcript")
 transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
     with gr.Tab("Whisper speaker diarization"):
         gr.Markdown('''
             <div>
+            <h1 style='text-align: center'>SPERCO</h1>
+            Space ini menggunakan Whisper models dari <a href='https://github.com/openai/whisper' target='_blank'><b>OpenAI</b></a> dengan <a href='https://github.com/guillaumekln/faster-whisper' target='_blank'><b>CTranslate2</b></a> yang merupakan mesin inferensi cepat untuk model Transformer untuk mengenali ucapan (4 kali lebih cepat dari model openai asli dengan akurasi yang sama)
+            dan model ECAPA-TDNN dari <a href='https://github.com/speechbrain/speechbrain' target='_blank'><b>SpeechBrain</b></a> untuk mengkodekan dan mengklasifikasikan pembicara.
             </div>
         ''')