daswer123's picture
Upload 2 files
6bcd893 verified
import gradio as gr
import whisperx
def transcribe_audio(whisper_model,filename,whisper_compute_type,whisper_device="cpu",whisper_batch_size=4,source_lang=None):
# model = whisperx.load_model(whisper_model, device="cuda", compute_type="float16")
# global WHISPER_MODEL
WHISPER_MODEL = whisperx.load_model(whisper_model,device=whisper_device,compute_type=whisper_compute_type)
print("Whisper model loaded")
audio = whisperx.load_audio(filename)
language_for_whisper = None if source_lang == "auto" else source_lang
result = WHISPER_MODEL.transcribe(audio, batch_size=whisper_batch_size,language=language_for_whisper)
language = result["language"]
segments = result["segments"]
# print(segments)
print(f"Detected language {language}")
if source_lang == "auto":
detected_language = language
else:
detected_language = source_lang
# Formated text
ready_text = ""
for segment in segments:
ready_text+=segment["text"] + "\n"
return ready_text
def process_audio(audio_input,whisper_model,whisper_language,whisper_compute_type):
result= transcribe_audio(filename=audio_input, whisper_model=whisper_model,source_lang=whisper_language,whisper_compute_type=whisper_compute_type)
return result
with gr.Blocks() as demo:
gr.Markdown("# Any whisper ct2 test")
with gr.Row():
with gr.Column(scale=1):
with gr.Accordion(open=False,label="Whisper settings"):
whisper_model = gr.Dropdown(choices=["tiny","daswer123/whisper-medium-uz-ct2"],value="daswer123/whisper-medium-uz-ct2", allow_custom_value=True)
whisper_language = gr.Dropdown(choices=["auto","uz"],value="uz", allow_custom_value=True)
whisper_compute_type = gr.Dropdown(choices=["int8","float32"],value="int8")
audio_input = gr.Audio(label="Upload your audio",type="filepath")
with gr.Column(scale=1):
text_output = gr.Textbox(label="Result")
submit_btn = gr.Button("Start!")
submit_btn.click(fn=process_audio, inputs=[audio_input,whisper_model,whisper_language,whisper_compute_type], outputs=text_output)
demo.launch()
# result = transcribe_audio("tiny","bezbeka.mp3","float32",source_lang="uz")
# print(result)