Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisperx | |
def transcribe_audio(whisper_model,filename,whisper_compute_type,whisper_device="cpu",whisper_batch_size=4,source_lang=None): | |
# model = whisperx.load_model(whisper_model, device="cuda", compute_type="float16") | |
# global WHISPER_MODEL | |
WHISPER_MODEL = whisperx.load_model(whisper_model,device=whisper_device,compute_type=whisper_compute_type) | |
print("Whisper model loaded") | |
audio = whisperx.load_audio(filename) | |
language_for_whisper = None if source_lang == "auto" else source_lang | |
result = WHISPER_MODEL.transcribe(audio, batch_size=whisper_batch_size,language=language_for_whisper) | |
language = result["language"] | |
segments = result["segments"] | |
# print(segments) | |
print(f"Detected language {language}") | |
if source_lang == "auto": | |
detected_language = language | |
else: | |
detected_language = source_lang | |
# Formated text | |
ready_text = "" | |
for segment in segments: | |
ready_text+=segment["text"] + "\n" | |
return ready_text | |
def process_audio(audio_input,whisper_model,whisper_language,whisper_compute_type): | |
result= transcribe_audio(filename=audio_input, whisper_model=whisper_model,source_lang=whisper_language,whisper_compute_type=whisper_compute_type) | |
return result | |
with gr.Blocks() as demo: | |
gr.Markdown("# Any whisper ct2 test") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Accordion(open=False,label="Whisper settings"): | |
whisper_model = gr.Dropdown(choices=["tiny","daswer123/whisper-medium-uz-ct2"],value="daswer123/whisper-medium-uz-ct2", allow_custom_value=True) | |
whisper_language = gr.Dropdown(choices=["auto","uz"],value="uz", allow_custom_value=True) | |
whisper_compute_type = gr.Dropdown(choices=["int8","float32"],value="int8") | |
audio_input = gr.Audio(label="Upload your audio",type="filepath") | |
with gr.Column(scale=1): | |
text_output = gr.Textbox(label="Result") | |
submit_btn = gr.Button("Start!") | |
submit_btn.click(fn=process_audio, inputs=[audio_input,whisper_model,whisper_language,whisper_compute_type], outputs=text_output) | |
demo.launch() | |
# result = transcribe_audio("tiny","bezbeka.mp3","float32",source_lang="uz") | |
# print(result) |