Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +63 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import whisperx
|
3 |
+
|
4 |
+
|
5 |
+
|
6 |
+
def transcribe_audio(whisper_model,filename,whisper_compute_type,whisper_device="cpu",whisper_batch_size=4,source_lang=None):
|
7 |
+
# model = whisperx.load_model(whisper_model, device="cuda", compute_type="float16")
|
8 |
+
# global WHISPER_MODEL
|
9 |
+
|
10 |
+
WHISPER_MODEL = whisperx.load_model(whisper_model,device=whisper_device,compute_type=whisper_compute_type)
|
11 |
+
|
12 |
+
print("Whisper model loaded")
|
13 |
+
|
14 |
+
audio = whisperx.load_audio(filename)
|
15 |
+
|
16 |
+
language_for_whisper = None if source_lang == "auto" else source_lang
|
17 |
+
|
18 |
+
result = WHISPER_MODEL.transcribe(audio, batch_size=whisper_batch_size,language=language_for_whisper)
|
19 |
+
language = result["language"]
|
20 |
+
segments = result["segments"]
|
21 |
+
|
22 |
+
# print(segments)
|
23 |
+
|
24 |
+
print(f"Detected language {language}")
|
25 |
+
|
26 |
+
|
27 |
+
if source_lang == "auto":
|
28 |
+
detected_language = language
|
29 |
+
else:
|
30 |
+
detected_language = source_lang
|
31 |
+
|
32 |
+
# Formated text
|
33 |
+
ready_text = ""
|
34 |
+
|
35 |
+
for segment in segments:
|
36 |
+
ready_text+=segment["text"] + "\n"
|
37 |
+
|
38 |
+
return ready_text
|
39 |
+
|
40 |
+
def process_audio(audio_input,whisper_model,whisper_language,whisper_compute_type):
|
41 |
+
result= transcribe_audio(filename=audio_input, whisper_model=whisper_model,source_lang=whisper_language,whisper_compute_type=whisper_compute_type)
|
42 |
+
return result
|
43 |
+
|
44 |
+
with gr.Blocks() as demo:
|
45 |
+
gr.Markdown("# Any whisper ct2 test")
|
46 |
+
|
47 |
+
with gr.Row():
|
48 |
+
with gr.Column(scale=1):
|
49 |
+
with gr.Accordion(open=False,label="Whisper settings"):
|
50 |
+
whisper_model = gr.Dropdown(choices=["tiny","daswer123/whisper-medium-uz-ct2"],value="daswer123/whisper-medium-uz-ct2", allow_custom_value=True)
|
51 |
+
whisper_language = gr.Dropdown(choices=["auto","uz"],value="uz", allow_custom_value=True)
|
52 |
+
whisper_compute_type = gr.Dropdown(choices=["int8","float32"],value="int8")
|
53 |
+
audio_input = gr.Audio(label="Upload your audio",type="filepath")
|
54 |
+
|
55 |
+
with gr.Column(scale=1):
|
56 |
+
text_output = gr.Textbox(label="Result")
|
57 |
+
submit_btn = gr.Button("Start!")
|
58 |
+
|
59 |
+
submit_btn.click(fn=process_audio, inputs=[audio_input,whisper_model,whisper_language,whisper_compute_type], outputs=text_output)
|
60 |
+
|
61 |
+
demo.launch()
|
62 |
+
# result = transcribe_audio("tiny","bezbeka.mp3","float32",source_lang="uz")
|
63 |
+
# print(result)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.40
|
2 |
+
whisperx
|
3 |
+
transformers==4.39.3
|