daswer123 commited on
Commit
6bcd893
1 Parent(s): 4aca2eb

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +63 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisperx
3
+
4
+
5
+
6
+ def transcribe_audio(whisper_model,filename,whisper_compute_type,whisper_device="cpu",whisper_batch_size=4,source_lang=None):
7
+ # model = whisperx.load_model(whisper_model, device="cuda", compute_type="float16")
8
+ # global WHISPER_MODEL
9
+
10
+ WHISPER_MODEL = whisperx.load_model(whisper_model,device=whisper_device,compute_type=whisper_compute_type)
11
+
12
+ print("Whisper model loaded")
13
+
14
+ audio = whisperx.load_audio(filename)
15
+
16
+ language_for_whisper = None if source_lang == "auto" else source_lang
17
+
18
+ result = WHISPER_MODEL.transcribe(audio, batch_size=whisper_batch_size,language=language_for_whisper)
19
+ language = result["language"]
20
+ segments = result["segments"]
21
+
22
+ # print(segments)
23
+
24
+ print(f"Detected language {language}")
25
+
26
+
27
+ if source_lang == "auto":
28
+ detected_language = language
29
+ else:
30
+ detected_language = source_lang
31
+
32
+ # Formated text
33
+ ready_text = ""
34
+
35
+ for segment in segments:
36
+ ready_text+=segment["text"] + "\n"
37
+
38
+ return ready_text
39
+
40
+ def process_audio(audio_input,whisper_model,whisper_language,whisper_compute_type):
41
+ result= transcribe_audio(filename=audio_input, whisper_model=whisper_model,source_lang=whisper_language,whisper_compute_type=whisper_compute_type)
42
+ return result
43
+
44
+ with gr.Blocks() as demo:
45
+ gr.Markdown("# Any whisper ct2 test")
46
+
47
+ with gr.Row():
48
+ with gr.Column(scale=1):
49
+ with gr.Accordion(open=False,label="Whisper settings"):
50
+ whisper_model = gr.Dropdown(choices=["tiny","daswer123/whisper-medium-uz-ct2"],value="daswer123/whisper-medium-uz-ct2", allow_custom_value=True)
51
+ whisper_language = gr.Dropdown(choices=["auto","uz"],value="uz", allow_custom_value=True)
52
+ whisper_compute_type = gr.Dropdown(choices=["int8","float32"],value="int8")
53
+ audio_input = gr.Audio(label="Upload your audio",type="filepath")
54
+
55
+ with gr.Column(scale=1):
56
+ text_output = gr.Textbox(label="Result")
57
+ submit_btn = gr.Button("Start!")
58
+
59
+ submit_btn.click(fn=process_audio, inputs=[audio_input,whisper_model,whisper_language,whisper_compute_type], outputs=text_output)
60
+
61
+ demo.launch()
62
+ # result = transcribe_audio("tiny","bezbeka.mp3","float32",source_lang="uz")
63
+ # print(result)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==4.40
2
+ whisperx
3
+ transformers==4.39.3