mikr commited on
Commit
44f2969
·
1 Parent(s): ba532dc

initial commit

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoFeatureExtractor, Wav2Vec2BertModel
3
+ import torch
4
+
5
+ MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
6
+ lang = "cs"
7
+
8
+ device = 0 if torch.cuda.is_available() else "cpu"
9
+ pipe = pipeline(
10
+ task="automatic-speech-recognition",
11
+ model=MODEL_NAME,
12
+ chunk_length_s=30,
13
+ device=device,
14
+ )
15
+
16
+ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
17
+
18
+ def transcribe(microphone, file_upload):
19
+ warn_output = ""
20
+ if (microphone is not None) and (file_upload is not None):
21
+ warn_output = (
22
+ "WARNING: You've uploaded an audio file and used the microphone. "
23
+ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
24
+ )
25
+
26
+ elif (microphone is None) and (file_upload is None):
27
+ return "ERROR: You have to either use the microphone or upload an audio file"
28
+
29
+ file = microphone if microphone is not None else file_upload
30
+
31
+ text = pipe(file)["text"]
32
+
33
+ return warn_output + text
34
+
35
+
36
+ iface = gr.Interface(
37
+ fn=transcribe,
38
+ inputs=[
39
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True),
40
+ gr.inputs.Audio(source="upload", type="filepath", optional=True),
41
+ ],
42
+ outputs="text",
43
+ layout="horizontal",
44
+ theme="huggingface",
45
+ title="Wav2Vec2-Bert demo - transcribe Czech Audio",
46
+ description=(
47
+ "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the fine-tuned"
48
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) from Whisper Fine Tuning Sprint Event 2022 "
49
+ "and 🤗 Transformers to transcribe audio files of arbitrary length."
50
+ ),
51
+ allow_flagging="never",
52
+ )
53
+
54
+ iface.launch()