Tamerstito commited on
Commit
dc41b9b
·
verified ·
1 Parent(s): 5596c16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -1,7 +1,19 @@
1
- from nemo.collections.asr.models import EncDecMultiTaskModel
2
- # load model
3
- canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b-flash')
4
- # update decode params
5
- decode_cfg = canary_model.cfg.decoding
6
- decode_cfg.beam.beam_size = 1
7
- canary_model.change_decoding_strategy(decode_cfg)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
2
+ from datasets import load_dataset
3
+
4
+ # load model and processor
5
+ processor = WhisperProcessor.from_pretrained("openai/whisper-base")
6
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
7
+ model.config.forced_decoder_ids = None
8
+
9
+ # load dummy dataset and read audio files
10
+ ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
11
+ sample = ds[0]["audio"]
12
+ input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features
13
+
14
+ # generate token ids
15
+ predicted_ids = model.generate(input_features)
16
+ # decode token ids to text
17
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
18
+
19
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)