sha1779 commited on
Commit
676724b
1 Parent(s): 50d045b
Files changed (1) hide show
  1. README.md +31 -1
README.md CHANGED
@@ -5,4 +5,34 @@ language:
5
  base_model:
6
  - openai/whisper-small
7
  pipeline_tag: automatic-speech-recognition
8
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  base_model:
6
  - openai/whisper-small
7
  pipeline_tag: automatic-speech-recognition
8
+
9
+ ---
10
+ ```py
11
+
12
+ import os
13
+ import librosa
14
+ import torch, torchaudio
15
+ import numpy as np
16
+ from transformers import WhisperTokenizer ,WhisperProcessor, WhisperFeatureExtractor, WhisperForConditionalGeneration
17
+ model_path_ = "sha1779/BengaliRegionalASR"
18
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
19
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(model_path_)
20
+ tokenizer = WhisperTokenizer.from_pretrained(model_path_)
21
+ processor = WhisperProcessor.from_pretrained(model_path_)
22
+ model = WhisperForConditionalGeneration.from_pretrained(model_path_).to(device)
23
+ model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="bengali", task="transcribe")
24
+
25
+ mp3_path = "/kaggle/input/barishal-data/valid_barishal (104).wav"
26
+ speech_array, sampling_rate = librosa.load(mp3_path, sr=16000)
27
+
28
+ speech_array = librosa.resample(np.asarray(speech_array), orig_sr=sampling_rate, target_sr=16000)
29
+ input_features = feature_extractor(speech_array, sampling_rate=16000, return_tensors="pt").input_features
30
+
31
+ predicted_ids = model.generate(inputs=input_features.to(device))[0]
32
+
33
+ transcription = processor.decode(predicted_ids, skip_special_tokens=True)
34
+
35
+ print(transcription)
36
+
37
+
38
+ ```