SujithPulikodan commited on
Commit
85e8b0b
1 Parent(s): efedf8a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +60 -3
README.md CHANGED
@@ -1,3 +1,60 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - ARTPARK-IISc/Vaani
5
+ language:
6
+ - te
7
+ base_model:
8
+ - openai/whisper-medium
9
+ pipeline_tag: automatic-speech-recognition
10
+ ---
11
+ ```python
12
+
13
+
14
+ import torch
15
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperTokenizer,WhisperFeatureExtractor
16
+ import soundfile as sf
17
+
18
+
19
+ model="ARTPARK-IISc/whisper-small-vaani-telugu"
20
+
21
+ # Load tokenizer and feature extractor individually
22
+ feature_extractor = WhisperFeatureExtractor.from_pretrained(model)
23
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="Telugu", task="transcribe")
24
+
25
+
26
+ # Create the processor manually
27
+ processor = WhisperProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
28
+
29
+ # Load and preprocess the audio file
30
+ audio_file_path = "Sample_Audio.wav" # replace with your audio file path
31
+
32
+
33
+ device = "cuda" if torch.cuda.is_available() else "cpu"
34
+
35
+ # Load the processor and model
36
+ model = WhisperForConditionalGeneration.from_pretrained(model).to(device)
37
+
38
+
39
+ # load audio
40
+ audio_data, sample_rate = sf.read(audio_file_path)
41
+ # Ensure the audio is 16kHz (Whisper expects 16kHz audio)
42
+ if sample_rate != 16000:
43
+ import torchaudio
44
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
45
+ audio_data = resampler(torch.tensor(audio_data).unsqueeze(0)).squeeze().numpy()
46
+
47
+
48
+ # Use the processor to prepare the input features
49
+ input_features = processor(audio_data, sampling_rate=16000, return_tensors="pt").input_features.to(device)
50
+
51
+ # Generate transcription (disable gradient calculation during inference)
52
+ with torch.no_grad():
53
+ predicted_ids = model.generate(input_features)
54
+
55
+ # Decode the generated IDs into human-readable text
56
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
57
+
58
+ print(transcription)
59
+
60
+ ```