ankush13r commited on
Commit
0454f45
·
verified ·
1 Parent(s): 84a0fab

Update whisper2.py

Browse files
Files changed (1) hide show
  1. whisper2.py +7 -1
whisper2.py CHANGED
@@ -10,7 +10,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
  print("[ INFO ] Device: ", device)
12
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
13
- torch_dtype = torch.float32
14
 
15
  model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
16
  processor = WhisperProcessor.from_pretrained(MODEL_NAME)
@@ -26,7 +26,13 @@ def convert_forced_to_tokens(forced_decoder_ids):
26
  return forced_decoder_tokens
27
 
28
 
 
 
 
 
 
29
  def generate(audio):
 
30
  input_audio, sample_rate = torchaudio.load(audio)
31
 
32
  #metadata = torchaudio.info(audio)
 
10
 
11
  print("[ INFO ] Device: ", device)
12
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
13
+
14
 
15
  model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
16
  processor = WhisperProcessor.from_pretrained(MODEL_NAME)
 
26
  return forced_decoder_tokens
27
 
28
 
29
+ def change_formate(input_file):
30
+ ffmpeg.input(input_file).output("16_" + input_file, loglevel='quiet', **{'ar': '16000'}).run(overwrite_output=True)
31
+ return "16_" + input_file
32
+
33
+
34
  def generate(audio):
35
+ audio = change_formate(audio)
36
  input_audio, sample_rate = torchaudio.load(audio)
37
 
38
  #metadata = torchaudio.info(audio)