asr-inference

Running on Zero

ankush13r commited on Apr 17, 2024

Commit

0454f45

verified ·

1 Parent(s): 84a0fab

Update whisper2.py

Files changed (1) hide show

whisper2.py CHANGED Viewed

@@ -10,7 +10,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("[ INFO ] Device: ", device)
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-torch_dtype = torch.float32
 model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
 processor = WhisperProcessor.from_pretrained(MODEL_NAME)
@@ -26,7 +26,13 @@ def convert_forced_to_tokens(forced_decoder_ids):
     return forced_decoder_tokens
 def generate(audio):
     input_audio, sample_rate = torchaudio.load(audio)
     #metadata = torchaudio.info(audio)

 print("[ INFO ] Device: ", device)
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
 processor = WhisperProcessor.from_pretrained(MODEL_NAME)
     return forced_decoder_tokens
+def change_formate(input_file):
+    ffmpeg.input(input_file).output("16_" + input_file, loglevel='quiet', **{'ar': '16000'}).run(overwrite_output=True)
+    return "16_" + input_file
 def generate(audio):
+    audio = change_formate(audio)
     input_audio, sample_rate = torchaudio.load(audio)
     #metadata = torchaudio.info(audio)