Spaces:
Running
Running
Update whisper2.py
Browse files- whisper2.py +7 -1
whisper2.py
CHANGED
@@ -10,7 +10,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
10 |
|
11 |
print("[ INFO ] Device: ", device)
|
12 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
13 |
-
|
14 |
|
15 |
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
|
16 |
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
@@ -26,7 +26,13 @@ def convert_forced_to_tokens(forced_decoder_ids):
|
|
26 |
return forced_decoder_tokens
|
27 |
|
28 |
|
|
|
|
|
|
|
|
|
|
|
29 |
def generate(audio):
|
|
|
30 |
input_audio, sample_rate = torchaudio.load(audio)
|
31 |
|
32 |
#metadata = torchaudio.info(audio)
|
|
|
10 |
|
11 |
print("[ INFO ] Device: ", device)
|
12 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
13 |
+
|
14 |
|
15 |
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype).to(device)
|
16 |
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
|
|
26 |
return forced_decoder_tokens
|
27 |
|
28 |
|
29 |
+
def change_formate(input_file):
|
30 |
+
ffmpeg.input(input_file).output("16_" + input_file, loglevel='quiet', **{'ar': '16000'}).run(overwrite_output=True)
|
31 |
+
return "16_" + input_file
|
32 |
+
|
33 |
+
|
34 |
def generate(audio):
|
35 |
+
audio = change_formate(audio)
|
36 |
input_audio, sample_rate = torchaudio.load(audio)
|
37 |
|
38 |
#metadata = torchaudio.info(audio)
|