Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -23,18 +23,18 @@ auth_token = os.environ.get("AUTH_TOKEN") or True
|
|
23 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24 |
print(f"Using device: {device}")
|
25 |
|
26 |
-
@spaces.GPU(duration=60)
|
27 |
def pipe(file, return_timestamps=False):
|
28 |
-
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, low_cpu_mem_usage=True)
|
29 |
-
model.to(device)
|
30 |
-
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
31 |
-
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
32 |
-
model.generation_config.cache_implementation = "static"
|
33 |
asr = pipeline(
|
34 |
task="automatic-speech-recognition",
|
35 |
-
model=
|
36 |
-
tokenizer=AutoTokenizer.from_pretrained(MODEL_NAME),
|
37 |
-
feature_extractor=AutoFeatureExtractor.from_pretrained(MODEL_NAME),
|
38 |
chunk_length_s=30,
|
39 |
device=device,
|
40 |
token=auth_token,
|
|
|
23 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
24 |
print(f"Using device: {device}")
|
25 |
|
26 |
+
@spaces.GPU(duration=60 * 2)
|
27 |
def pipe(file, return_timestamps=False):
|
28 |
+
# model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, low_cpu_mem_usage=True)
|
29 |
+
# model.to(device)
|
30 |
+
# processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
31 |
+
# model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
32 |
+
# model.generation_config.cache_implementation = "static"
|
33 |
asr = pipeline(
|
34 |
task="automatic-speech-recognition",
|
35 |
+
model=MODEL_NAME,
|
36 |
+
# tokenizer=AutoTokenizer.from_pretrained(MODEL_NAME),
|
37 |
+
# feature_extractor=AutoFeatureExtractor.from_pretrained(MODEL_NAME),
|
38 |
chunk_length_s=30,
|
39 |
device=device,
|
40 |
token=auth_token,
|