rbiojout
/

whisperX-endpoint

Model card Files Files and versions Community

raphaelbiojout commited on Nov 23, 2023

Commit

d9a0ef6

·

1 Parent(s): a7da197

update

Files changed (1) hide show

handler.py +3 -2

handler.py CHANGED Viewed

@@ -23,7 +23,7 @@ SAMPLE_RATE = 16000
 def whisper_config():
     device = "cuda" if torch.cuda.is_available() else "cpu"
     whisper_model = "large-v2"
-    batch_size = 8  # reduce if low on GPU mem, 16 initailly
     # change to "int8" if low on GPU mem (may reduce accuracy)
     compute_type = "float16" if device == "cuda" else "int8"
     return device, batch_size, compute_type, whisper_model
@@ -172,7 +172,7 @@ class EndpointHandler():
     def __init__(self, path=""):
         # load the model
         device, batch_size, compute_type, whisper_model = whisper_config()
-        self.model = whisperx.load_model(whisper_model, device=device, compute_type=compute_type, threads=2)
         # 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 ERIC
         # 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 OLD
         logger.info(f"Model {whisper_model} initialized")
@@ -230,6 +230,7 @@ class EndpointHandler():
             # 2. transcribe
             device, batch_size, compute_type, whisper_model = whisper_config()
             logger.info("--------------- STARTING TRANSCRIPTION ------------------------")
             transcription = self.model.transcribe(audio_nparray, batch_size=batch_size,language=language)
             if info:
                 print(transcription["segments"]) # before alignment

 def whisper_config():
     device = "cuda" if torch.cuda.is_available() else "cpu"
     whisper_model = "large-v2"
+    batch_size = 16  # reduce if low on GPU mem, 16 initailly
     # change to "int8" if low on GPU mem (may reduce accuracy)
     compute_type = "float16" if device == "cuda" else "int8"
     return device, batch_size, compute_type, whisper_model
     def __init__(self, path=""):
         # load the model
         device, batch_size, compute_type, whisper_model = whisper_config()
+        self.model = whisperx.load_model(whisper_model, device=device, compute_type=compute_type)
         # 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 ERIC
         # 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 OLD
         logger.info(f"Model {whisper_model} initialized")
             # 2. transcribe
             device, batch_size, compute_type, whisper_model = whisper_config()
             logger.info("--------------- STARTING TRANSCRIPTION ------------------------")
+            logger.info(f"device: {device}, batch_size: {batch_size}, compute_type:{compute_type}, whisper_model: {whisper_model}")
             transcription = self.model.transcribe(audio_nparray, batch_size=batch_size,language=language)
             if info:
                 print(transcription["segments"]) # before alignment