Ngoufack commited on
Commit
f6c51ca
·
1 Parent(s): 01daa7a
Files changed (2) hide show
  1. app.py +4 -2
  2. requirements.txt +1 -0
app.py CHANGED
@@ -20,7 +20,7 @@ import numpy as np
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
  BATCH_SIZE = 8
22
  FILE_LIMIT_MB = 1000
23
- COMPUTE_TYPE = "float16"
24
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
25
 
26
  num_speakers = 2
@@ -34,9 +34,11 @@ def getpreferredencoding(do_setlocale = True):
34
  locale.getpreferredencoding = getpreferredencoding
35
  embedding_model = PretrainedSpeakerEmbedding(
36
  "speechbrain/spkrec-ecapa-voxceleb",
37
- device=torch.device("cuda"))
38
  model = whisper.load_model(model_size)
39
  audio = Audio()
 
 
40
  def segment_embedding(segment,duration,path):
41
  start = segment["start"]
42
  # Whisper overshoots the end timestamp in the last segment
 
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
  BATCH_SIZE = 8
22
  FILE_LIMIT_MB = 1000
23
+ COMPUTE_TYPE = "float32"
24
  YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
25
 
26
  num_speakers = 2
 
34
  locale.getpreferredencoding = getpreferredencoding
35
  embedding_model = PretrainedSpeakerEmbedding(
36
  "speechbrain/spkrec-ecapa-voxceleb",
37
+ device=torch.device("cpu"))
38
  model = whisper.load_model(model_size)
39
  audio = Audio()
40
+ torch.set_default_dtype(torch.float32)
41
+
42
  def segment_embedding(segment,duration,path):
43
  start = segment["start"]
44
  # Whisper overshoots the end timestamp in the last segment
requirements.txt CHANGED
@@ -12,3 +12,4 @@ yt-dlp
12
  more_itertools
13
  faster-whisper
14
  git+https://github.com/openai/whisper.git
 
 
12
  more_itertools
13
  faster-whisper
14
  git+https://github.com/openai/whisper.git
15
+ gradio_client