fsd
Browse files- app.py +4 -2
- requirements.txt +1 -0
app.py
CHANGED
@@ -20,7 +20,7 @@ import numpy as np
|
|
20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
BATCH_SIZE = 8
|
22 |
FILE_LIMIT_MB = 1000
|
23 |
-
COMPUTE_TYPE = "
|
24 |
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
25 |
|
26 |
num_speakers = 2
|
@@ -34,9 +34,11 @@ def getpreferredencoding(do_setlocale = True):
|
|
34 |
locale.getpreferredencoding = getpreferredencoding
|
35 |
embedding_model = PretrainedSpeakerEmbedding(
|
36 |
"speechbrain/spkrec-ecapa-voxceleb",
|
37 |
-
device=torch.device("
|
38 |
model = whisper.load_model(model_size)
|
39 |
audio = Audio()
|
|
|
|
|
40 |
def segment_embedding(segment,duration,path):
|
41 |
start = segment["start"]
|
42 |
# Whisper overshoots the end timestamp in the last segment
|
|
|
20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
BATCH_SIZE = 8
|
22 |
FILE_LIMIT_MB = 1000
|
23 |
+
COMPUTE_TYPE = "float32"
|
24 |
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
25 |
|
26 |
num_speakers = 2
|
|
|
34 |
locale.getpreferredencoding = getpreferredencoding
|
35 |
embedding_model = PretrainedSpeakerEmbedding(
|
36 |
"speechbrain/spkrec-ecapa-voxceleb",
|
37 |
+
device=torch.device("cpu"))
|
38 |
model = whisper.load_model(model_size)
|
39 |
audio = Audio()
|
40 |
+
torch.set_default_dtype(torch.float32)
|
41 |
+
|
42 |
def segment_embedding(segment,duration,path):
|
43 |
start = segment["start"]
|
44 |
# Whisper overshoots the end timestamp in the last segment
|
requirements.txt
CHANGED
@@ -12,3 +12,4 @@ yt-dlp
|
|
12 |
more_itertools
|
13 |
faster-whisper
|
14 |
git+https://github.com/openai/whisper.git
|
|
|
|
12 |
more_itertools
|
13 |
faster-whisper
|
14 |
git+https://github.com/openai/whisper.git
|
15 |
+
gradio_client
|