tert
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization
|
|
11 |
from pyannote.audio import Model
|
12 |
from pyannote.core import Segment
|
13 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
14 |
-
|
15 |
|
16 |
MODEL_NAME = "medium"
|
17 |
BATCH_SIZE = 8
|
@@ -21,8 +21,10 @@ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
|
21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
model = WhisperModel(MODEL_NAME, device=device, compute_type="float16" if torch.cuda.is_available() else "int8")
|
23 |
|
24 |
-
model_pyannote = Model.from_pretrained("pyannote/speaker-diarization")
|
25 |
-
pipeline =
|
|
|
|
|
26 |
|
27 |
@spaces.GPU
|
28 |
def transcribe(inputs, task):
|
@@ -31,11 +33,13 @@ def transcribe(inputs, task):
|
|
31 |
|
32 |
segments, _ = model.transcribe(inputs, task=task)
|
33 |
text = " ".join([segment.text for segment in segments])
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
#diarization = pipeline({"uri": "audio", "audio": audio_path})
|
40 |
#speaker_segments = []
|
41 |
return text
|
|
|
11 |
from pyannote.audio import Model
|
12 |
from pyannote.core import Segment
|
13 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
14 |
+
from pyannote.audio import Pipeline
|
15 |
|
16 |
MODEL_NAME = "medium"
|
17 |
BATCH_SIZE = 8
|
|
|
21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
model = WhisperModel(MODEL_NAME, device=device, compute_type="float16" if torch.cuda.is_available() else "int8")
|
23 |
|
24 |
+
#model_pyannote = Model.from_pretrained("pyannote/speaker-diarization")
|
25 |
+
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
|
26 |
+
|
27 |
+
#pipeline = SpeakerDiarization(model_pyannote)
|
28 |
|
29 |
@spaces.GPU
|
30 |
def transcribe(inputs, task):
|
|
|
33 |
|
34 |
segments, _ = model.transcribe(inputs, task=task)
|
35 |
text = " ".join([segment.text for segment in segments])
|
36 |
+
diarization = pipeline(inputs)
|
37 |
+
speaker_segments = []
|
38 |
+
for segment, _, speaker in diarization.itertracks(yield_label=True):
|
39 |
+
speaker_segments.append((segment.start, segment.end, speaker))
|
40 |
+
|
41 |
+
# Associer les segments de transcription aux locuteurs
|
42 |
+
speaker_texts = []
|
43 |
#diarization = pipeline({"uri": "audio", "audio": audio_path})
|
44 |
#speaker_segments = []
|
45 |
return text
|