Update app.py
Browse files
app.py
CHANGED
@@ -49,63 +49,48 @@ pipe = pipeline(
|
|
49 |
|
50 |
|
51 |
|
52 |
-
def associate_speakers_with_timestamps(transcription_result, diarization
|
53 |
word_segments = transcription_result['chunks']
|
54 |
diarization_segments = list(diarization.itertracks(yield_label=True))
|
55 |
speaker_transcription = []
|
56 |
-
|
57 |
-
|
58 |
-
last_word_end = 0
|
59 |
-
|
60 |
-
def flush_current_segment():
|
61 |
-
nonlocal current_speaker, current_text
|
62 |
-
if current_speaker and current_text:
|
63 |
-
speaker_transcription.append((current_speaker, ' '.join(current_text)))
|
64 |
-
current_text = []
|
65 |
|
66 |
for word in word_segments:
|
67 |
word_start, word_end = word['timestamp']
|
68 |
-
word_text = word['text']
|
69 |
|
70 |
# Trouver le segment de diarisation correspondant
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
if
|
78 |
-
|
79 |
-
if
|
80 |
-
|
81 |
-
current_speaker = speaker
|
82 |
-
|
83 |
-
# Gérer les pauses longues
|
84 |
-
if word_start - last_word_end > 1.0: # Pause de plus d'une seconde
|
85 |
-
flush_current_segment()
|
86 |
-
|
87 |
-
current_text.append(word_text)
|
88 |
-
last_word_end = word_end
|
89 |
-
else:
|
90 |
-
# Si aucun segment ne correspond, attribuer au dernier locuteur connu
|
91 |
-
if current_speaker:
|
92 |
-
current_text.append(word_text)
|
93 |
else:
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
|
99 |
|
100 |
-
|
101 |
-
|
102 |
for speaker, text in speaker_transcription:
|
103 |
-
if not
|
104 |
-
|
105 |
else:
|
106 |
-
|
107 |
-
|
108 |
-
return merged_transcription
|
109 |
|
110 |
def simplify_diarization_output(speaker_transcription):
|
111 |
simplified = []
|
|
|
49 |
|
50 |
|
51 |
|
52 |
+
def associate_speakers_with_timestamps(transcription_result, diarization):
|
53 |
word_segments = transcription_result['chunks']
|
54 |
diarization_segments = list(diarization.itertracks(yield_label=True))
|
55 |
speaker_transcription = []
|
56 |
+
current_segment_index = 0
|
57 |
+
previous_speaker = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
for word in word_segments:
|
60 |
word_start, word_end = word['timestamp']
|
|
|
61 |
|
62 |
# Trouver le segment de diarisation correspondant
|
63 |
+
while current_segment_index < len(diarization_segments) - 1 and diarization_segments[current_segment_index][0].end <= word_start:
|
64 |
+
current_segment_index += 1
|
65 |
+
|
66 |
+
current_segment, _, current_speaker = diarization_segments[current_segment_index]
|
67 |
+
|
68 |
+
# Vérifier s'il y a un chevauchement avec le segment précédent
|
69 |
+
if current_segment_index > 0:
|
70 |
+
previous_segment, _, previous_speaker = diarization_segments[current_segment_index - 1]
|
71 |
+
if previous_segment.end > word_start and previous_speaker != current_speaker:
|
72 |
+
word_text = f"[{word['text']}]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
else:
|
74 |
+
word_text = word['text']
|
75 |
+
else:
|
76 |
+
word_text = word['text']
|
77 |
+
|
78 |
+
# Ajouter le mot au segment du locuteur actuel
|
79 |
+
if not speaker_transcription or speaker_transcription[-1][0] != current_speaker:
|
80 |
+
speaker_transcription.append((current_speaker, word_text))
|
81 |
+
else:
|
82 |
+
speaker_transcription[-1] = (current_speaker, speaker_transcription[-1][1] + " " + word_text)
|
83 |
|
84 |
+
return speaker_transcription
|
85 |
|
86 |
+
def merge_short_segments(speaker_transcription, min_words=3):
|
87 |
+
merged = []
|
88 |
for speaker, text in speaker_transcription:
|
89 |
+
if not merged or merged[-1][0] != speaker or len(merged[-1][1].split()) >= min_words:
|
90 |
+
merged.append((speaker, text))
|
91 |
else:
|
92 |
+
merged[-1] = (speaker, merged[-1][1] + " " + text)
|
93 |
+
return merged
|
|
|
94 |
|
95 |
def simplify_diarization_output(speaker_transcription):
|
96 |
simplified = []
|