Update app.py
Browse files
app.py
CHANGED
@@ -49,10 +49,9 @@ pipe = pipeline(
|
|
49 |
|
50 |
|
51 |
|
52 |
-
def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.
|
53 |
word_segments = transcription_result['chunks']
|
54 |
diarization_segments = list(diarization.itertracks(yield_label=True))
|
55 |
-
|
56 |
speaker_transcription = []
|
57 |
current_speaker = None
|
58 |
current_text = []
|
@@ -62,51 +61,53 @@ def associate_speakers_with_timestamps(transcription_result, diarization, tolera
|
|
62 |
def flush_current_segment():
|
63 |
nonlocal current_speaker, current_text
|
64 |
if current_speaker and current_text:
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
current_text = []
|
67 |
|
68 |
for word in word_segments:
|
69 |
word_start, word_end = word['timestamp']
|
70 |
word_text = word['text']
|
71 |
-
|
72 |
assigned = False
|
|
|
73 |
for i in range(last_segment_index, len(diarization_segments)):
|
74 |
segment, _, speaker = diarization_segments[i]
|
75 |
if segment.start - tolerance <= word_start < segment.end + tolerance:
|
76 |
if speaker != current_speaker:
|
77 |
-
|
78 |
-
|
79 |
-
current_text.append(word_text)
|
80 |
-
else:
|
81 |
-
flush_current_segment()
|
82 |
-
current_speaker = speaker
|
83 |
current_text.append(word_text)
|
84 |
last_segment_index = i
|
85 |
assigned = True
|
86 |
break
|
87 |
-
|
88 |
if not assigned:
|
89 |
unassigned_words.append((word_start, word_text))
|
90 |
-
|
|
|
|
|
91 |
# Traitement des mots non assignés
|
|
|
92 |
for word_start, word_text in unassigned_words:
|
93 |
-
closest_segment = min(diarization_segments, key=lambda x: abs(x[0].start - word_start))
|
94 |
speaker = closest_segment[2]
|
95 |
if speaker != current_speaker:
|
96 |
flush_current_segment()
|
97 |
current_speaker = speaker
|
98 |
current_text.append(word_text)
|
99 |
-
|
100 |
flush_current_segment()
|
101 |
-
|
102 |
# Fusion des segments courts
|
103 |
merged_transcription = []
|
104 |
for speaker, text in speaker_transcription:
|
105 |
-
if not merged_transcription or merged_transcription[-1][0] != speaker
|
106 |
merged_transcription.append((speaker, text))
|
107 |
else:
|
108 |
merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)
|
109 |
-
|
110 |
return merged_transcription
|
111 |
|
112 |
def simplify_diarization_output(speaker_transcription):
|
|
|
49 |
|
50 |
|
51 |
|
52 |
+
def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.05, min_segment_duration=0.1):
|
53 |
word_segments = transcription_result['chunks']
|
54 |
diarization_segments = list(diarization.itertracks(yield_label=True))
|
|
|
55 |
speaker_transcription = []
|
56 |
current_speaker = None
|
57 |
current_text = []
|
|
|
61 |
def flush_current_segment():
|
62 |
nonlocal current_speaker, current_text
|
63 |
if current_speaker and current_text:
|
64 |
+
segment_duration = word_segments[-1]['timestamp'][1] - word_segments[0]['timestamp'][0]
|
65 |
+
if segment_duration >= min_segment_duration:
|
66 |
+
speaker_transcription.append((current_speaker, ' '.join(current_text)))
|
67 |
+
else:
|
68 |
+
unassigned_words.extend([(word['timestamp'][0], word['text']) for word in word_segments])
|
69 |
current_text = []
|
70 |
|
71 |
for word in word_segments:
|
72 |
word_start, word_end = word['timestamp']
|
73 |
word_text = word['text']
|
|
|
74 |
assigned = False
|
75 |
+
|
76 |
for i in range(last_segment_index, len(diarization_segments)):
|
77 |
segment, _, speaker = diarization_segments[i]
|
78 |
if segment.start - tolerance <= word_start < segment.end + tolerance:
|
79 |
if speaker != current_speaker:
|
80 |
+
flush_current_segment()
|
81 |
+
current_speaker = speaker
|
|
|
|
|
|
|
|
|
82 |
current_text.append(word_text)
|
83 |
last_segment_index = i
|
84 |
assigned = True
|
85 |
break
|
86 |
+
|
87 |
if not assigned:
|
88 |
unassigned_words.append((word_start, word_text))
|
89 |
+
|
90 |
+
flush_current_segment()
|
91 |
+
|
92 |
# Traitement des mots non assignés
|
93 |
+
unassigned_words.sort(key=lambda x: x[0]) # Trier par timestamp
|
94 |
for word_start, word_text in unassigned_words:
|
95 |
+
closest_segment = min(diarization_segments, key=lambda x: min(abs(x[0].start - word_start), abs(x[0].end - word_start)))
|
96 |
speaker = closest_segment[2]
|
97 |
if speaker != current_speaker:
|
98 |
flush_current_segment()
|
99 |
current_speaker = speaker
|
100 |
current_text.append(word_text)
|
|
|
101 |
flush_current_segment()
|
102 |
+
|
103 |
# Fusion des segments courts
|
104 |
merged_transcription = []
|
105 |
for speaker, text in speaker_transcription:
|
106 |
+
if not merged_transcription or merged_transcription[-1][0] != speaker:
|
107 |
merged_transcription.append((speaker, text))
|
108 |
else:
|
109 |
merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)
|
110 |
+
|
111 |
return merged_transcription
|
112 |
|
113 |
def simplify_diarization_output(speaker_transcription):
|