Spaces:

Woziii
/

scribe

Sleeping

App Files Files Community

Woziii commited on Aug 20, 2024

Commit

41f4b8f

verified ·

1 Parent(s): 02ae966

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -55

app.py CHANGED Viewed

@@ -50,68 +50,34 @@ pipe = pipeline(
 def associate_speakers_with_timestamps(transcription_result, diarization):
     word_segments = transcription_result['chunks']
     diarization_segments = list(diarization.itertracks(yield_label=True))
-    # Pré-calcul des chevauchements
-    overlaps = set()
-    for i in range(1, len(diarization_segments)):
-        if diarization_segments[i-1][0].end > diarization_segments[i][0].start:
-            overlaps.add(diarization_segments[i][0].start)
     speaker_transcription = []
-    current_speaker = None
-    current_text = []
-    diar_iter = iter(diarization_segments)
-    current_segment, _, current_speaker = next(diar_iter)
     for word in word_segments:
-        word_start, _ = word['timestamp']
-        while word_start >= current_segment.end:
-            if current_text:
-                speaker_transcription.append((current_speaker, ' '.join(current_text)))
-                current_text = []
-            current_segment, _, current_speaker = next(diar_iter, (None, None, None))
-            if current_segment is None:
-                break
-        if current_segment is None:
-            break
-        if word_start in overlaps:
-            current_text.append(f"[{word['text']}]")
-        else:
-            current_text.append(word['text'])
-    if current_text:
-        speaker_transcription.append((current_speaker, ' '.join(current_text)))
-    return speaker_transcription
-def merge_short_segments(speaker_transcription, min_words=3):
-    def merge_group(group):
-        speaker, texts = group
-        merged = []
-        current = []
-        for text in texts:
-            current.extend(text.split())
-            if len(current) >= min_words:
-                merged.append(' '.join(current))
-                current = []
-        if current:
-            merged.append(' '.join(current))
-        return [(speaker, text) for text in merged]
-    return [item for group in groupby(speaker_transcription, key=lambda x: x[0]) for item in merge_group(group)]
-def merge_short_segments(speaker_transcription, min_words=3):
-    merged = []
-    for speaker, text in speaker_transcription:
-        if not merged or merged[-1][0] != speaker or len(merged[-1][1].split()) >= min_words:
-            merged.append((speaker, text))
         else:
-            merged[-1] = (speaker, merged[-1][1] + " " + text)
-    return merged
 def simplify_diarization_output(speaker_transcription):
     simplified = []

 def associate_speakers_with_timestamps(transcription_result, diarization):
     word_segments = transcription_result['chunks']
     diarization_segments = list(diarization.itertracks(yield_label=True))
     speaker_transcription = []
+    current_segment_index = 0
+    previous_speaker = None
     for word in word_segments:
+        word_start, word_end = word['timestamp']
+        while current_segment_index < len(diarization_segments) - 1 and diarization_segments[current_segment_index][0].end <= word_start:
+            current_segment_index += 1
+        current_segment, _, current_speaker = diarization_segments[current_segment_index]
+        # Vérifier s'il y a un chevauchement avec le segment précédent
+        if current_segment_index > 0:
+            previous_segment, _, previous_speaker = diarization_segments[current_segment_index - 1]
+            if previous_segment.end > word_start and previous_speaker != current_speaker:
+                word_text = f"[{word['text']}]"
+            else:
+                word_text = word['text']
+        else:
+            word_text = word['text']
+        if not speaker_transcription or speaker_transcription[-1][0] != current_speaker:
+            speaker_transcription.append((current_speaker, word_text))
         else:
+            speaker_transcription[-1] = (current_speaker, speaker_transcription[-1][1] + " " + word_text)
+    return speaker_transcription
 def simplify_diarization_output(speaker_transcription):
     simplified = []