Woziii commited on
Commit
3ad44f6
·
verified ·
1 Parent(s): 51ecff6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -44
app.py CHANGED
@@ -49,63 +49,48 @@ pipe = pipeline(
49
 
50
 
51
 
52
- def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.02, min_segment_duration=0.05):
53
  word_segments = transcription_result['chunks']
54
  diarization_segments = list(diarization.itertracks(yield_label=True))
55
  speaker_transcription = []
56
- current_speaker = None
57
- current_text = []
58
- last_word_end = 0
59
-
60
- def flush_current_segment():
61
- nonlocal current_speaker, current_text
62
- if current_speaker and current_text:
63
- speaker_transcription.append((current_speaker, ' '.join(current_text)))
64
- current_text = []
65
 
66
  for word in word_segments:
67
  word_start, word_end = word['timestamp']
68
- word_text = word['text']
69
 
70
  # Trouver le segment de diarisation correspondant
71
- matching_segment = None
72
- for segment, _, speaker in diarization_segments:
73
- if segment.start - tolerance <= word_start < segment.end + tolerance:
74
- matching_segment = (segment, speaker)
75
- break
76
-
77
- if matching_segment:
78
- segment, speaker = matching_segment
79
- if speaker != current_speaker:
80
- flush_current_segment()
81
- current_speaker = speaker
82
-
83
- # Gérer les pauses longues
84
- if word_start - last_word_end > 1.0: # Pause de plus d'une seconde
85
- flush_current_segment()
86
-
87
- current_text.append(word_text)
88
- last_word_end = word_end
89
- else:
90
- # Si aucun segment ne correspond, attribuer au dernier locuteur connu
91
- if current_speaker:
92
- current_text.append(word_text)
93
  else:
94
- # Si c'est le premier mot sans correspondance, créer un nouveau segment
95
- current_speaker = "SPEAKER_UNKNOWN"
96
- current_text.append(word_text)
 
 
 
 
 
 
97
 
98
- flush_current_segment()
99
 
100
- # Fusionner les segments courts du même locuteur
101
- merged_transcription = []
102
  for speaker, text in speaker_transcription:
103
- if not merged_transcription or merged_transcription[-1][0] != speaker or len(text.split()) > 3:
104
- merged_transcription.append((speaker, text))
105
  else:
106
- merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)
107
-
108
- return merged_transcription
109
 
110
  def simplify_diarization_output(speaker_transcription):
111
  simplified = []
 
49
 
50
 
51
 
52
+ def associate_speakers_with_timestamps(transcription_result, diarization):
53
  word_segments = transcription_result['chunks']
54
  diarization_segments = list(diarization.itertracks(yield_label=True))
55
  speaker_transcription = []
56
+ current_segment_index = 0
57
+ previous_speaker = None
 
 
 
 
 
 
 
58
 
59
  for word in word_segments:
60
  word_start, word_end = word['timestamp']
 
61
 
62
  # Trouver le segment de diarisation correspondant
63
+ while current_segment_index < len(diarization_segments) - 1 and diarization_segments[current_segment_index][0].end <= word_start:
64
+ current_segment_index += 1
65
+
66
+ current_segment, _, current_speaker = diarization_segments[current_segment_index]
67
+
68
+ # Vérifier s'il y a un chevauchement avec le segment précédent
69
+ if current_segment_index > 0:
70
+ previous_segment, _, previous_speaker = diarization_segments[current_segment_index - 1]
71
+ if previous_segment.end > word_start and previous_speaker != current_speaker:
72
+ word_text = f"[{word['text']}]"
 
 
 
 
 
 
 
 
 
 
 
 
73
  else:
74
+ word_text = word['text']
75
+ else:
76
+ word_text = word['text']
77
+
78
+ # Ajouter le mot au segment du locuteur actuel
79
+ if not speaker_transcription or speaker_transcription[-1][0] != current_speaker:
80
+ speaker_transcription.append((current_speaker, word_text))
81
+ else:
82
+ speaker_transcription[-1] = (current_speaker, speaker_transcription[-1][1] + " " + word_text)
83
 
84
+ return speaker_transcription
85
 
86
+ def merge_short_segments(speaker_transcription, min_words=3):
87
+ merged = []
88
  for speaker, text in speaker_transcription:
89
+ if not merged or merged[-1][0] != speaker or len(merged[-1][1].split()) >= min_words:
90
+ merged.append((speaker, text))
91
  else:
92
+ merged[-1] = (speaker, merged[-1][1] + " " + text)
93
+ return merged
 
94
 
95
  def simplify_diarization_output(speaker_transcription):
96
  simplified = []