Woziii commited on
Commit
457f829
·
verified ·
1 Parent(s): b0bbd2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -49,10 +49,9 @@ pipe = pipeline(
49
 
50
 
51
 
52
- def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.1, min_segment_duration=0.1):
53
  word_segments = transcription_result['chunks']
54
  diarization_segments = list(diarization.itertracks(yield_label=True))
55
-
56
  speaker_transcription = []
57
  current_speaker = None
58
  current_text = []
@@ -62,51 +61,53 @@ def associate_speakers_with_timestamps(transcription_result, diarization, tolera
62
  def flush_current_segment():
63
  nonlocal current_speaker, current_text
64
  if current_speaker and current_text:
65
- speaker_transcription.append((current_speaker, ' '.join(current_text)))
 
 
 
 
66
  current_text = []
67
 
68
  for word in word_segments:
69
  word_start, word_end = word['timestamp']
70
  word_text = word['text']
71
-
72
  assigned = False
 
73
  for i in range(last_segment_index, len(diarization_segments)):
74
  segment, _, speaker = diarization_segments[i]
75
  if segment.start - tolerance <= word_start < segment.end + tolerance:
76
  if speaker != current_speaker:
77
- if current_speaker and len(current_text) == 1 and len(current_text[0].split()) <= 2:
78
- # Si le segment précédent est très court, ne changez pas de locuteur
79
- current_text.append(word_text)
80
- else:
81
- flush_current_segment()
82
- current_speaker = speaker
83
  current_text.append(word_text)
84
  last_segment_index = i
85
  assigned = True
86
  break
87
-
88
  if not assigned:
89
  unassigned_words.append((word_start, word_text))
90
-
 
 
91
  # Traitement des mots non assignés
 
92
  for word_start, word_text in unassigned_words:
93
- closest_segment = min(diarization_segments, key=lambda x: abs(x[0].start - word_start))
94
  speaker = closest_segment[2]
95
  if speaker != current_speaker:
96
  flush_current_segment()
97
  current_speaker = speaker
98
  current_text.append(word_text)
99
-
100
  flush_current_segment()
101
-
102
  # Fusion des segments courts
103
  merged_transcription = []
104
  for speaker, text in speaker_transcription:
105
- if not merged_transcription or merged_transcription[-1][0] != speaker or len(text.split()) > 3:
106
  merged_transcription.append((speaker, text))
107
  else:
108
  merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)
109
-
110
  return merged_transcription
111
 
112
  def simplify_diarization_output(speaker_transcription):
 
49
 
50
 
51
 
52
+ def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.05, min_segment_duration=0.1):
53
  word_segments = transcription_result['chunks']
54
  diarization_segments = list(diarization.itertracks(yield_label=True))
 
55
  speaker_transcription = []
56
  current_speaker = None
57
  current_text = []
 
61
  def flush_current_segment():
62
  nonlocal current_speaker, current_text
63
  if current_speaker and current_text:
64
+ segment_duration = word_segments[-1]['timestamp'][1] - word_segments[0]['timestamp'][0]
65
+ if segment_duration >= min_segment_duration:
66
+ speaker_transcription.append((current_speaker, ' '.join(current_text)))
67
+ else:
68
+ unassigned_words.extend([(word['timestamp'][0], word['text']) for word in word_segments])
69
  current_text = []
70
 
71
  for word in word_segments:
72
  word_start, word_end = word['timestamp']
73
  word_text = word['text']
 
74
  assigned = False
75
+
76
  for i in range(last_segment_index, len(diarization_segments)):
77
  segment, _, speaker = diarization_segments[i]
78
  if segment.start - tolerance <= word_start < segment.end + tolerance:
79
  if speaker != current_speaker:
80
+ flush_current_segment()
81
+ current_speaker = speaker
 
 
 
 
82
  current_text.append(word_text)
83
  last_segment_index = i
84
  assigned = True
85
  break
86
+
87
  if not assigned:
88
  unassigned_words.append((word_start, word_text))
89
+
90
+ flush_current_segment()
91
+
92
  # Traitement des mots non assignés
93
+ unassigned_words.sort(key=lambda x: x[0]) # Trier par timestamp
94
  for word_start, word_text in unassigned_words:
95
+ closest_segment = min(diarization_segments, key=lambda x: min(abs(x[0].start - word_start), abs(x[0].end - word_start)))
96
  speaker = closest_segment[2]
97
  if speaker != current_speaker:
98
  flush_current_segment()
99
  current_speaker = speaker
100
  current_text.append(word_text)
 
101
  flush_current_segment()
102
+
103
  # Fusion des segments courts
104
  merged_transcription = []
105
  for speaker, text in speaker_transcription:
106
+ if not merged_transcription or merged_transcription[-1][0] != speaker:
107
  merged_transcription.append((speaker, text))
108
  else:
109
  merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)
110
+
111
  return merged_transcription
112
 
113
  def simplify_diarization_output(speaker_transcription):