Woziii commited on
Commit
2b05a52
·
verified ·
1 Parent(s): c3d36ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -31
app.py CHANGED
@@ -49,61 +49,58 @@ pipe = pipeline(
49
 
50
 
51
 
52
- def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.04, min_segment_duration=0.08):
53
  word_segments = transcription_result['chunks']
54
  diarization_segments = list(diarization.itertracks(yield_label=True))
55
  speaker_transcription = []
56
  current_speaker = None
57
  current_text = []
58
- unassigned_words = []
59
- last_segment_index = 0
60
 
61
  def flush_current_segment():
62
  nonlocal current_speaker, current_text
63
  if current_speaker and current_text:
64
- segment_duration = word_segments[-1]['timestamp'][1] - word_segments[0]['timestamp'][0]
65
- if segment_duration >= min_segment_duration:
66
- speaker_transcription.append((current_speaker, ' '.join(current_text)))
67
- else:
68
- unassigned_words.extend([(word['timestamp'][0], word['text']) for word in word_segments])
69
  current_text = []
70
 
71
  for word in word_segments:
72
  word_start, word_end = word['timestamp']
73
  word_text = word['text']
74
- assigned = False
75
 
76
- for i in range(last_segment_index, len(diarization_segments)):
77
- segment, _, speaker = diarization_segments[i]
 
78
  if segment.start - tolerance <= word_start < segment.end + tolerance:
79
- if speaker != current_speaker:
80
- flush_current_segment()
81
- current_speaker = speaker
82
- current_text.append(word_text)
83
- last_segment_index = i
84
- assigned = True
85
  break
86
 
87
- if not assigned:
88
- unassigned_words.append((word_start, word_text))
 
 
 
89
 
90
- flush_current_segment()
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Traitement des mots non assignés
93
- unassigned_words.sort(key=lambda x: x[0]) # Trier par timestamp
94
- for word_start, word_text in unassigned_words:
95
- closest_segment = min(diarization_segments, key=lambda x: min(abs(x[0].start - word_start), abs(x[0].end - word_start)))
96
- speaker = closest_segment[2]
97
- if speaker != current_speaker:
98
- flush_current_segment()
99
- current_speaker = speaker
100
- current_text.append(word_text)
101
  flush_current_segment()
102
 
103
- # Fusion des segments courts
104
  merged_transcription = []
105
  for speaker, text in speaker_transcription:
106
- if not merged_transcription or merged_transcription[-1][0] != speaker:
107
  merged_transcription.append((speaker, text))
108
  else:
109
  merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)
 
49
 
50
 
51
 
52
+ def associate_speakers_with_timestamps(transcription_result, diarization, tolerance=0.02, min_segment_duration=0.05):
53
  word_segments = transcription_result['chunks']
54
  diarization_segments = list(diarization.itertracks(yield_label=True))
55
  speaker_transcription = []
56
  current_speaker = None
57
  current_text = []
58
+ last_word_end = 0
 
59
 
60
  def flush_current_segment():
61
  nonlocal current_speaker, current_text
62
  if current_speaker and current_text:
63
+ speaker_transcription.append((current_speaker, ' '.join(current_text)))
 
 
 
 
64
  current_text = []
65
 
66
  for word in word_segments:
67
  word_start, word_end = word['timestamp']
68
  word_text = word['text']
 
69
 
70
+ # Trouver le segment de diarisation correspondant
71
+ matching_segment = None
72
+ for segment, _, speaker in diarization_segments:
73
  if segment.start - tolerance <= word_start < segment.end + tolerance:
74
+ matching_segment = (segment, speaker)
 
 
 
 
 
75
  break
76
 
77
+ if matching_segment:
78
+ segment, speaker = matching_segment
79
+ if speaker != current_speaker:
80
+ flush_current_segment()
81
+ current_speaker = speaker
82
 
83
+ # Gérer les pauses longues
84
+ if word_start - last_word_end > 1.0: # Pause de plus d'une seconde
85
+ flush_current_segment()
86
+
87
+ current_text.append(word_text)
88
+ last_word_end = word_end
89
+ else:
90
+ # Si aucun segment ne correspond, attribuer au dernier locuteur connu
91
+ if current_speaker:
92
+ current_text.append(word_text)
93
+ else:
94
+ # Si c'est le premier mot sans correspondance, créer un nouveau segment
95
+ current_speaker = "SPEAKER_UNKNOWN"
96
+ current_text.append(word_text)
97
 
 
 
 
 
 
 
 
 
 
98
  flush_current_segment()
99
 
100
+ # Fusionner les segments courts du même locuteur
101
  merged_transcription = []
102
  for speaker, text in speaker_transcription:
103
+ if not merged_transcription or merged_transcription[-1][0] != speaker or len(text.split()) > 3:
104
  merged_transcription.append((speaker, text))
105
  else:
106
  merged_transcription[-1] = (speaker, merged_transcription[-1][1] + " " + text)