Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py
Browse files- whisper_cs.py +11 -9
whisper_cs.py
CHANGED
@@ -25,9 +25,15 @@ def clean_text(input_text):
|
|
25 |
|
26 |
|
27 |
def split_stereo_channels(audio_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
audio = AudioSegment.from_wav(audio_path)
|
30 |
-
|
31 |
channels = audio.split_to_mono()
|
32 |
if len(channels) != 2:
|
33 |
raise ValueError(f"Audio {audio_path} does not have 2 channels.")
|
@@ -127,10 +133,8 @@ def post_process_transcription(transcription, max_repeats=2):
|
|
127 |
|
128 |
return cleaned_transcription
|
129 |
|
130 |
-
def post_merge_consecutive_segments(input_file, output_file): #check
|
131 |
-
with open(input_file, "r") as f:
|
132 |
-
transcription_text = f.read()
|
133 |
|
|
|
134 |
segments = re.split(r'(\[SPEAKER_\d{2}\])', transcription_text)
|
135 |
merged_transcription = ''
|
136 |
current_speaker = None
|
@@ -153,8 +157,7 @@ def post_merge_consecutive_segments(input_file, output_file): #check
|
|
153 |
if current_speaker is not None:
|
154 |
merged_transcription += f'[SPEAKER_{current_speaker}] {" ".join(current_segment)}\n'
|
155 |
|
156 |
-
|
157 |
-
f.write(merged_transcription.strip())
|
158 |
|
159 |
def cleanup_temp_files(*file_paths):
|
160 |
for path in file_paths:
|
@@ -262,8 +265,6 @@ def generate(audio_path, use_v2):
|
|
262 |
model = load_whisper_model(MODEL_PATH_2)
|
263 |
split_stereo_channels(audio_path)
|
264 |
|
265 |
-
audio_id = os.path.splitext(os.path.basename(audio_path))[0]
|
266 |
-
|
267 |
left_channel_path = "temp_mono_speaker2.wav"
|
268 |
right_channel_path = "temp_mono_speaker1.wav"
|
269 |
|
@@ -309,6 +310,7 @@ def generate(audio_path, use_v2):
|
|
309 |
clean_output = ""
|
310 |
for line in aligned_text:
|
311 |
clean_output += f"{line}\n"
|
|
|
312 |
cleanup_temp_files(mono_audio_path,tmp_full_path)
|
313 |
|
314 |
cleanup_temp_files(
|
|
|
25 |
|
26 |
|
27 |
def split_stereo_channels(audio_path):
|
28 |
+
ext = os.path.splitext(audio_path)[1].lower()
|
29 |
+
|
30 |
+
if ext == ".wav":
|
31 |
+
audio = AudioSegment.from_wav(audio_path)
|
32 |
+
elif ext == ".mp3":
|
33 |
+
audio = AudioSegment.from_file(audio_path, format="mp3")
|
34 |
+
else:
|
35 |
+
raise ValueError(f"Unsupported file format: {audio_path}")
|
36 |
|
|
|
|
|
37 |
channels = audio.split_to_mono()
|
38 |
if len(channels) != 2:
|
39 |
raise ValueError(f"Audio {audio_path} does not have 2 channels.")
|
|
|
133 |
|
134 |
return cleaned_transcription
|
135 |
|
|
|
|
|
|
|
136 |
|
137 |
+
def post_merge_consecutive_segments_from_text(transcription_text: str) -> str:
|
138 |
segments = re.split(r'(\[SPEAKER_\d{2}\])', transcription_text)
|
139 |
merged_transcription = ''
|
140 |
current_speaker = None
|
|
|
157 |
if current_speaker is not None:
|
158 |
merged_transcription += f'[SPEAKER_{current_speaker}] {" ".join(current_segment)}\n'
|
159 |
|
160 |
+
return merged_transcription.strip()
|
|
|
161 |
|
162 |
def cleanup_temp_files(*file_paths):
|
163 |
for path in file_paths:
|
|
|
265 |
model = load_whisper_model(MODEL_PATH_2)
|
266 |
split_stereo_channels(audio_path)
|
267 |
|
|
|
|
|
268 |
left_channel_path = "temp_mono_speaker2.wav"
|
269 |
right_channel_path = "temp_mono_speaker1.wav"
|
270 |
|
|
|
310 |
clean_output = ""
|
311 |
for line in aligned_text:
|
312 |
clean_output += f"{line}\n"
|
313 |
+
clean_output = post_merge_consecutive_segments_from_text(clean_output)
|
314 |
cleanup_temp_files(mono_audio_path,tmp_full_path)
|
315 |
|
316 |
cleanup_temp_files(
|