ttasd
Browse files
app.py
CHANGED
@@ -20,7 +20,18 @@ def transcribe(inputs, task):
|
|
20 |
|
21 |
audio = whisperx.load_audio(inputs)
|
22 |
result = model.transcribe(audio, batch_size=BATCH_SIZE)
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def _return_yt_html_embed(yt_url):
|
26 |
video_id = yt_url.split("?v=")[-1]
|
|
|
20 |
|
21 |
audio = whisperx.load_audio(inputs)
|
22 |
result = model.transcribe(audio, batch_size=BATCH_SIZE)
|
23 |
+
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
|
24 |
+
result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
|
25 |
+
diarize_model = whisperx.DiarizationPipeline(use_auth_token=os.getenv("HF_TOKEN"), device=device)
|
26 |
+
diarize_segments = diarize_model(audio)
|
27 |
+
result = whisperx.assign_word_speakers(diarize_segments, result)
|
28 |
+
output_text = ""
|
29 |
+
for segment in result['segments']:
|
30 |
+
speaker = segment.get('speaker', 'Unknown Speaker')
|
31 |
+
text = segment['text']
|
32 |
+
output_text += f"{speaker}: {text}\n"
|
33 |
+
|
34 |
+
return output_text
|
35 |
|
36 |
def _return_yt_html_embed(yt_url):
|
37 |
video_id = yt_url.split("?v=")[-1]
|