Ngoufack commited on
Commit
ef46332
·
1 Parent(s): 9c3f60a
Files changed (1) hide show
  1. app.py +12 -1
app.py CHANGED
@@ -20,7 +20,18 @@ def transcribe(inputs, task):
20
 
21
  audio = whisperx.load_audio(inputs)
22
  result = model.transcribe(audio, batch_size=BATCH_SIZE)
23
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def _return_yt_html_embed(yt_url):
26
  video_id = yt_url.split("?v=")[-1]
 
20
 
21
  audio = whisperx.load_audio(inputs)
22
  result = model.transcribe(audio, batch_size=BATCH_SIZE)
23
+ model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
24
+ result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
25
+ diarize_model = whisperx.DiarizationPipeline(use_auth_token=os.getenv("HF_TOKEN"), device=device)
26
+ diarize_segments = diarize_model(audio)
27
+ result = whisperx.assign_word_speakers(diarize_segments, result)
28
+ output_text = ""
29
+ for segment in result['segments']:
30
+ speaker = segment.get('speaker', 'Unknown Speaker')
31
+ text = segment['text']
32
+ output_text += f"{speaker}: {text}\n"
33
+
34
+ return output_text
35
 
36
  def _return_yt_html_embed(yt_url):
37
  video_id = yt_url.split("?v=")[-1]