Nitzantry1 commited on
Commit
16d67e4
·
verified ·
1 Parent(s): d1ba8a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -20
app.py CHANGED
@@ -1,30 +1,33 @@
1
- import os
2
- os.system('pip install pyannote.audio')
3
-
4
- import gradio as gr
5
  from pyannote.audio import Pipeline
 
6
 
7
  # Load the diarization pipeline
8
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
9
 
10
- # Function to handle diarization
11
- def diarize_audio(file_path):
12
- diarization = pipeline(file_path)
 
 
 
 
13
 
14
- # Create a result to show the speaker segments
15
  result = []
16
  for segment, _, speaker in diarization.itertracks(yield_label=True):
17
- result.append(f"Speaker {speaker}: from {segment.start:.1f} to {segment.end:.1f}")
18
- return "\n".join(result)
19
 
20
- # Create Gradio interface
21
- interface = gr.Interface(
22
- fn=diarize_audio,
23
- inputs=gr.Audio(source="upload", type="filepath"),
24
- outputs="text",
25
- title="Speaker Diarization",
26
- description="Upload an audio file to perform speaker diarization."
27
- )
28
 
29
- # Launch the Gradio app
30
- interface.launch()
 
 
 
 
 
 
 
1
  from pyannote.audio import Pipeline
2
+ from faster_whisper import WhisperModel
3
 
4
  # Load the diarization pipeline
5
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
6
 
7
+ # Load the transcription model (Whisper)
8
+ model = WhisperModel("openai/whisper-large", device="cuda")
9
+
10
+ # Function to handle diarization and transcription
11
+ def diarize_and_transcribe(audio_file):
12
+ # Step 1: Diarization
13
+ diarization = pipeline(audio_file)
14
 
15
+ # Step 2: Transcription
16
  result = []
17
  for segment, _, speaker in diarization.itertracks(yield_label=True):
18
+ # Extract the segment audio (use an external tool like ffmpeg for extraction)
19
+ segment_audio = extract_audio_segment(audio_file, segment.start, segment.end)
20
 
21
+ # Transcribe the segment
22
+ transcription_segments, _ = model.transcribe(segment_audio, language="he")
23
+ transcription = " ".join([seg.text for seg in transcription_segments])
24
+
25
+ # Append result with speaker and transcription
26
+ result.append(f"Speaker {speaker}: {transcription}")
27
+
28
+ return "\n".join(result)
29
 
30
+ # Example usage
31
+ audio_file_path = "example_audio.wav"
32
+ output = diarize_and_transcribe(audio_file_path)
33
+ print(output)