Spaces:

Nitzantry1
/

pyannote-speaker-diarization2

Runtime error

Nitzantry1 commited on Nov 17, 2024

Commit

16d67e4

verified ·

1 Parent(s): d1ba8a0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,30 +1,33 @@
-import os
-os.system('pip install pyannote.audio')
-import gradio as gr
 from pyannote.audio import Pipeline
 # Load the diarization pipeline
 pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
-# Function to handle diarization
-def diarize_audio(file_path):
-    diarization = pipeline(file_path)
-    # Create a result to show the speaker segments
     result = []
     for segment, _, speaker in diarization.itertracks(yield_label=True):
-        result.append(f"Speaker {speaker}: from {segment.start:.1f} to {segment.end:.1f}")
-    return "\n".join(result)
-# Create Gradio interface
-interface = gr.Interface(
-    fn=diarize_audio,
-    inputs=gr.Audio(source="upload", type="filepath"),
-    outputs="text",
-    title="Speaker Diarization",
-    description="Upload an audio file to perform speaker diarization."
-)
-# Launch the Gradio app
-interface.launch()

 from pyannote.audio import Pipeline
+from faster_whisper import WhisperModel
 # Load the diarization pipeline
 pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
+# Load the transcription model (Whisper)
+model = WhisperModel("openai/whisper-large", device="cuda")
+# Function to handle diarization and transcription
+def diarize_and_transcribe(audio_file):
+    # Step 1: Diarization
+    diarization = pipeline(audio_file)
+    # Step 2: Transcription
     result = []
     for segment, _, speaker in diarization.itertracks(yield_label=True):
+        # Extract the segment audio (use an external tool like ffmpeg for extraction)
+        segment_audio = extract_audio_segment(audio_file, segment.start, segment.end)
+        # Transcribe the segment
+        transcription_segments, _ = model.transcribe(segment_audio, language="he")
+        transcription = " ".join([seg.text for seg in transcription_segments])
+        # Append result with speaker and transcription
+        result.append(f"Speaker {speaker}: {transcription}")
+    return "\n".join(result)
+# Example usage
+audio_file_path = "example_audio.wav"
+output = diarize_and_transcribe(audio_file_path)
+print(output)