Spaces:

Nitzantry1
/

pyannote-speaker-diarization22

Sleeping

App Files Files Community

Nitzantry1 commited on Nov 21, 2024

Commit

ebec731

verified ·

1 Parent(s): 7f4d630

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -19

app.py CHANGED Viewed

@@ -1,28 +1,62 @@
 import gradio as gr
 from pyannote.audio import Pipeline
 import torch
-# יצירת הפייפליין - שים לב שצריך להחליף את הטוקן
-pipeline = Pipeline.from_pretrained(
-    "pyannote/[email protected]",
-    use_auth_token="YOUR_HF_TOKEN"
-)
-def process_audio(audio_file, min_speakers=None, max_speakers=None):
     try:
-        # הפעלת הדיאריזציה
         diarization = pipeline(
-            audio_file,
             min_speakers=min_speakers if min_speakers > 0 else None,
             max_speakers=max_speakers if max_speakers > 0 else None
         )
-        # המרת התוצאות לטקסט מובנה
-        result = ""
         for turn, _, speaker in diarization.itertracks(yield_label=True):
             line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
             result += line
         return result
     except Exception as e:
@@ -32,14 +66,53 @@ def process_audio(audio_file, min_speakers=None, max_speakers=None):
 demo = gr.Interface(
     fn=process_audio,
     inputs=[
-        gr.Audio(label="קובץ אודיו"),
-        gr.Number(label="מינימום דוברים", value=0),
-        gr.Number(label="מקסימום דוברים", value=0)
     ],
-    outputs=gr.Text(label="תוצאות הזיהוי"),
-    title="זיהוי דוברים בהקלטה",
-    description="העלה קובץ אודיו לזיהוי הדוברים השונים והזמנים שלהם",
-    examples=[["example.wav", 2, 5]]
 )
-demo.launch()

 import gradio as gr
 from pyannote.audio import Pipeline
+import os
 import torch
+def initialize_pipeline():
+    try:
+        # קבלת הטוקן ממשתנה הסביבה
+        hf_token = os.getenv('HF_TOKEN')
+        if not hf_token:
+            raise ValueError("חסר טוקן. הגדר HF_TOKEN במשתני הסביבה")
+        # יצירת הפייפליין
+        pipeline = Pipeline.from_pretrained(
+            "pyannote/[email protected]",
+            use_auth_token=hf_token
+        )
+        # העברה ל-GPU אם זמין
+        if torch.cuda.is_available():
+            pipeline = pipeline.to(torch.device("cuda"))
+        return pipeline
+    except Exception as e:
+        print(f"שגיאה באתחול הפייפליין: {str(e)}")
+        return None
+def process_audio(audio_path, min_speakers=None, max_speakers=None):
     try:
+        # בדיקה שהקובץ קיים
+        if not audio_path:
+            return "לא נבחר קובץ אודיו"
+        pipeline = initialize_pipeline()
+        if pipeline is None:
+            return "שגיאה באתחול המודל. בדוק את הטוקן וההרשאות"
+        # עיבוד הקובץ
         diarization = pipeline(
+            audio_path,
             min_speakers=min_speakers if min_speakers > 0 else None,
             max_speakers=max_speakers if max_speakers > 0 else None
         )
+        # יצירת פלט מאורגן
+        result = "תוצאות זיהוי הדוברים:\n\n"
         for turn, _, speaker in diarization.itertracks(yield_label=True):
             line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
             result += line
+        # הוספת סטטיסטיקות
+        unique_speakers = len(set(diarization.labels()))
+        total_duration = sum(turn.duration for turn, _, _ in diarization.itertracks(yield_label=True))
+        result += f"\n---\nסיכום:\n"
+        result += f"מספר דוברים שזוהו: {unique_speakers}\n"
+        result += f"משך כולל: {total_duration:.1f} שניות"
         return result
     except Exception as e:
 demo = gr.Interface(
     fn=process_audio,
     inputs=[
+        gr.Audio(
+            label="קובץ אודיו",
+            source="upload",
+            type="filepath"
+        ),
+        gr.Number(
+            label="מינימום דוברים (אופציונלי)",
+            value=0,
+            minimum=0,
+            step=1
+        ),
+        gr.Number(
+            label="מקסימום דוברים (אופציונלי)",
+            value=0,
+            minimum=0,
+            step=1
+        )
     ],
+    outputs=gr.Textbox(
+        label="תוצאות הזיהוי",
+        lines=10
+    ),
+    title="זיהוי דוברים בהקלטות",
+    description="""
+    העלה קובץ אודיו לזיהוי הדוברים השונים והזמנים שלהם.
+    הערות:
+    - אם ידוע לך מספר הדוברים, הזן אותו כדי לשפר את הדיוק
+    - תומך בפורמטים: WAV, MP3, FLAC
+    - מומלץ להשתמש בהקלטות באיכות טובה
+    - משך מקסימלי: 2 שעות
+    """,
+    examples=[
+        ["example.wav", 2, 4],
+        ["interview.mp3", 2, 2]
+    ]
 )
+if __name__ == "__main__":
+    # הדפסת מידע על הסביבה
+    space_name = os.getenv('SPACE_ID', 'unknown')
+    print(f"Space name: {space_name}")
+    print(f"GPU available: {torch.cuda.is_available()}")
+    # הפעלת הממשק
+    demo.launch(
+        share=True,
+        enable_queue=True,
+        debug=True
+    )