Spaces:

Nitzantry1
/

pyannote-speaker-diarization22

Sleeping

App Files Files Community

Nitzantry1 commited on Nov 21, 2024

Commit

e254e59

verified ·

1 Parent(s): 38751f9

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -103

app.py CHANGED Viewed

@@ -1,118 +1,217 @@
-import gradio as gr
-from pyannote.audio import Pipeline
 import os
 import torch
-def initialize_pipeline():
-    try:
-        # קבלת הטוקן ממשתנה הסביבה
-        hf_token = os.getenv('HF_TOKEN')
-        if not hf_token:
-            raise ValueError("חסר טוקן. הגדר HF_TOKEN במשתני הסביבה")
-        # יצירת הפייפליין
-        pipeline = Pipeline.from_pretrained(
-            "pyannote/[email protected]",
-            use_auth_token=hf_token
-        )
-        # העברה ל-GPU אם זמין
-        if torch.cuda.is_available():
-            pipeline = pipeline.to(torch.device("cuda"))
-        return pipeline
-    except Exception as e:
-        print(f"שגיאה באתחול הפייפליין: {str(e)}")
-        return None
-def process_audio(audio_path, min_speakers=None, max_speakers=None):
-    try:
-        # בדיקה שהקובץ קיים
-        if not audio_path:
-            return "לא נבחר קובץ אודיו"
-        pipeline = initialize_pipeline()
-        if pipeline is None:
-            return "שגיאה באתחול המודל. בדוק את הטוקן וההרשאות"
-        # עיבוד הקובץ
-        diarization = pipeline(
-            audio_path,
-            min_speakers=min_speakers if min_speakers > 0 else None,
-            max_speakers=max_speakers if max_speakers > 0 else None
-        )
-        # יצירת פלט מאורגן
-        result = "תוצאות זיהוי הדוברים:\n\n"
-        for turn, _, speaker in diarization.itertracks(yield_label=True):
-            line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
-            result += line
-        # הוספת סטטיסטיקות
-        unique_speakers = len(set(diarization.labels()))
-        total_duration = sum(turn.duration for turn, _, _ in diarization.itertracks(yield_label=True))
-        result += f"\n---\nסיכום:\n"
-        result += f"מספר דוברים שזוהו: {unique_speakers}\n"
-        result += f"משך כולל: {total_duration:.1f} שניות"
-        return result
     except Exception as e:
-        return f"אירעה שגיאה: {str(e)}"
-# יצירת ממשק משתמש
-demo = gr.Interface(
-    fn=process_audio,
-    inputs=[
-        gr.Audio(
-            label="קובץ אודיו",
-            source="upload",
-            type="filepath"
-        ),
-        gr.Number(
-            label="מינימום דוברים (אופציונלי)",
-            value=0,
-            minimum=0,
-            step=1
-        ),
-        gr.Number(
-            label="מקסימום דוברים (אופציונלי)",
-            value=0,
-            minimum=0,
-            step=1
-        )
-    ],
-    outputs=gr.Textbox(
-        label="תוצאות הזיהוי",
-        lines=10
-    ),
-    title="זיהוי דוברים בהקלטות",
-    description="""
-    העלה קובץ אודיו לזיהוי הדוברים השונים והזמנים שלהם.
-    הערות:
-    - אם ידוע לך מספר הדוברים, הזן אותו כדי לשפר את הדיוק
-    - תומך בפורמטים: WAV, MP3, FLAC
-    - מומלץ להשתמש בהקלטות באיכות טובה
-    - משך מקסימלי: 2 שעות
-    """,
-    examples=[
-        ["example.wav", 2, 4],
-        ["interview.mp3", 2, 2]
-    ]
-)
-if __name__ == "__main__":
-    # הדפסת מידע על הסביבה
-    space_name = os.getenv('SPACE_ID', 'unknown')
-    print(f"Space name: {space_name}")
-    print(f"GPU available: {torch.cuda.is_available()}")
-    # הפעלת הממשק
-    demo.launch(
-        share=True,
-        enable_queue=True,
-        debug=True
-    )

 import os
+import sys
+import logging
+import warnings
 import torch
+import numpy as np
+from typing import Optional, Union, Dict
+# הגדרת logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# התעלמות מאזהרות מיותרות
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+try:
+    import gradio as gr
+    from pyannote.audio import Pipeline
+except ImportError as e:
+    logger.error(f"שגיאה בטעינת ספריות: {str(e)}")
+    sys.exit(1)
+class DiarizationPipeline:
+    def __init__(self):
+        self.pipeline = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {self.device}")
+    def initialize(self) -> Optional[str]:
+        """מאתחל את הפייפליין עם טיפול שגיאות"""
+        try:
+            hf_token = os.getenv('HF_TOKEN')
+            if not hf_token:
+                return "חסר טוקן HF_TOKEN. אנא הגדר אותו בהגדרות."
+            self.pipeline = Pipeline.from_pretrained(
+                "pyannote/[email protected]",
+                use_auth_token=hf_token
+            )
+            if self.device == "cuda":
+                self.pipeline = self.pipeline.to(torch.device("cuda"))
+            return None  # אתחול הצליח
+        except Exception as e:
+            error_msg = f"שגיאה באתחול המודל: {str(e)}"
+            logger.error(error_msg)
+            return error_msg
+    def process_audio(
+        self,
+        audio_path: str,
+        min_speakers: Optional[int] = None,
+        max_speakers: Optional[int] = None
+    ) -> Dict[str, Union[str, float, int]]:
+        """מעבד קובץ אודיו ומחזיר תוצאות מפורטות"""
+        try:
+            if not os.path.exists(audio_path):
+                return {"error": "קובץ האודיו לא נמצא"}
+            file_size = os.path.getsize(audio_path) / (1024 * 1024)  # MB
+            if file_size > 100:  # הגבלת גודל קובץ
+                return {"error": f"גודל הקובץ ({file_size:.1f}MB) גדול מדי. המקסימום הוא 100MB"}
+            # וידוא שהפייפליין מאותחל
+            if self.pipeline is None:
+                init_error = self.initialize()
+                if init_error:
+                    return {"error": init_error}
+            # עיבוד האודיו
+            diarization = self.pipeline(
+                audio_path,
+                min_speakers=min_speakers if min_speakers and min_speakers > 0 else None,
+                max_speakers=max_speakers if max_speakers and max_speakers > 0 else None
+            )
+            # עיבוד התוצאות
+            segments = []
+            speakers = set()
+            total_duration = 0
+            for turn, _, speaker in diarization.itertracks(yield_label=True):
+                segment = {
+                    "start": turn.start,
+                    "end": turn.end,
+                    "duration": turn.duration,
+                    "speaker": speaker
+                }
+                segments.append(segment)
+                speakers.add(speaker)
+                total_duration += turn.duration
+            # יצירת פלט מעוצב
+            output_text = "תוצאות זיהוי הדוברים:\n\n"
+            for segment in segments:
+                output_text += (
+                    f"[{segment['start']:.1f}s -> {segment['end']:.1f}s] "
+                    f"{segment['speaker']}\n"
+                )
+            # הוספת סטטיסטיקות
+            output_text += f"\nסיכום:\n"
+            output_text += f"מספר דוברים שזוהו: {len(speakers)}\n"
+            output_text += f"משך כולל: {total_duration:.1f} שניות\n"
+            output_text += f"גודל הקובץ: {file_size:.1f}MB\n"
+            if min_speakers or max_speakers:
+                output_text += f"הגבלות שהוגדרו: "
+                if min_speakers:
+                    output_text += f"מינימום {min_speakers} דוברים, "
+                if max_speakers:
+                    output_text += f"מקסימום {max_speakers} דוברים"
+                output_text += "\n"
+            return {
+                "text": output_text,
+                "num_speakers": len(speakers),
+                "duration": total_duration,
+                "file_size": file_size,
+                "segments": segments
+            }
+        except Exception as e:
+            error_msg = f"שגיאה בעיבוד האודיו: {str(e)}"
+            logger.error(error_msg)
+            return {"error": error_msg}
+# יצירת המעטפת לממשק משתמש
+def create_interface(pipeline: DiarizationPipeline) -> gr.Interface:
+    def process_wrapper(audio_path, min_speakers, max_speakers):
+        if audio_path is None:
+            return "לא נבחר קובץ אודיו"
+        result = pipeline.process_audio(audio_path, min_speakers, max_speakers)
+        if "error" in result:
+            return f"שגיאה: {result['error']}"
+        return result["text"]
+    return gr.Interface(
+        fn=process_wrapper,
+        inputs=[
+            gr.Audio(
+                label="קובץ אודיו",
+                source="upload",
+                type="filepath"
+            ),
+            gr.Number(
+                label="מינימום דוברים (אופציונלי)",
+                value=0,
+                minimum=0,
+                step=1
+            ),
+            gr.Number(
+                label="מקסימום דוברים (אופציונלי)",
+                value=0,
+                minimum=0,
+                step=1
+            )
+        ],
+        outputs=gr.Textbox(
+            label="תוצאות הזיהוי",
+            lines=10
+        ),
+        title="זיהוי דוברים בהקלטות",
+        description="""
+        העלה קובץ אודיו לזיהוי הדוברים השונים והזמנים שלהם.
+        הערות:
+        - תומך בפורמטים: WAV, MP3, FLAC
+        - גודל קובץ מקסימלי: 100MB
+        - מומלץ להשתמש בהקלטות באיכות טובה
+        - אם ידוע לך מספר הדוברים, הזן אותו לשיפור הדיוק
+        """,
+        examples=[
+            ["example.wav", 2, 4],
+            ["interview.mp3", 2, 2]
+        ],
+        allow_flagging="never",
+        theme="default"
+    )
+if __name__ == "__main__":
+    try:
+        # הדפסת מידע על הסביבה
+        logger.info(f"Python version: {sys.version}")
+        logger.info(f"PyTorch version: {torch.__version__}")
+        logger.info(f"NumPy version: {np.__version__}")
+        logger.info(f"Space ID: {os.getenv('SPACE_ID', 'unknown')}")
+        logger.info(f"GPU available: {torch.cuda.is_available()}")
+        if torch.cuda.is_available():
+            logger.info(f"GPU model: {torch.cuda.get_device_name(0)}")
+        # יצירת הפייפליין והממשק
+        pipeline = DiarizationPipeline()
+        demo = create_interface(pipeline)
+        # הפעלת הממשק
+        demo.launch(
+            share=True,
+            enable_queue=True,
+            max_threads=4,
+            debug=True
+        )
     except Exception as e:
+        logger.error(f"שגיאה קריטית: {str(e)}")
+        sys.exit(1)