File size: 1,516 Bytes
e58bff7
d9e71ec
 
 
e58bff7
d9e71ec
e58bff7
c82bffb
d9e71ec
e58bff7
d9e71ec
c82bffb
 
d9e71ec
c82bffb
 
 
 
e58bff7
 
 
c82bffb
8640564
1c24637
d9e71ec
 
8640564
d9e71ec
 
c82bffb
 
 
d9e71ec
c82bffb
d9e71ec
c82bffb
 
e58bff7
d9e71ec
e58bff7
d9e71ec
 
 
e58bff7
d9e71ec
e58bff7
 
d9e71ec
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import tempfile
import gradio as gr
from pyannote.audio import Pipeline

# ืฉืœื™ืคืช Hugging Face Token ืžื”-Secret
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
    raise ValueError("HF_TOKEN is missing. Please set it in the Secrets section.")

# ื˜ืขื™ื ืช ืžื•ื“ืœ pyannote ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
try:
    pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization",
        use_auth_token=hf_token
    )
except Exception as e:
    raise RuntimeError(f"Failed to load the pipeline: {e}")

# ืคื•ื ืงืฆื™ื” ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
def diarize(audio):
    try:
        # Gradio ืฉื•ืœื— tuple ื‘ืคื•ืจืžื˜ (ื ืชื™ื‘ ืงื•ื‘ืฅ, sample_rate)
        audio_file_path = audio[0]  # ื”ื•ืฆืืช ื ืชื™ื‘ ื”ืงื•ื‘ืฅ ืžื”-`tuple`

        # ืขื™ื‘ื•ื“ ื”ืื•ื“ื™ื• ืขื pyannote
        diarization = pipeline(audio_file_path)

        # ืขื™ื‘ื•ื“ ื”ืชื•ืฆืื” ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
        result = []
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            result.append(f"{speaker}: {turn.start:.1f}s - {turn.end:.1f}s")

        return "\n".join(result)

    except Exception as e:
        return f"Error during diarization: {e}"

# ื™ืฆื™ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
    fn=diarize,
    inputs="audio",
    outputs="text",
    title="Speaker Diarization",
    description="Upload an audio file (WAV, MP3, etc.) to detect speakers and their timestamps."
)

# ื”ืคืขืœืช ื”ืžืžืฉืง
if __name__ == "__main__":
    interface.launch()