File size: 1,031 Bytes
678798d
3ab5767
30323fc
678798d
30323fc
 
 
 
 
678798d
3ab5767
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30323fc
3ab5767
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from pyannote.audio import Pipeline
import os

# ื˜ื•ืขืŸ ืืช ื”-Token ืžืžืขืจื›ืช ืžืฉืชื ื™ ื”ืกื‘ื™ื‘ื”
auth_token = os.getenv("HF_AUTH_TOKEN")

# ื˜ื•ืขืŸ ืืช ื”ืžื•ื“ืœ ืฉืœ Pyannote ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื ืขื ื”-Token
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=auth_token)

# ืคื•ื ืงืฆื™ื” ืœื‘ื™ืฆื•ืข ื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
def diarize(audio_file):
    diarization = pipeline(audio_file)
    segments = []
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        segments.append({
            "start": turn.start,
            "end": turn.end,
            "speaker": speaker
        })
    return segments

# ื”ื’ื“ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
    fn=diarize, 
    inputs=gr.Audio(type="filepath"),  # ืฉื™ืžื•ืฉ ื‘ืคืจืžื˜ืจ ื”ืžืชืื™ื ืฉืœ Gradio
    outputs="json",
    description="Upload an audio file to get speaker diarization (timestamps and speaker IDs only)."
)

# ื”ืจืฆืช ื”ืžืžืฉืง
interface.launch()