import gradio as gr import os from dotenv import load_dotenv load_dotenv() HF_API_KEY = os.getenv("HF_API_KEY") # gr.Interface.load("models/pyannote/speaker-diarization").launch() # 1. visit hf.co/pyannote/speaker-diarization and accept user conditions # 2. visit hf.co/pyannote/segmentation and accept user conditions # 3. visit hf.co/settings/tokens to create an access token # 4. instantiate pretrained speaker diarization pipeline from pyannote.audio import Pipeline pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization@2.1", use_auth_token=HF_API_KEY, ) def transcribe_audio(audio): # Perform speech-to-text on audio file # apply the pipeline to an audio file diarization = pipeline(audio) text = "" for turn, _, speaker in diarization.itertracks(yield_label=True): text = ( text + f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}" + "\n" ) return text iface = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(source="upload"), outputs="text" ) iface.launch()