Nitzantry1's picture
Update app.py
4d2cffe verified
import os
import tempfile
import gradio as gr
from pyannote.audio import Pipeline
# ืฉืœื™ืคืช Hugging Face Token ืžื”-Secret
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN is missing. Please set it in the Secrets section.")
# ื˜ืขื™ื ืช ืžื•ื“ืœ pyannote ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
try:
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization",
use_auth_token=hf_token
)
except Exception as e:
raise RuntimeError(f"Failed to load the pipeline: {e}")
# ืคื•ื ืงืฆื™ื” ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
def diarize(audio):
try:
# ืฉืžื™ืจืช ื”ืื•ื“ื™ื• ืœืงื•ื‘ืฅ ื–ืžื ื™
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
temp_audio.write(audio.read())
temp_audio_path = temp_audio.name
# ืขื™ื‘ื•ื“ ื”ืื•ื“ื™ื• ืขื pyannote
diarization = pipeline(temp_audio_path)
# ืขื™ื‘ื•ื“ ื”ืชื•ืฆืื” ืœื–ื™ื”ื•ื™ ื“ื•ื‘ืจื™ื
result = []
for turn, _, speaker in diarization.itertracks(yield_label=True):
result.append(f"{speaker}: {turn.start:.1f}s - {turn.end:.1f}s")
# ืžื—ื™ืงืช ื”ืงื•ื‘ืฅ ื”ื–ืžื ื™ ืœืื—ืจ ื”ืฉื™ืžื•ืฉ
os.remove(temp_audio_path)
return "\n".join(result)
except Exception as e:
return f"Error during diarization: {e}"
# ื™ืฆื™ืจืช ืžืžืฉืง Gradio
interface = gr.Interface(
fn=diarize,
inputs=gr.inputs.Audio(source="upload", type="file"),
outputs="text",
title="Speaker Diarization",
description="Upload an audio file (WAV, MP3, etc.) to detect speakers and their timestamps."
)
# ื”ืคืขืœืช ื”ืžืžืฉืง
if __name__ == "__main__":
interface.launch()