Nitzantry1's picture
Update app.py
ff841ad verified
import torch
import spaces
import gradio as gr
import os
from pyannote.audio import Pipeline
# instantiate the pipeline
try:
# בדיקה אם ה-token קיים בסביבה והאם הוא לא ריק
auth_token = os.environ.get("HUGGINGFACE_READ_TOKEN")
if not auth_token:
raise ValueError("HUGGINGFACE_READ_TOKEN not found or is empty")
print("Token found, attempting to initialize pipeline...")
# ניסיון לאתחל את ה-Pipeline
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=auth_token,
cache_dir="./cache" # כדי לנסות להשתמש במטמון
)
# העברת ה-Pipeline ל-CPU בלבד, בהתחשב שאתה בגרסה חינמית
device = torch.device("cpu")
pipeline.to(device)
print("Pipeline initialized successfully!")
except Exception as e:
print(f"Error initializing pipeline: {e}")
pipeline = None
def save_audio(audio):
if pipeline is None:
return "Error: Pipeline not initialized"
try:
# קריאה של קובץ האודיו שעלה
with open(audio, "rb") as f:
audio_data = f.read()
# שמירה של קובץ האודיו במיקום זמני
temp_file = "temp.wav"
with open(temp_file, "wb") as f:
f.write(audio_data)
print(f"Audio file saved to {temp_file}")
return temp_file
except Exception as e:
print(f"Error saving audio file: {e}")
return None
@spaces.GPU(duration=60 * 2)
def diarize_audio(temp_file, num_speakers, min_speakers, max_speakers):
if pipeline is None:
return "Error: Pipeline not initialized"
try:
# הכנת פרמטרים לפי הקלט של המשתמש
params = {}
if num_speakers > 0:
params["num_speakers"] = num_speakers
if min_speakers > 0:
params["min_speakers"] = min_speakers
if max_speakers > 0:
params["max_speakers"] = max_speakers
print(f"Processing audio file {temp_file} with parameters: {params}")
diarization = pipeline(temp_file, **params)
print("Diarization completed successfully!")
except Exception as e:
print(f"Error processing audio: {e}")
return f"Error processing audio: {e}"
# הסרת הקובץ הזמני לאחר עיבוד
try:
os.remove(temp_file)
print(f"Temporary file {temp_file} removed successfully.")
except Exception as e:
print(f"Error removing temporary file {temp_file}: {e}")
return str(diarization)
def timestamp_to_seconds(timestamp):
try:
# המרה של timestamp לשניות
h, m, s = map(float, timestamp.split(':'))
return 3600 * h + 60 * m + s
except ValueError as e:
print(f"Error converting timestamp to seconds: '{timestamp}'. Error: {e}")
return None
def generate_labels_from_diarization(diarization_output):
successful_lines = 0
labels_path = 'labels.txt'
try:
with open(labels_path, 'w') as outfile:
lines = diarization_output.strip().split('\n')
for line in lines:
try:
parts = line.strip()[1:-1].split(' --> ')
start_time = parts[0].strip()
end_time = parts[1].split(']')[0].strip()
label = line.split()[-1].strip() # לקיחת התווית מהשורה
start_seconds = timestamp_to_seconds(start_time)
end_seconds = timestamp_to_seconds(end_time)
if start_seconds is None or end_seconds is None:
continue
outfile.write(f"{start_seconds}\t{end_seconds}\t{label}\n")
successful_lines += 1
except Exception as e:
print(f"Error processing line: '{line.strip()}'. Error: {e}")
print(f"Processed {successful_lines} lines successfully.")
return labels_path if successful_lines > 0 else None
except Exception as e:
print(f"Cannot write to file '{labels_path}'. Error: {e}")
return None
def process_audio(audio, num_speakers, min_speakers, max_speakers):
temp_file = save_audio(audio)
if temp_file is None:
return "Error saving audio file", None
diarization_result = diarize_audio(temp_file, num_speakers, min_speakers, max_speakers)
if diarization_result.startswith("Error"):
return diarization_result, None
label_file = generate_labels_from_diarization(diarization_result)
return diarization_result, label_file
# ממשק גריידיו
with gr.Blocks() as demo:
gr.Markdown("""
# 🗣️Pyannote Speaker Diarization 3.1🗣️
This model takes an audio file as input and outputs the diarization of the speakers in the audio.
Please upload an audio file and adjust the parameters as needed.
If you find this space helpful, please ❤ it.
""")
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
num_speakers_input = gr.Number(label="Number of Speakers", value=0)
min_speakers_input = gr.Number(label="Minimum Number of Speakers", value=0)
max_speakers_input = gr.Number(label="Maximum Number of Speakers", value=0)
process_button = gr.Button("Process")
diarization_output = gr.Textbox(label="Diarization Output")
label_file_link = gr.File(label="Download DAW Labels")
process_button.click(
fn=process_audio,
inputs=[audio_input, num_speakers_input, min_speakers_input, max_speakers_input],
outputs=[diarization_output, label_file_link]
)
demo.launch(share=False)