Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,62 @@
|
|
1 |
import gradio as gr
|
2 |
from pyannote.audio import Pipeline
|
|
|
3 |
import torch
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
def process_audio(
|
12 |
try:
|
13 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
diarization = pipeline(
|
15 |
-
|
16 |
min_speakers=min_speakers if min_speakers > 0 else None,
|
17 |
max_speakers=max_speakers if max_speakers > 0 else None
|
18 |
)
|
19 |
|
20 |
-
#
|
21 |
-
result = ""
|
22 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
23 |
line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
|
24 |
result += line
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
return result
|
27 |
|
28 |
except Exception as e:
|
@@ -32,14 +66,53 @@ def process_audio(audio_file, min_speakers=None, max_speakers=None):
|
|
32 |
demo = gr.Interface(
|
33 |
fn=process_audio,
|
34 |
inputs=[
|
35 |
-
gr.Audio(
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
],
|
39 |
-
outputs=gr.
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
)
|
44 |
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from pyannote.audio import Pipeline
|
3 |
+
import os
|
4 |
import torch
|
5 |
|
6 |
+
def initialize_pipeline():
|
7 |
+
try:
|
8 |
+
# 拽讘诇转 讛讟讜拽谉 诪诪砖转谞讛 讛住讘讬讘讛
|
9 |
+
hf_token = os.getenv('HF_TOKEN')
|
10 |
+
if not hf_token:
|
11 |
+
raise ValueError("讞住专 讟讜拽谉. 讛讙讚专 HF_TOKEN 讘诪砖转谞讬 讛住讘讬讘讛")
|
12 |
+
|
13 |
+
# 讬爪讬专转 讛驻讬讬驻诇讬讬谉
|
14 |
+
pipeline = Pipeline.from_pretrained(
|
15 |
+
"pyannote/[email protected]",
|
16 |
+
use_auth_token=hf_token
|
17 |
+
)
|
18 |
+
|
19 |
+
# 讛注讘专讛 诇-GPU 讗诐 讝诪讬谉
|
20 |
+
if torch.cuda.is_available():
|
21 |
+
pipeline = pipeline.to(torch.device("cuda"))
|
22 |
+
|
23 |
+
return pipeline
|
24 |
+
|
25 |
+
except Exception as e:
|
26 |
+
print(f"砖讙讬讗讛 讘讗转讞讜诇 讛驻讬讬驻诇讬讬谉: {str(e)}")
|
27 |
+
return None
|
28 |
|
29 |
+
def process_audio(audio_path, min_speakers=None, max_speakers=None):
|
30 |
try:
|
31 |
+
# 讘讚讬拽讛 砖讛拽讜讘抓 拽讬讬诐
|
32 |
+
if not audio_path:
|
33 |
+
return "诇讗 谞讘讞专 拽讜讘抓 讗讜讚讬讜"
|
34 |
+
|
35 |
+
pipeline = initialize_pipeline()
|
36 |
+
if pipeline is None:
|
37 |
+
return "砖讙讬讗讛 讘讗转讞讜诇 讛诪讜讚诇. 讘讚讜拽 讗转 讛讟讜拽谉 讜讛讛专砖讗讜转"
|
38 |
+
|
39 |
+
# 注讬讘讜讚 讛拽讜讘抓
|
40 |
diarization = pipeline(
|
41 |
+
audio_path,
|
42 |
min_speakers=min_speakers if min_speakers > 0 else None,
|
43 |
max_speakers=max_speakers if max_speakers > 0 else None
|
44 |
)
|
45 |
|
46 |
+
# 讬爪讬专转 驻诇讟 诪讗讜专讙谉
|
47 |
+
result = "转讜爪讗讜转 讝讬讛讜讬 讛讚讜讘专讬诐:\n\n"
|
48 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
49 |
line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
|
50 |
result += line
|
51 |
+
|
52 |
+
# 讛讜住驻转 住讟讟讬住讟讬拽讜转
|
53 |
+
unique_speakers = len(set(diarization.labels()))
|
54 |
+
total_duration = sum(turn.duration for turn, _, _ in diarization.itertracks(yield_label=True))
|
55 |
+
|
56 |
+
result += f"\n---\n住讬讻讜诐:\n"
|
57 |
+
result += f"诪住驻专 讚讜讘专讬诐 砖讝讜讛讜: {unique_speakers}\n"
|
58 |
+
result += f"诪砖讱 讻讜诇诇: {total_duration:.1f} 砖谞讬讜转"
|
59 |
+
|
60 |
return result
|
61 |
|
62 |
except Exception as e:
|
|
|
66 |
demo = gr.Interface(
|
67 |
fn=process_audio,
|
68 |
inputs=[
|
69 |
+
gr.Audio(
|
70 |
+
label="拽讜讘抓 讗讜讚讬讜",
|
71 |
+
source="upload",
|
72 |
+
type="filepath"
|
73 |
+
),
|
74 |
+
gr.Number(
|
75 |
+
label="诪讬谞讬诪讜诐 讚讜讘专讬诐 (讗讜驻爪讬讜谞诇讬)",
|
76 |
+
value=0,
|
77 |
+
minimum=0,
|
78 |
+
step=1
|
79 |
+
),
|
80 |
+
gr.Number(
|
81 |
+
label="诪拽住讬诪讜诐 讚讜讘专讬诐 (讗讜驻爪讬讜谞诇讬)",
|
82 |
+
value=0,
|
83 |
+
minimum=0,
|
84 |
+
step=1
|
85 |
+
)
|
86 |
],
|
87 |
+
outputs=gr.Textbox(
|
88 |
+
label="转讜爪讗讜转 讛讝讬讛讜讬",
|
89 |
+
lines=10
|
90 |
+
),
|
91 |
+
title="讝讬讛讜讬 讚讜讘专讬诐 讘讛拽诇讟讜转",
|
92 |
+
description="""
|
93 |
+
讛注诇讛 拽讜讘抓 讗讜讚讬讜 诇讝讬讛讜讬 讛讚讜讘专讬诐 讛砖讜谞讬诐 讜讛讝诪谞讬诐 砖诇讛诐.
|
94 |
+
|
95 |
+
讛注专讜转:
|
96 |
+
- 讗诐 讬讚讜注 诇讱 诪住驻专 讛讚讜讘专讬诐, 讛讝谉 讗讜转讜 讻讚讬 诇砖驻专 讗转 讛讚讬讜拽
|
97 |
+
- 转讜诪讱 讘驻讜专诪讟讬诐: WAV, MP3, FLAC
|
98 |
+
- 诪讜诪诇抓 诇讛砖转诪砖 讘讛拽诇讟讜转 讘讗讬讻讜转 讟讜讘讛
|
99 |
+
- 诪砖讱 诪拽住讬诪诇讬: 2 砖注讜转
|
100 |
+
""",
|
101 |
+
examples=[
|
102 |
+
["example.wav", 2, 4],
|
103 |
+
["interview.mp3", 2, 2]
|
104 |
+
]
|
105 |
)
|
106 |
|
107 |
+
if __name__ == "__main__":
|
108 |
+
# 讛讚驻住转 诪讬讚注 注诇 讛住讘讬讘讛
|
109 |
+
space_name = os.getenv('SPACE_ID', 'unknown')
|
110 |
+
print(f"Space name: {space_name}")
|
111 |
+
print(f"GPU available: {torch.cuda.is_available()}")
|
112 |
+
|
113 |
+
# 讛驻注诇转 讛诪诪砖拽
|
114 |
+
demo.launch(
|
115 |
+
share=True,
|
116 |
+
enable_queue=True,
|
117 |
+
debug=True
|
118 |
+
)
|