Nitzantry1 commited on
Commit
ebec731
verified
1 Parent(s): 7f4d630

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -19
app.py CHANGED
@@ -1,28 +1,62 @@
1
  import gradio as gr
2
  from pyannote.audio import Pipeline
 
3
  import torch
4
 
5
- # 讬爪讬专转 讛驻讬讬驻诇讬讬谉 - 砖讬诐 诇讘 砖爪专讬讱 诇讛讞诇讬祝 讗转 讛讟讜拽谉
6
- pipeline = Pipeline.from_pretrained(
7
- "pyannote/[email protected]",
8
- use_auth_token="YOUR_HF_TOKEN"
9
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def process_audio(audio_file, min_speakers=None, max_speakers=None):
12
  try:
13
- # 讛驻注诇转 讛讚讬讗专讬讝爪讬讛
 
 
 
 
 
 
 
 
14
  diarization = pipeline(
15
- audio_file,
16
  min_speakers=min_speakers if min_speakers > 0 else None,
17
  max_speakers=max_speakers if max_speakers > 0 else None
18
  )
19
 
20
- # 讛诪专转 讛转讜爪讗讜转 诇讟拽住讟 诪讜讘谞讛
21
- result = ""
22
  for turn, _, speaker in diarization.itertracks(yield_label=True):
23
  line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
24
  result += line
25
-
 
 
 
 
 
 
 
 
26
  return result
27
 
28
  except Exception as e:
@@ -32,14 +66,53 @@ def process_audio(audio_file, min_speakers=None, max_speakers=None):
32
  demo = gr.Interface(
33
  fn=process_audio,
34
  inputs=[
35
- gr.Audio(label="拽讜讘抓 讗讜讚讬讜"),
36
- gr.Number(label="诪讬谞讬诪讜诐 讚讜讘专讬诐", value=0),
37
- gr.Number(label="诪拽住讬诪讜诐 讚讜讘专讬诐", value=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ],
39
- outputs=gr.Text(label="转讜爪讗讜转 讛讝讬讛讜讬"),
40
- title="讝讬讛讜讬 讚讜讘专讬诐 讘讛拽诇讟讛",
41
- description="讛注诇讛 拽讜讘抓 讗讜讚讬讜 诇讝讬讛讜讬 讛讚讜讘专讬诐 讛砖讜谞讬诐 讜讛讝诪谞讬诐 砖诇讛诐",
42
- examples=[["example.wav", 2, 5]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  )
44
 
45
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from pyannote.audio import Pipeline
3
+ import os
4
  import torch
5
 
6
+ def initialize_pipeline():
7
+ try:
8
+ # 拽讘诇转 讛讟讜拽谉 诪诪砖转谞讛 讛住讘讬讘讛
9
+ hf_token = os.getenv('HF_TOKEN')
10
+ if not hf_token:
11
+ raise ValueError("讞住专 讟讜拽谉. 讛讙讚专 HF_TOKEN 讘诪砖转谞讬 讛住讘讬讘讛")
12
+
13
+ # 讬爪讬专转 讛驻讬讬驻诇讬讬谉
14
+ pipeline = Pipeline.from_pretrained(
15
+ "pyannote/[email protected]",
16
+ use_auth_token=hf_token
17
+ )
18
+
19
+ # 讛注讘专讛 诇-GPU 讗诐 讝诪讬谉
20
+ if torch.cuda.is_available():
21
+ pipeline = pipeline.to(torch.device("cuda"))
22
+
23
+ return pipeline
24
+
25
+ except Exception as e:
26
+ print(f"砖讙讬讗讛 讘讗转讞讜诇 讛驻讬讬驻诇讬讬谉: {str(e)}")
27
+ return None
28
 
29
+ def process_audio(audio_path, min_speakers=None, max_speakers=None):
30
  try:
31
+ # 讘讚讬拽讛 砖讛拽讜讘抓 拽讬讬诐
32
+ if not audio_path:
33
+ return "诇讗 谞讘讞专 拽讜讘抓 讗讜讚讬讜"
34
+
35
+ pipeline = initialize_pipeline()
36
+ if pipeline is None:
37
+ return "砖讙讬讗讛 讘讗转讞讜诇 讛诪讜讚诇. 讘讚讜拽 讗转 讛讟讜拽谉 讜讛讛专砖讗讜转"
38
+
39
+ # 注讬讘讜讚 讛拽讜讘抓
40
  diarization = pipeline(
41
+ audio_path,
42
  min_speakers=min_speakers if min_speakers > 0 else None,
43
  max_speakers=max_speakers if max_speakers > 0 else None
44
  )
45
 
46
+ # 讬爪讬专转 驻诇讟 诪讗讜专讙谉
47
+ result = "转讜爪讗讜转 讝讬讛讜讬 讛讚讜讘专讬诐:\n\n"
48
  for turn, _, speaker in diarization.itertracks(yield_label=True):
49
  line = f"[{turn.start:.1f}s -> {turn.end:.1f}s] {speaker}\n"
50
  result += line
51
+
52
+ # 讛讜住驻转 住讟讟讬住讟讬拽讜转
53
+ unique_speakers = len(set(diarization.labels()))
54
+ total_duration = sum(turn.duration for turn, _, _ in diarization.itertracks(yield_label=True))
55
+
56
+ result += f"\n---\n住讬讻讜诐:\n"
57
+ result += f"诪住驻专 讚讜讘专讬诐 砖讝讜讛讜: {unique_speakers}\n"
58
+ result += f"诪砖讱 讻讜诇诇: {total_duration:.1f} 砖谞讬讜转"
59
+
60
  return result
61
 
62
  except Exception as e:
 
66
  demo = gr.Interface(
67
  fn=process_audio,
68
  inputs=[
69
+ gr.Audio(
70
+ label="拽讜讘抓 讗讜讚讬讜",
71
+ source="upload",
72
+ type="filepath"
73
+ ),
74
+ gr.Number(
75
+ label="诪讬谞讬诪讜诐 讚讜讘专讬诐 (讗讜驻爪讬讜谞诇讬)",
76
+ value=0,
77
+ minimum=0,
78
+ step=1
79
+ ),
80
+ gr.Number(
81
+ label="诪拽住讬诪讜诐 讚讜讘专讬诐 (讗讜驻爪讬讜谞诇讬)",
82
+ value=0,
83
+ minimum=0,
84
+ step=1
85
+ )
86
  ],
87
+ outputs=gr.Textbox(
88
+ label="转讜爪讗讜转 讛讝讬讛讜讬",
89
+ lines=10
90
+ ),
91
+ title="讝讬讛讜讬 讚讜讘专讬诐 讘讛拽诇讟讜转",
92
+ description="""
93
+ 讛注诇讛 拽讜讘抓 讗讜讚讬讜 诇讝讬讛讜讬 讛讚讜讘专讬诐 讛砖讜谞讬诐 讜讛讝诪谞讬诐 砖诇讛诐.
94
+
95
+ 讛注专讜转:
96
+ - 讗诐 讬讚讜注 诇讱 诪住驻专 讛讚讜讘专讬诐, 讛讝谉 讗讜转讜 讻讚讬 诇砖驻专 讗转 讛讚讬讜拽
97
+ - 转讜诪讱 讘驻讜专诪讟讬诐: WAV, MP3, FLAC
98
+ - 诪讜诪诇抓 诇讛砖转诪砖 讘讛拽诇讟讜转 讘讗讬讻讜转 讟讜讘讛
99
+ - 诪砖讱 诪拽住讬诪诇讬: 2 砖注讜转
100
+ """,
101
+ examples=[
102
+ ["example.wav", 2, 4],
103
+ ["interview.mp3", 2, 2]
104
+ ]
105
  )
106
 
107
+ if __name__ == "__main__":
108
+ # 讛讚驻住转 诪讬讚注 注诇 讛住讘讬讘讛
109
+ space_name = os.getenv('SPACE_ID', 'unknown')
110
+ print(f"Space name: {space_name}")
111
+ print(f"GPU available: {torch.cuda.is_available()}")
112
+
113
+ # 讛驻注诇转 讛诪诪砖拽
114
+ demo.launch(
115
+ share=True,
116
+ enable_queue=True,
117
+ debug=True
118
+ )