allow to use any pyannote pretrained pipeline
Browse files- app.py +25 -11
- pyannote_viewer/README.md +19 -18
- pyannote_viewer/backend/pyannote_viewer/pyannote_viewer.py +22 -11
- pyannote_viewer/frontend/Index.svelte +2 -3
- pyannote_viewer/frontend/interactive/InteractiveAudio.svelte +7 -7
- pyannote_viewer/frontend/player/AudioPlayer.svelte +49 -29
- pyannote_viewer/frontend/shared/types.ts +9 -0
- pyannote_viewer/frontend/static/StaticAudio.svelte +3 -4
- requirements.txt +1 -1
app.py
CHANGED
@@ -2,13 +2,18 @@ import gradio as gr
|
|
2 |
from pyannote_viewer import PyannoteViewer
|
3 |
from pyannote.audio import Pipeline
|
4 |
import os
|
|
|
5 |
|
6 |
-
|
7 |
-
def apply_pipeline(audio: str) -> tuple:
|
8 |
pipeline = Pipeline.from_pretrained(
|
9 |
-
|
10 |
)
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
|
14 |
with gr.Blocks() as demo:
|
@@ -22,20 +27,29 @@ with gr.Blocks() as demo:
|
|
22 |
)
|
23 |
# space title and description
|
24 |
with gr.Column(scale=10):
|
25 |
-
gr.Markdown('#
|
26 |
|
27 |
gr.Markdown(
|
28 |
-
"
|
29 |
-
"\
|
30 |
-
"\n - Click on the apply pipeline button"
|
31 |
-
"\n - After pipeline processed the audio, you can then listen for each speaker separetely. Annotations on waveforms correspond to the speaker diarization produced by the pipeline, with one color per speaker."
|
32 |
)
|
|
|
|
|
|
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
audio = gr.Audio(type="filepath")
|
35 |
-
|
|
|
|
|
36 |
source_viewer = PyannoteViewer(interactive=False)
|
37 |
|
38 |
-
btn.click(fn=apply_pipeline, inputs=[audio], outputs=[source_viewer])
|
39 |
|
40 |
|
41 |
if __name__ == "__main__":
|
|
|
2 |
from pyannote_viewer import PyannoteViewer
|
3 |
from pyannote.audio import Pipeline
|
4 |
import os
|
5 |
+
from huggingface_hub import HfApi
|
6 |
|
7 |
+
def apply_pipeline(audio: str, pipeline_name: str) -> tuple:
|
|
|
8 |
pipeline = Pipeline.from_pretrained(
|
9 |
+
pipeline_name, use_auth_token=os.environ["HF_TOKEN"]
|
10 |
)
|
11 |
+
|
12 |
+
outputs = pipeline(audio)
|
13 |
+
if isinstance(outputs, tuple):
|
14 |
+
return outputs
|
15 |
+
else:
|
16 |
+
return (outputs, audio)
|
17 |
|
18 |
|
19 |
with gr.Blocks() as demo:
|
|
|
27 |
)
|
28 |
# space title and description
|
29 |
with gr.Column(scale=10):
|
30 |
+
gr.Markdown('# pyannote pretrained pipelines')
|
31 |
|
32 |
gr.Markdown(
|
33 |
+
"You like [pyannote.audio](https://github.com/pyannote/pyannote-audio)? Consider using [pyannoteAI](https://pyannote.ai/) for better and faster options.\n"
|
34 |
+
"\nGo [here](https://huggingface.co/pyannote) for more detail on each pipeline available in this space."
|
|
|
|
|
35 |
)
|
36 |
+
|
37 |
+
gr.Markdown()
|
38 |
+
|
39 |
|
40 |
+
gr.Markdown("#### Select a pretrained pipeline:")
|
41 |
+
available_pipelines = [p.modelId for p in HfApi().list_models(filter="pyannote-audio-pipeline")]
|
42 |
+
available_pipelines = list(filter(lambda p: p.startswith("pyannote/"), available_pipelines))
|
43 |
+
dropdown = gr.Dropdown(choices=available_pipelines, value=available_pipelines[0], interactive=True, label="Pretrained pipeline")
|
44 |
+
|
45 |
+
gr.Markdown("#### Upload or record an audio:")
|
46 |
audio = gr.Audio(type="filepath")
|
47 |
+
|
48 |
+
btn = gr.Button("Apply pipeline")
|
49 |
+
|
50 |
source_viewer = PyannoteViewer(interactive=False)
|
51 |
|
52 |
+
btn.click(fn=apply_pipeline, inputs=[audio, dropdown], outputs=[source_viewer])
|
53 |
|
54 |
|
55 |
if __name__ == "__main__":
|
pyannote_viewer/README.md
CHANGED
@@ -1,20 +1,20 @@
|
|
1 |
|
2 |
-
# `
|
3 |
<img alt="Static Badge" src="https://img.shields.io/badge/version%20-%201.0.0%20-%20orange">
|
4 |
|
5 |
-
|
6 |
|
7 |
## Installation
|
8 |
|
9 |
```bash
|
10 |
-
pip install
|
11 |
```
|
12 |
|
13 |
## Usage
|
14 |
|
15 |
```python
|
16 |
import gradio as gr
|
17 |
-
from
|
18 |
from pyannote.audio import Pipeline
|
19 |
import os
|
20 |
|
@@ -29,9 +29,9 @@ def apply_pipeline(audio: str) -> tuple:
|
|
29 |
with gr.Blocks() as demo:
|
30 |
audio = gr.Audio(type="filepath")
|
31 |
btn = gr.Button("Apply separation pipeline")
|
32 |
-
|
33 |
|
34 |
-
btn.click(fn=apply_pipeline, inputs=[audio], outputs=[
|
35 |
|
36 |
|
37 |
if __name__ == "__main__":
|
@@ -39,7 +39,7 @@ if __name__ == "__main__":
|
|
39 |
|
40 |
```
|
41 |
|
42 |
-
## `
|
43 |
|
44 |
### Initialization
|
45 |
|
@@ -362,16 +362,16 @@ WaveformOptions | dict | None
|
|
362 |
|
363 |
| name | description |
|
364 |
|:-----|:------------|
|
365 |
-
| `stream` | This listener is triggered when the user streams the
|
366 |
-
| `change` | Triggered when the value of the
|
367 |
-
| `clear` | This listener is triggered when the user clears the
|
368 |
-
| `play` | This listener is triggered when the user plays the media in the
|
369 |
-
| `pause` | This listener is triggered when the media in the
|
370 |
-
| `stop` | This listener is triggered when the user reaches the end of the media playing in the
|
371 |
-
| `start_recording` | This listener is triggered when the user starts recording with the
|
372 |
-
| `pause_recording` | This listener is triggered when the user pauses recording with the
|
373 |
-
| `stop_recording` | This listener is triggered when the user stops recording with the
|
374 |
-
| `upload` | This listener is triggered when the user uploads a file into the
|
375 |
|
376 |
|
377 |
|
@@ -391,7 +391,8 @@ The code snippet below is accurate in cases where the component is used as both
|
|
391 |
def predict(
|
392 |
value: str | tuple[int, numpy.ndarray] | None
|
393 |
) -> tuple[
|
394 |
-
pyannote.core.annotation.Annotation,
|
|
|
395 |
]
|
396 |
| None:
|
397 |
return value
|
|
|
1 |
|
2 |
+
# `pyannote_viewer`
|
3 |
<img alt="Static Badge" src="https://img.shields.io/badge/version%20-%201.0.0%20-%20orange">
|
4 |
|
5 |
+
Gradio custom component to visualize pyannote's pipelines outputs
|
6 |
|
7 |
## Installation
|
8 |
|
9 |
```bash
|
10 |
+
pip install pyannote-viewer
|
11 |
```
|
12 |
|
13 |
## Usage
|
14 |
|
15 |
```python
|
16 |
import gradio as gr
|
17 |
+
from pyannote_viewer import PyannoteViewer
|
18 |
from pyannote.audio import Pipeline
|
19 |
import os
|
20 |
|
|
|
29 |
with gr.Blocks() as demo:
|
30 |
audio = gr.Audio(type="filepath")
|
31 |
btn = gr.Button("Apply separation pipeline")
|
32 |
+
pyannote_viewer = PyannoteViewer(interactive=False)
|
33 |
|
34 |
+
btn.click(fn=apply_pipeline, inputs=[audio], outputs=[pyannote_viewer])
|
35 |
|
36 |
|
37 |
if __name__ == "__main__":
|
|
|
39 |
|
40 |
```
|
41 |
|
42 |
+
## `PyannoteViewer`
|
43 |
|
44 |
### Initialization
|
45 |
|
|
|
362 |
|
363 |
| name | description |
|
364 |
|:-----|:------------|
|
365 |
+
| `stream` | This listener is triggered when the user streams the PyannoteViewer. |
|
366 |
+
| `change` | Triggered when the value of the PyannoteViewer changes either because of user input (e.g. a user types in a textbox) OR because of a function update (e.g. an image receives a value from the output of an event trigger). See `.input()` for a listener that is only triggered by user input. |
|
367 |
+
| `clear` | This listener is triggered when the user clears the PyannoteViewer using the X button for the component. |
|
368 |
+
| `play` | This listener is triggered when the user plays the media in the PyannoteViewer. |
|
369 |
+
| `pause` | This listener is triggered when the media in the PyannoteViewer stops for any reason. |
|
370 |
+
| `stop` | This listener is triggered when the user reaches the end of the media playing in the PyannoteViewer. |
|
371 |
+
| `start_recording` | This listener is triggered when the user starts recording with the PyannoteViewer. |
|
372 |
+
| `pause_recording` | This listener is triggered when the user pauses recording with the PyannoteViewer. |
|
373 |
+
| `stop_recording` | This listener is triggered when the user stops recording with the PyannoteViewer. |
|
374 |
+
| `upload` | This listener is triggered when the user uploads a file into the PyannoteViewer. |
|
375 |
|
376 |
|
377 |
|
|
|
391 |
def predict(
|
392 |
value: str | tuple[int, numpy.ndarray] | None
|
393 |
) -> tuple[
|
394 |
+
pyannote.core.annotation.Annotation,
|
395 |
+
numpy.ndarray | pathlib.Path | str,
|
396 |
]
|
397 |
| None:
|
398 |
return value
|
pyannote_viewer/backend/pyannote_viewer/pyannote_viewer.py
CHANGED
@@ -19,6 +19,7 @@ from gradio.events import Events
|
|
19 |
from gradio.exceptions import Error
|
20 |
|
21 |
from pyannote.core.annotation import Annotation
|
|
|
22 |
|
23 |
|
24 |
@dataclasses.dataclass
|
@@ -249,7 +250,7 @@ class PyannoteViewer(
|
|
249 |
)
|
250 |
|
251 |
def postprocess(
|
252 |
-
self, value: Tuple[Annotation, np.ndarray] | None
|
253 |
) -> FileData | bytes | None:
|
254 |
"""
|
255 |
Parameters:
|
@@ -260,7 +261,8 @@ class PyannoteViewer(
|
|
260 |
if value is None:
|
261 |
return None
|
262 |
|
263 |
-
annotations,
|
|
|
264 |
labels = annotations.labels()
|
265 |
|
266 |
# format diarization output
|
@@ -271,19 +273,28 @@ class PyannoteViewer(
|
|
271 |
Segment(start=segment.start, end=segment.end, channel=label_idx)
|
272 |
)
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
return {
|
284 |
"segments": segments,
|
285 |
"labels": labels,
|
286 |
-
"
|
|
|
287 |
}
|
288 |
|
289 |
def stream_output(
|
|
|
19 |
from gradio.exceptions import Error
|
20 |
|
21 |
from pyannote.core.annotation import Annotation
|
22 |
+
from pyannote.core.feature import SlidingWindowFeature
|
23 |
|
24 |
|
25 |
@dataclasses.dataclass
|
|
|
250 |
)
|
251 |
|
252 |
def postprocess(
|
253 |
+
self, value: Tuple[Annotation, np.ndarray | Path | str] | None
|
254 |
) -> FileData | bytes | None:
|
255 |
"""
|
256 |
Parameters:
|
|
|
261 |
if value is None:
|
262 |
return None
|
263 |
|
264 |
+
annotations, audio = value
|
265 |
+
|
266 |
labels = annotations.labels()
|
267 |
|
268 |
# format diarization output
|
|
|
273 |
Segment(start=segment.start, end=segment.end, channel=label_idx)
|
274 |
)
|
275 |
|
276 |
+
if isinstance(audio, SlidingWindowFeature):
|
277 |
+
# save sources in cache
|
278 |
+
audio_filepath = processing_utils.save_audio_to_cache(
|
279 |
+
data=audio.data,
|
280 |
+
sample_rate=16_000,
|
281 |
+
format=self.format,
|
282 |
+
cache_dir=self.GRADIO_CACHE,
|
283 |
+
)
|
284 |
+
multichannel = True
|
285 |
+
elif isinstance(audio, (Path, str)):
|
286 |
+
audio_filepath = audio
|
287 |
+
multichannel = False
|
288 |
+
else:
|
289 |
+
raise ValueError("Unknown type for audio value")
|
290 |
+
|
291 |
+
orig_name = Path(audio_filepath).name
|
292 |
|
293 |
return {
|
294 |
"segments": segments,
|
295 |
"labels": labels,
|
296 |
+
"multichannel": multichannel,
|
297 |
+
"audio_file": FileData(path=audio_filepath, orig_name=orig_name),
|
298 |
}
|
299 |
|
300 |
def stream_output(
|
pyannote_viewer/frontend/Index.svelte
CHANGED
@@ -10,13 +10,13 @@
|
|
10 |
import InteractiveAudio from "./interactive/InteractiveAudio.svelte";
|
11 |
import { StatusTracker } from "@gradio/statustracker";
|
12 |
import { Block, UploadText } from "@gradio/atoms";
|
13 |
-
import type { WaveformOptions,
|
14 |
|
15 |
export let elem_id = "";
|
16 |
export let elem_classes: string[] = [];
|
17 |
export let visible = true;
|
18 |
export let interactive: boolean;
|
19 |
-
export let value:
|
20 |
export let sources:
|
21 |
| ["microphone"]
|
22 |
| ["upload"]
|
@@ -106,7 +106,6 @@
|
|
106 |
minPxPerSec: 20,
|
107 |
mediaControls: waveform_options.show_controls,
|
108 |
sampleRate: waveform_options.sample_rate || 44100,
|
109 |
-
splitChannels: true,
|
110 |
};
|
111 |
|
112 |
|
|
|
10 |
import InteractiveAudio from "./interactive/InteractiveAudio.svelte";
|
11 |
import { StatusTracker } from "@gradio/statustracker";
|
12 |
import { Block, UploadText } from "@gradio/atoms";
|
13 |
+
import type { WaveformOptions, PipelineOutput } from "./shared/types";
|
14 |
|
15 |
export let elem_id = "";
|
16 |
export let elem_classes: string[] = [];
|
17 |
export let visible = true;
|
18 |
export let interactive: boolean;
|
19 |
+
export let value: PipelineOutput | null = null;
|
20 |
export let sources:
|
21 |
| ["microphone"]
|
22 |
| ["upload"]
|
|
|
106 |
minPxPerSec: 20,
|
107 |
mediaControls: waveform_options.show_controls,
|
108 |
sampleRate: waveform_options.sample_rate || 44100,
|
|
|
109 |
};
|
110 |
|
111 |
|
pyannote_viewer/frontend/interactive/InteractiveAudio.svelte
CHANGED
@@ -16,9 +16,9 @@
|
|
16 |
import AudioRecorder from "../recorder/AudioRecorder.svelte";
|
17 |
import StreamAudio from "../streaming/StreamAudio.svelte";
|
18 |
import { SelectSource } from "@gradio/atoms";
|
19 |
-
import type { WaveformOptions,
|
20 |
|
21 |
-
export let value:
|
22 |
export let label: string;
|
23 |
export let root: string;
|
24 |
export let show_label = true;
|
@@ -74,7 +74,7 @@
|
|
74 |
|
75 |
const dispatch = createEventDispatcher<{
|
76 |
change: typeof value;
|
77 |
-
stream:
|
78 |
edit: never;
|
79 |
play: never;
|
80 |
pause: never;
|
@@ -95,7 +95,7 @@
|
|
95 |
): Promise<void> => {
|
96 |
let _audio_blob = new File(blobs, "audio.wav");
|
97 |
const val = await prepare_files([_audio_blob], event === "stream");
|
98 |
-
value.
|
99 |
(await upload(val, root, undefined, upload_fn))?.filter(
|
100 |
Boolean
|
101 |
) as FileData[]
|
@@ -192,8 +192,8 @@
|
|
192 |
}
|
193 |
|
194 |
function handle_load({ detail }: { detail: FileData }): void {
|
195 |
-
value = {"segments": [], "labels": [], "
|
196 |
-
value.
|
197 |
dispatch("change", value);
|
198 |
dispatch("upload", detail);
|
199 |
}
|
@@ -264,7 +264,7 @@
|
|
264 |
{i18n}
|
265 |
on:clear={clear}
|
266 |
on:edit={() => (mode = "edit")}
|
267 |
-
download={show_download_button ? value.
|
268 |
absolute={true}
|
269 |
/>
|
270 |
|
|
|
16 |
import AudioRecorder from "../recorder/AudioRecorder.svelte";
|
17 |
import StreamAudio from "../streaming/StreamAudio.svelte";
|
18 |
import { SelectSource } from "@gradio/atoms";
|
19 |
+
import type { WaveformOptions, PipelineOutput } from "../shared/types";
|
20 |
|
21 |
+
export let value: PipelineOutput | null = null;
|
22 |
export let label: string;
|
23 |
export let root: string;
|
24 |
export let show_label = true;
|
|
|
74 |
|
75 |
const dispatch = createEventDispatcher<{
|
76 |
change: typeof value;
|
77 |
+
stream: typeof value;
|
78 |
edit: never;
|
79 |
play: never;
|
80 |
pause: never;
|
|
|
95 |
): Promise<void> => {
|
96 |
let _audio_blob = new File(blobs, "audio.wav");
|
97 |
const val = await prepare_files([_audio_blob], event === "stream");
|
98 |
+
value.audio_file = (
|
99 |
(await upload(val, root, undefined, upload_fn))?.filter(
|
100 |
Boolean
|
101 |
) as FileData[]
|
|
|
192 |
}
|
193 |
|
194 |
function handle_load({ detail }: { detail: FileData }): void {
|
195 |
+
value = {"segments": [], "labels": [], "multichannel": false, "audioFile": null}
|
196 |
+
value.audio_file = detail;
|
197 |
dispatch("change", value);
|
198 |
dispatch("upload", detail);
|
199 |
}
|
|
|
264 |
{i18n}
|
265 |
on:clear={clear}
|
266 |
on:edit={() => (mode = "edit")}
|
267 |
+
download={show_download_button ? value.audio_file.url : null}
|
268 |
absolute={true}
|
269 |
/>
|
270 |
|
pyannote_viewer/frontend/player/AudioPlayer.svelte
CHANGED
@@ -3,15 +3,15 @@
|
|
3 |
import { Music } from "@gradio/icons";
|
4 |
import { format_time, type I18nFormatter } from "@gradio/utils";
|
5 |
import WaveSurfer from "wavesurfer.js";
|
6 |
-
import RegionsPlugin
|
7 |
-
import { skip_audio
|
8 |
import WaveformControls from "../shared/WaveformControls.svelte";
|
9 |
import { Empty } from "@gradio/atoms";
|
10 |
-
import type {
|
11 |
-
import type { WaveformOptions, Segment } from "../shared/types";
|
12 |
import { createEventDispatcher } from "svelte";
|
|
|
13 |
|
14 |
-
export let value:
|
15 |
export let label: string;
|
16 |
export let root: string;
|
17 |
export let i18n: I18nFormatter;
|
@@ -50,7 +50,7 @@
|
|
50 |
}>();
|
51 |
|
52 |
const create_waveform = (): void => {
|
53 |
-
const audio = new Audio(root + `/file=${value.
|
54 |
audio.crossOrigin = "anonymous"
|
55 |
|
56 |
audioContext = new AudioContext();
|
@@ -58,6 +58,7 @@
|
|
58 |
waveform = WaveSurfer.create({
|
59 |
container: container,
|
60 |
media: audio,
|
|
|
61 |
...waveform_settings
|
62 |
});
|
63 |
};
|
@@ -72,6 +73,7 @@
|
|
72 |
$: waveform?.on("decode", (duration: any) => {
|
73 |
audioDecoded = true;
|
74 |
const numChannels = waveform.getDecodedData().numberOfChannels;
|
|
|
75 |
audio_duration = duration;
|
76 |
durationRef && (durationRef.textContent = format_time(duration));
|
77 |
|
@@ -80,6 +82,10 @@
|
|
80 |
splitter = audioContext.createChannelSplitter(numChannels);
|
81 |
mediaNode.connect(splitter);
|
82 |
|
|
|
|
|
|
|
|
|
83 |
// add diarization annotation on each source:
|
84 |
if(!wsRegion){
|
85 |
wsRegion = waveform.registerPlugin(RegionsPlugin.create())
|
@@ -87,13 +93,13 @@
|
|
87 |
const region = wsRegion.addRegion({
|
88 |
start: segment.start,
|
89 |
end: segment.end,
|
90 |
-
channelIdx: segment.channel,
|
91 |
drag: false,
|
92 |
resize: false,
|
93 |
color: colors[segment.channel % colors.length],
|
94 |
});
|
95 |
|
96 |
-
const regionHeight = 100 / numChannels;
|
97 |
region.element.style.cssText += `height: ${regionHeight}% !important;`;
|
98 |
// TODO: Can we do better than force region color ?
|
99 |
region.element.style.cssText += `background-color: ${region.color} !important;`;
|
@@ -144,10 +150,10 @@
|
|
144 |
<Empty size="small">
|
145 |
<Music />
|
146 |
</Empty>
|
147 |
-
{:else if value.
|
148 |
<audio
|
149 |
class="standard-player"
|
150 |
-
src={value.
|
151 |
controls
|
152 |
autoplay={waveform_settings.autoplay}
|
153 |
/>
|
@@ -159,20 +165,30 @@
|
|
159 |
<div class="viewer">
|
160 |
<div class="source-selection">
|
161 |
{#if audioDecoded}
|
162 |
-
{#
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
{/if}
|
177 |
</div>
|
178 |
<div class="waveform-container">
|
@@ -225,6 +241,15 @@
|
|
225 |
background-color: var(--color-accent);
|
226 |
}
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
.component-wrapper {
|
229 |
padding: var(--size-3);
|
230 |
width: 100%;
|
@@ -240,11 +265,6 @@
|
|
240 |
margin-right: 1em;
|
241 |
}
|
242 |
|
243 |
-
.source {
|
244 |
-
display: flex;
|
245 |
-
align-items: center;
|
246 |
-
}
|
247 |
-
|
248 |
:global(::part(wrapper)) {
|
249 |
margin-bottom: var(--size-2);
|
250 |
}
|
|
|
3 |
import { Music } from "@gradio/icons";
|
4 |
import { format_time, type I18nFormatter } from "@gradio/utils";
|
5 |
import WaveSurfer from "wavesurfer.js";
|
6 |
+
import RegionsPlugin from "wavesurfer.js/dist/plugins/regions";
|
7 |
+
import { skip_audio } from "../shared/utils";
|
8 |
import WaveformControls from "../shared/WaveformControls.svelte";
|
9 |
import { Empty } from "@gradio/atoms";
|
10 |
+
import type { WaveformOptions, PipelineOutput } from "../shared/types";
|
|
|
11 |
import { createEventDispatcher } from "svelte";
|
12 |
+
import Color from "@gradio/icons/src/Color.svelte";
|
13 |
|
14 |
+
export let value: PipelineOutput | null = null;
|
15 |
export let label: string;
|
16 |
export let root: string;
|
17 |
export let i18n: I18nFormatter;
|
|
|
50 |
}>();
|
51 |
|
52 |
const create_waveform = (): void => {
|
53 |
+
const audio = new Audio(root + `/file=${value.audio_file.path}`)
|
54 |
audio.crossOrigin = "anonymous"
|
55 |
|
56 |
audioContext = new AudioContext();
|
|
|
58 |
waveform = WaveSurfer.create({
|
59 |
container: container,
|
60 |
media: audio,
|
61 |
+
splitChannels: value.multichannel,
|
62 |
...waveform_settings
|
63 |
});
|
64 |
};
|
|
|
73 |
$: waveform?.on("decode", (duration: any) => {
|
74 |
audioDecoded = true;
|
75 |
const numChannels = waveform.getDecodedData().numberOfChannels;
|
76 |
+
console.log(numChannels);
|
77 |
audio_duration = duration;
|
78 |
durationRef && (durationRef.textContent = format_time(duration));
|
79 |
|
|
|
82 |
splitter = audioContext.createChannelSplitter(numChannels);
|
83 |
mediaNode.connect(splitter);
|
84 |
|
85 |
+
if(!value.multichannel){
|
86 |
+
splitter.connect(audioContext.destination, 0);
|
87 |
+
}
|
88 |
+
|
89 |
// add diarization annotation on each source:
|
90 |
if(!wsRegion){
|
91 |
wsRegion = waveform.registerPlugin(RegionsPlugin.create())
|
|
|
93 |
const region = wsRegion.addRegion({
|
94 |
start: segment.start,
|
95 |
end: segment.end,
|
96 |
+
channelIdx: value.multichannel ? segment.channel : 0,
|
97 |
drag: false,
|
98 |
resize: false,
|
99 |
color: colors[segment.channel % colors.length],
|
100 |
});
|
101 |
|
102 |
+
const regionHeight = 100 / (value.multichannel ? numChannels : 1);
|
103 |
region.element.style.cssText += `height: ${regionHeight}% !important;`;
|
104 |
// TODO: Can we do better than force region color ?
|
105 |
region.element.style.cssText += `background-color: ${region.color} !important;`;
|
|
|
150 |
<Empty size="small">
|
151 |
<Music />
|
152 |
</Empty>
|
153 |
+
{:else if value.audio_file.is_stream}
|
154 |
<audio
|
155 |
class="standard-player"
|
156 |
+
src={value.audio_file.url}
|
157 |
controls
|
158 |
autoplay={waveform_settings.autoplay}
|
159 |
/>
|
|
|
165 |
<div class="viewer">
|
166 |
<div class="source-selection">
|
167 |
{#if audioDecoded}
|
168 |
+
{#if value.multichannel}
|
169 |
+
<!-- Separation pipeline case -->
|
170 |
+
{#each [...Array(waveform.getDecodedData().numberOfChannels).keys()] as channelIdx}
|
171 |
+
<label style={`height: ${waveform_settings.height}px; background-color: ${colors[channelIdx % colors.length]}`}>
|
172 |
+
<input
|
173 |
+
type="radio"
|
174 |
+
name="channels"
|
175 |
+
value={`${channelIdx}`}
|
176 |
+
on:change={(ev) => {
|
177 |
+
splitter.disconnect()
|
178 |
+
splitter.connect(audioContext.destination, Number(ev.target.value), 0);
|
179 |
+
}}
|
180 |
+
/>
|
181 |
+
{value.labels[channelIdx]}
|
182 |
+
</label>
|
183 |
+
{/each}
|
184 |
+
{:else}
|
185 |
+
{#each [...Array(value.labels.length)].keys() as labelIdx}
|
186 |
+
<label style={`background-color: ${colors[labelIdx % colors.length]};`}>
|
187 |
+
<input type="hidden">
|
188 |
+
{value.labels[labelIdx]}
|
189 |
+
</label>
|
190 |
+
{/each}
|
191 |
+
{/if}
|
192 |
{/if}
|
193 |
</div>
|
194 |
<div class="waveform-container">
|
|
|
241 |
background-color: var(--color-accent);
|
242 |
}
|
243 |
|
244 |
+
label {
|
245 |
+
display: flex;
|
246 |
+
align-items: center;
|
247 |
+
margin-bottom: 0.25em;
|
248 |
+
padding-left: 0.5em;
|
249 |
+
padding-right: 0.5em;
|
250 |
+
}
|
251 |
+
|
252 |
+
|
253 |
.component-wrapper {
|
254 |
padding: var(--size-3);
|
255 |
width: 100%;
|
|
|
265 |
margin-right: 1em;
|
266 |
}
|
267 |
|
|
|
|
|
|
|
|
|
|
|
268 |
:global(::part(wrapper)) {
|
269 |
margin-bottom: var(--size-2);
|
270 |
}
|
pyannote_viewer/frontend/shared/types.ts
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
export type WaveformOptions = {
|
2 |
waveform_color?: string;
|
3 |
waveform_progress_color?: string;
|
@@ -13,3 +15,10 @@ export type Segment = {
|
|
13 |
end: number;
|
14 |
channel: number;
|
15 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { FileData } from "@gradio/client";
|
2 |
+
|
3 |
export type WaveformOptions = {
|
4 |
waveform_color?: string;
|
5 |
waveform_progress_color?: string;
|
|
|
15 |
end: number;
|
16 |
channel: number;
|
17 |
}
|
18 |
+
|
19 |
+
export type PipelineOutput = {
|
20 |
+
segments: Segment[];
|
21 |
+
labels: string[];
|
22 |
+
multichannel: boolean;
|
23 |
+
audio_file: FileData;
|
24 |
+
}
|
pyannote_viewer/frontend/static/StaticAudio.svelte
CHANGED
@@ -6,11 +6,10 @@
|
|
6 |
import type { I18nFormatter } from "@gradio/utils";
|
7 |
import AudioPlayer from "../player/AudioPlayer.svelte";
|
8 |
import { createEventDispatcher } from "svelte";
|
9 |
-
import type { FileData } from "@gradio/client";
|
10 |
import { DownloadLink } from "@gradio/wasm/svelte";
|
11 |
-
import type { WaveformOptions,
|
12 |
|
13 |
-
export let value:
|
14 |
export let label: string;
|
15 |
export let root: string;
|
16 |
export let show_label = true;
|
@@ -42,7 +41,7 @@
|
|
42 |
{#if value !== null}
|
43 |
<div class="icon-buttons">
|
44 |
{#if show_download_button}
|
45 |
-
<DownloadLink href={value.
|
46 |
<IconButton Icon={Download} label={i18n("common.download")} />
|
47 |
</DownloadLink>
|
48 |
{/if}
|
|
|
6 |
import type { I18nFormatter } from "@gradio/utils";
|
7 |
import AudioPlayer from "../player/AudioPlayer.svelte";
|
8 |
import { createEventDispatcher } from "svelte";
|
|
|
9 |
import { DownloadLink } from "@gradio/wasm/svelte";
|
10 |
+
import type { WaveformOptions, PipelineOutput } from "../shared/types";
|
11 |
|
12 |
+
export let value: PipelineOutput | null = null;
|
13 |
export let label: string;
|
14 |
export let root: string;
|
15 |
export let show_label = true;
|
|
|
41 |
{#if value !== null}
|
42 |
<div class="icon-buttons">
|
43 |
{#if show_download_button}
|
44 |
+
<DownloadLink href={value.audio_file.url} download={value.audio_file.orig_name || value.audio_file.path}>
|
45 |
<IconButton Icon={Download} label={i18n("common.download")} />
|
46 |
</DownloadLink>
|
47 |
{/if}
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
asteroid==0.7.0
|
2 |
fastapi==0.112.4
|
3 |
-
|
4 |
pyannote-audio==3.3.2
|
5 |
spaces==0.30.2
|
6 |
transformers==4.46.3
|
|
|
1 |
asteroid==0.7.0
|
2 |
fastapi==0.112.4
|
3 |
+
pyannote-viewer==1.0.0
|
4 |
pyannote-audio==3.3.2
|
5 |
spaces==0.30.2
|
6 |
transformers==4.46.3
|