github-actions[bot] commited on
Commit
3f16dbd
·
0 Parent(s):

Sync to HuggingFace Spaces

Browse files
Files changed (11) hide show
  1. .gitattributes +3 -0
  2. .github/workflows/sync.yml +27 -0
  3. .gitignore +152 -0
  4. LICENSE +21 -0
  5. README.md +36 -0
  6. app.py +3 -0
  7. fonts/NotoSansTC-Regular.ttf +3 -0
  8. fonts/OFL.txt +93 -0
  9. gen.py +195 -0
  10. requirements.txt +6 -0
  11. ui.py +119 -0
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
3
+ *.ttf filter=lfs diff=lfs merge=lfs -text
.github/workflows/sync.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ sync:
10
+ name: Sync
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - name: Checkout Repository
15
+ uses: actions/checkout@v4
16
+ with:
17
+ lfs: true
18
+
19
+ - run: rm -rf images
20
+
21
+ - name: Sync to Hugging Face Spaces
22
+ uses: JacobLinCool/huggingface-sync@v1
23
+ with:
24
+ github: ${{ secrets.GITHUB_TOKEN }}
25
+ user: jacoblincool # Hugging Face username or organization name
26
+ space: audio-vis # Hugging Face space name
27
+ token: ${{ secrets.HF_TOKEN }} # Hugging Face token
.gitignore ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105
+ __pypackages__/
106
+
107
+ # Celery stuff
108
+ celerybeat-schedule
109
+ celerybeat.pid
110
+
111
+ # SageMath parsed files
112
+ *.sage.py
113
+
114
+ # Environments
115
+ .env
116
+ .venv
117
+ env/
118
+ venv/
119
+ ENV/
120
+ env.bak/
121
+ venv.bak/
122
+
123
+ # Spyder project settings
124
+ .spyderproject
125
+ .spyproject
126
+
127
+ # Rope project settings
128
+ .ropeproject
129
+
130
+ # mkdocs documentation
131
+ /site
132
+
133
+ # mypy
134
+ .mypy_cache/
135
+ .dmypy.json
136
+ dmypy.json
137
+
138
+ # Pyre type checker
139
+ .pyre/
140
+
141
+ # pytype static type analyzer
142
+ .pytype/
143
+
144
+ # Cython debug symbols
145
+ cython_debug/
146
+
147
+ # PyCharm
148
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
151
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152
+ #.idea/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 JacobLinCool
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: audio vis
3
+ emoji: 🦄
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ short_description: Visualizing Audio Features.
8
+ ---
9
+
10
+ # audio-vis
11
+
12
+ Visualizing Audio Features.
13
+
14
+ <a href="https://huggingface.co/spaces/jacoblincool/audio-vis" target="_blank">
15
+ <picture>
16
+ <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md-dark.svg">
17
+ <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg">
18
+ </picture>
19
+ </a>
20
+
21
+ ## Setup
22
+
23
+ ```sh
24
+ micromamba create -n audio-vis python==3.12 librosa matplotlib numpy scipy pillow gradio -c conda-forge
25
+ ```
26
+
27
+ ## Run
28
+
29
+ ```sh
30
+ micromamba activate audio-vis
31
+ python app.py
32
+ ```
33
+
34
+ ## Screenshot
35
+
36
+ ![Screenshot](./images/audio-vis.png)
app.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from ui import app
2
+
3
+ app.queue(status_update_rate=5.0, max_size=10).launch()
fonts/NotoSansTC-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b137e2eb57a2d1e4cf391c886ab1b783a0e5ddb5c75254748bde00c15cb8ff5
3
+ size 7110560
fonts/OFL.txt ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2014-2021 Adobe (http://www.adobe.com/), with Reserved Font Name 'Source'
2
+
3
+ This Font Software is licensed under the SIL Open Font License, Version 1.1.
4
+ This license is copied below, and is also available with a FAQ at:
5
+ http://scripts.sil.org/OFL
6
+
7
+
8
+ -----------------------------------------------------------
9
+ SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
10
+ -----------------------------------------------------------
11
+
12
+ PREAMBLE
13
+ The goals of the Open Font License (OFL) are to stimulate worldwide
14
+ development of collaborative font projects, to support the font creation
15
+ efforts of academic and linguistic communities, and to provide a free and
16
+ open framework in which fonts may be shared and improved in partnership
17
+ with others.
18
+
19
+ The OFL allows the licensed fonts to be used, studied, modified and
20
+ redistributed freely as long as they are not sold by themselves. The
21
+ fonts, including any derivative works, can be bundled, embedded,
22
+ redistributed and/or sold with any software provided that any reserved
23
+ names are not used by derivative works. The fonts and derivatives,
24
+ however, cannot be released under any other type of license. The
25
+ requirement for fonts to remain under this license does not apply
26
+ to any document created using the fonts or their derivatives.
27
+
28
+ DEFINITIONS
29
+ "Font Software" refers to the set of files released by the Copyright
30
+ Holder(s) under this license and clearly marked as such. This may
31
+ include source files, build scripts and documentation.
32
+
33
+ "Reserved Font Name" refers to any names specified as such after the
34
+ copyright statement(s).
35
+
36
+ "Original Version" refers to the collection of Font Software components as
37
+ distributed by the Copyright Holder(s).
38
+
39
+ "Modified Version" refers to any derivative made by adding to, deleting,
40
+ or substituting -- in part or in whole -- any of the components of the
41
+ Original Version, by changing formats or by porting the Font Software to a
42
+ new environment.
43
+
44
+ "Author" refers to any designer, engineer, programmer, technical
45
+ writer or other person who contributed to the Font Software.
46
+
47
+ PERMISSION & CONDITIONS
48
+ Permission is hereby granted, free of charge, to any person obtaining
49
+ a copy of the Font Software, to use, study, copy, merge, embed, modify,
50
+ redistribute, and sell modified and unmodified copies of the Font
51
+ Software, subject to the following conditions:
52
+
53
+ 1) Neither the Font Software nor any of its individual components,
54
+ in Original or Modified Versions, may be sold by itself.
55
+
56
+ 2) Original or Modified Versions of the Font Software may be bundled,
57
+ redistributed and/or sold with any software, provided that each copy
58
+ contains the above copyright notice and this license. These can be
59
+ included either as stand-alone text files, human-readable headers or
60
+ in the appropriate machine-readable metadata fields within text or
61
+ binary files as long as those fields can be easily viewed by the user.
62
+
63
+ 3) No Modified Version of the Font Software may use the Reserved Font
64
+ Name(s) unless explicit written permission is granted by the corresponding
65
+ Copyright Holder. This restriction only applies to the primary font name as
66
+ presented to the users.
67
+
68
+ 4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
69
+ Software shall not be used to promote, endorse or advertise any
70
+ Modified Version, except to acknowledge the contribution(s) of the
71
+ Copyright Holder(s) and the Author(s) or with their explicit written
72
+ permission.
73
+
74
+ 5) The Font Software, modified or unmodified, in part or in whole,
75
+ must be distributed entirely under this license, and must not be
76
+ distributed under any other license. The requirement for fonts to
77
+ remain under this license does not apply to any document created
78
+ using the Font Software.
79
+
80
+ TERMINATION
81
+ This license becomes null and void if any of the above conditions are
82
+ not met.
83
+
84
+ DISCLAIMER
85
+ THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
86
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
87
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
88
+ OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
89
+ COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
90
+ INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
91
+ DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
92
+ FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
93
+ OTHER DEALINGS IN THE FONT SOFTWARE.
gen.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import librosa
4
+ import librosa.display
5
+ import numpy as np
6
+ import matplotlib
7
+ from matplotlib.font_manager import fontManager
8
+ import matplotlib.pyplot as plt
9
+ from scipy.signal import butter, lfilter
10
+ from PIL import Image
11
+
12
+ FILTER_UPPER_BOUND = 20000
13
+ FILTER_LOWER_BOUND = 0
14
+
15
+ # use ./fonts/NotoSansTC-Regular.ttf
16
+ fontManager.addfont("fonts/NotoSansTC-Regular.ttf")
17
+ matplotlib.rc("font", family="Noto Sans TC")
18
+
19
+
20
+ def butter_filter(data: np.ndarray, cutoff: int, fs: int, btype: str, order=5):
21
+ nyquist = 0.5 * fs
22
+ if btype in ["low", "high"]:
23
+ normal_cutoff = cutoff / nyquist
24
+ else: # 'band'
25
+ normal_cutoff = [c / nyquist for c in cutoff]
26
+ b, a = butter(order, normal_cutoff, btype=btype, analog=False)
27
+ y = lfilter(b, a, data)
28
+ return y
29
+
30
+
31
+ def plt_to_numpy(plt: plt.Figure) -> np.ndarray:
32
+ buf = io.BytesIO()
33
+ plt.savefig(buf, format="png")
34
+ buf.seek(0)
35
+ return np.array(Image.open(buf))
36
+
37
+
38
+ def apply_filters(
39
+ y: np.ndarray,
40
+ sr: int,
41
+ highpass_cutoff: int,
42
+ lowpass_cutoff: int,
43
+ bandpass_low: int,
44
+ bandpass_high: int,
45
+ ):
46
+ if highpass_cutoff > FILTER_LOWER_BOUND:
47
+ y = butter_filter(y, highpass_cutoff, sr, "high")
48
+ if lowpass_cutoff > FILTER_LOWER_BOUND and lowpass_cutoff < sr / 2:
49
+ y = butter_filter(y, lowpass_cutoff, sr, "low")
50
+ if bandpass_low > FILTER_LOWER_BOUND and bandpass_high < sr / 2:
51
+ y = butter_filter(y, [bandpass_low, bandpass_high], sr, "band")
52
+ return y
53
+
54
+
55
+ def analyze_audio(
56
+ file: str,
57
+ highpass_cutoff: int,
58
+ lowpass_cutoff: int,
59
+ bandpass_low: int,
60
+ bandpass_high: int,
61
+ ):
62
+ filename = os.path.basename(file)
63
+ y, sr = librosa.load(file)
64
+ y = apply_filters(
65
+ y, sr, highpass_cutoff, lowpass_cutoff, bandpass_low, bandpass_high
66
+ )
67
+
68
+ def plot_waveform(y: np.ndarray, sr: int) -> np.ndarray:
69
+ plt.figure(figsize=(14, 5))
70
+ librosa.display.waveshow(y, sr=sr)
71
+ plt.title(f"Waveform ({filename})")
72
+ plt.xlabel("Time")
73
+ plt.ylabel("Amplitude")
74
+ return plt_to_numpy(plt)
75
+
76
+ def plot_spectrogram(y: np.ndarray, sr: int) -> np.ndarray:
77
+ plt.figure(figsize=(14, 5))
78
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
79
+ librosa.display.specshow(D, sr=sr, x_axis="time", y_axis="log")
80
+ plt.colorbar(format="%+2.0f dB")
81
+ plt.title(f"Spectrogram ({filename})")
82
+ return plt_to_numpy(plt)
83
+
84
+ def plot_mfcc(y: np.ndarray, sr: int) -> np.ndarray:
85
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
86
+ plt.figure(figsize=(14, 5))
87
+ librosa.display.specshow(mfccs, sr=sr, x_axis="time")
88
+ plt.colorbar()
89
+ plt.title(f"MFCC ({filename})")
90
+ return plt_to_numpy(plt)
91
+
92
+ def plot_zcr(y: np.ndarray) -> np.ndarray:
93
+ zcr = librosa.feature.zero_crossing_rate(y=y)
94
+ plt.figure(figsize=(14, 5))
95
+ plt.plot(zcr[0])
96
+ plt.title(f"Zero Crossing Rate ({filename})")
97
+ plt.xlabel("Frames")
98
+ plt.ylabel("Rate")
99
+ return plt_to_numpy(plt)
100
+
101
+ def plot_spectral_centroid(y: np.ndarray, sr: int) -> np.ndarray:
102
+ spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
103
+ frames = range(len(spectral_centroids))
104
+ t = librosa.frames_to_time(frames)
105
+ plt.figure(figsize=(14, 5))
106
+ plt.semilogy(t, spectral_centroids, label="Spectral centroid")
107
+ plt.title(f"Spectral Centroid ({filename})")
108
+ plt.xlabel("Time")
109
+ plt.ylabel("Hz")
110
+ return plt_to_numpy(plt)
111
+
112
+ def plot_spectral_bandwidth(y: np.ndarray, sr: int) -> np.ndarray:
113
+ spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
114
+ frames = range(len(spectral_bandwidth))
115
+ t = librosa.frames_to_time(frames)
116
+ plt.figure(figsize=(14, 5))
117
+ plt.semilogy(t, spectral_bandwidth, label="Spectral bandwidth")
118
+ plt.title(f"Spectral Bandwidth ({filename})")
119
+ plt.xlabel("Time")
120
+ plt.ylabel("Hz")
121
+ return plt_to_numpy(plt)
122
+
123
+ def plot_rms(y: np.ndarray) -> np.ndarray:
124
+ rms = librosa.feature.rms(y=y)[0]
125
+ plt.figure(figsize=(14, 5))
126
+ plt.plot(rms)
127
+ plt.title(f"RMS Energy ({filename})")
128
+ plt.xlabel("Frames")
129
+ plt.ylabel("RMS")
130
+ return plt_to_numpy(plt)
131
+
132
+ def plot_spectral_contrast(y: np.ndarray, sr: int) -> np.ndarray:
133
+ spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
134
+ plt.figure(figsize=(14, 5))
135
+ librosa.display.specshow(spectral_contrast, sr=sr, x_axis="time")
136
+ plt.colorbar()
137
+ plt.title(f"Spectral Contrast ({filename})")
138
+ return plt_to_numpy(plt)
139
+
140
+ def plot_spectral_rolloff(y: np.ndarray, sr: int) -> np.ndarray:
141
+ spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
142
+ frames = range(len(spectral_rolloff))
143
+ t = librosa.frames_to_time(frames)
144
+ plt.figure(figsize=(14, 5))
145
+ plt.semilogy(t, spectral_rolloff, label="Spectral rolloff")
146
+ plt.xlabel("Time")
147
+ plt.ylabel("Hz")
148
+ plt.title(f"Spectral Rolloff ({filename})")
149
+ return plt_to_numpy(plt)
150
+
151
+ def plot_tempo(onset_env: np.ndarray, sr: int) -> np.ndarray:
152
+ dtempo = librosa.feature.tempo(onset_envelope=onset_env, sr=sr, aggregate=None)
153
+ frames = range(len(dtempo))
154
+ t = librosa.frames_to_time(frames, sr=sr)
155
+ plt.figure(figsize=(14, 5))
156
+ plt.plot(t, dtempo, label="Tempo")
157
+ plt.title(f"Tempo ({filename})")
158
+ plt.xlabel("Time")
159
+ plt.ylabel("Tempo")
160
+ return plt_to_numpy(plt)
161
+
162
+ def plot_tempogram(onset_env: np.ndarray, sr: int) -> np.ndarray:
163
+ tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
164
+ plt.figure(figsize=(14, 5))
165
+ librosa.display.specshow(tempogram, sr=sr, x_axis="time")
166
+ plt.colorbar()
167
+ plt.title(f"Tempogram ({filename})")
168
+ return plt_to_numpy(plt)
169
+
170
+ waveform = plot_waveform(y, sr)
171
+ spectrogram = plot_spectrogram(y, sr)
172
+ mfcc = plot_mfcc(y, sr)
173
+ zcr = plot_zcr(y)
174
+ spectral_centroid = plot_spectral_centroid(y, sr)
175
+ spectral_bandwidth = plot_spectral_bandwidth(y, sr)
176
+ rms = plot_rms(y)
177
+ spectral_contrast = plot_spectral_contrast(y, sr)
178
+ spectral_rolloff = plot_spectral_rolloff(y, sr)
179
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr)
180
+ tempo = plot_tempo(onset_env, sr)
181
+ tempogram = plot_tempogram(onset_env, sr)
182
+
183
+ return (
184
+ waveform,
185
+ spectrogram,
186
+ mfcc,
187
+ zcr,
188
+ spectral_centroid,
189
+ spectral_bandwidth,
190
+ rms,
191
+ spectral_contrast,
192
+ spectral_rolloff,
193
+ tempo,
194
+ tempogram,
195
+ )
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ librosa
3
+ matplotlib
4
+ numpy
5
+ scipy
6
+ pillow
ui.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gen import FILTER_LOWER_BOUND, FILTER_UPPER_BOUND, analyze_audio
3
+
4
+ with gr.Blocks() as app:
5
+ gr.Markdown(
6
+ """
7
+ # Audio Feature Visualization
8
+
9
+ Upload an audio file to visualize its features and optionally apply filters. Each visualization helps in understanding different aspects of the audio signal.
10
+ """
11
+ )
12
+
13
+ with gr.Row():
14
+ audio = gr.Audio(
15
+ sources=["upload", "microphone"],
16
+ type="filepath",
17
+ label="Audio File",
18
+ )
19
+
20
+ with gr.Row():
21
+ highpass = gr.Slider(
22
+ FILTER_LOWER_BOUND - 1,
23
+ FILTER_UPPER_BOUND,
24
+ step=1,
25
+ label="High-pass filter cutoff frequency (Hz).",
26
+ info="Frequency above which signals are allowed to pass through.",
27
+ value=FILTER_LOWER_BOUND - 1,
28
+ )
29
+
30
+ lowpass = gr.Slider(
31
+ FILTER_LOWER_BOUND - 1,
32
+ FILTER_UPPER_BOUND,
33
+ step=1,
34
+ label="Low-pass filter cutoff frequency (Hz).",
35
+ info="Frequency below which signals are allowed to pass through.",
36
+ value=FILTER_LOWER_BOUND - 1,
37
+ )
38
+
39
+ with gr.Row():
40
+ bandpass_low = gr.Slider(
41
+ FILTER_LOWER_BOUND - 1,
42
+ FILTER_UPPER_BOUND,
43
+ step=1,
44
+ label="Band-pass filter low cutoff frequency (Hz).",
45
+ info="Lower frequency bound for band-pass filter.",
46
+ value=FILTER_LOWER_BOUND - 1,
47
+ )
48
+
49
+ bandpass_high = gr.Slider(
50
+ FILTER_LOWER_BOUND - 1,
51
+ FILTER_UPPER_BOUND,
52
+ step=1,
53
+ label="Band-pass filter high cutoff frequency (Hz).",
54
+ info="Higher frequency bound for band-pass filter.",
55
+ value=FILTER_LOWER_BOUND - 1,
56
+ )
57
+
58
+ btn = gr.Button("Visualize Features", variant="primary")
59
+
60
+ with gr.Row():
61
+ waveform = gr.Image(
62
+ label="Waveform: Visual representation of the audio signal over time."
63
+ )
64
+
65
+ with gr.Row():
66
+ spectrogram = gr.Image(
67
+ label="Spectrogram: Graphical representation of the spectrum of frequencies in a sound signal as they vary with time."
68
+ )
69
+ mfcc = gr.Image(
70
+ label="MFCC: Mel-frequency cepstral coefficients, representing the short-term power spectrum of a sound."
71
+ )
72
+
73
+ with gr.Row():
74
+ rms_energy = gr.Image(
75
+ label="RMS Energy: Root Mean Square energy of the audio signal."
76
+ )
77
+ zero_crossing_rate = gr.Image(
78
+ label="Zero Crossing Rate: Rate at which the signal changes from positive to negative or back."
79
+ )
80
+
81
+ with gr.Row():
82
+ spectral_centroid = gr.Image(
83
+ label="Spectral Centroid: Indicates where the center of mass of the spectrum is located."
84
+ )
85
+ spectral_bandwidth = gr.Image(
86
+ label="Spectral Bandwidth: The width of a range of frequencies."
87
+ )
88
+
89
+ with gr.Row():
90
+ spectral_rolloff = gr.Image(
91
+ label="Spectral Rolloff: Frequency below which a specified percentage of the total spectral energy lies."
92
+ )
93
+ spectral_contrast = gr.Image(
94
+ label="Spectral Contrast: Difference in amplitude between peaks and valleys in a sound spectrum."
95
+ )
96
+
97
+ with gr.Row():
98
+ tempo = gr.Image(label="Tempo: Estimated tempo of the audio signal.")
99
+ tempogram = gr.Image(
100
+ label="Tempogram: Localized autocorrelation of the onset strength envelope."
101
+ )
102
+
103
+ btn.click(
104
+ fn=analyze_audio,
105
+ inputs=[audio, highpass, lowpass, bandpass_low, bandpass_high],
106
+ outputs=[
107
+ waveform,
108
+ spectrogram,
109
+ mfcc,
110
+ zero_crossing_rate,
111
+ spectral_centroid,
112
+ spectral_bandwidth,
113
+ rms_energy,
114
+ spectral_contrast,
115
+ spectral_rolloff,
116
+ tempo,
117
+ tempogram,
118
+ ],
119
+ )