Spaces:
Running
on
T4
Running
on
T4
enhuiz
commited on
Commit
β’
693611a
0
Parent(s):
Init
Browse files- .gitignore +9 -0
- LICENSE +21 -0
- README.md +15 -0
- app.py +59 -0
- packages.txt +1 -0
- pyproject.toml +6 -0
- requirements.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/data
|
2 |
+
/runs
|
3 |
+
/scripts
|
4 |
+
/dist
|
5 |
+
/build
|
6 |
+
/*.egg-info
|
7 |
+
/flagged
|
8 |
+
version.py
|
9 |
+
__pycache__
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Resemble AI
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Resemble Enhance
|
3 |
+
emoji: π
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.8.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
# Resemble Enhance
|
14 |
+
|
15 |
+
Resemble Enhance is an AI-powered tool that aims to improve the overall quality of speech by performing denoising and enhancement. It consists of two modules: a denoiser, which separates speech from a noisy audio, and an enhancer, which further boosts the perceptual audio quality by restoring audio distortions and extending the audio bandwidth. The two models are trained on high-quality 44.1kHz speech data that guarantees the enhancement of your speech with high quality.
|
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import torchaudio
|
4 |
+
|
5 |
+
from resemble_enhance.enhancer.inference import denoise, enhance
|
6 |
+
|
7 |
+
if torch.cuda.is_available():
|
8 |
+
device = "cuda"
|
9 |
+
else:
|
10 |
+
device = "cpu"
|
11 |
+
|
12 |
+
|
13 |
+
def _fn(path, solver, nfe, tau, denoising):
|
14 |
+
if path is None:
|
15 |
+
return None, None
|
16 |
+
|
17 |
+
solver = solver.lower()
|
18 |
+
nfe = int(nfe)
|
19 |
+
lambd = 0.9 if denoising else 0.1
|
20 |
+
|
21 |
+
dwav, sr = torchaudio.load(path)
|
22 |
+
dwav = dwav.mean(dim=0)
|
23 |
+
|
24 |
+
wav1, new_sr = denoise(dwav, sr, device)
|
25 |
+
wav2, new_sr = enhance(dwav, sr, device, nfe=nfe, solver=solver, lambd=lambd, tau=tau)
|
26 |
+
|
27 |
+
wav1 = wav1.cpu().numpy()
|
28 |
+
wav2 = wav2.cpu().numpy()
|
29 |
+
|
30 |
+
return (new_sr, wav1), (new_sr, wav2)
|
31 |
+
|
32 |
+
|
33 |
+
def main():
|
34 |
+
inputs: list = [
|
35 |
+
gr.Audio(type="filepath", label="Input Audio"),
|
36 |
+
gr.Dropdown(choices=["Midpoint", "RK4", "Euler"], value="Midpoint", label="CFM ODE Solver"),
|
37 |
+
gr.Slider(minimum=1, maximum=128, value=64, step=1, label="CFM Number of Function Evaluations"),
|
38 |
+
gr.Slider(minimum=0, maximum=1, value=0.5, step=0.01, label="CFM Prior Temperature"),
|
39 |
+
gr.Checkbox(value=False, label="Denoise Before Enhancement"),
|
40 |
+
]
|
41 |
+
|
42 |
+
outputs: list = [
|
43 |
+
gr.Audio(label="Output Denoised Audio"),
|
44 |
+
gr.Audio(label="Output Enhanced Audio"),
|
45 |
+
]
|
46 |
+
|
47 |
+
interface = gr.Interface(
|
48 |
+
fn=_fn,
|
49 |
+
title="Resemble Enhance",
|
50 |
+
description="AI-driven audio enhancement for your audio files, powered by Resemble AI.",
|
51 |
+
inputs=inputs,
|
52 |
+
outputs=outputs,
|
53 |
+
)
|
54 |
+
|
55 |
+
interface.launch()
|
56 |
+
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
main()
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
libsox-dev
|
pyproject.toml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.black]
|
2 |
+
line-length = 120
|
3 |
+
target-version = ['py310']
|
4 |
+
|
5 |
+
[tool.isort]
|
6 |
+
line_length = 120
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
resemble-enhance
|