Simonlob
commited on
Commit
•
6703e27
0
Parent(s):
Release version 0.1.13
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env.example +6 -0
- .gitignore +163 -0
- .pre-commit-config.yaml +59 -0
- .project-root +2 -0
- .pylintrc +525 -0
- Create_dataset/__init__.py +1 -0
- Create_dataset/cr_dataset_script.py +99 -0
- LICENSE +21 -0
- MANIFEST.in +14 -0
- Makefile +42 -0
- README.md +268 -0
- configs/__init__.py +1 -0
- configs/callbacks/default.yaml +5 -0
- configs/callbacks/model_checkpoint.yaml +17 -0
- configs/callbacks/model_summary.yaml +5 -0
- configs/callbacks/none.yaml +0 -0
- configs/callbacks/rich_progress_bar.yaml +4 -0
- configs/data/akylai.yaml +21 -0
- configs/data/hi-fi_en-US_female.yaml +14 -0
- configs/data/ljspeech.yaml +22 -0
- configs/data/vctk.yaml +14 -0
- configs/debug/default.yaml +35 -0
- configs/debug/fdr.yaml +9 -0
- configs/debug/limit.yaml +12 -0
- configs/debug/overfit.yaml +13 -0
- configs/debug/profiler.yaml +15 -0
- configs/eval.yaml +18 -0
- configs/experiment/akylai.yaml +14 -0
- configs/experiment/hifi_dataset_piper_phonemizer.yaml +14 -0
- configs/experiment/ljspeech.yaml +14 -0
- configs/experiment/ljspeech_min_memory.yaml +18 -0
- configs/experiment/multispeaker.yaml +14 -0
- configs/extras/default.yaml +8 -0
- configs/hparams_search/mnist_optuna.yaml +52 -0
- configs/hydra/default.yaml +19 -0
- configs/local/.gitkeep +0 -0
- configs/logger/aim.yaml +28 -0
- configs/logger/comet.yaml +12 -0
- configs/logger/csv.yaml +7 -0
- configs/logger/many_loggers.yaml +9 -0
- configs/logger/mlflow.yaml +12 -0
- configs/logger/neptune.yaml +9 -0
- configs/logger/tensorboard.yaml +10 -0
- configs/logger/wandb.yaml +16 -0
- configs/model/cfm/default.yaml +3 -0
- configs/model/decoder/default.yaml +7 -0
- configs/model/encoder/default.yaml +18 -0
- configs/model/matcha.yaml +15 -0
- configs/model/optimizer/adam.yaml +4 -0
- configs/paths/default.yaml +18 -0
.env.example
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# example of file for storing private and user specific environment variables, like keys or system paths
|
2 |
+
# rename it to ".env" (excluded from version control by default)
|
3 |
+
# .env is loaded by train.py automatically
|
4 |
+
# hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
|
5 |
+
|
6 |
+
MY_VAR="/home/user/my/system/path"
|
.gitignore
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
pip-wheel-metadata/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
*.py,cover
|
51 |
+
.hypothesis/
|
52 |
+
.pytest_cache/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
target/
|
76 |
+
|
77 |
+
# Jupyter Notebook
|
78 |
+
.ipynb_checkpoints
|
79 |
+
|
80 |
+
# IPython
|
81 |
+
profile_default/
|
82 |
+
ipython_config.py
|
83 |
+
|
84 |
+
# pyenv
|
85 |
+
.python-version
|
86 |
+
|
87 |
+
# pipenv
|
88 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
89 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
90 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
91 |
+
# install all needed dependencies.
|
92 |
+
#Pipfile.lock
|
93 |
+
|
94 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
95 |
+
__pypackages__/
|
96 |
+
|
97 |
+
# Celery stuff
|
98 |
+
celerybeat-schedule
|
99 |
+
celerybeat.pid
|
100 |
+
|
101 |
+
# SageMath parsed files
|
102 |
+
*.sage.py
|
103 |
+
|
104 |
+
# Environments
|
105 |
+
.venv
|
106 |
+
env/
|
107 |
+
venv/
|
108 |
+
ENV/
|
109 |
+
env.bak/
|
110 |
+
venv.bak/
|
111 |
+
|
112 |
+
# Spyder project settings
|
113 |
+
.spyderproject
|
114 |
+
.spyproject
|
115 |
+
|
116 |
+
# Rope project settings
|
117 |
+
.ropeproject
|
118 |
+
|
119 |
+
# mkdocs documentation
|
120 |
+
/site
|
121 |
+
|
122 |
+
# mypy
|
123 |
+
.mypy_cache/
|
124 |
+
.dmypy.json
|
125 |
+
dmypy.json
|
126 |
+
|
127 |
+
# Pyre type checker
|
128 |
+
.pyre/
|
129 |
+
|
130 |
+
### VisualStudioCode
|
131 |
+
.vscode/*
|
132 |
+
!.vscode/settings.json
|
133 |
+
!.vscode/tasks.json
|
134 |
+
!.vscode/launch.json
|
135 |
+
!.vscode/extensions.json
|
136 |
+
*.code-workspace
|
137 |
+
**/.vscode
|
138 |
+
|
139 |
+
# JetBrains
|
140 |
+
.idea/
|
141 |
+
|
142 |
+
# Data & Models
|
143 |
+
*.h5
|
144 |
+
*.tar
|
145 |
+
*.tar.gz
|
146 |
+
|
147 |
+
# Lightning-Hydra-Template
|
148 |
+
configs/local/default.yaml
|
149 |
+
/data/
|
150 |
+
/logs/
|
151 |
+
.env
|
152 |
+
|
153 |
+
# Aim logging
|
154 |
+
.aim
|
155 |
+
|
156 |
+
# Cython complied files
|
157 |
+
matcha/utils/monotonic_align/core.c
|
158 |
+
|
159 |
+
# Ignoring hifigan checkpoint
|
160 |
+
generator_v1
|
161 |
+
g_02500000
|
162 |
+
gradio_cached_examples/
|
163 |
+
synth_output/
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
default_language_version:
|
2 |
+
python: python3.10
|
3 |
+
|
4 |
+
repos:
|
5 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
6 |
+
rev: v4.5.0
|
7 |
+
hooks:
|
8 |
+
# list of supported hooks: https://pre-commit.com/hooks.html
|
9 |
+
- id: trailing-whitespace
|
10 |
+
- id: end-of-file-fixer
|
11 |
+
# - id: check-docstring-first
|
12 |
+
- id: check-yaml
|
13 |
+
- id: debug-statements
|
14 |
+
- id: detect-private-key
|
15 |
+
- id: check-toml
|
16 |
+
- id: check-case-conflict
|
17 |
+
- id: check-added-large-files
|
18 |
+
|
19 |
+
# python code formatting
|
20 |
+
- repo: https://github.com/psf/black
|
21 |
+
rev: 23.12.1
|
22 |
+
hooks:
|
23 |
+
- id: black
|
24 |
+
args: [--line-length, "120"]
|
25 |
+
|
26 |
+
# python import sorting
|
27 |
+
- repo: https://github.com/PyCQA/isort
|
28 |
+
rev: 5.13.2
|
29 |
+
hooks:
|
30 |
+
- id: isort
|
31 |
+
args: ["--profile", "black", "--filter-files"]
|
32 |
+
|
33 |
+
# python upgrading syntax to newer version
|
34 |
+
- repo: https://github.com/asottile/pyupgrade
|
35 |
+
rev: v3.15.0
|
36 |
+
hooks:
|
37 |
+
- id: pyupgrade
|
38 |
+
args: [--py38-plus]
|
39 |
+
|
40 |
+
# python check (PEP8), programming errors and code complexity
|
41 |
+
- repo: https://github.com/PyCQA/flake8
|
42 |
+
rev: 7.0.0
|
43 |
+
hooks:
|
44 |
+
- id: flake8
|
45 |
+
args:
|
46 |
+
[
|
47 |
+
"--max-line-length", "120",
|
48 |
+
"--extend-ignore",
|
49 |
+
"E203,E402,E501,F401,F841,RST2,RST301",
|
50 |
+
"--exclude",
|
51 |
+
"logs/*,data/*,matcha/hifigan/*",
|
52 |
+
]
|
53 |
+
additional_dependencies: [flake8-rst-docstrings==0.3.0]
|
54 |
+
|
55 |
+
# pylint
|
56 |
+
- repo: https://github.com/pycqa/pylint
|
57 |
+
rev: v3.0.3
|
58 |
+
hooks:
|
59 |
+
- id: pylint
|
.project-root
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# this file is required for inferring the project root directory
|
2 |
+
# do not delete
|
.pylintrc
ADDED
@@ -0,0 +1,525 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[MASTER]
|
2 |
+
|
3 |
+
# A comma-separated list of package or module names from where C extensions may
|
4 |
+
# be loaded. Extensions are loading into the active Python interpreter and may
|
5 |
+
# run arbitrary code.
|
6 |
+
extension-pkg-whitelist=
|
7 |
+
|
8 |
+
# Add files or directories to the blacklist. They should be base names, not
|
9 |
+
# paths.
|
10 |
+
ignore=CVS
|
11 |
+
|
12 |
+
# Add files or directories matching the regex patterns to the blacklist. The
|
13 |
+
# regex matches against base names, not paths.
|
14 |
+
ignore-patterns=
|
15 |
+
|
16 |
+
# Python code to execute, usually for sys.path manipulation such as
|
17 |
+
# pygtk.require().
|
18 |
+
#init-hook=
|
19 |
+
|
20 |
+
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
|
21 |
+
# number of processors available to use.
|
22 |
+
jobs=1
|
23 |
+
|
24 |
+
# Control the amount of potential inferred values when inferring a single
|
25 |
+
# object. This can help the performance when dealing with large functions or
|
26 |
+
# complex, nested conditions.
|
27 |
+
limit-inference-results=100
|
28 |
+
|
29 |
+
# List of plugins (as comma separated values of python modules names) to load,
|
30 |
+
# usually to register additional checkers.
|
31 |
+
load-plugins=
|
32 |
+
|
33 |
+
# Pickle collected data for later comparisons.
|
34 |
+
persistent=yes
|
35 |
+
|
36 |
+
# Specify a configuration file.
|
37 |
+
#rcfile=
|
38 |
+
|
39 |
+
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
40 |
+
# user-friendly hints instead of false-positive error messages.
|
41 |
+
suggestion-mode=yes
|
42 |
+
|
43 |
+
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
44 |
+
# active Python interpreter and may run arbitrary code.
|
45 |
+
unsafe-load-any-extension=no
|
46 |
+
|
47 |
+
|
48 |
+
[MESSAGES CONTROL]
|
49 |
+
|
50 |
+
# Only show warnings with the listed confidence levels. Leave empty to show
|
51 |
+
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
|
52 |
+
confidence=
|
53 |
+
|
54 |
+
# Disable the message, report, category or checker with the given id(s). You
|
55 |
+
# can either give multiple identifiers separated by comma (,) or put this
|
56 |
+
# option multiple times (only on the command line, not in the configuration
|
57 |
+
# file where it should appear only once). You can also use "--disable=all" to
|
58 |
+
# disable everything first and then reenable specific checks. For example, if
|
59 |
+
# you want to run only the similarities checker, you can use "--disable=all
|
60 |
+
# --enable=similarities". If you want to run only the classes checker, but have
|
61 |
+
# no Warning level messages displayed, use "--disable=all --enable=classes
|
62 |
+
# --disable=W".
|
63 |
+
disable=missing-docstring,
|
64 |
+
too-many-public-methods,
|
65 |
+
too-many-lines,
|
66 |
+
bare-except,
|
67 |
+
## for avoiding weird p3.6 CI linter error
|
68 |
+
## TODO: see later if we can remove this
|
69 |
+
assigning-non-slot,
|
70 |
+
unsupported-assignment-operation,
|
71 |
+
## end
|
72 |
+
line-too-long,
|
73 |
+
fixme,
|
74 |
+
wrong-import-order,
|
75 |
+
ungrouped-imports,
|
76 |
+
wrong-import-position,
|
77 |
+
import-error,
|
78 |
+
invalid-name,
|
79 |
+
too-many-instance-attributes,
|
80 |
+
arguments-differ,
|
81 |
+
arguments-renamed,
|
82 |
+
no-name-in-module,
|
83 |
+
no-member,
|
84 |
+
unsubscriptable-object,
|
85 |
+
raw-checker-failed,
|
86 |
+
bad-inline-option,
|
87 |
+
locally-disabled,
|
88 |
+
file-ignored,
|
89 |
+
suppressed-message,
|
90 |
+
useless-suppression,
|
91 |
+
deprecated-pragma,
|
92 |
+
use-symbolic-message-instead,
|
93 |
+
useless-object-inheritance,
|
94 |
+
too-few-public-methods,
|
95 |
+
too-many-branches,
|
96 |
+
too-many-arguments,
|
97 |
+
too-many-locals,
|
98 |
+
too-many-statements,
|
99 |
+
duplicate-code,
|
100 |
+
not-callable,
|
101 |
+
import-outside-toplevel,
|
102 |
+
logging-fstring-interpolation,
|
103 |
+
logging-not-lazy,
|
104 |
+
unused-argument,
|
105 |
+
no-else-return,
|
106 |
+
chained-comparison,
|
107 |
+
redefined-outer-name
|
108 |
+
|
109 |
+
# Enable the message, report, category or checker with the given id(s). You can
|
110 |
+
# either give multiple identifier separated by comma (,) or put this option
|
111 |
+
# multiple time (only on the command line, not in the configuration file where
|
112 |
+
# it should appear only once). See also the "--disable" option for examples.
|
113 |
+
enable=c-extension-no-member
|
114 |
+
|
115 |
+
|
116 |
+
[REPORTS]
|
117 |
+
|
118 |
+
# Python expression which should return a note less than 10 (10 is the highest
|
119 |
+
# note). You have access to the variables errors warning, statement which
|
120 |
+
# respectively contain the number of errors / warnings messages and the total
|
121 |
+
# number of statements analyzed. This is used by the global evaluation report
|
122 |
+
# (RP0004).
|
123 |
+
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
124 |
+
|
125 |
+
# Template used to display messages. This is a python new-style format string
|
126 |
+
# used to format the message information. See doc for all details.
|
127 |
+
#msg-template=
|
128 |
+
|
129 |
+
# Set the output format. Available formats are text, parseable, colorized, json
|
130 |
+
# and msvs (visual studio). You can also give a reporter class, e.g.
|
131 |
+
# mypackage.mymodule.MyReporterClass.
|
132 |
+
output-format=text
|
133 |
+
|
134 |
+
# Tells whether to display a full report or only the messages.
|
135 |
+
reports=no
|
136 |
+
|
137 |
+
# Activate the evaluation score.
|
138 |
+
score=yes
|
139 |
+
|
140 |
+
|
141 |
+
[REFACTORING]
|
142 |
+
|
143 |
+
# Maximum number of nested blocks for function / method body
|
144 |
+
max-nested-blocks=5
|
145 |
+
|
146 |
+
# Complete name of functions that never returns. When checking for
|
147 |
+
# inconsistent-return-statements if a never returning function is called then
|
148 |
+
# it will be considered as an explicit return statement and no message will be
|
149 |
+
# printed.
|
150 |
+
never-returning-functions=sys.exit
|
151 |
+
|
152 |
+
|
153 |
+
[LOGGING]
|
154 |
+
|
155 |
+
# Format style used to check logging format string. `old` means using %
|
156 |
+
# formatting, while `new` is for `{}` formatting.
|
157 |
+
logging-format-style=old
|
158 |
+
|
159 |
+
# Logging modules to check that the string format arguments are in logging
|
160 |
+
# function parameter format.
|
161 |
+
logging-modules=logging
|
162 |
+
|
163 |
+
|
164 |
+
[SPELLING]
|
165 |
+
|
166 |
+
# Limits count of emitted suggestions for spelling mistakes.
|
167 |
+
max-spelling-suggestions=4
|
168 |
+
|
169 |
+
# Spelling dictionary name. Available dictionaries: none. To make it working
|
170 |
+
# install python-enchant package..
|
171 |
+
spelling-dict=
|
172 |
+
|
173 |
+
# List of comma separated words that should not be checked.
|
174 |
+
spelling-ignore-words=
|
175 |
+
|
176 |
+
# A path to a file that contains private dictionary; one word per line.
|
177 |
+
spelling-private-dict-file=
|
178 |
+
|
179 |
+
# Tells whether to store unknown words to indicated private dictionary in
|
180 |
+
# --spelling-private-dict-file option instead of raising a message.
|
181 |
+
spelling-store-unknown-words=no
|
182 |
+
|
183 |
+
|
184 |
+
[MISCELLANEOUS]
|
185 |
+
|
186 |
+
# List of note tags to take in consideration, separated by a comma.
|
187 |
+
notes=FIXME,
|
188 |
+
XXX,
|
189 |
+
TODO
|
190 |
+
|
191 |
+
|
192 |
+
[TYPECHECK]
|
193 |
+
|
194 |
+
# List of decorators that produce context managers, such as
|
195 |
+
# contextlib.contextmanager. Add to this list to register other decorators that
|
196 |
+
# produce valid context managers.
|
197 |
+
contextmanager-decorators=contextlib.contextmanager
|
198 |
+
|
199 |
+
# List of members which are set dynamically and missed by pylint inference
|
200 |
+
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
201 |
+
# expressions are accepted.
|
202 |
+
generated-members=numpy.*,torch.*
|
203 |
+
|
204 |
+
# Tells whether missing members accessed in mixin class should be ignored. A
|
205 |
+
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
206 |
+
ignore-mixin-members=yes
|
207 |
+
|
208 |
+
# Tells whether to warn about missing members when the owner of the attribute
|
209 |
+
# is inferred to be None.
|
210 |
+
ignore-none=yes
|
211 |
+
|
212 |
+
# This flag controls whether pylint should warn about no-member and similar
|
213 |
+
# checks whenever an opaque object is returned when inferring. The inference
|
214 |
+
# can return multiple potential results while evaluating a Python object, but
|
215 |
+
# some branches might not be evaluated, which results in partial inference. In
|
216 |
+
# that case, it might be useful to still emit no-member and other checks for
|
217 |
+
# the rest of the inferred objects.
|
218 |
+
ignore-on-opaque-inference=yes
|
219 |
+
|
220 |
+
# List of class names for which member attributes should not be checked (useful
|
221 |
+
# for classes with dynamically set attributes). This supports the use of
|
222 |
+
# qualified names.
|
223 |
+
ignored-classes=optparse.Values,thread._local,_thread._local
|
224 |
+
|
225 |
+
# List of module names for which member attributes should not be checked
|
226 |
+
# (useful for modules/projects where namespaces are manipulated during runtime
|
227 |
+
# and thus existing member attributes cannot be deduced by static analysis. It
|
228 |
+
# supports qualified module names, as well as Unix pattern matching.
|
229 |
+
ignored-modules=
|
230 |
+
|
231 |
+
# Show a hint with possible names when a member name was not found. The aspect
|
232 |
+
# of finding the hint is based on edit distance.
|
233 |
+
missing-member-hint=yes
|
234 |
+
|
235 |
+
# The minimum edit distance a name should have in order to be considered a
|
236 |
+
# similar match for a missing member name.
|
237 |
+
missing-member-hint-distance=1
|
238 |
+
|
239 |
+
# The total number of similar names that should be taken in consideration when
|
240 |
+
# showing a hint for a missing member.
|
241 |
+
missing-member-max-choices=1
|
242 |
+
|
243 |
+
|
244 |
+
[VARIABLES]
|
245 |
+
|
246 |
+
# List of additional names supposed to be defined in builtins. Remember that
|
247 |
+
# you should avoid defining new builtins when possible.
|
248 |
+
additional-builtins=
|
249 |
+
|
250 |
+
# Tells whether unused global variables should be treated as a violation.
|
251 |
+
allow-global-unused-variables=yes
|
252 |
+
|
253 |
+
# List of strings which can identify a callback function by name. A callback
|
254 |
+
# name must start or end with one of those strings.
|
255 |
+
callbacks=cb_,
|
256 |
+
_cb
|
257 |
+
|
258 |
+
# A regular expression matching the name of dummy variables (i.e. expected to
|
259 |
+
# not be used).
|
260 |
+
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
261 |
+
|
262 |
+
# Argument names that match this expression will be ignored. Default to name
|
263 |
+
# with leading underscore.
|
264 |
+
ignored-argument-names=_.*|^ignored_|^unused_
|
265 |
+
|
266 |
+
# Tells whether we should check for unused import in __init__ files.
|
267 |
+
init-import=no
|
268 |
+
|
269 |
+
# List of qualified module names which can have objects that can redefine
|
270 |
+
# builtins.
|
271 |
+
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
|
272 |
+
|
273 |
+
|
274 |
+
[FORMAT]
|
275 |
+
|
276 |
+
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
277 |
+
expected-line-ending-format=
|
278 |
+
|
279 |
+
# Regexp for a line that is allowed to be longer than the limit.
|
280 |
+
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
281 |
+
|
282 |
+
# Number of spaces of indent required inside a hanging or continued line.
|
283 |
+
indent-after-paren=4
|
284 |
+
|
285 |
+
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
286 |
+
# tab).
|
287 |
+
indent-string=' '
|
288 |
+
|
289 |
+
# Maximum number of characters on a single line.
|
290 |
+
max-line-length=120
|
291 |
+
|
292 |
+
# Maximum number of lines in a module.
|
293 |
+
max-module-lines=1000
|
294 |
+
|
295 |
+
# Allow the body of a class to be on the same line as the declaration if body
|
296 |
+
# contains single statement.
|
297 |
+
single-line-class-stmt=no
|
298 |
+
|
299 |
+
# Allow the body of an if to be on the same line as the test if there is no
|
300 |
+
# else.
|
301 |
+
single-line-if-stmt=no
|
302 |
+
|
303 |
+
|
304 |
+
[SIMILARITIES]
|
305 |
+
|
306 |
+
# Ignore comments when computing similarities.
|
307 |
+
ignore-comments=yes
|
308 |
+
|
309 |
+
# Ignore docstrings when computing similarities.
|
310 |
+
ignore-docstrings=yes
|
311 |
+
|
312 |
+
# Ignore imports when computing similarities.
|
313 |
+
ignore-imports=no
|
314 |
+
|
315 |
+
# Minimum lines number of a similarity.
|
316 |
+
min-similarity-lines=4
|
317 |
+
|
318 |
+
|
319 |
+
[BASIC]
|
320 |
+
|
321 |
+
# Naming style matching correct argument names.
|
322 |
+
argument-naming-style=snake_case
|
323 |
+
|
324 |
+
# Regular expression matching correct argument names. Overrides argument-
|
325 |
+
# naming-style.
|
326 |
+
argument-rgx=[a-z_][a-z0-9_]{0,30}$
|
327 |
+
|
328 |
+
# Naming style matching correct attribute names.
|
329 |
+
attr-naming-style=snake_case
|
330 |
+
|
331 |
+
# Regular expression matching correct attribute names. Overrides attr-naming-
|
332 |
+
# style.
|
333 |
+
#attr-rgx=
|
334 |
+
|
335 |
+
# Bad variable names which should always be refused, separated by a comma.
|
336 |
+
bad-names=
|
337 |
+
|
338 |
+
# Naming style matching correct class attribute names.
|
339 |
+
class-attribute-naming-style=any
|
340 |
+
|
341 |
+
# Regular expression matching correct class attribute names. Overrides class-
|
342 |
+
# attribute-naming-style.
|
343 |
+
#class-attribute-rgx=
|
344 |
+
|
345 |
+
# Naming style matching correct class names.
|
346 |
+
class-naming-style=PascalCase
|
347 |
+
|
348 |
+
# Regular expression matching correct class names. Overrides class-naming-
|
349 |
+
# style.
|
350 |
+
#class-rgx=
|
351 |
+
|
352 |
+
# Naming style matching correct constant names.
|
353 |
+
const-naming-style=UPPER_CASE
|
354 |
+
|
355 |
+
# Regular expression matching correct constant names. Overrides const-naming-
|
356 |
+
# style.
|
357 |
+
#const-rgx=
|
358 |
+
|
359 |
+
# Minimum line length for functions/classes that require docstrings, shorter
|
360 |
+
# ones are exempt.
|
361 |
+
docstring-min-length=-1
|
362 |
+
|
363 |
+
# Naming style matching correct function names.
|
364 |
+
function-naming-style=snake_case
|
365 |
+
|
366 |
+
# Regular expression matching correct function names. Overrides function-
|
367 |
+
# naming-style.
|
368 |
+
#function-rgx=
|
369 |
+
|
370 |
+
# Good variable names which should always be accepted, separated by a comma.
|
371 |
+
good-names=i,
|
372 |
+
j,
|
373 |
+
k,
|
374 |
+
x,
|
375 |
+
ex,
|
376 |
+
Run,
|
377 |
+
_
|
378 |
+
|
379 |
+
# Include a hint for the correct naming format with invalid-name.
|
380 |
+
include-naming-hint=no
|
381 |
+
|
382 |
+
# Naming style matching correct inline iteration names.
|
383 |
+
inlinevar-naming-style=any
|
384 |
+
|
385 |
+
# Regular expression matching correct inline iteration names. Overrides
|
386 |
+
# inlinevar-naming-style.
|
387 |
+
#inlinevar-rgx=
|
388 |
+
|
389 |
+
# Naming style matching correct method names.
|
390 |
+
method-naming-style=snake_case
|
391 |
+
|
392 |
+
# Regular expression matching correct method names. Overrides method-naming-
|
393 |
+
# style.
|
394 |
+
#method-rgx=
|
395 |
+
|
396 |
+
# Naming style matching correct module names.
|
397 |
+
module-naming-style=snake_case
|
398 |
+
|
399 |
+
# Regular expression matching correct module names. Overrides module-naming-
|
400 |
+
# style.
|
401 |
+
#module-rgx=
|
402 |
+
|
403 |
+
# Colon-delimited sets of names that determine each other's naming style when
|
404 |
+
# the name regexes allow several styles.
|
405 |
+
name-group=
|
406 |
+
|
407 |
+
# Regular expression which should only match function or class names that do
|
408 |
+
# not require a docstring.
|
409 |
+
no-docstring-rgx=^_
|
410 |
+
|
411 |
+
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
412 |
+
# to this list to register other decorators that produce valid properties.
|
413 |
+
# These decorators are taken in consideration only for invalid-name.
|
414 |
+
property-classes=abc.abstractproperty
|
415 |
+
|
416 |
+
# Naming style matching correct variable names.
|
417 |
+
variable-naming-style=snake_case
|
418 |
+
|
419 |
+
# Regular expression matching correct variable names. Overrides variable-
|
420 |
+
# naming-style.
|
421 |
+
variable-rgx=[a-z_][a-z0-9_]{0,30}$
|
422 |
+
|
423 |
+
|
424 |
+
[STRING]
|
425 |
+
|
426 |
+
# This flag controls whether the implicit-str-concat-in-sequence should
|
427 |
+
# generate a warning on implicit string concatenation in sequences defined over
|
428 |
+
# several lines.
|
429 |
+
check-str-concat-over-line-jumps=no
|
430 |
+
|
431 |
+
|
432 |
+
[IMPORTS]
|
433 |
+
|
434 |
+
# Allow wildcard imports from modules that define __all__.
|
435 |
+
allow-wildcard-with-all=no
|
436 |
+
|
437 |
+
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
438 |
+
# 3 compatible code, which means that the block might have code that exists
|
439 |
+
# only in one or another interpreter, leading to false positives when analysed.
|
440 |
+
analyse-fallback-blocks=no
|
441 |
+
|
442 |
+
# Deprecated modules which should not be used, separated by a comma.
|
443 |
+
deprecated-modules=optparse,tkinter.tix
|
444 |
+
|
445 |
+
# Create a graph of external dependencies in the given file (report RP0402 must
|
446 |
+
# not be disabled).
|
447 |
+
ext-import-graph=
|
448 |
+
|
449 |
+
# Create a graph of every (i.e. internal and external) dependencies in the
|
450 |
+
# given file (report RP0402 must not be disabled).
|
451 |
+
import-graph=
|
452 |
+
|
453 |
+
# Create a graph of internal dependencies in the given file (report RP0402 must
|
454 |
+
# not be disabled).
|
455 |
+
int-import-graph=
|
456 |
+
|
457 |
+
# Force import order to recognize a module as part of the standard
|
458 |
+
# compatibility libraries.
|
459 |
+
known-standard-library=
|
460 |
+
|
461 |
+
# Force import order to recognize a module as part of a third party library.
|
462 |
+
known-third-party=enchant
|
463 |
+
|
464 |
+
|
465 |
+
[CLASSES]
|
466 |
+
|
467 |
+
# List of method names used to declare (i.e. assign) instance attributes.
|
468 |
+
defining-attr-methods=__init__,
|
469 |
+
__new__,
|
470 |
+
setUp
|
471 |
+
|
472 |
+
# List of member names, which should be excluded from the protected access
|
473 |
+
# warning.
|
474 |
+
exclude-protected=_asdict,
|
475 |
+
_fields,
|
476 |
+
_replace,
|
477 |
+
_source,
|
478 |
+
_make
|
479 |
+
|
480 |
+
# List of valid names for the first argument in a class method.
|
481 |
+
valid-classmethod-first-arg=cls
|
482 |
+
|
483 |
+
# List of valid names for the first argument in a metaclass class method.
|
484 |
+
valid-metaclass-classmethod-first-arg=cls
|
485 |
+
|
486 |
+
|
487 |
+
[DESIGN]
|
488 |
+
|
489 |
+
# Maximum number of arguments for function / method.
|
490 |
+
max-args=5
|
491 |
+
|
492 |
+
# Maximum number of attributes for a class (see R0902).
|
493 |
+
max-attributes=7
|
494 |
+
|
495 |
+
# Maximum number of boolean expressions in an if statement.
|
496 |
+
max-bool-expr=5
|
497 |
+
|
498 |
+
# Maximum number of branch for function / method body.
|
499 |
+
max-branches=12
|
500 |
+
|
501 |
+
# Maximum number of locals for function / method body.
|
502 |
+
max-locals=15
|
503 |
+
|
504 |
+
# Maximum number of parents for a class (see R0901).
|
505 |
+
max-parents=15
|
506 |
+
|
507 |
+
# Maximum number of public methods for a class (see R0904).
|
508 |
+
max-public-methods=20
|
509 |
+
|
510 |
+
# Maximum number of return / yield for function / method body.
|
511 |
+
max-returns=6
|
512 |
+
|
513 |
+
# Maximum number of statements in function / method body.
|
514 |
+
max-statements=50
|
515 |
+
|
516 |
+
# Minimum number of public methods for a class (see R0903).
|
517 |
+
min-public-methods=2
|
518 |
+
|
519 |
+
|
520 |
+
[EXCEPTIONS]
|
521 |
+
|
522 |
+
# Exceptions that will emit a warning when being caught. Defaults to
|
523 |
+
# "BaseException, Exception".
|
524 |
+
overgeneral-exceptions=builtins.BaseException,
|
525 |
+
builtins.Exception
|
Create_dataset/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
Create_dataset/cr_dataset_script.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from datasets import load_dataset
|
4 |
+
from datasets import Dataset, DatasetDict
|
5 |
+
from IPython.display import Audio
|
6 |
+
import scipy
|
7 |
+
import librosa
|
8 |
+
from tqdm import tqdm
|
9 |
+
import re
|
10 |
+
import os
|
11 |
+
|
12 |
+
|
13 |
+
def load_audio(audio_dict:dict)->None:
|
14 |
+
target_sr = 22050
|
15 |
+
audio_resampled = librosa.resample(np.array(audio_dict['array']),
|
16 |
+
orig_sr=audio_dict['sampling_rate'],
|
17 |
+
target_sr=target_sr)
|
18 |
+
scipy.io.wavfile.write(audio_dict['path'],
|
19 |
+
rate=target_sr,
|
20 |
+
data=(audio_resampled* 32767).astype(np.int16))
|
21 |
+
|
22 |
+
def remove_outer_quotes_regex(sen:str)->str:
|
23 |
+
return re.sub(r'^["\'](.*)["\']$', r'\1', sen)
|
24 |
+
|
25 |
+
def main()->None:
|
26 |
+
name_dataset = input('Write HF dataset name as <REPO_NAME/DATASET_NAME>: ')
|
27 |
+
sub_name_dataset = name_dataset.split('/')[1]
|
28 |
+
os.mkdir(sub_name_dataset)
|
29 |
+
os.chdir(sub_name_dataset)
|
30 |
+
os.mkdir('wavs')
|
31 |
+
os.chdir('wavs')
|
32 |
+
|
33 |
+
|
34 |
+
art = """
|
35 |
+
/\_/\
|
36 |
+
( o.o )
|
37 |
+
> ^ <
|
38 |
+
|
39 |
+
V O I C E
|
40 |
+
"""
|
41 |
+
print(art)
|
42 |
+
|
43 |
+
print('--- LOADING DATASET ---')
|
44 |
+
your_dataset = load_dataset(name_dataset)
|
45 |
+
|
46 |
+
# mk TRAIN
|
47 |
+
print()
|
48 |
+
print('--- CONVERTIND AND SAVING THE TRAIN DATASET ---')
|
49 |
+
num_shards=20
|
50 |
+
path = []
|
51 |
+
text = []
|
52 |
+
|
53 |
+
with tqdm(total=len(your_dataset['train']), leave=False) as pbar:
|
54 |
+
for ind in range(num_shards):
|
55 |
+
dataset_shard = your_dataset['train'].shard(num_shards=num_shards, index=ind)
|
56 |
+
for row in dataset_shard:
|
57 |
+
load_audio(row['audio'])
|
58 |
+
path.append(row['audio']['path'])
|
59 |
+
text.append(row['raw_transcription'])
|
60 |
+
pbar.update(1)
|
61 |
+
|
62 |
+
|
63 |
+
absolute_path = os.path.abspath('../')
|
64 |
+
os.chdir(absolute_path)
|
65 |
+
|
66 |
+
dir = f'{absolute_path}/wavs/'
|
67 |
+
df = pd.DataFrame({'path':path, 'text':text})
|
68 |
+
df.text = df.text.map(remove_outer_quotes_regex)
|
69 |
+
df.path = dir + df.path
|
70 |
+
df.to_csv(f'{sub_name_dataset}_filelist_train.txt', sep='|', header=None, index=False)
|
71 |
+
|
72 |
+
# mk TEST
|
73 |
+
os.chdir(dir)
|
74 |
+
path = []
|
75 |
+
text = []
|
76 |
+
print()
|
77 |
+
print('--- CONVERTIND AND SAVING THE TEST DATASET ---')
|
78 |
+
with tqdm(total=len(your_dataset['test']), leave=False) as pbar2:
|
79 |
+
for row in tqdm(your_dataset['test']):
|
80 |
+
load_audio(row['audio'])
|
81 |
+
path.append(row['audio']['path'])
|
82 |
+
text.append(row['raw_transcription'])
|
83 |
+
pbar2.update(1)
|
84 |
+
|
85 |
+
os.chdir(absolute_path)
|
86 |
+
df = pd.DataFrame({'path':path, 'text':text})
|
87 |
+
df.text = df.text.map(remove_outer_quotes_regex)
|
88 |
+
df.path = dir + df.path
|
89 |
+
df.to_csv(f'{sub_name_dataset}_filelist_test.txt', sep='|', header=None, index=False)
|
90 |
+
print()
|
91 |
+
print('--- THE DATASET IS READY ---')
|
92 |
+
print(f'Dir of data is "{absolute_path}"')
|
93 |
+
|
94 |
+
absolute_path_home = os.path.abspath('../')
|
95 |
+
os.chdir(absolute_path_home)
|
96 |
+
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
main()
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Shivam Mehta
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
MANIFEST.in
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
include README.md
|
2 |
+
include LICENSE.txt
|
3 |
+
include requirements.*.txt
|
4 |
+
include *.cff
|
5 |
+
include requirements.txt
|
6 |
+
include matcha/VERSION
|
7 |
+
recursive-include matcha *.json
|
8 |
+
recursive-include matcha *.html
|
9 |
+
recursive-include matcha *.png
|
10 |
+
recursive-include matcha *.md
|
11 |
+
recursive-include matcha *.py
|
12 |
+
recursive-include matcha *.pyx
|
13 |
+
recursive-exclude tests *
|
14 |
+
prune tests*
|
Makefile
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
help: ## Show help
|
3 |
+
@grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
|
4 |
+
|
5 |
+
clean: ## Clean autogenerated files
|
6 |
+
rm -rf dist
|
7 |
+
find . -type f -name "*.DS_Store" -ls -delete
|
8 |
+
find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
|
9 |
+
find . | grep -E ".pytest_cache" | xargs rm -rf
|
10 |
+
find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
|
11 |
+
rm -f .coverage
|
12 |
+
|
13 |
+
clean-logs: ## Clean logs
|
14 |
+
rm -rf logs/**
|
15 |
+
|
16 |
+
create-package: ## Create wheel and tar gz
|
17 |
+
rm -rf dist/
|
18 |
+
python setup.py bdist_wheel --plat-name=manylinux1_x86_64
|
19 |
+
python setup.py sdist
|
20 |
+
python -m twine upload dist/* --verbose --skip-existing
|
21 |
+
|
22 |
+
format: ## Run pre-commit hooks
|
23 |
+
pre-commit run -a
|
24 |
+
|
25 |
+
sync: ## Merge changes from main branch to your current branch
|
26 |
+
git pull
|
27 |
+
git pull origin main
|
28 |
+
|
29 |
+
test: ## Run not slow tests
|
30 |
+
pytest -k "not slow"
|
31 |
+
|
32 |
+
test-full: ## Run all tests
|
33 |
+
pytest
|
34 |
+
|
35 |
+
train-ljspeech: ## Train the model
|
36 |
+
python matcha/train.py experiment=ljspeech
|
37 |
+
|
38 |
+
train-ljspeech-min: ## Train the model with minimum memory
|
39 |
+
python matcha/train.py experiment=ljspeech_min_memory
|
40 |
+
|
41 |
+
start_app: ## Start the app
|
42 |
+
python matcha/app.py
|
README.md
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
|
3 |
+
|
4 |
+
|
5 |
+
# AkylAI TTS
|
6 |
+
|
7 |
+
|
8 |
+
[![python](https://img.shields.io/badge/-Python_3.10-blue?logo=python&logoColor=white)](https://www.python.org/downloads/release/python-3100/)
|
9 |
+
[![pytorch](https://img.shields.io/badge/PyTorch_2.0+-ee4c2c?logo=pytorch&logoColor=white)](https://pytorch.org/get-started/locally/)
|
10 |
+
[![lightning](https://img.shields.io/badge/-Lightning_2.0+-792ee5?logo=pytorchlightning&logoColor=white)](https://pytorchlightning.ai/)
|
11 |
+
[![hydra](https://img.shields.io/badge/Config-Hydra_1.3-89b8cd)](https://hydra.cc/)
|
12 |
+
[![black](https://img.shields.io/badge/Code%20Style-Black-black.svg?labelColor=gray)](https://black.readthedocs.io/en/stable/)
|
13 |
+
[![isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
|
14 |
+
|
15 |
+
<img src="https://github.com/simonlobgromov/Matcha-TTS/blob/main/photo_2024-04-07_15-59-52.png" height="400"/>
|
16 |
+
</div>
|
17 |
+
|
18 |
+
# AkylAI-TTS for Kyrgyz language
|
19 |
+
|
20 |
+
We present to you a model trained in the Kyrgyz language, which has been trained on 13 hours of speech and 7,000 samples, complete with source code and training scripts. The architecture is based on Matcha-TTS.
|
21 |
+
It`s a new approach to non-autoregressive neural TTS, that uses [conditional flow matching](https://arxiv.org/abs/2210.02747) (similar to [rectified flows](https://arxiv.org/abs/2209.03003)) to speed up ODE-based speech synthesis. Our method:
|
22 |
+
|
23 |
+
- Is probabilistic
|
24 |
+
- Has compact memory footprint
|
25 |
+
- Sounds highly natural
|
26 |
+
- Is very fast to synthesise from
|
27 |
+
|
28 |
+
You can try our *AkylAI TTS* by visiting [SPACE](https://huggingface.co/spaces/the-cramer-project/akylai-tts-mini) and read [ICASSP 2024 paper](https://arxiv.org/abs/2309.03199) for more details.
|
29 |
+
|
30 |
+
# Inference
|
31 |
+
|
32 |
+
## Run via terminal
|
33 |
+
|
34 |
+
|
35 |
+
It is recommended to start by setting up a virtual environment using `venv`.
|
36 |
+
|
37 |
+
1. Clone this repository and install all modules and dependencies by running the commands:
|
38 |
+
|
39 |
+
```
|
40 |
+
git clone https://github.com/simonlobgromov/Matcha-TTS
|
41 |
+
cd Matcha-TTS
|
42 |
+
pip install -e .
|
43 |
+
apt-get install espeak-ng
|
44 |
+
```
|
45 |
+
|
46 |
+
|
47 |
+
2. Run with CLI arguments:
|
48 |
+
|
49 |
+
- To synthesise from given text, run:
|
50 |
+
|
51 |
+
```bash
|
52 |
+
matcha-tts --text "<INPUT TEXT>"
|
53 |
+
```
|
54 |
+
|
55 |
+
- To synthesise from a file, run:
|
56 |
+
|
57 |
+
```bash
|
58 |
+
matcha-tts --file <PATH TO FILE>
|
59 |
+
```
|
60 |
+
- Speaking rate
|
61 |
+
|
62 |
+
```bash
|
63 |
+
matcha-tts --text "<INPUT TEXT>" --speaking_rate 1.0
|
64 |
+
```
|
65 |
+
|
66 |
+
- Sampling temperature
|
67 |
+
|
68 |
+
```bash
|
69 |
+
matcha-tts --text "<INPUT TEXT>" --temperature 0.667
|
70 |
+
```
|
71 |
+
|
72 |
+
- Euler ODE solver steps
|
73 |
+
|
74 |
+
```bash
|
75 |
+
matcha-tts --text "<INPUT TEXT>" --steps 10
|
76 |
+
```
|
77 |
+
|
78 |
+
|
79 |
+
# Train with your own dataset.
|
80 |
+
|
81 |
+
## Dataset
|
82 |
+
|
83 |
+
For training this model, it is suitable to organize data similar to [LJ Speech](https://keithito.com/LJ-Speech-Dataset/). Each audio file should be single-channel 16-bit PCM WAV with a sample rate of 22050 Hz. WAV files must have unique names, for example:
|
84 |
+
|
85 |
+
```
|
86 |
+
file_1.wav
|
87 |
+
file_2.wav
|
88 |
+
file_3.wav
|
89 |
+
file_4.wav
|
90 |
+
....
|
91 |
+
file_12454.wav
|
92 |
+
file_12455.wav
|
93 |
+
```
|
94 |
+
|
95 |
+
|
96 |
+
They should also be placed at the root of the project directory in a separate folder.
|
97 |
+
|
98 |
+
Additionally, the project should include two `.txt` files for Train and Test with metadata for the files. The names of these files can be arbitrary, and their structure is as follows:
|
99 |
+
```
|
100 |
+
.../Matcha-TTS/<your folder name>/wavs/<filename>.wav|Баарыңарга салам, менин атым Акылай.
|
101 |
+
.../Matcha-TTS/<your folder name>/wavs/<filename>.wav|Мен бардыгын бул жерде Инновация борборунда көргөнүмө абдан кубанычтамын.
|
102 |
+
.../Matcha-TTS/<your folder name>/wavs/<filename>.wav|<your sentence>
|
103 |
+
.../Matcha-TTS/<your folder name>/wavs/<filename>.wav|<your sentence>
|
104 |
+
.../Matcha-TTS/<your folder name>/wavs/<filename>.wav|<your sentence>
|
105 |
+
........
|
106 |
+
```
|
107 |
+
Where each line is the FULL path to the file located in the folder with the uploaded audio, and a sentence in its original form with punctuation is written after the delimiter '|'.
|
108 |
+
It is advisable to clean the text of unnecessary and unwanted characters beforehand. Be careful with abbreviations and contractions.
|
109 |
+
The text preprocessing does not include functionality for processing abbreviations and contractions; however, the built-in phonemizer can transcribe numbers, but to avoid errors, it is better to write numbers in words.
|
110 |
+
|
111 |
+
## Dataset from Hugging Face
|
112 |
+
|
113 |
+
If you want to use a dataset that you store on Hugging Face, it would be convenient to use the `create-dataset` script, which will handle the downloading and all the data preparation, including .txt files with metadata.
|
114 |
+
Here's what its structure might look like:
|
115 |
+
|
116 |
+
```
|
117 |
+
DatasetDict({
|
118 |
+
train: Dataset({
|
119 |
+
features: ['id', 'raw_transcription', 'transcription', 'sentence_type', 'speaker_id', 'gender', 'audio'],
|
120 |
+
num_rows: 7016
|
121 |
+
})
|
122 |
+
test: Dataset({
|
123 |
+
features: ['id', 'raw_transcription', 'transcription', 'sentence_type', 'speaker_id', 'gender', 'audio'],
|
124 |
+
num_rows: 31
|
125 |
+
})
|
126 |
+
})
|
127 |
+
```
|
128 |
+
|
129 |
+
Where the most important and mandatory features are:
|
130 |
+
```
|
131 |
+
['raw_transcription', 'audio']
|
132 |
+
```
|
133 |
+
|
134 |
+
Where:
|
135 |
+
|
136 |
+
`raw_transcription` - this is the text of your sentences in the original version (the requirements are the same as in the previous method).
|
137 |
+
|
138 |
+
`audio` - these are audio files with metadata, which are dictionaries with keys:
|
139 |
+
|
140 |
+
* `array` - audio in the form of a `numpy.ndarray` with a `float32` data type
|
141 |
+
* `path` - file name
|
142 |
+
* `sampling_rate` - Sampling rate, which should be no less than 22050 Hz.
|
143 |
+
|
144 |
+
Example a row:
|
145 |
+
|
146 |
+
```
|
147 |
+
{'array': array([-3.05175781e-05, -3.05175781e-05, 0.00000000e+00, ...,
|
148 |
+
0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
|
149 |
+
'path': '1353.wav',
|
150 |
+
'sampling_rate': 44100}
|
151 |
+
```
|
152 |
+
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
## Process by Terminal
|
157 |
+
|
158 |
+
* **Load this repo and connect to HF**
|
159 |
+
|
160 |
+
```
|
161 |
+
git clone https://github.com/simonlobgromov/Matcha-TTS
|
162 |
+
cd Matcha-TTS
|
163 |
+
pip install -e .
|
164 |
+
```
|
165 |
+
|
166 |
+
Install this:
|
167 |
+
|
168 |
+
```
|
169 |
+
apt-get install espeak-ng
|
170 |
+
```
|
171 |
+
Connect to HF (Skip this step if you are not using data from Hugging Face.)
|
172 |
+
|
173 |
+
```
|
174 |
+
git config --global credential.helper store
|
175 |
+
huggingface-cli login
|
176 |
+
```
|
177 |
+
|
178 |
+
* **Load the Data** (Skip this step if you are not using data from Hugging Face.)
|
179 |
+
|
180 |
+
The script will automatically create a folder with audio recordings and text files with metadata. During the process, enter the HF repository name and the dataset name.
|
181 |
+
|
182 |
+
|
183 |
+
```
|
184 |
+
create-dataset
|
185 |
+
|
186 |
+
# If you see a cat, then everything is fine!
|
187 |
+
```
|
188 |
+
|
189 |
+
* Go to `configs/data/akylai<OR YOUR FILE NAME>.yaml` and change
|
190 |
+
|
191 |
+
```yaml
|
192 |
+
train_filelist_path: data/filelists/akylai_audio_text_train_filelist.txt # path to your TXT with metadata
|
193 |
+
valid_filelist_path: data/filelists/akylai_audio_text_val_filelist.txt # path to your TXT with metadata
|
194 |
+
```
|
195 |
+
|
196 |
+
* Generate normalisation statistics with the yaml file of dataset configuration
|
197 |
+
|
198 |
+
```bash
|
199 |
+
matcha-data-stats -i akylai.yaml
|
200 |
+
# Output:
|
201 |
+
#{'mel_mean': -5.53662231756592, 'mel_std': 2.1161014277038574}
|
202 |
+
```
|
203 |
+
|
204 |
+
* Update these values in `configs/data/akylai.yaml` under `data_statistics` key.
|
205 |
+
|
206 |
+
```bash
|
207 |
+
data_statistics: # Computed for akylai(or your) dataset
|
208 |
+
mel_mean: -5.536622
|
209 |
+
mel_std: 2.116101
|
210 |
+
```
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
* **Train**
|
215 |
+
|
216 |
+
```
|
217 |
+
python matcha/train.py experiment=akylai
|
218 |
+
```
|
219 |
+
|
220 |
+
OR
|
221 |
+
|
222 |
+
```
|
223 |
+
python matcha/train.py experiment=akylai trainer.devices=[0,1]
|
224 |
+
```
|
225 |
+
|
226 |
+
|
227 |
+
* **Checkpoints**
|
228 |
+
|
229 |
+
Checkpoints will be saved in `./Matcha-TTS/logs/train/<MODEL_NAME>/runs/<DATE>_<TIME>/checkpoints`. Unload them or select the last few checkpoints.
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
# Credits
|
234 |
+
|
235 |
+
|
236 |
+
- Shivam Mehta ([GitHub](https://github.com/shivammehta25))
|
237 |
+
- The Cramer Project (Data collection and preprocessing) [Official Space](https://thecramer.com/)
|
238 |
+
- Amantur Amatov (Expert)
|
239 |
+
- Timur Turatali (Expert, Research)
|
240 |
+
- Den Pavlov (Research, Data preprocessing and ML engineering) [GitHub](https://github.com/simonlobgromov/Matcha-TTS)
|
241 |
+
- Ulan Abdurazakov (Environment Developer)
|
242 |
+
- Nursultan Bakashov (CEO)
|
243 |
+
|
244 |
+
## Citation information
|
245 |
+
|
246 |
+
If you use our code or otherwise find this work useful, please cite our paper:
|
247 |
+
|
248 |
+
```text
|
249 |
+
@inproceedings{mehta2024matcha,
|
250 |
+
title={Matcha-{TTS}: A fast {TTS} architecture with conditional flow matching},
|
251 |
+
author={Mehta, Shivam and Tu, Ruibo and Beskow, Jonas and Sz{\'e}kely, {\'E}va and Henter, Gustav Eje},
|
252 |
+
booktitle={Proc. ICASSP},
|
253 |
+
year={2024}
|
254 |
+
}
|
255 |
+
```
|
256 |
+
|
257 |
+
## Acknowledgements
|
258 |
+
|
259 |
+
Since this code uses [Lightning-Hydra-Template](https://github.com/ashleve/lightning-hydra-template), you have all the powers that come with it.
|
260 |
+
|
261 |
+
Other source code we would like to acknowledge:
|
262 |
+
|
263 |
+
- [Coqui-TTS](https://github.com/coqui-ai/TTS/tree/dev): For helping me figure out how to make cython binaries pip installable and encouragement
|
264 |
+
- [Hugging Face Diffusers](https://huggingface.co/): For their awesome diffusers library and its components
|
265 |
+
- [Grad-TTS](https://github.com/huawei-noah/Speech-Backbones/tree/main/Grad-TTS): For the monotonic alignment search source code
|
266 |
+
- [torchdyn](https://github.com/DiffEqML/torchdyn): Useful for trying other ODE solvers during research and development
|
267 |
+
- [labml.ai](https://nn.labml.ai/transformers/rope/index.html): For the RoPE implementation
|
268 |
+
|
configs/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# this file is needed here to include configs when building project as a package
|
configs/callbacks/default.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- model_checkpoint.yaml
|
3 |
+
- model_summary.yaml
|
4 |
+
- rich_progress_bar.yaml
|
5 |
+
- _self_
|
configs/callbacks/model_checkpoint.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
|
2 |
+
|
3 |
+
model_checkpoint:
|
4 |
+
_target_: lightning.pytorch.callbacks.ModelCheckpoint
|
5 |
+
dirpath: ${paths.output_dir}/checkpoints # directory to save the model file
|
6 |
+
filename: checkpoint_{epoch:03d} # checkpoint filename
|
7 |
+
monitor: epoch # name of the logged metric which determines when model is improving
|
8 |
+
verbose: False # verbosity mode
|
9 |
+
save_last: true # additionally always save an exact copy of the last checkpoint to a file last.ckpt
|
10 |
+
save_top_k: 5 # save k best models (determined by above metric)
|
11 |
+
mode: "max" # "max" means higher metric value is better, can be also "min"
|
12 |
+
auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
|
13 |
+
save_weights_only: False # if True, then only the model’s weights will be saved
|
14 |
+
every_n_train_steps: null # number of training steps between checkpoints
|
15 |
+
train_time_interval: null # checkpoints are monitored at the specified time interval
|
16 |
+
every_n_epochs: 10 # number of epochs between checkpoints
|
17 |
+
save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
|
configs/callbacks/model_summary.yaml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html
|
2 |
+
|
3 |
+
model_summary:
|
4 |
+
_target_: lightning.pytorch.callbacks.RichModelSummary
|
5 |
+
max_depth: 3 # the maximum depth of layer nesting that the summary will include
|
configs/callbacks/none.yaml
ADDED
File without changes
|
configs/callbacks/rich_progress_bar.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html
|
2 |
+
|
3 |
+
rich_progress_bar:
|
4 |
+
_target_: lightning.pytorch.callbacks.RichProgressBar
|
configs/data/akylai.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: matcha.data.text_mel_datamodule.TextMelDataModule
|
2 |
+
name: akylai
|
3 |
+
train_filelist_path: ./Kany_dataset_mk4/Kany_dataset_mk4_filelist_train.txt
|
4 |
+
valid_filelist_path: ./Kany_dataset_mk4/Kany_dataset_mk4_filelist_test.txt
|
5 |
+
batch_size: 32
|
6 |
+
num_workers: 20
|
7 |
+
pin_memory: True
|
8 |
+
cleaners: [kyrgyz_cleaners]
|
9 |
+
add_blank: True
|
10 |
+
n_spks: 1
|
11 |
+
n_fft: 1024
|
12 |
+
n_feats: 80
|
13 |
+
sample_rate: 22050
|
14 |
+
hop_length: 256
|
15 |
+
win_length: 1024
|
16 |
+
f_min: 0
|
17 |
+
f_max: 8000
|
18 |
+
data_statistics: # Computed for ljspeech dataset
|
19 |
+
mel_mean: -5.6814561
|
20 |
+
mel_std: 2.7337122
|
21 |
+
seed: ${seed}
|
configs/data/hi-fi_en-US_female.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- ljspeech
|
3 |
+
- _self_
|
4 |
+
|
5 |
+
# Dataset URL: https://ast-astrec.nict.go.jp/en/release/hi-fi-captain/
|
6 |
+
_target_: matcha.data.text_mel_datamodule.TextMelDataModule
|
7 |
+
name: hi-fi_en-US_female
|
8 |
+
train_filelist_path: data/filelists/hi-fi-captain-en-us-female_train.txt
|
9 |
+
valid_filelist_path: data/filelists/hi-fi-captain-en-us-female_val.txt
|
10 |
+
batch_size: 32
|
11 |
+
cleaners: [english_cleaners_piper]
|
12 |
+
data_statistics: # Computed for this dataset
|
13 |
+
mel_mean: -6.38385
|
14 |
+
mel_std: 2.541796
|
configs/data/ljspeech.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: matcha.data.text_mel_datamodule.TextMelDataModule
|
2 |
+
name: ljspeech
|
3 |
+
train_filelist_path: /content/kany_dataset/kany_filelist_train.txt
|
4 |
+
valid_filelist_path: /content/kany_dataset/kany_filelist_test.txt
|
5 |
+
batch_size: 16
|
6 |
+
num_workers: 20
|
7 |
+
pin_memory: True
|
8 |
+
cleaners: [kyrgyz_cleaners]
|
9 |
+
add_blank: True
|
10 |
+
n_spks: 1
|
11 |
+
n_fft: 1024
|
12 |
+
n_feats: 80
|
13 |
+
sample_rate: 22050
|
14 |
+
hop_length: 256
|
15 |
+
win_length: 1024
|
16 |
+
f_min: 0
|
17 |
+
f_max: 8000
|
18 |
+
data_statistics: # Computed for ljspeech dataset
|
19 |
+
mel_mean: -5.68145561
|
20 |
+
mel_std: 2.7337122
|
21 |
+
seed: ${seed}
|
22 |
+
|
configs/data/vctk.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- ljspeech
|
3 |
+
- _self_
|
4 |
+
|
5 |
+
_target_: matcha.data.text_mel_datamodule.TextMelDataModule
|
6 |
+
name: vctk
|
7 |
+
train_filelist_path: data/filelists/vctk_audio_sid_text_train_filelist.txt
|
8 |
+
valid_filelist_path: data/filelists/vctk_audio_sid_text_val_filelist.txt
|
9 |
+
batch_size: 32
|
10 |
+
add_blank: True
|
11 |
+
n_spks: 109
|
12 |
+
data_statistics: # Computed for vctk dataset
|
13 |
+
mel_mean: -6.630575
|
14 |
+
mel_std: 2.482914
|
configs/debug/default.yaml
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# default debugging setup, runs 1 full epoch
|
4 |
+
# other debugging configs can inherit from this one
|
5 |
+
|
6 |
+
# overwrite task name so debugging logs are stored in separate folder
|
7 |
+
task_name: "debug"
|
8 |
+
|
9 |
+
# disable callbacks and loggers during debugging
|
10 |
+
# callbacks: null
|
11 |
+
# logger: null
|
12 |
+
|
13 |
+
extras:
|
14 |
+
ignore_warnings: False
|
15 |
+
enforce_tags: False
|
16 |
+
|
17 |
+
# sets level of all command line loggers to 'DEBUG'
|
18 |
+
# https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
|
19 |
+
hydra:
|
20 |
+
job_logging:
|
21 |
+
root:
|
22 |
+
level: DEBUG
|
23 |
+
|
24 |
+
# use this to also set hydra loggers to 'DEBUG'
|
25 |
+
# verbose: True
|
26 |
+
|
27 |
+
trainer:
|
28 |
+
max_epochs: 1
|
29 |
+
accelerator: cpu # debuggers don't like gpus
|
30 |
+
devices: 1 # debuggers don't like multiprocessing
|
31 |
+
detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
|
32 |
+
|
33 |
+
data:
|
34 |
+
num_workers: 0 # debuggers don't like multiprocessing
|
35 |
+
pin_memory: False # disable gpu memory pin
|
configs/debug/fdr.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# runs 1 train, 1 validation and 1 test step
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- default
|
7 |
+
|
8 |
+
trainer:
|
9 |
+
fast_dev_run: true
|
configs/debug/limit.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# uses only 1% of the training data and 5% of validation/test data
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- default
|
7 |
+
|
8 |
+
trainer:
|
9 |
+
max_epochs: 3
|
10 |
+
limit_train_batches: 0.01
|
11 |
+
limit_val_batches: 0.05
|
12 |
+
limit_test_batches: 0.05
|
configs/debug/overfit.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# overfits to 3 batches
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- default
|
7 |
+
|
8 |
+
trainer:
|
9 |
+
max_epochs: 20
|
10 |
+
overfit_batches: 3
|
11 |
+
|
12 |
+
# model ckpt and early stopping need to be disabled during overfitting
|
13 |
+
callbacks: null
|
configs/debug/profiler.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# runs with execution time profiling
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- default
|
7 |
+
|
8 |
+
trainer:
|
9 |
+
max_epochs: 1
|
10 |
+
# profiler: "simple"
|
11 |
+
profiler: "advanced"
|
12 |
+
# profiler: "pytorch"
|
13 |
+
accelerator: gpu
|
14 |
+
|
15 |
+
limit_train_batches: 0.02
|
configs/eval.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
defaults:
|
4 |
+
- _self_
|
5 |
+
- data: akylai # choose datamodule with `test_dataloader()` for evaluation
|
6 |
+
- model: matcha
|
7 |
+
- logger: null
|
8 |
+
- trainer: default
|
9 |
+
- paths: default
|
10 |
+
- extras: default
|
11 |
+
- hydra: default
|
12 |
+
|
13 |
+
task_name: "eval"
|
14 |
+
|
15 |
+
tags: ["dev"]
|
16 |
+
|
17 |
+
# passing checkpoint path is necessary for evaluation
|
18 |
+
ckpt_path: ???
|
configs/experiment/akylai.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# to execute this experiment run:
|
4 |
+
# python train.py experiment=multispeaker
|
5 |
+
|
6 |
+
defaults:
|
7 |
+
- override /data: akylai.yaml
|
8 |
+
|
9 |
+
# all parameters below will be merged with parameters from default configurations set above
|
10 |
+
# this allows you to overwrite only specified parameters
|
11 |
+
|
12 |
+
tags: ["akylai"]
|
13 |
+
|
14 |
+
run_name: akylai
|
configs/experiment/hifi_dataset_piper_phonemizer.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# to execute this experiment run:
|
4 |
+
# python train.py experiment=multispeaker
|
5 |
+
|
6 |
+
defaults:
|
7 |
+
- override /data: hi-fi_en-US_female.yaml
|
8 |
+
|
9 |
+
# all parameters below will be merged with parameters from default configurations set above
|
10 |
+
# this allows you to overwrite only specified parameters
|
11 |
+
|
12 |
+
tags: ["hi-fi", "single_speaker", "piper_phonemizer", "en_US", "female"]
|
13 |
+
|
14 |
+
run_name: hi-fi_en-US_female_piper_phonemizer
|
configs/experiment/ljspeech.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# to execute this experiment run:
|
4 |
+
# python train.py experiment=multispeaker
|
5 |
+
|
6 |
+
defaults:
|
7 |
+
- override /data: ljspeech.yaml
|
8 |
+
|
9 |
+
# all parameters below will be merged with parameters from default configurations set above
|
10 |
+
# this allows you to overwrite only specified parameters
|
11 |
+
|
12 |
+
tags: ["ljspeech"]
|
13 |
+
|
14 |
+
run_name: ljspeech
|
configs/experiment/ljspeech_min_memory.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# to execute this experiment run:
|
4 |
+
# python train.py experiment=multispeaker
|
5 |
+
|
6 |
+
defaults:
|
7 |
+
- override /data: ljspeech.yaml
|
8 |
+
|
9 |
+
# all parameters below will be merged with parameters from default configurations set above
|
10 |
+
# this allows you to overwrite only specified parameters
|
11 |
+
|
12 |
+
tags: ["ljspeech"]
|
13 |
+
|
14 |
+
run_name: ljspeech_min
|
15 |
+
|
16 |
+
|
17 |
+
model:
|
18 |
+
out_size: 172
|
configs/experiment/multispeaker.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# to execute this experiment run:
|
4 |
+
# python train.py experiment=multispeaker
|
5 |
+
|
6 |
+
defaults:
|
7 |
+
- override /data: vctk.yaml
|
8 |
+
|
9 |
+
# all parameters below will be merged with parameters from default configurations set above
|
10 |
+
# this allows you to overwrite only specified parameters
|
11 |
+
|
12 |
+
tags: ["multispeaker"]
|
13 |
+
|
14 |
+
run_name: multispeaker
|
configs/extras/default.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# disable python warnings if they annoy you
|
2 |
+
ignore_warnings: False
|
3 |
+
|
4 |
+
# ask user for tags if none are provided in the config
|
5 |
+
enforce_tags: True
|
6 |
+
|
7 |
+
# pretty print config tree at the start of the run using Rich library
|
8 |
+
print_config: True
|
configs/hparams_search/mnist_optuna.yaml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
# example hyperparameter optimization of some experiment with Optuna:
|
4 |
+
# python train.py -m hparams_search=mnist_optuna experiment=example
|
5 |
+
|
6 |
+
defaults:
|
7 |
+
- override /hydra/sweeper: optuna
|
8 |
+
|
9 |
+
# choose metric which will be optimized by Optuna
|
10 |
+
# make sure this is the correct name of some metric logged in lightning module!
|
11 |
+
optimized_metric: "val/acc_best"
|
12 |
+
|
13 |
+
# here we define Optuna hyperparameter search
|
14 |
+
# it optimizes for value returned from function with @hydra.main decorator
|
15 |
+
# docs: https://hydra.cc/docs/next/plugins/optuna_sweeper
|
16 |
+
hydra:
|
17 |
+
mode: "MULTIRUN" # set hydra to multirun by default if this config is attached
|
18 |
+
|
19 |
+
sweeper:
|
20 |
+
_target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
|
21 |
+
|
22 |
+
# storage URL to persist optimization results
|
23 |
+
# for example, you can use SQLite if you set 'sqlite:///example.db'
|
24 |
+
storage: null
|
25 |
+
|
26 |
+
# name of the study to persist optimization results
|
27 |
+
study_name: null
|
28 |
+
|
29 |
+
# number of parallel workers
|
30 |
+
n_jobs: 1
|
31 |
+
|
32 |
+
# 'minimize' or 'maximize' the objective
|
33 |
+
direction: maximize
|
34 |
+
|
35 |
+
# total number of runs that will be executed
|
36 |
+
n_trials: 20
|
37 |
+
|
38 |
+
# choose Optuna hyperparameter sampler
|
39 |
+
# you can choose bayesian sampler (tpe), random search (without optimization), grid sampler, and others
|
40 |
+
# docs: https://optuna.readthedocs.io/en/stable/reference/samplers.html
|
41 |
+
sampler:
|
42 |
+
_target_: optuna.samplers.TPESampler
|
43 |
+
seed: 1234
|
44 |
+
n_startup_trials: 10 # number of random sampling runs before optimization starts
|
45 |
+
|
46 |
+
# define hyperparameter search space
|
47 |
+
params:
|
48 |
+
model.optimizer.lr: interval(0.0001, 0.1)
|
49 |
+
data.batch_size: choice(32, 64, 128, 256)
|
50 |
+
model.net.lin1_size: choice(64, 128, 256)
|
51 |
+
model.net.lin2_size: choice(64, 128, 256)
|
52 |
+
model.net.lin3_size: choice(32, 64, 128, 256)
|
configs/hydra/default.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://hydra.cc/docs/configure_hydra/intro/
|
2 |
+
|
3 |
+
# enable color logging
|
4 |
+
defaults:
|
5 |
+
- override hydra_logging: colorlog
|
6 |
+
- override job_logging: colorlog
|
7 |
+
|
8 |
+
# output directory, generated dynamically on each run
|
9 |
+
run:
|
10 |
+
dir: ${paths.log_dir}/${task_name}/${run_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
|
11 |
+
sweep:
|
12 |
+
dir: ${paths.log_dir}/${task_name}/${run_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
|
13 |
+
subdir: ${hydra.job.num}
|
14 |
+
|
15 |
+
job_logging:
|
16 |
+
handlers:
|
17 |
+
file:
|
18 |
+
# Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
|
19 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
configs/local/.gitkeep
ADDED
File without changes
|
configs/logger/aim.yaml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://aimstack.io/
|
2 |
+
|
3 |
+
# example usage in lightning module:
|
4 |
+
# https://github.com/aimhubio/aim/blob/main/examples/pytorch_lightning_track.py
|
5 |
+
|
6 |
+
# open the Aim UI with the following command (run in the folder containing the `.aim` folder):
|
7 |
+
# `aim up`
|
8 |
+
|
9 |
+
aim:
|
10 |
+
_target_: aim.pytorch_lightning.AimLogger
|
11 |
+
repo: ${paths.root_dir} # .aim folder will be created here
|
12 |
+
# repo: "aim://ip_address:port" # can instead provide IP address pointing to Aim remote tracking server which manages the repo, see https://aimstack.readthedocs.io/en/latest/using/remote_tracking.html#
|
13 |
+
|
14 |
+
# aim allows to group runs under experiment name
|
15 |
+
experiment: null # any string, set to "default" if not specified
|
16 |
+
|
17 |
+
train_metric_prefix: "train/"
|
18 |
+
val_metric_prefix: "val/"
|
19 |
+
test_metric_prefix: "test/"
|
20 |
+
|
21 |
+
# sets the tracking interval in seconds for system usage metrics (CPU, GPU, memory, etc.)
|
22 |
+
system_tracking_interval: 10 # set to null to disable system metrics tracking
|
23 |
+
|
24 |
+
# enable/disable logging of system params such as installed packages, git info, env vars, etc.
|
25 |
+
log_system_params: true
|
26 |
+
|
27 |
+
# enable/disable tracking console logs (default value is true)
|
28 |
+
capture_terminal_logs: false # set to false to avoid infinite console log loop issue https://github.com/aimhubio/aim/issues/2550
|
configs/logger/comet.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://www.comet.ml
|
2 |
+
|
3 |
+
comet:
|
4 |
+
_target_: lightning.pytorch.loggers.comet.CometLogger
|
5 |
+
api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
|
6 |
+
save_dir: "${paths.output_dir}"
|
7 |
+
project_name: "lightning-hydra-template"
|
8 |
+
rest_api_key: null
|
9 |
+
# experiment_name: ""
|
10 |
+
experiment_key: null # set to resume experiment
|
11 |
+
offline: False
|
12 |
+
prefix: ""
|
configs/logger/csv.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# csv logger built in lightning
|
2 |
+
|
3 |
+
csv:
|
4 |
+
_target_: lightning.pytorch.loggers.csv_logs.CSVLogger
|
5 |
+
save_dir: "${paths.output_dir}"
|
6 |
+
name: "csv/"
|
7 |
+
prefix: ""
|
configs/logger/many_loggers.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# train with many loggers at once
|
2 |
+
|
3 |
+
defaults:
|
4 |
+
# - comet
|
5 |
+
- csv
|
6 |
+
# - mlflow
|
7 |
+
# - neptune
|
8 |
+
- tensorboard
|
9 |
+
- wandb
|
configs/logger/mlflow.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://mlflow.org
|
2 |
+
|
3 |
+
mlflow:
|
4 |
+
_target_: lightning.pytorch.loggers.mlflow.MLFlowLogger
|
5 |
+
# experiment_name: ""
|
6 |
+
# run_name: ""
|
7 |
+
tracking_uri: ${paths.log_dir}/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
|
8 |
+
tags: null
|
9 |
+
# save_dir: "./mlruns"
|
10 |
+
prefix: ""
|
11 |
+
artifact_location: null
|
12 |
+
# run_id: ""
|
configs/logger/neptune.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://neptune.ai
|
2 |
+
|
3 |
+
neptune:
|
4 |
+
_target_: lightning.pytorch.loggers.neptune.NeptuneLogger
|
5 |
+
api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
|
6 |
+
project: username/lightning-hydra-template
|
7 |
+
# name: ""
|
8 |
+
log_model_checkpoints: True
|
9 |
+
prefix: ""
|
configs/logger/tensorboard.yaml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://www.tensorflow.org/tensorboard/
|
2 |
+
|
3 |
+
tensorboard:
|
4 |
+
_target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
|
5 |
+
save_dir: "${paths.output_dir}/tensorboard/"
|
6 |
+
name: null
|
7 |
+
log_graph: False
|
8 |
+
default_hp_metric: True
|
9 |
+
prefix: ""
|
10 |
+
# version: ""
|
configs/logger/wandb.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://wandb.ai
|
2 |
+
|
3 |
+
wandb:
|
4 |
+
_target_: lightning.pytorch.loggers.wandb.WandbLogger
|
5 |
+
# name: "" # name of the run (normally generated by wandb)
|
6 |
+
save_dir: "${paths.output_dir}"
|
7 |
+
offline: False
|
8 |
+
id: null # pass correct id to resume experiment!
|
9 |
+
anonymous: null # enable anonymous logging
|
10 |
+
project: "lightning-hydra-template"
|
11 |
+
log_model: False # upload lightning ckpts
|
12 |
+
prefix: "" # a string to put at the beginning of metric keys
|
13 |
+
# entity: "" # set to name of your wandb team
|
14 |
+
group: ""
|
15 |
+
tags: []
|
16 |
+
job_type: ""
|
configs/model/cfm/default.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
name: CFM
|
2 |
+
solver: euler
|
3 |
+
sigma_min: 1e-4
|
configs/model/decoder/default.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
channels: [256, 256]
|
2 |
+
dropout: 0.05
|
3 |
+
attention_head_dim: 64
|
4 |
+
n_blocks: 1
|
5 |
+
num_mid_blocks: 2
|
6 |
+
num_heads: 2
|
7 |
+
act_fn: snakebeta
|
configs/model/encoder/default.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
encoder_type: RoPE Encoder
|
2 |
+
encoder_params:
|
3 |
+
n_feats: ${model.n_feats}
|
4 |
+
n_channels: 192
|
5 |
+
filter_channels: 768
|
6 |
+
filter_channels_dp: 256
|
7 |
+
n_heads: 2
|
8 |
+
n_layers: 6
|
9 |
+
kernel_size: 3
|
10 |
+
p_dropout: 0.1
|
11 |
+
spk_emb_dim: 64
|
12 |
+
n_spks: 1
|
13 |
+
prenet: true
|
14 |
+
|
15 |
+
duration_predictor_params:
|
16 |
+
filter_channels_dp: ${model.encoder.encoder_params.filter_channels_dp}
|
17 |
+
kernel_size: 3
|
18 |
+
p_dropout: ${model.encoder.encoder_params.p_dropout}
|
configs/model/matcha.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- _self_
|
3 |
+
- encoder: default.yaml
|
4 |
+
- decoder: default.yaml
|
5 |
+
- cfm: default.yaml
|
6 |
+
- optimizer: adam.yaml
|
7 |
+
|
8 |
+
_target_: matcha.models.matcha_tts.MatchaTTS
|
9 |
+
n_vocab: 178
|
10 |
+
n_spks: ${data.n_spks}
|
11 |
+
spk_emb_dim: 64
|
12 |
+
n_feats: 80
|
13 |
+
data_statistics: ${data.data_statistics}
|
14 |
+
out_size: null # Must be divisible by 4
|
15 |
+
prior_loss: true
|
configs/model/optimizer/adam.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: torch.optim.Adam
|
2 |
+
_partial_: true
|
3 |
+
lr: 1e-4
|
4 |
+
weight_decay: 0.0
|
configs/paths/default.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# path to root directory
|
2 |
+
# this requires PROJECT_ROOT environment variable to exist
|
3 |
+
# you can replace it with "." if you want the root to be the current working directory
|
4 |
+
root_dir: ${oc.env:PROJECT_ROOT}
|
5 |
+
|
6 |
+
# path to data directory
|
7 |
+
data_dir: ${paths.root_dir}/data/
|
8 |
+
|
9 |
+
# path to logging directory
|
10 |
+
log_dir: ${paths.root_dir}/logs/
|
11 |
+
|
12 |
+
# path to output directory, created dynamically by hydra
|
13 |
+
# path generation pattern is specified in `configs/hydra/default.yaml`
|
14 |
+
# use it to store all files generated during the run, like ckpts and metrics
|
15 |
+
output_dir: ${hydra:runtime.output_dir}
|
16 |
+
|
17 |
+
# path to working directory
|
18 |
+
work_dir: ${hydra:runtime.cwd}
|