Text-to-Speech
Kyrgyz
Simonlob commited on
Commit
6703e27
0 Parent(s):

Release version 0.1.13

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +6 -0
  2. .gitignore +163 -0
  3. .pre-commit-config.yaml +59 -0
  4. .project-root +2 -0
  5. .pylintrc +525 -0
  6. Create_dataset/__init__.py +1 -0
  7. Create_dataset/cr_dataset_script.py +99 -0
  8. LICENSE +21 -0
  9. MANIFEST.in +14 -0
  10. Makefile +42 -0
  11. README.md +268 -0
  12. configs/__init__.py +1 -0
  13. configs/callbacks/default.yaml +5 -0
  14. configs/callbacks/model_checkpoint.yaml +17 -0
  15. configs/callbacks/model_summary.yaml +5 -0
  16. configs/callbacks/none.yaml +0 -0
  17. configs/callbacks/rich_progress_bar.yaml +4 -0
  18. configs/data/akylai.yaml +21 -0
  19. configs/data/hi-fi_en-US_female.yaml +14 -0
  20. configs/data/ljspeech.yaml +22 -0
  21. configs/data/vctk.yaml +14 -0
  22. configs/debug/default.yaml +35 -0
  23. configs/debug/fdr.yaml +9 -0
  24. configs/debug/limit.yaml +12 -0
  25. configs/debug/overfit.yaml +13 -0
  26. configs/debug/profiler.yaml +15 -0
  27. configs/eval.yaml +18 -0
  28. configs/experiment/akylai.yaml +14 -0
  29. configs/experiment/hifi_dataset_piper_phonemizer.yaml +14 -0
  30. configs/experiment/ljspeech.yaml +14 -0
  31. configs/experiment/ljspeech_min_memory.yaml +18 -0
  32. configs/experiment/multispeaker.yaml +14 -0
  33. configs/extras/default.yaml +8 -0
  34. configs/hparams_search/mnist_optuna.yaml +52 -0
  35. configs/hydra/default.yaml +19 -0
  36. configs/local/.gitkeep +0 -0
  37. configs/logger/aim.yaml +28 -0
  38. configs/logger/comet.yaml +12 -0
  39. configs/logger/csv.yaml +7 -0
  40. configs/logger/many_loggers.yaml +9 -0
  41. configs/logger/mlflow.yaml +12 -0
  42. configs/logger/neptune.yaml +9 -0
  43. configs/logger/tensorboard.yaml +10 -0
  44. configs/logger/wandb.yaml +16 -0
  45. configs/model/cfm/default.yaml +3 -0
  46. configs/model/decoder/default.yaml +7 -0
  47. configs/model/encoder/default.yaml +18 -0
  48. configs/model/matcha.yaml +15 -0
  49. configs/model/optimizer/adam.yaml +4 -0
  50. configs/paths/default.yaml +18 -0
.env.example ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # example of file for storing private and user specific environment variables, like keys or system paths
2
+ # rename it to ".env" (excluded from version control by default)
3
+ # .env is loaded by train.py automatically
4
+ # hydra allows you to reference variables in .yaml configs with special syntax: ${oc.env:MY_VAR}
5
+
6
+ MY_VAR="/home/user/my/system/path"
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .venv
106
+ env/
107
+ venv/
108
+ ENV/
109
+ env.bak/
110
+ venv.bak/
111
+
112
+ # Spyder project settings
113
+ .spyderproject
114
+ .spyproject
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # mypy
123
+ .mypy_cache/
124
+ .dmypy.json
125
+ dmypy.json
126
+
127
+ # Pyre type checker
128
+ .pyre/
129
+
130
+ ### VisualStudioCode
131
+ .vscode/*
132
+ !.vscode/settings.json
133
+ !.vscode/tasks.json
134
+ !.vscode/launch.json
135
+ !.vscode/extensions.json
136
+ *.code-workspace
137
+ **/.vscode
138
+
139
+ # JetBrains
140
+ .idea/
141
+
142
+ # Data & Models
143
+ *.h5
144
+ *.tar
145
+ *.tar.gz
146
+
147
+ # Lightning-Hydra-Template
148
+ configs/local/default.yaml
149
+ /data/
150
+ /logs/
151
+ .env
152
+
153
+ # Aim logging
154
+ .aim
155
+
156
+ # Cython complied files
157
+ matcha/utils/monotonic_align/core.c
158
+
159
+ # Ignoring hifigan checkpoint
160
+ generator_v1
161
+ g_02500000
162
+ gradio_cached_examples/
163
+ synth_output/
.pre-commit-config.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_language_version:
2
+ python: python3.10
3
+
4
+ repos:
5
+ - repo: https://github.com/pre-commit/pre-commit-hooks
6
+ rev: v4.5.0
7
+ hooks:
8
+ # list of supported hooks: https://pre-commit.com/hooks.html
9
+ - id: trailing-whitespace
10
+ - id: end-of-file-fixer
11
+ # - id: check-docstring-first
12
+ - id: check-yaml
13
+ - id: debug-statements
14
+ - id: detect-private-key
15
+ - id: check-toml
16
+ - id: check-case-conflict
17
+ - id: check-added-large-files
18
+
19
+ # python code formatting
20
+ - repo: https://github.com/psf/black
21
+ rev: 23.12.1
22
+ hooks:
23
+ - id: black
24
+ args: [--line-length, "120"]
25
+
26
+ # python import sorting
27
+ - repo: https://github.com/PyCQA/isort
28
+ rev: 5.13.2
29
+ hooks:
30
+ - id: isort
31
+ args: ["--profile", "black", "--filter-files"]
32
+
33
+ # python upgrading syntax to newer version
34
+ - repo: https://github.com/asottile/pyupgrade
35
+ rev: v3.15.0
36
+ hooks:
37
+ - id: pyupgrade
38
+ args: [--py38-plus]
39
+
40
+ # python check (PEP8), programming errors and code complexity
41
+ - repo: https://github.com/PyCQA/flake8
42
+ rev: 7.0.0
43
+ hooks:
44
+ - id: flake8
45
+ args:
46
+ [
47
+ "--max-line-length", "120",
48
+ "--extend-ignore",
49
+ "E203,E402,E501,F401,F841,RST2,RST301",
50
+ "--exclude",
51
+ "logs/*,data/*,matcha/hifigan/*",
52
+ ]
53
+ additional_dependencies: [flake8-rst-docstrings==0.3.0]
54
+
55
+ # pylint
56
+ - repo: https://github.com/pycqa/pylint
57
+ rev: v3.0.3
58
+ hooks:
59
+ - id: pylint
.project-root ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # this file is required for inferring the project root directory
2
+ # do not delete
.pylintrc ADDED
@@ -0,0 +1,525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [MASTER]
2
+
3
+ # A comma-separated list of package or module names from where C extensions may
4
+ # be loaded. Extensions are loading into the active Python interpreter and may
5
+ # run arbitrary code.
6
+ extension-pkg-whitelist=
7
+
8
+ # Add files or directories to the blacklist. They should be base names, not
9
+ # paths.
10
+ ignore=CVS
11
+
12
+ # Add files or directories matching the regex patterns to the blacklist. The
13
+ # regex matches against base names, not paths.
14
+ ignore-patterns=
15
+
16
+ # Python code to execute, usually for sys.path manipulation such as
17
+ # pygtk.require().
18
+ #init-hook=
19
+
20
+ # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
21
+ # number of processors available to use.
22
+ jobs=1
23
+
24
+ # Control the amount of potential inferred values when inferring a single
25
+ # object. This can help the performance when dealing with large functions or
26
+ # complex, nested conditions.
27
+ limit-inference-results=100
28
+
29
+ # List of plugins (as comma separated values of python modules names) to load,
30
+ # usually to register additional checkers.
31
+ load-plugins=
32
+
33
+ # Pickle collected data for later comparisons.
34
+ persistent=yes
35
+
36
+ # Specify a configuration file.
37
+ #rcfile=
38
+
39
+ # When enabled, pylint would attempt to guess common misconfiguration and emit
40
+ # user-friendly hints instead of false-positive error messages.
41
+ suggestion-mode=yes
42
+
43
+ # Allow loading of arbitrary C extensions. Extensions are imported into the
44
+ # active Python interpreter and may run arbitrary code.
45
+ unsafe-load-any-extension=no
46
+
47
+
48
+ [MESSAGES CONTROL]
49
+
50
+ # Only show warnings with the listed confidence levels. Leave empty to show
51
+ # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
52
+ confidence=
53
+
54
+ # Disable the message, report, category or checker with the given id(s). You
55
+ # can either give multiple identifiers separated by comma (,) or put this
56
+ # option multiple times (only on the command line, not in the configuration
57
+ # file where it should appear only once). You can also use "--disable=all" to
58
+ # disable everything first and then reenable specific checks. For example, if
59
+ # you want to run only the similarities checker, you can use "--disable=all
60
+ # --enable=similarities". If you want to run only the classes checker, but have
61
+ # no Warning level messages displayed, use "--disable=all --enable=classes
62
+ # --disable=W".
63
+ disable=missing-docstring,
64
+ too-many-public-methods,
65
+ too-many-lines,
66
+ bare-except,
67
+ ## for avoiding weird p3.6 CI linter error
68
+ ## TODO: see later if we can remove this
69
+ assigning-non-slot,
70
+ unsupported-assignment-operation,
71
+ ## end
72
+ line-too-long,
73
+ fixme,
74
+ wrong-import-order,
75
+ ungrouped-imports,
76
+ wrong-import-position,
77
+ import-error,
78
+ invalid-name,
79
+ too-many-instance-attributes,
80
+ arguments-differ,
81
+ arguments-renamed,
82
+ no-name-in-module,
83
+ no-member,
84
+ unsubscriptable-object,
85
+ raw-checker-failed,
86
+ bad-inline-option,
87
+ locally-disabled,
88
+ file-ignored,
89
+ suppressed-message,
90
+ useless-suppression,
91
+ deprecated-pragma,
92
+ use-symbolic-message-instead,
93
+ useless-object-inheritance,
94
+ too-few-public-methods,
95
+ too-many-branches,
96
+ too-many-arguments,
97
+ too-many-locals,
98
+ too-many-statements,
99
+ duplicate-code,
100
+ not-callable,
101
+ import-outside-toplevel,
102
+ logging-fstring-interpolation,
103
+ logging-not-lazy,
104
+ unused-argument,
105
+ no-else-return,
106
+ chained-comparison,
107
+ redefined-outer-name
108
+
109
+ # Enable the message, report, category or checker with the given id(s). You can
110
+ # either give multiple identifier separated by comma (,) or put this option
111
+ # multiple time (only on the command line, not in the configuration file where
112
+ # it should appear only once). See also the "--disable" option for examples.
113
+ enable=c-extension-no-member
114
+
115
+
116
+ [REPORTS]
117
+
118
+ # Python expression which should return a note less than 10 (10 is the highest
119
+ # note). You have access to the variables errors warning, statement which
120
+ # respectively contain the number of errors / warnings messages and the total
121
+ # number of statements analyzed. This is used by the global evaluation report
122
+ # (RP0004).
123
+ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
124
+
125
+ # Template used to display messages. This is a python new-style format string
126
+ # used to format the message information. See doc for all details.
127
+ #msg-template=
128
+
129
+ # Set the output format. Available formats are text, parseable, colorized, json
130
+ # and msvs (visual studio). You can also give a reporter class, e.g.
131
+ # mypackage.mymodule.MyReporterClass.
132
+ output-format=text
133
+
134
+ # Tells whether to display a full report or only the messages.
135
+ reports=no
136
+
137
+ # Activate the evaluation score.
138
+ score=yes
139
+
140
+
141
+ [REFACTORING]
142
+
143
+ # Maximum number of nested blocks for function / method body
144
+ max-nested-blocks=5
145
+
146
+ # Complete name of functions that never returns. When checking for
147
+ # inconsistent-return-statements if a never returning function is called then
148
+ # it will be considered as an explicit return statement and no message will be
149
+ # printed.
150
+ never-returning-functions=sys.exit
151
+
152
+
153
+ [LOGGING]
154
+
155
+ # Format style used to check logging format string. `old` means using %
156
+ # formatting, while `new` is for `{}` formatting.
157
+ logging-format-style=old
158
+
159
+ # Logging modules to check that the string format arguments are in logging
160
+ # function parameter format.
161
+ logging-modules=logging
162
+
163
+
164
+ [SPELLING]
165
+
166
+ # Limits count of emitted suggestions for spelling mistakes.
167
+ max-spelling-suggestions=4
168
+
169
+ # Spelling dictionary name. Available dictionaries: none. To make it working
170
+ # install python-enchant package..
171
+ spelling-dict=
172
+
173
+ # List of comma separated words that should not be checked.
174
+ spelling-ignore-words=
175
+
176
+ # A path to a file that contains private dictionary; one word per line.
177
+ spelling-private-dict-file=
178
+
179
+ # Tells whether to store unknown words to indicated private dictionary in
180
+ # --spelling-private-dict-file option instead of raising a message.
181
+ spelling-store-unknown-words=no
182
+
183
+
184
+ [MISCELLANEOUS]
185
+
186
+ # List of note tags to take in consideration, separated by a comma.
187
+ notes=FIXME,
188
+ XXX,
189
+ TODO
190
+
191
+
192
+ [TYPECHECK]
193
+
194
+ # List of decorators that produce context managers, such as
195
+ # contextlib.contextmanager. Add to this list to register other decorators that
196
+ # produce valid context managers.
197
+ contextmanager-decorators=contextlib.contextmanager
198
+
199
+ # List of members which are set dynamically and missed by pylint inference
200
+ # system, and so shouldn't trigger E1101 when accessed. Python regular
201
+ # expressions are accepted.
202
+ generated-members=numpy.*,torch.*
203
+
204
+ # Tells whether missing members accessed in mixin class should be ignored. A
205
+ # mixin class is detected if its name ends with "mixin" (case insensitive).
206
+ ignore-mixin-members=yes
207
+
208
+ # Tells whether to warn about missing members when the owner of the attribute
209
+ # is inferred to be None.
210
+ ignore-none=yes
211
+
212
+ # This flag controls whether pylint should warn about no-member and similar
213
+ # checks whenever an opaque object is returned when inferring. The inference
214
+ # can return multiple potential results while evaluating a Python object, but
215
+ # some branches might not be evaluated, which results in partial inference. In
216
+ # that case, it might be useful to still emit no-member and other checks for
217
+ # the rest of the inferred objects.
218
+ ignore-on-opaque-inference=yes
219
+
220
+ # List of class names for which member attributes should not be checked (useful
221
+ # for classes with dynamically set attributes). This supports the use of
222
+ # qualified names.
223
+ ignored-classes=optparse.Values,thread._local,_thread._local
224
+
225
+ # List of module names for which member attributes should not be checked
226
+ # (useful for modules/projects where namespaces are manipulated during runtime
227
+ # and thus existing member attributes cannot be deduced by static analysis. It
228
+ # supports qualified module names, as well as Unix pattern matching.
229
+ ignored-modules=
230
+
231
+ # Show a hint with possible names when a member name was not found. The aspect
232
+ # of finding the hint is based on edit distance.
233
+ missing-member-hint=yes
234
+
235
+ # The minimum edit distance a name should have in order to be considered a
236
+ # similar match for a missing member name.
237
+ missing-member-hint-distance=1
238
+
239
+ # The total number of similar names that should be taken in consideration when
240
+ # showing a hint for a missing member.
241
+ missing-member-max-choices=1
242
+
243
+
244
+ [VARIABLES]
245
+
246
+ # List of additional names supposed to be defined in builtins. Remember that
247
+ # you should avoid defining new builtins when possible.
248
+ additional-builtins=
249
+
250
+ # Tells whether unused global variables should be treated as a violation.
251
+ allow-global-unused-variables=yes
252
+
253
+ # List of strings which can identify a callback function by name. A callback
254
+ # name must start or end with one of those strings.
255
+ callbacks=cb_,
256
+ _cb
257
+
258
+ # A regular expression matching the name of dummy variables (i.e. expected to
259
+ # not be used).
260
+ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
261
+
262
+ # Argument names that match this expression will be ignored. Default to name
263
+ # with leading underscore.
264
+ ignored-argument-names=_.*|^ignored_|^unused_
265
+
266
+ # Tells whether we should check for unused import in __init__ files.
267
+ init-import=no
268
+
269
+ # List of qualified module names which can have objects that can redefine
270
+ # builtins.
271
+ redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
272
+
273
+
274
+ [FORMAT]
275
+
276
+ # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
277
+ expected-line-ending-format=
278
+
279
+ # Regexp for a line that is allowed to be longer than the limit.
280
+ ignore-long-lines=^\s*(# )?<?https?://\S+>?$
281
+
282
+ # Number of spaces of indent required inside a hanging or continued line.
283
+ indent-after-paren=4
284
+
285
+ # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
286
+ # tab).
287
+ indent-string=' '
288
+
289
+ # Maximum number of characters on a single line.
290
+ max-line-length=120
291
+
292
+ # Maximum number of lines in a module.
293
+ max-module-lines=1000
294
+
295
+ # Allow the body of a class to be on the same line as the declaration if body
296
+ # contains single statement.
297
+ single-line-class-stmt=no
298
+
299
+ # Allow the body of an if to be on the same line as the test if there is no
300
+ # else.
301
+ single-line-if-stmt=no
302
+
303
+
304
+ [SIMILARITIES]
305
+
306
+ # Ignore comments when computing similarities.
307
+ ignore-comments=yes
308
+
309
+ # Ignore docstrings when computing similarities.
310
+ ignore-docstrings=yes
311
+
312
+ # Ignore imports when computing similarities.
313
+ ignore-imports=no
314
+
315
+ # Minimum lines number of a similarity.
316
+ min-similarity-lines=4
317
+
318
+
319
+ [BASIC]
320
+
321
+ # Naming style matching correct argument names.
322
+ argument-naming-style=snake_case
323
+
324
+ # Regular expression matching correct argument names. Overrides argument-
325
+ # naming-style.
326
+ argument-rgx=[a-z_][a-z0-9_]{0,30}$
327
+
328
+ # Naming style matching correct attribute names.
329
+ attr-naming-style=snake_case
330
+
331
+ # Regular expression matching correct attribute names. Overrides attr-naming-
332
+ # style.
333
+ #attr-rgx=
334
+
335
+ # Bad variable names which should always be refused, separated by a comma.
336
+ bad-names=
337
+
338
+ # Naming style matching correct class attribute names.
339
+ class-attribute-naming-style=any
340
+
341
+ # Regular expression matching correct class attribute names. Overrides class-
342
+ # attribute-naming-style.
343
+ #class-attribute-rgx=
344
+
345
+ # Naming style matching correct class names.
346
+ class-naming-style=PascalCase
347
+
348
+ # Regular expression matching correct class names. Overrides class-naming-
349
+ # style.
350
+ #class-rgx=
351
+
352
+ # Naming style matching correct constant names.
353
+ const-naming-style=UPPER_CASE
354
+
355
+ # Regular expression matching correct constant names. Overrides const-naming-
356
+ # style.
357
+ #const-rgx=
358
+
359
+ # Minimum line length for functions/classes that require docstrings, shorter
360
+ # ones are exempt.
361
+ docstring-min-length=-1
362
+
363
+ # Naming style matching correct function names.
364
+ function-naming-style=snake_case
365
+
366
+ # Regular expression matching correct function names. Overrides function-
367
+ # naming-style.
368
+ #function-rgx=
369
+
370
+ # Good variable names which should always be accepted, separated by a comma.
371
+ good-names=i,
372
+ j,
373
+ k,
374
+ x,
375
+ ex,
376
+ Run,
377
+ _
378
+
379
+ # Include a hint for the correct naming format with invalid-name.
380
+ include-naming-hint=no
381
+
382
+ # Naming style matching correct inline iteration names.
383
+ inlinevar-naming-style=any
384
+
385
+ # Regular expression matching correct inline iteration names. Overrides
386
+ # inlinevar-naming-style.
387
+ #inlinevar-rgx=
388
+
389
+ # Naming style matching correct method names.
390
+ method-naming-style=snake_case
391
+
392
+ # Regular expression matching correct method names. Overrides method-naming-
393
+ # style.
394
+ #method-rgx=
395
+
396
+ # Naming style matching correct module names.
397
+ module-naming-style=snake_case
398
+
399
+ # Regular expression matching correct module names. Overrides module-naming-
400
+ # style.
401
+ #module-rgx=
402
+
403
+ # Colon-delimited sets of names that determine each other's naming style when
404
+ # the name regexes allow several styles.
405
+ name-group=
406
+
407
+ # Regular expression which should only match function or class names that do
408
+ # not require a docstring.
409
+ no-docstring-rgx=^_
410
+
411
+ # List of decorators that produce properties, such as abc.abstractproperty. Add
412
+ # to this list to register other decorators that produce valid properties.
413
+ # These decorators are taken in consideration only for invalid-name.
414
+ property-classes=abc.abstractproperty
415
+
416
+ # Naming style matching correct variable names.
417
+ variable-naming-style=snake_case
418
+
419
+ # Regular expression matching correct variable names. Overrides variable-
420
+ # naming-style.
421
+ variable-rgx=[a-z_][a-z0-9_]{0,30}$
422
+
423
+
424
+ [STRING]
425
+
426
+ # This flag controls whether the implicit-str-concat-in-sequence should
427
+ # generate a warning on implicit string concatenation in sequences defined over
428
+ # several lines.
429
+ check-str-concat-over-line-jumps=no
430
+
431
+
432
+ [IMPORTS]
433
+
434
+ # Allow wildcard imports from modules that define __all__.
435
+ allow-wildcard-with-all=no
436
+
437
+ # Analyse import fallback blocks. This can be used to support both Python 2 and
438
+ # 3 compatible code, which means that the block might have code that exists
439
+ # only in one or another interpreter, leading to false positives when analysed.
440
+ analyse-fallback-blocks=no
441
+
442
+ # Deprecated modules which should not be used, separated by a comma.
443
+ deprecated-modules=optparse,tkinter.tix
444
+
445
+ # Create a graph of external dependencies in the given file (report RP0402 must
446
+ # not be disabled).
447
+ ext-import-graph=
448
+
449
+ # Create a graph of every (i.e. internal and external) dependencies in the
450
+ # given file (report RP0402 must not be disabled).
451
+ import-graph=
452
+
453
+ # Create a graph of internal dependencies in the given file (report RP0402 must
454
+ # not be disabled).
455
+ int-import-graph=
456
+
457
+ # Force import order to recognize a module as part of the standard
458
+ # compatibility libraries.
459
+ known-standard-library=
460
+
461
+ # Force import order to recognize a module as part of a third party library.
462
+ known-third-party=enchant
463
+
464
+
465
+ [CLASSES]
466
+
467
+ # List of method names used to declare (i.e. assign) instance attributes.
468
+ defining-attr-methods=__init__,
469
+ __new__,
470
+ setUp
471
+
472
+ # List of member names, which should be excluded from the protected access
473
+ # warning.
474
+ exclude-protected=_asdict,
475
+ _fields,
476
+ _replace,
477
+ _source,
478
+ _make
479
+
480
+ # List of valid names for the first argument in a class method.
481
+ valid-classmethod-first-arg=cls
482
+
483
+ # List of valid names for the first argument in a metaclass class method.
484
+ valid-metaclass-classmethod-first-arg=cls
485
+
486
+
487
+ [DESIGN]
488
+
489
+ # Maximum number of arguments for function / method.
490
+ max-args=5
491
+
492
+ # Maximum number of attributes for a class (see R0902).
493
+ max-attributes=7
494
+
495
+ # Maximum number of boolean expressions in an if statement.
496
+ max-bool-expr=5
497
+
498
+ # Maximum number of branch for function / method body.
499
+ max-branches=12
500
+
501
+ # Maximum number of locals for function / method body.
502
+ max-locals=15
503
+
504
+ # Maximum number of parents for a class (see R0901).
505
+ max-parents=15
506
+
507
+ # Maximum number of public methods for a class (see R0904).
508
+ max-public-methods=20
509
+
510
+ # Maximum number of return / yield for function / method body.
511
+ max-returns=6
512
+
513
+ # Maximum number of statements in function / method body.
514
+ max-statements=50
515
+
516
+ # Minimum number of public methods for a class (see R0903).
517
+ min-public-methods=2
518
+
519
+
520
+ [EXCEPTIONS]
521
+
522
+ # Exceptions that will emit a warning when being caught. Defaults to
523
+ # "BaseException, Exception".
524
+ overgeneral-exceptions=builtins.BaseException,
525
+ builtins.Exception
Create_dataset/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
Create_dataset/cr_dataset_script.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datasets import load_dataset
4
+ from datasets import Dataset, DatasetDict
5
+ from IPython.display import Audio
6
+ import scipy
7
+ import librosa
8
+ from tqdm import tqdm
9
+ import re
10
+ import os
11
+
12
+
13
+ def load_audio(audio_dict:dict)->None:
14
+ target_sr = 22050
15
+ audio_resampled = librosa.resample(np.array(audio_dict['array']),
16
+ orig_sr=audio_dict['sampling_rate'],
17
+ target_sr=target_sr)
18
+ scipy.io.wavfile.write(audio_dict['path'],
19
+ rate=target_sr,
20
+ data=(audio_resampled* 32767).astype(np.int16))
21
+
22
+ def remove_outer_quotes_regex(sen:str)->str:
23
+ return re.sub(r'^["\'](.*)["\']$', r'\1', sen)
24
+
25
+ def main()->None:
26
+ name_dataset = input('Write HF dataset name as <REPO_NAME/DATASET_NAME>: ')
27
+ sub_name_dataset = name_dataset.split('/')[1]
28
+ os.mkdir(sub_name_dataset)
29
+ os.chdir(sub_name_dataset)
30
+ os.mkdir('wavs')
31
+ os.chdir('wavs')
32
+
33
+
34
+ art = """
35
+ /\_/\
36
+ ( o.o )
37
+ > ^ <
38
+
39
+ V O I C E
40
+ """
41
+ print(art)
42
+
43
+ print('--- LOADING DATASET ---')
44
+ your_dataset = load_dataset(name_dataset)
45
+
46
+ # mk TRAIN
47
+ print()
48
+ print('--- CONVERTIND AND SAVING THE TRAIN DATASET ---')
49
+ num_shards=20
50
+ path = []
51
+ text = []
52
+
53
+ with tqdm(total=len(your_dataset['train']), leave=False) as pbar:
54
+ for ind in range(num_shards):
55
+ dataset_shard = your_dataset['train'].shard(num_shards=num_shards, index=ind)
56
+ for row in dataset_shard:
57
+ load_audio(row['audio'])
58
+ path.append(row['audio']['path'])
59
+ text.append(row['raw_transcription'])
60
+ pbar.update(1)
61
+
62
+
63
+ absolute_path = os.path.abspath('../')
64
+ os.chdir(absolute_path)
65
+
66
+ dir = f'{absolute_path}/wavs/'
67
+ df = pd.DataFrame({'path':path, 'text':text})
68
+ df.text = df.text.map(remove_outer_quotes_regex)
69
+ df.path = dir + df.path
70
+ df.to_csv(f'{sub_name_dataset}_filelist_train.txt', sep='|', header=None, index=False)
71
+
72
+ # mk TEST
73
+ os.chdir(dir)
74
+ path = []
75
+ text = []
76
+ print()
77
+ print('--- CONVERTIND AND SAVING THE TEST DATASET ---')
78
+ with tqdm(total=len(your_dataset['test']), leave=False) as pbar2:
79
+ for row in tqdm(your_dataset['test']):
80
+ load_audio(row['audio'])
81
+ path.append(row['audio']['path'])
82
+ text.append(row['raw_transcription'])
83
+ pbar2.update(1)
84
+
85
+ os.chdir(absolute_path)
86
+ df = pd.DataFrame({'path':path, 'text':text})
87
+ df.text = df.text.map(remove_outer_quotes_regex)
88
+ df.path = dir + df.path
89
+ df.to_csv(f'{sub_name_dataset}_filelist_test.txt', sep='|', header=None, index=False)
90
+ print()
91
+ print('--- THE DATASET IS READY ---')
92
+ print(f'Dir of data is "{absolute_path}"')
93
+
94
+ absolute_path_home = os.path.abspath('../')
95
+ os.chdir(absolute_path_home)
96
+
97
+
98
+ if __name__ == "__main__":
99
+ main()
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Shivam Mehta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
MANIFEST.in ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ include README.md
2
+ include LICENSE.txt
3
+ include requirements.*.txt
4
+ include *.cff
5
+ include requirements.txt
6
+ include matcha/VERSION
7
+ recursive-include matcha *.json
8
+ recursive-include matcha *.html
9
+ recursive-include matcha *.png
10
+ recursive-include matcha *.md
11
+ recursive-include matcha *.py
12
+ recursive-include matcha *.pyx
13
+ recursive-exclude tests *
14
+ prune tests*
Makefile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ help: ## Show help
3
+ @grep -E '^[.a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
4
+
5
+ clean: ## Clean autogenerated files
6
+ rm -rf dist
7
+ find . -type f -name "*.DS_Store" -ls -delete
8
+ find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
9
+ find . | grep -E ".pytest_cache" | xargs rm -rf
10
+ find . | grep -E ".ipynb_checkpoints" | xargs rm -rf
11
+ rm -f .coverage
12
+
13
+ clean-logs: ## Clean logs
14
+ rm -rf logs/**
15
+
16
+ create-package: ## Create wheel and tar gz
17
+ rm -rf dist/
18
+ python setup.py bdist_wheel --plat-name=manylinux1_x86_64
19
+ python setup.py sdist
20
+ python -m twine upload dist/* --verbose --skip-existing
21
+
22
+ format: ## Run pre-commit hooks
23
+ pre-commit run -a
24
+
25
+ sync: ## Merge changes from main branch to your current branch
26
+ git pull
27
+ git pull origin main
28
+
29
+ test: ## Run not slow tests
30
+ pytest -k "not slow"
31
+
32
+ test-full: ## Run all tests
33
+ pytest
34
+
35
+ train-ljspeech: ## Train the model
36
+ python matcha/train.py experiment=ljspeech
37
+
38
+ train-ljspeech-min: ## Train the model with minimum memory
39
+ python matcha/train.py experiment=ljspeech_min_memory
40
+
41
+ start_app: ## Start the app
42
+ python matcha/app.py
README.md ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+
3
+
4
+
5
+ # AkylAI TTS
6
+
7
+
8
+ [![python](https://img.shields.io/badge/-Python_3.10-blue?logo=python&logoColor=white)](https://www.python.org/downloads/release/python-3100/)
9
+ [![pytorch](https://img.shields.io/badge/PyTorch_2.0+-ee4c2c?logo=pytorch&logoColor=white)](https://pytorch.org/get-started/locally/)
10
+ [![lightning](https://img.shields.io/badge/-Lightning_2.0+-792ee5?logo=pytorchlightning&logoColor=white)](https://pytorchlightning.ai/)
11
+ [![hydra](https://img.shields.io/badge/Config-Hydra_1.3-89b8cd)](https://hydra.cc/)
12
+ [![black](https://img.shields.io/badge/Code%20Style-Black-black.svg?labelColor=gray)](https://black.readthedocs.io/en/stable/)
13
+ [![isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
14
+
15
+ <img src="https://github.com/simonlobgromov/Matcha-TTS/blob/main/photo_2024-04-07_15-59-52.png" height="400"/>
16
+ </div>
17
+
18
+ # AkylAI-TTS for Kyrgyz language
19
+
20
+ We present to you a model trained in the Kyrgyz language, which has been trained on 13 hours of speech and 7,000 samples, complete with source code and training scripts. The architecture is based on Matcha-TTS.
21
+ It`s a new approach to non-autoregressive neural TTS, that uses [conditional flow matching](https://arxiv.org/abs/2210.02747) (similar to [rectified flows](https://arxiv.org/abs/2209.03003)) to speed up ODE-based speech synthesis. Our method:
22
+
23
+ - Is probabilistic
24
+ - Has compact memory footprint
25
+ - Sounds highly natural
26
+ - Is very fast to synthesise from
27
+
28
+ You can try our *AkylAI TTS* by visiting [SPACE](https://huggingface.co/spaces/the-cramer-project/akylai-tts-mini) and read [ICASSP 2024 paper](https://arxiv.org/abs/2309.03199) for more details.
29
+
30
+ # Inference
31
+
32
+ ## Run via terminal
33
+
34
+
35
+ It is recommended to start by setting up a virtual environment using `venv`.
36
+
37
+ 1. Clone this repository and install all modules and dependencies by running the commands:
38
+
39
+ ```
40
+ git clone https://github.com/simonlobgromov/Matcha-TTS
41
+ cd Matcha-TTS
42
+ pip install -e .
43
+ apt-get install espeak-ng
44
+ ```
45
+
46
+
47
+ 2. Run with CLI arguments:
48
+
49
+ - To synthesise from given text, run:
50
+
51
+ ```bash
52
+ matcha-tts --text "<INPUT TEXT>"
53
+ ```
54
+
55
+ - To synthesise from a file, run:
56
+
57
+ ```bash
58
+ matcha-tts --file <PATH TO FILE>
59
+ ```
60
+ - Speaking rate
61
+
62
+ ```bash
63
+ matcha-tts --text "<INPUT TEXT>" --speaking_rate 1.0
64
+ ```
65
+
66
+ - Sampling temperature
67
+
68
+ ```bash
69
+ matcha-tts --text "<INPUT TEXT>" --temperature 0.667
70
+ ```
71
+
72
+ - Euler ODE solver steps
73
+
74
+ ```bash
75
+ matcha-tts --text "<INPUT TEXT>" --steps 10
76
+ ```
77
+
78
+
79
+ # Train with your own dataset.
80
+
81
+ ## Dataset
82
+
83
+ For training this model, it is suitable to organize data similar to [LJ Speech](https://keithito.com/LJ-Speech-Dataset/). Each audio file should be single-channel 16-bit PCM WAV with a sample rate of 22050 Hz. WAV files must have unique names, for example:
84
+
85
+ ```
86
+ file_1.wav
87
+ file_2.wav
88
+ file_3.wav
89
+ file_4.wav
90
+ ....
91
+ file_12454.wav
92
+ file_12455.wav
93
+ ```
94
+
95
+
96
+ They should also be placed at the root of the project directory in a separate folder.
97
+
98
+ Additionally, the project should include two `.txt` files for Train and Test with metadata for the files. The names of these files can be arbitrary, and their structure is as follows:
99
+ ```
100
+ .../Matcha-TTS/<your folder name>/wavs/<filename>.wav|Баарыңарга салам, менин атым Акылай.
101
+ .../Matcha-TTS/<your folder name>/wavs/<filename>.wav|Мен бардыгын бул жерде Инновация борборунда көргөнүмө абдан кубанычтамын.
102
+ .../Matcha-TTS/<your folder name>/wavs/<filename>.wav|<your sentence>
103
+ .../Matcha-TTS/<your folder name>/wavs/<filename>.wav|<your sentence>
104
+ .../Matcha-TTS/<your folder name>/wavs/<filename>.wav|<your sentence>
105
+ ........
106
+ ```
107
+ Where each line is the FULL path to the file located in the folder with the uploaded audio, and a sentence in its original form with punctuation is written after the delimiter '|'.
108
+ It is advisable to clean the text of unnecessary and unwanted characters beforehand. Be careful with abbreviations and contractions.
109
+ The text preprocessing does not include functionality for processing abbreviations and contractions; however, the built-in phonemizer can transcribe numbers, but to avoid errors, it is better to write numbers in words.
110
+
111
+ ## Dataset from Hugging Face
112
+
113
+ If you want to use a dataset that you store on Hugging Face, it would be convenient to use the `create-dataset` script, which will handle the downloading and all the data preparation, including .txt files with metadata.
114
+ Here's what its structure might look like:
115
+
116
+ ```
117
+ DatasetDict({
118
+ train: Dataset({
119
+ features: ['id', 'raw_transcription', 'transcription', 'sentence_type', 'speaker_id', 'gender', 'audio'],
120
+ num_rows: 7016
121
+ })
122
+ test: Dataset({
123
+ features: ['id', 'raw_transcription', 'transcription', 'sentence_type', 'speaker_id', 'gender', 'audio'],
124
+ num_rows: 31
125
+ })
126
+ })
127
+ ```
128
+
129
+ Where the most important and mandatory features are:
130
+ ```
131
+ ['raw_transcription', 'audio']
132
+ ```
133
+
134
+ Where:
135
+
136
+ `raw_transcription` - this is the text of your sentences in the original version (the requirements are the same as in the previous method).
137
+
138
+ `audio` - these are audio files with metadata, which are dictionaries with keys:
139
+
140
+ * `array` - audio in the form of a `numpy.ndarray` with a `float32` data type
141
+ * `path` - file name
142
+ * `sampling_rate` - Sampling rate, which should be no less than 22050 Hz.
143
+
144
+ Example a row:
145
+
146
+ ```
147
+ {'array': array([-3.05175781e-05, -3.05175781e-05, 0.00000000e+00, ...,
148
+ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]),
149
+ 'path': '1353.wav',
150
+ 'sampling_rate': 44100}
151
+ ```
152
+
153
+
154
+
155
+
156
+ ## Process by Terminal
157
+
158
+ * **Load this repo and connect to HF**
159
+
160
+ ```
161
+ git clone https://github.com/simonlobgromov/Matcha-TTS
162
+ cd Matcha-TTS
163
+ pip install -e .
164
+ ```
165
+
166
+ Install this:
167
+
168
+ ```
169
+ apt-get install espeak-ng
170
+ ```
171
+ Connect to HF (Skip this step if you are not using data from Hugging Face.)
172
+
173
+ ```
174
+ git config --global credential.helper store
175
+ huggingface-cli login
176
+ ```
177
+
178
+ * **Load the Data** (Skip this step if you are not using data from Hugging Face.)
179
+
180
+ The script will automatically create a folder with audio recordings and text files with metadata. During the process, enter the HF repository name and the dataset name.
181
+
182
+
183
+ ```
184
+ create-dataset
185
+
186
+ # If you see a cat, then everything is fine!
187
+ ```
188
+
189
+ * Go to `configs/data/akylai<OR YOUR FILE NAME>.yaml` and change
190
+
191
+ ```yaml
192
+ train_filelist_path: data/filelists/akylai_audio_text_train_filelist.txt # path to your TXT with metadata
193
+ valid_filelist_path: data/filelists/akylai_audio_text_val_filelist.txt # path to your TXT with metadata
194
+ ```
195
+
196
+ * Generate normalisation statistics with the yaml file of dataset configuration
197
+
198
+ ```bash
199
+ matcha-data-stats -i akylai.yaml
200
+ # Output:
201
+ #{'mel_mean': -5.53662231756592, 'mel_std': 2.1161014277038574}
202
+ ```
203
+
204
+ * Update these values in `configs/data/akylai.yaml` under `data_statistics` key.
205
+
206
+ ```bash
207
+ data_statistics: # Computed for akylai(or your) dataset
208
+ mel_mean: -5.536622
209
+ mel_std: 2.116101
210
+ ```
211
+
212
+
213
+
214
+ * **Train**
215
+
216
+ ```
217
+ python matcha/train.py experiment=akylai
218
+ ```
219
+
220
+ OR
221
+
222
+ ```
223
+ python matcha/train.py experiment=akylai trainer.devices=[0,1]
224
+ ```
225
+
226
+
227
+ * **Checkpoints**
228
+
229
+ Checkpoints will be saved in `./Matcha-TTS/logs/train/<MODEL_NAME>/runs/<DATE>_<TIME>/checkpoints`. Unload them or select the last few checkpoints.
230
+
231
+
232
+
233
+ # Credits
234
+
235
+
236
+ - Shivam Mehta ([GitHub](https://github.com/shivammehta25))
237
+ - The Cramer Project (Data collection and preprocessing) [Official Space](https://thecramer.com/)
238
+ - Amantur Amatov (Expert)
239
+ - Timur Turatali (Expert, Research)
240
+ - Den Pavlov (Research, Data preprocessing and ML engineering) [GitHub](https://github.com/simonlobgromov/Matcha-TTS)
241
+ - Ulan Abdurazakov (Environment Developer)
242
+ - Nursultan Bakashov (CEO)
243
+
244
+ ## Citation information
245
+
246
+ If you use our code or otherwise find this work useful, please cite our paper:
247
+
248
+ ```text
249
+ @inproceedings{mehta2024matcha,
250
+ title={Matcha-{TTS}: A fast {TTS} architecture with conditional flow matching},
251
+ author={Mehta, Shivam and Tu, Ruibo and Beskow, Jonas and Sz{\'e}kely, {\'E}va and Henter, Gustav Eje},
252
+ booktitle={Proc. ICASSP},
253
+ year={2024}
254
+ }
255
+ ```
256
+
257
+ ## Acknowledgements
258
+
259
+ Since this code uses [Lightning-Hydra-Template](https://github.com/ashleve/lightning-hydra-template), you have all the powers that come with it.
260
+
261
+ Other source code we would like to acknowledge:
262
+
263
+ - [Coqui-TTS](https://github.com/coqui-ai/TTS/tree/dev): For helping me figure out how to make cython binaries pip installable and encouragement
264
+ - [Hugging Face Diffusers](https://huggingface.co/): For their awesome diffusers library and its components
265
+ - [Grad-TTS](https://github.com/huawei-noah/Speech-Backbones/tree/main/Grad-TTS): For the monotonic alignment search source code
266
+ - [torchdyn](https://github.com/DiffEqML/torchdyn): Useful for trying other ODE solvers during research and development
267
+ - [labml.ai](https://nn.labml.ai/transformers/rope/index.html): For the RoPE implementation
268
+
configs/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # this file is needed here to include configs when building project as a package
configs/callbacks/default.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ defaults:
2
+ - model_checkpoint.yaml
3
+ - model_summary.yaml
4
+ - rich_progress_bar.yaml
5
+ - _self_
configs/callbacks/model_checkpoint.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html
2
+
3
+ model_checkpoint:
4
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
5
+ dirpath: ${paths.output_dir}/checkpoints # directory to save the model file
6
+ filename: checkpoint_{epoch:03d} # checkpoint filename
7
+ monitor: epoch # name of the logged metric which determines when model is improving
8
+ verbose: False # verbosity mode
9
+ save_last: true # additionally always save an exact copy of the last checkpoint to a file last.ckpt
10
+ save_top_k: 5 # save k best models (determined by above metric)
11
+ mode: "max" # "max" means higher metric value is better, can be also "min"
12
+ auto_insert_metric_name: True # when True, the checkpoints filenames will contain the metric name
13
+ save_weights_only: False # if True, then only the model’s weights will be saved
14
+ every_n_train_steps: null # number of training steps between checkpoints
15
+ train_time_interval: null # checkpoints are monitored at the specified time interval
16
+ every_n_epochs: 10 # number of epochs between checkpoints
17
+ save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation
configs/callbacks/model_summary.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html
2
+
3
+ model_summary:
4
+ _target_: lightning.pytorch.callbacks.RichModelSummary
5
+ max_depth: 3 # the maximum depth of layer nesting that the summary will include
configs/callbacks/none.yaml ADDED
File without changes
configs/callbacks/rich_progress_bar.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # https://lightning.ai/docs/pytorch/latest/api/lightning.pytorch.callbacks.RichProgressBar.html
2
+
3
+ rich_progress_bar:
4
+ _target_: lightning.pytorch.callbacks.RichProgressBar
configs/data/akylai.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
2
+ name: akylai
3
+ train_filelist_path: ./Kany_dataset_mk4/Kany_dataset_mk4_filelist_train.txt
4
+ valid_filelist_path: ./Kany_dataset_mk4/Kany_dataset_mk4_filelist_test.txt
5
+ batch_size: 32
6
+ num_workers: 20
7
+ pin_memory: True
8
+ cleaners: [kyrgyz_cleaners]
9
+ add_blank: True
10
+ n_spks: 1
11
+ n_fft: 1024
12
+ n_feats: 80
13
+ sample_rate: 22050
14
+ hop_length: 256
15
+ win_length: 1024
16
+ f_min: 0
17
+ f_max: 8000
18
+ data_statistics: # Computed for ljspeech dataset
19
+ mel_mean: -5.6814561
20
+ mel_std: 2.7337122
21
+ seed: ${seed}
configs/data/hi-fi_en-US_female.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - ljspeech
3
+ - _self_
4
+
5
+ # Dataset URL: https://ast-astrec.nict.go.jp/en/release/hi-fi-captain/
6
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
7
+ name: hi-fi_en-US_female
8
+ train_filelist_path: data/filelists/hi-fi-captain-en-us-female_train.txt
9
+ valid_filelist_path: data/filelists/hi-fi-captain-en-us-female_val.txt
10
+ batch_size: 32
11
+ cleaners: [english_cleaners_piper]
12
+ data_statistics: # Computed for this dataset
13
+ mel_mean: -6.38385
14
+ mel_std: 2.541796
configs/data/ljspeech.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
2
+ name: ljspeech
3
+ train_filelist_path: /content/kany_dataset/kany_filelist_train.txt
4
+ valid_filelist_path: /content/kany_dataset/kany_filelist_test.txt
5
+ batch_size: 16
6
+ num_workers: 20
7
+ pin_memory: True
8
+ cleaners: [kyrgyz_cleaners]
9
+ add_blank: True
10
+ n_spks: 1
11
+ n_fft: 1024
12
+ n_feats: 80
13
+ sample_rate: 22050
14
+ hop_length: 256
15
+ win_length: 1024
16
+ f_min: 0
17
+ f_max: 8000
18
+ data_statistics: # Computed for ljspeech dataset
19
+ mel_mean: -5.68145561
20
+ mel_std: 2.7337122
21
+ seed: ${seed}
22
+
configs/data/vctk.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - ljspeech
3
+ - _self_
4
+
5
+ _target_: matcha.data.text_mel_datamodule.TextMelDataModule
6
+ name: vctk
7
+ train_filelist_path: data/filelists/vctk_audio_sid_text_train_filelist.txt
8
+ valid_filelist_path: data/filelists/vctk_audio_sid_text_val_filelist.txt
9
+ batch_size: 32
10
+ add_blank: True
11
+ n_spks: 109
12
+ data_statistics: # Computed for vctk dataset
13
+ mel_mean: -6.630575
14
+ mel_std: 2.482914
configs/debug/default.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # default debugging setup, runs 1 full epoch
4
+ # other debugging configs can inherit from this one
5
+
6
+ # overwrite task name so debugging logs are stored in separate folder
7
+ task_name: "debug"
8
+
9
+ # disable callbacks and loggers during debugging
10
+ # callbacks: null
11
+ # logger: null
12
+
13
+ extras:
14
+ ignore_warnings: False
15
+ enforce_tags: False
16
+
17
+ # sets level of all command line loggers to 'DEBUG'
18
+ # https://hydra.cc/docs/tutorials/basic/running_your_app/logging/
19
+ hydra:
20
+ job_logging:
21
+ root:
22
+ level: DEBUG
23
+
24
+ # use this to also set hydra loggers to 'DEBUG'
25
+ # verbose: True
26
+
27
+ trainer:
28
+ max_epochs: 1
29
+ accelerator: cpu # debuggers don't like gpus
30
+ devices: 1 # debuggers don't like multiprocessing
31
+ detect_anomaly: true # raise exception if NaN or +/-inf is detected in any tensor
32
+
33
+ data:
34
+ num_workers: 0 # debuggers don't like multiprocessing
35
+ pin_memory: False # disable gpu memory pin
configs/debug/fdr.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # runs 1 train, 1 validation and 1 test step
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ fast_dev_run: true
configs/debug/limit.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # uses only 1% of the training data and 5% of validation/test data
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ max_epochs: 3
10
+ limit_train_batches: 0.01
11
+ limit_val_batches: 0.05
12
+ limit_test_batches: 0.05
configs/debug/overfit.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # overfits to 3 batches
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ max_epochs: 20
10
+ overfit_batches: 3
11
+
12
+ # model ckpt and early stopping need to be disabled during overfitting
13
+ callbacks: null
configs/debug/profiler.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # runs with execution time profiling
4
+
5
+ defaults:
6
+ - default
7
+
8
+ trainer:
9
+ max_epochs: 1
10
+ # profiler: "simple"
11
+ profiler: "advanced"
12
+ # profiler: "pytorch"
13
+ accelerator: gpu
14
+
15
+ limit_train_batches: 0.02
configs/eval.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ defaults:
4
+ - _self_
5
+ - data: akylai # choose datamodule with `test_dataloader()` for evaluation
6
+ - model: matcha
7
+ - logger: null
8
+ - trainer: default
9
+ - paths: default
10
+ - extras: default
11
+ - hydra: default
12
+
13
+ task_name: "eval"
14
+
15
+ tags: ["dev"]
16
+
17
+ # passing checkpoint path is necessary for evaluation
18
+ ckpt_path: ???
configs/experiment/akylai.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: akylai.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["akylai"]
13
+
14
+ run_name: akylai
configs/experiment/hifi_dataset_piper_phonemizer.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: hi-fi_en-US_female.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["hi-fi", "single_speaker", "piper_phonemizer", "en_US", "female"]
13
+
14
+ run_name: hi-fi_en-US_female_piper_phonemizer
configs/experiment/ljspeech.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: ljspeech.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["ljspeech"]
13
+
14
+ run_name: ljspeech
configs/experiment/ljspeech_min_memory.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: ljspeech.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["ljspeech"]
13
+
14
+ run_name: ljspeech_min
15
+
16
+
17
+ model:
18
+ out_size: 172
configs/experiment/multispeaker.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python train.py experiment=multispeaker
5
+
6
+ defaults:
7
+ - override /data: vctk.yaml
8
+
9
+ # all parameters below will be merged with parameters from default configurations set above
10
+ # this allows you to overwrite only specified parameters
11
+
12
+ tags: ["multispeaker"]
13
+
14
+ run_name: multispeaker
configs/extras/default.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # disable python warnings if they annoy you
2
+ ignore_warnings: False
3
+
4
+ # ask user for tags if none are provided in the config
5
+ enforce_tags: True
6
+
7
+ # pretty print config tree at the start of the run using Rich library
8
+ print_config: True
configs/hparams_search/mnist_optuna.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # example hyperparameter optimization of some experiment with Optuna:
4
+ # python train.py -m hparams_search=mnist_optuna experiment=example
5
+
6
+ defaults:
7
+ - override /hydra/sweeper: optuna
8
+
9
+ # choose metric which will be optimized by Optuna
10
+ # make sure this is the correct name of some metric logged in lightning module!
11
+ optimized_metric: "val/acc_best"
12
+
13
+ # here we define Optuna hyperparameter search
14
+ # it optimizes for value returned from function with @hydra.main decorator
15
+ # docs: https://hydra.cc/docs/next/plugins/optuna_sweeper
16
+ hydra:
17
+ mode: "MULTIRUN" # set hydra to multirun by default if this config is attached
18
+
19
+ sweeper:
20
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
21
+
22
+ # storage URL to persist optimization results
23
+ # for example, you can use SQLite if you set 'sqlite:///example.db'
24
+ storage: null
25
+
26
+ # name of the study to persist optimization results
27
+ study_name: null
28
+
29
+ # number of parallel workers
30
+ n_jobs: 1
31
+
32
+ # 'minimize' or 'maximize' the objective
33
+ direction: maximize
34
+
35
+ # total number of runs that will be executed
36
+ n_trials: 20
37
+
38
+ # choose Optuna hyperparameter sampler
39
+ # you can choose bayesian sampler (tpe), random search (without optimization), grid sampler, and others
40
+ # docs: https://optuna.readthedocs.io/en/stable/reference/samplers.html
41
+ sampler:
42
+ _target_: optuna.samplers.TPESampler
43
+ seed: 1234
44
+ n_startup_trials: 10 # number of random sampling runs before optimization starts
45
+
46
+ # define hyperparameter search space
47
+ params:
48
+ model.optimizer.lr: interval(0.0001, 0.1)
49
+ data.batch_size: choice(32, 64, 128, 256)
50
+ model.net.lin1_size: choice(64, 128, 256)
51
+ model.net.lin2_size: choice(64, 128, 256)
52
+ model.net.lin3_size: choice(32, 64, 128, 256)
configs/hydra/default.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://hydra.cc/docs/configure_hydra/intro/
2
+
3
+ # enable color logging
4
+ defaults:
5
+ - override hydra_logging: colorlog
6
+ - override job_logging: colorlog
7
+
8
+ # output directory, generated dynamically on each run
9
+ run:
10
+ dir: ${paths.log_dir}/${task_name}/${run_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
11
+ sweep:
12
+ dir: ${paths.log_dir}/${task_name}/${run_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
13
+ subdir: ${hydra.job.num}
14
+
15
+ job_logging:
16
+ handlers:
17
+ file:
18
+ # Incorporates fix from https://github.com/facebookresearch/hydra/pull/2242
19
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
configs/local/.gitkeep ADDED
File without changes
configs/logger/aim.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://aimstack.io/
2
+
3
+ # example usage in lightning module:
4
+ # https://github.com/aimhubio/aim/blob/main/examples/pytorch_lightning_track.py
5
+
6
+ # open the Aim UI with the following command (run in the folder containing the `.aim` folder):
7
+ # `aim up`
8
+
9
+ aim:
10
+ _target_: aim.pytorch_lightning.AimLogger
11
+ repo: ${paths.root_dir} # .aim folder will be created here
12
+ # repo: "aim://ip_address:port" # can instead provide IP address pointing to Aim remote tracking server which manages the repo, see https://aimstack.readthedocs.io/en/latest/using/remote_tracking.html#
13
+
14
+ # aim allows to group runs under experiment name
15
+ experiment: null # any string, set to "default" if not specified
16
+
17
+ train_metric_prefix: "train/"
18
+ val_metric_prefix: "val/"
19
+ test_metric_prefix: "test/"
20
+
21
+ # sets the tracking interval in seconds for system usage metrics (CPU, GPU, memory, etc.)
22
+ system_tracking_interval: 10 # set to null to disable system metrics tracking
23
+
24
+ # enable/disable logging of system params such as installed packages, git info, env vars, etc.
25
+ log_system_params: true
26
+
27
+ # enable/disable tracking console logs (default value is true)
28
+ capture_terminal_logs: false # set to false to avoid infinite console log loop issue https://github.com/aimhubio/aim/issues/2550
configs/logger/comet.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.comet.ml
2
+
3
+ comet:
4
+ _target_: lightning.pytorch.loggers.comet.CometLogger
5
+ api_key: ${oc.env:COMET_API_TOKEN} # api key is loaded from environment variable
6
+ save_dir: "${paths.output_dir}"
7
+ project_name: "lightning-hydra-template"
8
+ rest_api_key: null
9
+ # experiment_name: ""
10
+ experiment_key: null # set to resume experiment
11
+ offline: False
12
+ prefix: ""
configs/logger/csv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # csv logger built in lightning
2
+
3
+ csv:
4
+ _target_: lightning.pytorch.loggers.csv_logs.CSVLogger
5
+ save_dir: "${paths.output_dir}"
6
+ name: "csv/"
7
+ prefix: ""
configs/logger/many_loggers.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # train with many loggers at once
2
+
3
+ defaults:
4
+ # - comet
5
+ - csv
6
+ # - mlflow
7
+ # - neptune
8
+ - tensorboard
9
+ - wandb
configs/logger/mlflow.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://mlflow.org
2
+
3
+ mlflow:
4
+ _target_: lightning.pytorch.loggers.mlflow.MLFlowLogger
5
+ # experiment_name: ""
6
+ # run_name: ""
7
+ tracking_uri: ${paths.log_dir}/mlflow/mlruns # run `mlflow ui` command inside the `logs/mlflow/` dir to open the UI
8
+ tags: null
9
+ # save_dir: "./mlruns"
10
+ prefix: ""
11
+ artifact_location: null
12
+ # run_id: ""
configs/logger/neptune.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # https://neptune.ai
2
+
3
+ neptune:
4
+ _target_: lightning.pytorch.loggers.neptune.NeptuneLogger
5
+ api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
6
+ project: username/lightning-hydra-template
7
+ # name: ""
8
+ log_model_checkpoints: True
9
+ prefix: ""
configs/logger/tensorboard.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.tensorflow.org/tensorboard/
2
+
3
+ tensorboard:
4
+ _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
5
+ save_dir: "${paths.output_dir}/tensorboard/"
6
+ name: null
7
+ log_graph: False
8
+ default_hp_metric: True
9
+ prefix: ""
10
+ # version: ""
configs/logger/wandb.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://wandb.ai
2
+
3
+ wandb:
4
+ _target_: lightning.pytorch.loggers.wandb.WandbLogger
5
+ # name: "" # name of the run (normally generated by wandb)
6
+ save_dir: "${paths.output_dir}"
7
+ offline: False
8
+ id: null # pass correct id to resume experiment!
9
+ anonymous: null # enable anonymous logging
10
+ project: "lightning-hydra-template"
11
+ log_model: False # upload lightning ckpts
12
+ prefix: "" # a string to put at the beginning of metric keys
13
+ # entity: "" # set to name of your wandb team
14
+ group: ""
15
+ tags: []
16
+ job_type: ""
configs/model/cfm/default.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name: CFM
2
+ solver: euler
3
+ sigma_min: 1e-4
configs/model/decoder/default.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ channels: [256, 256]
2
+ dropout: 0.05
3
+ attention_head_dim: 64
4
+ n_blocks: 1
5
+ num_mid_blocks: 2
6
+ num_heads: 2
7
+ act_fn: snakebeta
configs/model/encoder/default.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ encoder_type: RoPE Encoder
2
+ encoder_params:
3
+ n_feats: ${model.n_feats}
4
+ n_channels: 192
5
+ filter_channels: 768
6
+ filter_channels_dp: 256
7
+ n_heads: 2
8
+ n_layers: 6
9
+ kernel_size: 3
10
+ p_dropout: 0.1
11
+ spk_emb_dim: 64
12
+ n_spks: 1
13
+ prenet: true
14
+
15
+ duration_predictor_params:
16
+ filter_channels_dp: ${model.encoder.encoder_params.filter_channels_dp}
17
+ kernel_size: 3
18
+ p_dropout: ${model.encoder.encoder_params.p_dropout}
configs/model/matcha.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - encoder: default.yaml
4
+ - decoder: default.yaml
5
+ - cfm: default.yaml
6
+ - optimizer: adam.yaml
7
+
8
+ _target_: matcha.models.matcha_tts.MatchaTTS
9
+ n_vocab: 178
10
+ n_spks: ${data.n_spks}
11
+ spk_emb_dim: 64
12
+ n_feats: 80
13
+ data_statistics: ${data.data_statistics}
14
+ out_size: null # Must be divisible by 4
15
+ prior_loss: true
configs/model/optimizer/adam.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ _target_: torch.optim.Adam
2
+ _partial_: true
3
+ lr: 1e-4
4
+ weight_decay: 0.0
configs/paths/default.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # path to root directory
2
+ # this requires PROJECT_ROOT environment variable to exist
3
+ # you can replace it with "." if you want the root to be the current working directory
4
+ root_dir: ${oc.env:PROJECT_ROOT}
5
+
6
+ # path to data directory
7
+ data_dir: ${paths.root_dir}/data/
8
+
9
+ # path to logging directory
10
+ log_dir: ${paths.root_dir}/logs/
11
+
12
+ # path to output directory, created dynamically by hydra
13
+ # path generation pattern is specified in `configs/hydra/default.yaml`
14
+ # use it to store all files generated during the run, like ckpts and metrics
15
+ output_dir: ${hydra:runtime.output_dir}
16
+
17
+ # path to working directory
18
+ work_dir: ${hydra:runtime.cwd}