Spanicin commited on
Commit
182bf6b
·
verified ·
1 Parent(s): 73bde56

Upload 8 files

Browse files
Files changed (8) hide show
  1. .gitignore +170 -0
  2. .pre-commit-config.yaml +17 -0
  3. .pylintrc +633 -0
  4. LICENSE +21 -0
  5. README.md +400 -10
  6. accelerate_config.yaml +21 -0
  7. requirements.txt +33 -0
  8. setup.py +55 -0
.gitignore ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # running cache
2
+ mlruns/
3
+
4
+ # Test directories
5
+ test_data/
6
+ pretrained_models/
7
+
8
+ # Poetry project
9
+ poetry.lock
10
+
11
+ # Byte-compiled / optimized / DLL files
12
+ __pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+
16
+ # C extensions
17
+ *.so
18
+
19
+ # Distribution / packaging
20
+ .Python
21
+ build/
22
+ develop-eggs/
23
+ dist/
24
+ downloads/
25
+ eggs/
26
+ .eggs/
27
+ lib/
28
+ lib64/
29
+ parts/
30
+ sdist/
31
+ var/
32
+ wheels/
33
+ share/python-wheels/
34
+ *.egg-info/
35
+ .installed.cfg
36
+ *.egg
37
+ MANIFEST
38
+
39
+ # PyInstaller
40
+ # Usually these files are written by a python script from a template
41
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
42
+ *.manifest
43
+ *.spec
44
+
45
+ # Installer logs
46
+ pip-log.txt
47
+ pip-delete-this-directory.txt
48
+
49
+ # Unit test / coverage reports
50
+ htmlcov/
51
+ .tox/
52
+ .nox/
53
+ .coverage
54
+ .coverage.*
55
+ .cache
56
+ nosetests.xml
57
+ coverage.xml
58
+ *.cover
59
+ *.py,cover
60
+ .hypothesis/
61
+ .pytest_cache/
62
+ cover/
63
+
64
+ # Translations
65
+ *.mo
66
+ *.pot
67
+
68
+ # Django stuff:
69
+ *.log
70
+ local_settings.py
71
+ db.sqlite3
72
+ db.sqlite3-journal
73
+
74
+ # Flask stuff:
75
+ instance/
76
+ .webassets-cache
77
+
78
+ # Scrapy stuff:
79
+ .scrapy
80
+
81
+ # Sphinx documentation
82
+ docs/_build/
83
+
84
+ # PyBuilder
85
+ .pybuilder/
86
+ target/
87
+
88
+ # Jupyter Notebook
89
+ .ipynb_checkpoints
90
+
91
+ # IPython
92
+ profile_default/
93
+ ipython_config.py
94
+
95
+ # pyenv
96
+ # For a library or package, you might want to ignore these files since the code is
97
+ # intended to run in multiple environments; otherwise, check them in:
98
+ # .python-version
99
+
100
+ # pipenv
101
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
103
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
104
+ # install all needed dependencies.
105
+ #Pipfile.lock
106
+
107
+ # poetry
108
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
109
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
110
+ # commonly ignored for libraries.
111
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
112
+ #poetry.lock
113
+
114
+ # pdm
115
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116
+ #pdm.lock
117
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
118
+ # in version control.
119
+ # https://pdm.fming.dev/#use-with-ide
120
+ .pdm.toml
121
+
122
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
123
+ __pypackages__/
124
+
125
+ # Celery stuff
126
+ celerybeat-schedule
127
+ celerybeat.pid
128
+
129
+ # SageMath parsed files
130
+ *.sage.py
131
+
132
+ # Environments
133
+ .env
134
+ .venv
135
+ env/
136
+ venv/
137
+ ENV/
138
+ env.bak/
139
+ venv.bak/
140
+
141
+ # Spyder project settings
142
+ .spyderproject
143
+ .spyproject
144
+
145
+ # Rope project settings
146
+ .ropeproject
147
+
148
+ # mkdocs documentation
149
+ /site
150
+
151
+ # mypy
152
+ .mypy_cache/
153
+ .dmypy.json
154
+ dmypy.json
155
+
156
+ # Pyre type checker
157
+ .pyre/
158
+
159
+ # pytype static type analyzer
160
+ .pytype/
161
+
162
+ # Cython debug symbols
163
+ cython_debug/
164
+
165
+ # IDE
166
+ .idea/
167
+ .vscode/
168
+ data
169
+ pretrained_models
170
+ test_data
.pre-commit-config.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: local
3
+ hooks:
4
+ - id: isort
5
+ name: isort
6
+ language: system
7
+ types: [python]
8
+ pass_filenames: false
9
+ entry: isort
10
+ args: ["."]
11
+ - id: pylint
12
+ name: pylint
13
+ language: system
14
+ types: [python]
15
+ pass_filenames: false
16
+ entry: pylint
17
+ args: ["**/*.py"]
.pylintrc ADDED
@@ -0,0 +1,633 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [MAIN]
2
+
3
+ # Analyse import fallback blocks. This can be used to support both Python 2 and
4
+ # 3 compatible code, which means that the block might have code that exists
5
+ # only in one or another interpreter, leading to false positives when analysed.
6
+ analyse-fallback-blocks=no
7
+
8
+ # Clear in-memory caches upon conclusion of linting. Useful if running pylint
9
+ # in a server-like mode.
10
+ clear-cache-post-run=no
11
+
12
+ # Load and enable all available extensions. Use --list-extensions to see a list
13
+ # all available extensions.
14
+ #enable-all-extensions=
15
+
16
+ # In error mode, messages with a category besides ERROR or FATAL are
17
+ # suppressed, and no reports are done by default. Error mode is compatible with
18
+ # disabling specific errors.
19
+ #errors-only=
20
+
21
+ # Always return a 0 (non-error) status code, even if lint errors are found.
22
+ # This is primarily useful in continuous integration scripts.
23
+ #exit-zero=
24
+
25
+ # A comma-separated list of package or module names from where C extensions may
26
+ # be loaded. Extensions are loading into the active Python interpreter and may
27
+ # run arbitrary code.
28
+ extension-pkg-allow-list=
29
+
30
+ # A comma-separated list of package or module names from where C extensions may
31
+ # be loaded. Extensions are loading into the active Python interpreter and may
32
+ # run arbitrary code. (This is an alternative name to extension-pkg-allow-list
33
+ # for backward compatibility.)
34
+ extension-pkg-whitelist=cv2
35
+
36
+ # Return non-zero exit code if any of these messages/categories are detected,
37
+ # even if score is above --fail-under value. Syntax same as enable. Messages
38
+ # specified are enabled, while categories only check already-enabled messages.
39
+ fail-on=
40
+
41
+ # Specify a score threshold under which the program will exit with error.
42
+ fail-under=10
43
+
44
+ # Interpret the stdin as a python script, whose filename needs to be passed as
45
+ # the module_or_package argument.
46
+ #from-stdin=
47
+
48
+ # Files or directories to be skipped. They should be base names, not paths.
49
+ ignore=CVS
50
+
51
+ # Add files or directories matching the regular expressions patterns to the
52
+ # ignore-list. The regex matches against paths and can be in Posix or Windows
53
+ # format. Because '\\' represents the directory delimiter on Windows systems,
54
+ # it can't be used as an escape character.
55
+ ignore-paths=
56
+
57
+ # Files or directories matching the regular expression patterns are skipped.
58
+ # The regex matches against base names, not paths. The default value ignores
59
+ # Emacs file locks
60
+ ignore-patterns=^\.#
61
+
62
+ # List of module names for which member attributes should not be checked
63
+ # (useful for modules/projects where namespaces are manipulated during runtime
64
+ # and thus existing member attributes cannot be deduced by static analysis). It
65
+ # supports qualified module names, as well as Unix pattern matching.
66
+ ignored-modules=cv2
67
+
68
+ # Python code to execute, usually for sys.path manipulation such as
69
+ # pygtk.require().
70
+ init-hook='import sys; sys.path.append(".")'
71
+
72
+ # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
73
+ # number of processors available to use, and will cap the count on Windows to
74
+ # avoid hangs.
75
+ jobs=1
76
+
77
+ # Control the amount of potential inferred values when inferring a single
78
+ # object. This can help the performance when dealing with large functions or
79
+ # complex, nested conditions.
80
+ limit-inference-results=100
81
+
82
+ # List of plugins (as comma separated values of python module names) to load,
83
+ # usually to register additional checkers.
84
+ load-plugins=
85
+
86
+ # Pickle collected data for later comparisons.
87
+ persistent=yes
88
+
89
+ # Minimum Python version to use for version dependent checks. Will default to
90
+ # the version used to run pylint.
91
+ py-version=3.10
92
+
93
+ # Discover python modules and packages in the file system subtree.
94
+ recursive=no
95
+
96
+ # Add paths to the list of the source roots. Supports globbing patterns. The
97
+ # source root is an absolute path or a path relative to the current working
98
+ # directory used to determine a package namespace for modules located under the
99
+ # source root.
100
+ source-roots=
101
+
102
+ # When enabled, pylint would attempt to guess common misconfiguration and emit
103
+ # user-friendly hints instead of false-positive error messages.
104
+ suggestion-mode=yes
105
+
106
+ # Allow loading of arbitrary C extensions. Extensions are imported into the
107
+ # active Python interpreter and may run arbitrary code.
108
+ unsafe-load-any-extension=no
109
+
110
+ # In verbose mode, extra non-checker-related info will be displayed.
111
+ #verbose=
112
+
113
+
114
+ [BASIC]
115
+
116
+ # Naming style matching correct argument names.
117
+ argument-naming-style=snake_case
118
+
119
+ # Regular expression matching correct argument names. Overrides argument-
120
+ # naming-style. If left empty, argument names will be checked with the set
121
+ # naming style.
122
+ #argument-rgx=
123
+
124
+ # Naming style matching correct attribute names.
125
+ attr-naming-style=snake_case
126
+
127
+ # Regular expression matching correct attribute names. Overrides attr-naming-
128
+ # style. If left empty, attribute names will be checked with the set naming
129
+ # style.
130
+ #attr-rgx=
131
+
132
+ # Bad variable names which should always be refused, separated by a comma.
133
+ bad-names=foo,
134
+ bar,
135
+ baz,
136
+ toto,
137
+ tutu,
138
+ tata
139
+
140
+ # Bad variable names regexes, separated by a comma. If names match any regex,
141
+ # they will always be refused
142
+ bad-names-rgxs=
143
+
144
+ # Naming style matching correct class attribute names.
145
+ class-attribute-naming-style=any
146
+
147
+ # Regular expression matching correct class attribute names. Overrides class-
148
+ # attribute-naming-style. If left empty, class attribute names will be checked
149
+ # with the set naming style.
150
+ #class-attribute-rgx=
151
+
152
+ # Naming style matching correct class constant names.
153
+ class-const-naming-style=UPPER_CASE
154
+
155
+ # Regular expression matching correct class constant names. Overrides class-
156
+ # const-naming-style. If left empty, class constant names will be checked with
157
+ # the set naming style.
158
+ #class-const-rgx=
159
+
160
+ # Naming style matching correct class names.
161
+ class-naming-style=PascalCase
162
+
163
+ # Regular expression matching correct class names. Overrides class-naming-
164
+ # style. If left empty, class names will be checked with the set naming style.
165
+ #class-rgx=
166
+
167
+ # Naming style matching correct constant names.
168
+ const-naming-style=UPPER_CASE
169
+
170
+ # Regular expression matching correct constant names. Overrides const-naming-
171
+ # style. If left empty, constant names will be checked with the set naming
172
+ # style.
173
+ #const-rgx=
174
+
175
+ # Minimum line length for functions/classes that require docstrings, shorter
176
+ # ones are exempt.
177
+ docstring-min-length=-1
178
+
179
+ # Naming style matching correct function names.
180
+ function-naming-style=snake_case
181
+
182
+ # Regular expression matching correct function names. Overrides function-
183
+ # naming-style. If left empty, function names will be checked with the set
184
+ # naming style.
185
+ #function-rgx=
186
+
187
+ # Good variable names which should always be accepted, separated by a comma.
188
+ good-names=i,
189
+ j,
190
+ k,
191
+ ex,
192
+ Run,
193
+ _
194
+
195
+ # Good variable names regexes, separated by a comma. If names match any regex,
196
+ # they will always be accepted
197
+ good-names-rgxs=
198
+
199
+ # Include a hint for the correct naming format with invalid-name.
200
+ include-naming-hint=no
201
+
202
+ # Naming style matching correct inline iteration names.
203
+ inlinevar-naming-style=any
204
+
205
+ # Regular expression matching correct inline iteration names. Overrides
206
+ # inlinevar-naming-style. If left empty, inline iteration names will be checked
207
+ # with the set naming style.
208
+ #inlinevar-rgx=
209
+
210
+ # Naming style matching correct method names.
211
+ method-naming-style=snake_case
212
+
213
+ # Regular expression matching correct method names. Overrides method-naming-
214
+ # style. If left empty, method names will be checked with the set naming style.
215
+ #method-rgx=
216
+
217
+ # Naming style matching correct module names.
218
+ module-naming-style=snake_case
219
+
220
+ # Regular expression matching correct module names. Overrides module-naming-
221
+ # style. If left empty, module names will be checked with the set naming style.
222
+ #module-rgx=
223
+
224
+ # Colon-delimited sets of names that determine each other's naming style when
225
+ # the name regexes allow several styles.
226
+ name-group=
227
+
228
+ # Regular expression which should only match function or class names that do
229
+ # not require a docstring.
230
+ no-docstring-rgx=^_
231
+
232
+ # List of decorators that produce properties, such as abc.abstractproperty. Add
233
+ # to this list to register other decorators that produce valid properties.
234
+ # These decorators are taken in consideration only for invalid-name.
235
+ property-classes=abc.abstractproperty
236
+
237
+ # Regular expression matching correct type alias names. If left empty, type
238
+ # alias names will be checked with the set naming style.
239
+ #typealias-rgx=
240
+
241
+ # Regular expression matching correct type variable names. If left empty, type
242
+ # variable names will be checked with the set naming style.
243
+ #typevar-rgx=
244
+
245
+ # Naming style matching correct variable names.
246
+ variable-naming-style=snake_case
247
+
248
+ # Regular expression matching correct variable names. Overrides variable-
249
+ # naming-style. If left empty, variable names will be checked with the set
250
+ # naming style.
251
+ variable-rgx=(_?[a-z][A-Za-z0-9]{0,30})|([A-Z0-9]{1,30})
252
+
253
+
254
+ [CLASSES]
255
+
256
+ # Warn about protected attribute access inside special methods
257
+ check-protected-access-in-special-methods=no
258
+
259
+ # List of method names used to declare (i.e. assign) instance attributes.
260
+ defining-attr-methods=__init__,
261
+ __new__,
262
+ setUp,
263
+ asyncSetUp,
264
+ __post_init__
265
+
266
+ # List of member names, which should be excluded from the protected access
267
+ # warning.
268
+ exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
269
+
270
+ # List of valid names for the first argument in a class method.
271
+ valid-classmethod-first-arg=cls
272
+
273
+ # List of valid names for the first argument in a metaclass class method.
274
+ valid-metaclass-classmethod-first-arg=mcs
275
+
276
+
277
+ [DESIGN]
278
+
279
+ # List of regular expressions of class ancestor names to ignore when counting
280
+ # public methods (see R0903)
281
+ exclude-too-few-public-methods=
282
+
283
+ # List of qualified class names to ignore when counting class parents (see
284
+ # R0901)
285
+ ignored-parents=
286
+
287
+ # Maximum number of arguments for function / method.
288
+ max-args=7
289
+
290
+ # Maximum number of attributes for a class (see R0902).
291
+ max-attributes=20
292
+
293
+ # Maximum number of boolean expressions in an if statement (see R0916).
294
+ max-bool-expr=5
295
+
296
+ # Maximum number of branch for function / method body.
297
+ max-branches=12
298
+
299
+ # Maximum number of locals for function / method body.
300
+ max-locals=15
301
+
302
+ # Maximum number of parents for a class (see R0901).
303
+ max-parents=7
304
+
305
+ # Maximum number of public methods for a class (see R0904).
306
+ max-public-methods=20
307
+
308
+ # Maximum number of return / yield for function / method body.
309
+ max-returns=6
310
+
311
+ # Maximum number of statements in function / method body.
312
+ max-statements=300
313
+
314
+ # Minimum number of public methods for a class (see R0903).
315
+ min-public-methods=1
316
+
317
+
318
+ [EXCEPTIONS]
319
+
320
+ # Exceptions that will emit a warning when caught.
321
+ overgeneral-exceptions=builtins.BaseException,builtins.Exception
322
+
323
+
324
+ [FORMAT]
325
+
326
+ # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
327
+ expected-line-ending-format=
328
+
329
+ # Regexp for a line that is allowed to be longer than the limit.
330
+ ignore-long-lines=^\s*(# )?<?https?://\S+>?$
331
+
332
+ # Number of spaces of indent required inside a hanging or continued line.
333
+ indent-after-paren=4
334
+
335
+ # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
336
+ # tab).
337
+ indent-string=' '
338
+
339
+ # Maximum number of characters on a single line.
340
+ max-line-length=150
341
+
342
+ # Maximum number of lines in a module.
343
+ max-module-lines=2000
344
+
345
+ # Allow the body of a class to be on the same line as the declaration if body
346
+ # contains single statement.
347
+ single-line-class-stmt=no
348
+
349
+ # Allow the body of an if to be on the same line as the test if there is no
350
+ # else.
351
+ single-line-if-stmt=no
352
+
353
+
354
+ [IMPORTS]
355
+
356
+ # List of modules that can be imported at any level, not just the top level
357
+ # one.
358
+ allow-any-import-level=
359
+
360
+ # Allow explicit reexports by alias from a package __init__.
361
+ allow-reexport-from-package=no
362
+
363
+ # Allow wildcard imports from modules that define __all__.
364
+ allow-wildcard-with-all=no
365
+
366
+ # Deprecated modules which should not be used, separated by a comma.
367
+ deprecated-modules=
368
+
369
+ # Output a graph (.gv or any supported image format) of external dependencies
370
+ # to the given file (report RP0402 must not be disabled).
371
+ ext-import-graph=
372
+
373
+ # Output a graph (.gv or any supported image format) of all (i.e. internal and
374
+ # external) dependencies to the given file (report RP0402 must not be
375
+ # disabled).
376
+ import-graph=
377
+
378
+ # Output a graph (.gv or any supported image format) of internal dependencies
379
+ # to the given file (report RP0402 must not be disabled).
380
+ int-import-graph=
381
+
382
+ # Force import order to recognize a module as part of the standard
383
+ # compatibility libraries.
384
+ known-standard-library=
385
+
386
+ # Force import order to recognize a module as part of a third party library.
387
+ known-third-party=enchant
388
+
389
+ # Couples of modules and preferred modules, separated by a comma.
390
+ preferred-modules=
391
+
392
+
393
+ [LOGGING]
394
+
395
+ # The type of string formatting that logging methods do. `old` means using %
396
+ # formatting, `new` is for `{}` formatting.
397
+ logging-format-style=old
398
+
399
+ # Logging modules to check that the string format arguments are in logging
400
+ # function parameter format.
401
+ logging-modules=logging
402
+
403
+
404
+ [MESSAGES CONTROL]
405
+
406
+ # Only show warnings with the listed confidence levels. Leave empty to show
407
+ # all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
408
+ # UNDEFINED.
409
+ confidence=HIGH,
410
+ CONTROL_FLOW,
411
+ INFERENCE,
412
+ INFERENCE_FAILURE,
413
+ UNDEFINED
414
+
415
+ # Disable the message, report, category or checker with the given id(s). You
416
+ # can either give multiple identifiers separated by comma (,) or put this
417
+ # option multiple times (only on the command line, not in the configuration
418
+ # file where it should appear only once). You can also use "--disable=all" to
419
+ # disable everything first and then re-enable specific checks. For example, if
420
+ # you want to run only the similarities checker, you can use "--disable=all
421
+ # --enable=similarities". If you want to run only the classes checker, but have
422
+ # no Warning level messages displayed, use "--disable=all --enable=classes
423
+ # --disable=W".
424
+ disable=too-many-arguments,
425
+ too-many-locals,
426
+ too-many-branches,
427
+ protected-access
428
+
429
+
430
+ # Enable the message, report, category or checker with the given id(s). You can
431
+ # either give multiple identifier separated by comma (,) or put this option
432
+ # multiple time (only on the command line, not in the configuration file where
433
+ # it should appear only once). See also the "--disable" option for examples.
434
+ enable=
435
+
436
+
437
+ [METHOD_ARGS]
438
+
439
+ # List of qualified names (i.e., library.method) which require a timeout
440
+ # parameter e.g. 'requests.api.get,requests.api.post'
441
+ timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
442
+
443
+
444
+ [MISCELLANEOUS]
445
+
446
+ # List of note tags to take in consideration, separated by a comma.
447
+ notes=FIXME,
448
+ XXX
449
+
450
+ # Regular expression of note tags to take in consideration.
451
+ notes-rgx=
452
+
453
+
454
+ [REFACTORING]
455
+
456
+ # Maximum number of nested blocks for function / method body
457
+ max-nested-blocks=5
458
+
459
+ # Complete name of functions that never returns. When checking for
460
+ # inconsistent-return-statements if a never returning function is called then
461
+ # it will be considered as an explicit return statement and no message will be
462
+ # printed.
463
+ never-returning-functions=sys.exit,argparse.parse_error
464
+
465
+ # Let 'consider-using-join' be raised when the separator to join on would be
466
+ # non-empty (resulting in expected fixes of the type: ``"- " + " -
467
+ # ".join(items)``)
468
+ # suggest-join-with-non-empty-separator=yes
469
+
470
+
471
+ [REPORTS]
472
+
473
+ # Python expression which should return a score less than or equal to 10. You
474
+ # have access to the variables 'fatal', 'error', 'warning', 'refactor',
475
+ # 'convention', and 'info' which contain the number of messages in each
476
+ # category, as well as 'statement' which is the total number of statements
477
+ # analyzed. This score is used by the global evaluation report (RP0004).
478
+ evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
479
+
480
+ # Template used to display messages. This is a python new-style format string
481
+ # used to format the message information. See doc for all details.
482
+ msg-template=
483
+
484
+ # Set the output format. Available formats are: text, parseable, colorized,
485
+ # json2 (improved json format), json (old json format) and msvs (visual
486
+ # studio). You can also give a reporter class, e.g.
487
+ # mypackage.mymodule.MyReporterClass.
488
+ #output-format=
489
+
490
+ # Tells whether to display a full report or only the messages.
491
+ reports=no
492
+
493
+ # Activate the evaluation score.
494
+ score=yes
495
+
496
+
497
+ [SIMILARITIES]
498
+
499
+ # Comments are removed from the similarity computation
500
+ ignore-comments=yes
501
+
502
+ # Docstrings are removed from the similarity computation
503
+ ignore-docstrings=yes
504
+
505
+ # Imports are removed from the similarity computation
506
+ ignore-imports=yes
507
+
508
+ # Signatures are removed from the similarity computation
509
+ ignore-signatures=yes
510
+
511
+ # Minimum lines number of a similarity.
512
+ min-similarity-lines=4
513
+
514
+
515
+ [SPELLING]
516
+
517
+ # Limits count of emitted suggestions for spelling mistakes.
518
+ max-spelling-suggestions=4
519
+
520
+ # Spelling dictionary name. No available dictionaries : You need to install
521
+ # both the python package and the system dependency for enchant to work.
522
+ spelling-dict=
523
+
524
+ # List of comma separated words that should be considered directives if they
525
+ # appear at the beginning of a comment and should not be checked.
526
+ spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
527
+
528
+ # List of comma separated words that should not be checked.
529
+ spelling-ignore-words=
530
+
531
+ # A path to a file that contains the private dictionary; one word per line.
532
+ spelling-private-dict-file=
533
+
534
+ # Tells whether to store unknown words to the private dictionary (see the
535
+ # --spelling-private-dict-file option) instead of raising a message.
536
+ spelling-store-unknown-words=no
537
+
538
+
539
+ [STRING]
540
+
541
+ # This flag controls whether inconsistent-quotes generates a warning when the
542
+ # character used as a quote delimiter is used inconsistently within a module.
543
+ check-quote-consistency=no
544
+
545
+ # This flag controls whether the implicit-str-concat should generate a warning
546
+ # on implicit string concatenation in sequences defined over several lines.
547
+ check-str-concat-over-line-jumps=no
548
+
549
+
550
+ [TYPECHECK]
551
+
552
+ # List of decorators that produce context managers, such as
553
+ # contextlib.contextmanager. Add to this list to register other decorators that
554
+ # produce valid context managers.
555
+ contextmanager-decorators=contextlib.contextmanager
556
+
557
+ # List of members which are set dynamically and missed by pylint inference
558
+ # system, and so shouldn't trigger E1101 when accessed. Python regular
559
+ # expressions are accepted.
560
+ generated-members=
561
+
562
+ # Tells whether to warn about missing members when the owner of the attribute
563
+ # is inferred to be None.
564
+ ignore-none=yes
565
+
566
+ # This flag controls whether pylint should warn about no-member and similar
567
+ # checks whenever an opaque object is returned when inferring. The inference
568
+ # can return multiple potential results while evaluating a Python object, but
569
+ # some branches might not be evaluated, which results in partial inference. In
570
+ # that case, it might be useful to still emit no-member and other checks for
571
+ # the rest of the inferred objects.
572
+ ignore-on-opaque-inference=yes
573
+
574
+ # List of symbolic message names to ignore for Mixin members.
575
+ ignored-checks-for-mixins=no-member,
576
+ not-async-context-manager,
577
+ not-context-manager,
578
+ attribute-defined-outside-init
579
+
580
+ # List of class names for which member attributes should not be checked (useful
581
+ # for classes with dynamically set attributes). This supports the use of
582
+ # qualified names.
583
+ ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
584
+
585
+ # Show a hint with possible names when a member name was not found. The aspect
586
+ # of finding the hint is based on edit distance.
587
+ missing-member-hint=yes
588
+
589
+ # The minimum edit distance a name should have in order to be considered a
590
+ # similar match for a missing member name.
591
+ missing-member-hint-distance=1
592
+
593
+ # The total number of similar names that should be taken in consideration when
594
+ # showing a hint for a missing member.
595
+ missing-member-max-choices=1
596
+
597
+ # Regex pattern to define which classes are considered mixins.
598
+ mixin-class-rgx=.*[Mm]ixin
599
+
600
+ # List of decorators that change the signature of a decorated function.
601
+ signature-mutators=
602
+
603
+
604
+ [VARIABLES]
605
+
606
+ # List of additional names supposed to be defined in builtins. Remember that
607
+ # you should avoid defining new builtins when possible.
608
+ additional-builtins=
609
+
610
+ # Tells whether unused global variables should be treated as a violation.
611
+ allow-global-unused-variables=yes
612
+
613
+ # List of names allowed to shadow builtins
614
+ allowed-redefined-builtins=
615
+
616
+ # List of strings which can identify a callback function by name. A callback
617
+ # name must start or end with one of those strings.
618
+ callbacks=cb_,
619
+ _cb
620
+
621
+ # A regular expression matching the name of dummy variables (i.e. expected to
622
+ # not be used).
623
+ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
624
+
625
+ # Argument names that match this expression will be ignored.
626
+ ignored-argument-names=_.*|^ignored_|^unused_
627
+
628
+ # Tells whether we should check for unused import in __init__ files.
629
+ init-import=no
630
+
631
+ # List of qualified module names which can have objects that can redefine
632
+ # builtins.
633
+ redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Fusion Lab: Generative Vision Lab of Fudan University
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,10 +1,400 @@
1
- ---
2
- title: Aiavatar2
3
- emoji: 🏢
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h1 align='center'>Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation</h1>
2
+
3
+ <div align='center'>
4
+ <a href='https://github.com/xumingw' target='_blank'>Mingwang Xu</a><sup>1*</sup>&emsp;
5
+ <a href='https://github.com/crystallee-ai' target='_blank'>Hui Li</a><sup>1*</sup>&emsp;
6
+ <a href='https://github.com/subazinga' target='_blank'>Qingkun Su</a><sup>1*</sup>&emsp;
7
+ <a href='https://github.com/NinoNeumann' target='_blank'>Hanlin Shang</a><sup>1</sup>&emsp;
8
+ <a href='https://github.com/AricGamma' target='_blank'>Liwei Zhang</a><sup>1</sup>&emsp;
9
+ <a href='https://github.com/cnexah' target='_blank'>Ce Liu</a><sup>3</sup>&emsp;
10
+ </div>
11
+ <div align='center'>
12
+ <a href='https://jingdongwang2017.github.io/' target='_blank'>Jingdong Wang</a><sup>2</sup>&emsp;
13
+ <a href='https://yoyo000.github.io/' target='_blank'>Yao Yao</a><sup>4</sup>&emsp;
14
+ <a href='https://sites.google.com/site/zhusiyucs/home' target='_blank'>Siyu Zhu</a><sup>1</sup>&emsp;
15
+ </div>
16
+
17
+ <div align='center'>
18
+ <sup>1</sup>Fudan University&emsp; <sup>2</sup>Baidu Inc&emsp; <sup>3</sup>ETH Zurich&emsp; <sup>4</sup>Nanjing University
19
+ </div>
20
+
21
+ <br>
22
+ <div align='center'>
23
+ <a href='https://github.com/fudan-generative-vision/hallo'><img src='https://img.shields.io/github/stars/fudan-generative-vision/hallo?style=social'></a>
24
+ <a href='https://fudan-generative-vision.github.io/hallo/#/'><img src='https://img.shields.io/badge/Project-HomePage-Green'></a>
25
+ <a href='https://arxiv.org/pdf/2406.08801'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
26
+ <a href='https://huggingface.co/fudan-generative-ai/hallo'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Model-yellow'></a>
27
+ <a href='https://huggingface.co/spaces/fffiloni/tts-hallo-talking-portrait'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Demo-yellow'></a>
28
+ <a href='https://www.modelscope.cn/models/fudan-generative-vision/Hallo/summary'><img src='https://img.shields.io/badge/Modelscope-Model-purple'></a>
29
+ <a href='assets/wechat.jpeg'><img src='https://badges.aleen42.com/src/wechat.svg'></a>
30
+ </div>
31
+
32
+ <br>
33
+
34
+ ## 📸 Showcase
35
+
36
+
37
+ https://github.com/fudan-generative-vision/hallo/assets/17402682/9d1a0de4-3470-4d38-9e4f-412f517f834c
38
+
39
+ ### 🎬 Honoring Classic Films
40
+
41
+ <table class="center">
42
+ <tr>
43
+ <td style="text-align: center"><b>Devil Wears Prada</b></td>
44
+ <td style="text-align: center"><b>Green Book</b></td>
45
+ <td style="text-align: center"><b>Infernal Affairs</b></td>
46
+ </tr>
47
+ <tr>
48
+ <td style="text-align: center"><a target="_blank" href="https://cdn.aondata.work/video/short_movie/Devil_Wears_Prada-480p.mp4"><img src="https://cdn.aondata.work/img/short_movie/Devil_Wears_Prada_GIF.gif"></a></td>
49
+ <td style="text-align: center"><a target="_blank" href="https://cdn.aondata.work/video/short_movie/Green_Book-480p.mp4"><img src="https://cdn.aondata.work/img/short_movie/Green_Book_GIF.gif"></a></td>
50
+ <td style="text-align: center"><a target="_blank" href="https://cdn.aondata.work/video/short_movie/无间道-480p.mp4"><img src="https://cdn.aondata.work/img/short_movie/Infernal_Affairs_GIF.gif"></a></td>
51
+ </tr>
52
+ <tr>
53
+ <td style="text-align: center"><b>Patch Adams</b></td>
54
+ <td style="text-align: center"><b>Tough Love</b></td>
55
+ <td style="text-align: center"><b>Shawshank Redemption</b></td>
56
+ </tr>
57
+ <tr>
58
+ <td style="text-align: center"><a target="_blank" href="https://cdn.aondata.work/video/short_movie/Patch_Adams-480p.mp4"><img src="https://cdn.aondata.work/img/short_movie/Patch_Adams_GIF.gif"></a></td>
59
+ <td style="text-align: center"><a target="_blank" href="https://cdn.aondata.work/video/short_movie/Tough_Love-480p.mp4"><img src="https://cdn.aondata.work/img/short_movie/Tough_Love_GIF.gif"></a></td>
60
+ <td style="text-align: center"><a target="_blank" href="https://cdn.aondata.work/video/short_movie/Shawshank-480p.mp4"><img src="https://cdn.aondata.work/img/short_movie/Shawshank_GIF.gif"></a></td>
61
+ </tr>
62
+ </table>
63
+
64
+ Explore [more examples](https://fudan-generative-vision.github.io/hallo).
65
+
66
+ ## 📰 News
67
+
68
+ - **`2024/06/28`**: 🎉🎉🎉 We are proud to announce the release of our model training code. Try your own training data. Here is [tutorial](#training).
69
+ - **`2024/06/21`**: 🚀🚀🚀 Cloned a Gradio demo on [🤗Huggingface space](https://huggingface.co/spaces/fudan-generative-ai/hallo).
70
+ - **`2024/06/20`**: 🌟🌟🌟 Received numerous contributions from the community, including a [Windows version](https://github.com/sdbds/hallo-for-windows), [ComfyUI](https://github.com/AIFSH/ComfyUI-Hallo), [WebUI](https://github.com/fudan-generative-vision/hallo/pull/51), and [Docker template](https://github.com/ashleykleynhans/hallo-docker).
71
+ - **`2024/06/15`**: ✨✨✨ Released some images and audios for inference testing on [🤗Huggingface](https://huggingface.co/datasets/fudan-generative-ai/hallo_inference_samples).
72
+ - **`2024/06/15`**: 🎉🎉🎉 Launched the first version on 🫡[GitHub](https://github.com/fudan-generative-vision/hallo).
73
+
74
+ ## 🤝 Community Resources
75
+
76
+ Explore the resources developed by our community to enhance your experience with Hallo:
77
+
78
+ - [TTS x Hallo Talking Portrait Generator](https://huggingface.co/spaces/fffiloni/tts-hallo-talking-portrait) - Check out this awesome Gradio demo by [@Sylvain Filoni](https://huggingface.co/fffiloni)! With this tool, you can conveniently prepare portrait image and audio for Hallo.
79
+ - [Demo on Huggingface](https://huggingface.co/spaces/multimodalart/hallo) - Check out this easy-to-use Gradio demo by [@multimodalart](https://huggingface.co/multimodalart).
80
+ - [hallo-webui](https://github.com/daswer123/hallo-webui) - Explore the WebUI created by [@daswer123](https://github.com/daswer123).
81
+ - [hallo-for-windows](https://github.com/sdbds/hallo-for-windows) - Utilize Hallo on Windows with the guide by [@sdbds](https://github.com/sdbds).
82
+ - [ComfyUI-Hallo](https://github.com/AIFSH/ComfyUI-Hallo) - Integrate Hallo with the ComfyUI tool by [@AIFSH](https://github.com/AIFSH).
83
+ - [hallo-docker](https://github.com/ashleykleynhans/hallo-docker) - Docker image for Hallo by [@ashleykleynhans](https://github.com/ashleykleynhans).
84
+ - [RunPod Template](https://runpod.io/console/deploy?template=aeyibwyvzy&ref=2xxro4syy) - Deploy Hallo to RunPod by [@ashleykleynhans](https://github.com/ashleykleynhans).
85
+ - [JoyHallo](https://jdh-algo.github.io/JoyHallo/) - JoyHallo extends the capabilities of Hallo, enabling it to support Mandarin
86
+
87
+ Thanks to all of them.
88
+
89
+ Join our community and explore these amazing resources to make the most out of Hallo. Enjoy and elevate their creative projects!
90
+
91
+ ## 🔧️ Framework
92
+
93
+ ![abstract](assets/framework_1.jpg)
94
+ ![framework](assets/framework_2.jpg)
95
+
96
+ ## ⚙️ Installation
97
+
98
+ - System requirement: Ubuntu 20.04/Ubuntu 22.04, Cuda 12.1
99
+ - Tested GPUs: A100
100
+
101
+ Create conda environment:
102
+
103
+ ```bash
104
+ conda create -n hallo python=3.10
105
+ conda activate hallo
106
+ ```
107
+
108
+ Install packages with `pip`
109
+
110
+ ```bash
111
+ pip install -r requirements.txt
112
+ pip install .
113
+ ```
114
+
115
+ Besides, ffmpeg is also needed:
116
+ ```bash
117
+ apt-get install ffmpeg
118
+ ```
119
+
120
+ ## 🗝️️ Usage
121
+
122
+ The entry point for inference is `scripts/inference.py`. Before testing your cases, two preparations need to be completed:
123
+
124
+ 1. [Download all required pretrained models](#download-pretrained-models).
125
+ 2. [Prepare source image and driving audio pairs](#prepare-inference-data).
126
+ 3. [Run inference](#run-inference).
127
+
128
+ ### 📥 Download Pretrained Models
129
+
130
+ You can easily get all pretrained models required by inference from our [HuggingFace repo](https://huggingface.co/fudan-generative-ai/hallo).
131
+
132
+ Clone the pretrained models into `${PROJECT_ROOT}/pretrained_models` directory by cmd below:
133
+
134
+ ```shell
135
+ git lfs install
136
+ git clone https://huggingface.co/fudan-generative-ai/hallo pretrained_models
137
+ ```
138
+
139
+ Or you can download them separately from their source repo:
140
+
141
+ - [hallo](https://huggingface.co/fudan-generative-ai/hallo/tree/main/hallo): Our checkpoints consist of denoising UNet, face locator, image & audio proj.
142
+ - [audio_separator](https://huggingface.co/huangjackson/Kim_Vocal_2): Kim\_Vocal\_2 MDX-Net vocal removal model. (_Thanks to [KimberleyJensen](https://github.com/KimberleyJensen)_)
143
+ - [insightface](https://github.com/deepinsight/insightface/tree/master/python-package#model-zoo): 2D and 3D Face Analysis placed into `pretrained_models/face_analysis/models/`. (_Thanks to deepinsight_)
144
+ - [face landmarker](https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task): Face detection & mesh model from [mediapipe](https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker#models) placed into `pretrained_models/face_analysis/models`.
145
+ - [motion module](https://github.com/guoyww/AnimateDiff/blob/main/README.md#202309-animatediff-v2): motion module from [AnimateDiff](https://github.com/guoyww/AnimateDiff). (_Thanks to [guoyww](https://github.com/guoyww)_).
146
+ - [sd-vae-ft-mse](https://huggingface.co/stabilityai/sd-vae-ft-mse): Weights are intended to be used with the diffusers library. (_Thanks to [stablilityai](https://huggingface.co/stabilityai)_)
147
+ - [StableDiffusion V1.5](https://huggingface.co/runwayml/stable-diffusion-v1-5): Initialized and fine-tuned from Stable-Diffusion-v1-2. (_Thanks to [runwayml](https://huggingface.co/runwayml)_)
148
+ - [wav2vec](https://huggingface.co/facebook/wav2vec2-base-960h): wav audio to vector model from [Facebook](https://huggingface.co/facebook/wav2vec2-base-960h).
149
+
150
+ Finally, these pretrained models should be organized as follows:
151
+
152
+ ```text
153
+ ./pretrained_models/
154
+ |-- audio_separator/
155
+ | |-- download_checks.json
156
+ | |-- mdx_model_data.json
157
+ | |-- vr_model_data.json
158
+ | `-- Kim_Vocal_2.onnx
159
+ |-- face_analysis/
160
+ | `-- models/
161
+ | |-- face_landmarker_v2_with_blendshapes.task # face landmarker model from mediapipe
162
+ | |-- 1k3d68.onnx
163
+ | |-- 2d106det.onnx
164
+ | |-- genderage.onnx
165
+ | |-- glintr100.onnx
166
+ | `-- scrfd_10g_bnkps.onnx
167
+ |-- motion_module/
168
+ | `-- mm_sd_v15_v2.ckpt
169
+ |-- sd-vae-ft-mse/
170
+ | |-- config.json
171
+ | `-- diffusion_pytorch_model.safetensors
172
+ |-- stable-diffusion-v1-5/
173
+ | `-- unet/
174
+ | |-- config.json
175
+ | `-- diffusion_pytorch_model.safetensors
176
+ `-- wav2vec/
177
+ `-- wav2vec2-base-960h/
178
+ |-- config.json
179
+ |-- feature_extractor_config.json
180
+ |-- model.safetensors
181
+ |-- preprocessor_config.json
182
+ |-- special_tokens_map.json
183
+ |-- tokenizer_config.json
184
+ `-- vocab.json
185
+ ```
186
+
187
+ ### 🛠️ Prepare Inference Data
188
+
189
+ Hallo has a few simple requirements for input data:
190
+
191
+ For the source image:
192
+
193
+ 1. It should be cropped into squares.
194
+ 2. The face should be the main focus, making up 50%-70% of the image.
195
+ 3. The face should be facing forward, with a rotation angle of less than 30° (no side profiles).
196
+
197
+ For the driving audio:
198
+
199
+ 1. It must be in WAV format.
200
+ 2. It must be in English since our training datasets are only in this language.
201
+ 3. Ensure the vocals are clear; background music is acceptable.
202
+
203
+ We have provided [some samples](examples/) for your reference.
204
+
205
+ ### 🎮 Run Inference
206
+
207
+ Simply to run the `scripts/inference.py` and pass `source_image` and `driving_audio` as input:
208
+
209
+ ```bash
210
+ python scripts/inference.py --source_image examples/reference_images/1.jpg --driving_audio examples/driving_audios/1.wav
211
+ ```
212
+
213
+ Animation results will be saved as `${PROJECT_ROOT}/.cache/output.mp4` by default. You can pass `--output` to specify the output file name. You can find more examples for inference at [examples folder](https://github.com/fudan-generative-vision/hallo/tree/main/examples).
214
+
215
+ For more options:
216
+
217
+ ```shell
218
+ usage: inference.py [-h] [-c CONFIG] [--source_image SOURCE_IMAGE] [--driving_audio DRIVING_AUDIO] [--output OUTPUT] [--pose_weight POSE_WEIGHT]
219
+ [--face_weight FACE_WEIGHT] [--lip_weight LIP_WEIGHT] [--face_expand_ratio FACE_EXPAND_RATIO]
220
+
221
+ options:
222
+ -h, --help show this help message and exit
223
+ -c CONFIG, --config CONFIG
224
+ --source_image SOURCE_IMAGE
225
+ source image
226
+ --driving_audio DRIVING_AUDIO
227
+ driving audio
228
+ --output OUTPUT output video file name
229
+ --pose_weight POSE_WEIGHT
230
+ weight of pose
231
+ --face_weight FACE_WEIGHT
232
+ weight of face
233
+ --lip_weight LIP_WEIGHT
234
+ weight of lip
235
+ --face_expand_ratio FACE_EXPAND_RATIO
236
+ face region
237
+ ```
238
+
239
+ ## Training
240
+
241
+ ### Prepare Data for Training
242
+
243
+ The training data, which utilizes some talking-face videos similar to the source images used for inference, also needs to meet the following requirements:
244
+
245
+ 1. It should be cropped into squares.
246
+ 2. The face should be the main focus, making up 50%-70% of the image.
247
+ 3. The face should be facing forward, with a rotation angle of less than 30° (no side profiles).
248
+
249
+ Organize your raw videos into the following directory structure:
250
+
251
+
252
+ ```text
253
+ dataset_name/
254
+ |-- videos/
255
+ | |-- 0001.mp4
256
+ | |-- 0002.mp4
257
+ | |-- 0003.mp4
258
+ | `-- 0004.mp4
259
+ ```
260
+
261
+ You can use any `dataset_name`, but ensure the `videos` directory is named as shown above.
262
+
263
+ Next, process the videos with the following commands:
264
+
265
+ ```bash
266
+ python -m scripts.data_preprocess --input_dir dataset_name/videos --step 1
267
+ python -m scripts.data_preprocess --input_dir dataset_name/videos --step 2
268
+ ```
269
+
270
+ **Note:** Execute steps 1 and 2 sequentially as they perform different tasks. Step 1 converts videos into frames, extracts audio from each video, and generates the necessary masks. Step 2 generates face embeddings using InsightFace and audio embeddings using Wav2Vec, and requires a GPU. For parallel processing, use the `-p` and `-r` arguments. The `-p` argument specifies the total number of instances to launch, dividing the data into `p` parts. The `-r` argument specifies which part the current process should handle. You need to manually launch multiple instances with different values for `-r`.
271
+
272
+ Generate the metadata JSON files with the following commands:
273
+
274
+ ```bash
275
+ python scripts/extract_meta_info_stage1.py -r path/to/dataset -n dataset_name
276
+ python scripts/extract_meta_info_stage2.py -r path/to/dataset -n dataset_name
277
+ ```
278
+
279
+ Replace `path/to/dataset` with the path to the parent directory of `videos`, such as `dataset_name` in the example above. This will generate `dataset_name_stage1.json` and `dataset_name_stage2.json` in the `./data` directory.
280
+
281
+ ### Training
282
+
283
+ Update the data meta path settings in the configuration YAML files, `configs/train/stage1.yaml` and `configs/train/stage2.yaml`:
284
+
285
+
286
+ ```yaml
287
+ #stage1.yaml
288
+ data:
289
+ meta_paths:
290
+ - ./data/dataset_name_stage1.json
291
+
292
+ #stage2.yaml
293
+ data:
294
+ meta_paths:
295
+ - ./data/dataset_name_stage2.json
296
+ ```
297
+
298
+ Start training with the following command:
299
+
300
+ ```shell
301
+ accelerate launch -m \
302
+ --config_file accelerate_config.yaml \
303
+ --machine_rank 0 \
304
+ --main_process_ip 0.0.0.0 \
305
+ --main_process_port 20055 \
306
+ --num_machines 1 \
307
+ --num_processes 8 \
308
+ scripts.train_stage1 --config ./configs/train/stage1.yaml
309
+ ```
310
+
311
+ #### Accelerate Usage Explanation
312
+
313
+ The `accelerate launch` command is used to start the training process with distributed settings.
314
+
315
+ ```shell
316
+ accelerate launch [arguments] {training_script} --{training_script-argument-1} --{training_script-argument-2} ...
317
+ ```
318
+
319
+ **Arguments for Accelerate:**
320
+
321
+ - `-m, --module`: Interpret the launch script as a Python module.
322
+ - `--config_file`: Configuration file for Hugging Face Accelerate.
323
+ - `--machine_rank`: Rank of the current machine in a multi-node setup.
324
+ - `--main_process_ip`: IP address of the master node.
325
+ - `--main_process_port`: Port of the master node.
326
+ - `--num_machines`: Total number of nodes participating in the training.
327
+ - `--num_processes`: Total number of processes for training, matching the total number of GPUs across all machines.
328
+
329
+ **Arguments for Training:**
330
+
331
+ - `{training_script}`: The training script, such as `scripts.train_stage1` or `scripts.train_stage2`.
332
+ - `--{training_script-argument-1}`: Arguments specific to the training script. Our training scripts accept one argument, `--config`, to specify the training configuration file.
333
+
334
+ For multi-node training, you need to manually run the command with different `machine_rank` on each node separately.
335
+
336
+ For more settings, refer to the [Accelerate documentation](https://huggingface.co/docs/accelerate/en/index).
337
+
338
+ ## 📅️ Roadmap
339
+
340
+ | Status | Milestone | ETA |
341
+ | :----: | :---------------------------------------------------------------------------------------------------- | :--------: |
342
+ | ✅ | **[Inference source code meet everyone on GitHub](https://github.com/fudan-generative-vision/hallo)** | 2024-06-15 |
343
+ | ✅ | **[Pretrained models on Huggingface](https://huggingface.co/fudan-generative-ai/hallo)** | 2024-06-15 |
344
+ | ✅ | **[Releasing data preparation and training scripts](#training)** | 2024-06-28 |
345
+ | 🚀 | **[Improving the model's performance on Mandarin Chinese]()** | TBD |
346
+
347
+ <details>
348
+ <summary>Other Enhancements</summary>
349
+
350
+ - [x] Enhancement: Test and ensure compatibility with Windows operating system. [#39](https://github.com/fudan-generative-vision/hallo/issues/39)
351
+ - [x] Bug: Output video may lose several frames. [#41](https://github.com/fudan-generative-vision/hallo/issues/41)
352
+ - [ ] Bug: Sound volume affecting inference results (audio normalization).
353
+ - [ ] ~~Enhancement: Inference code logic optimization~~. This solution doesn't show significant performance improvements. Trying other approaches.
354
+
355
+ </details>
356
+
357
+
358
+ ## 📝 Citation
359
+
360
+ If you find our work useful for your research, please consider citing the paper:
361
+
362
+ ```
363
+ @misc{xu2024hallo,
364
+ title={Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation},
365
+ author={Mingwang Xu and Hui Li and Qingkun Su and Hanlin Shang and Liwei Zhang and Ce Liu and Jingdong Wang and Yao Yao and Siyu zhu},
366
+ year={2024},
367
+ eprint={2406.08801},
368
+ archivePrefix={arXiv},
369
+ primaryClass={cs.CV}
370
+ }
371
+ ```
372
+
373
+ ## 🌟 Opportunities Available
374
+
375
+ Multiple research positions are open at the **Generative Vision Lab, Fudan University**! Include:
376
+
377
+ - Research assistant
378
+ - Postdoctoral researcher
379
+ - PhD candidate
380
+ - Master students
381
+
382
+ Interested individuals are encouraged to contact us at [[email protected]](mailto://[email protected]) for further information.
383
+
384
+ ## ⚠️ Social Risks and Mitigations
385
+
386
+ The development of portrait image animation technologies driven by audio inputs poses social risks, such as the ethical implications of creating realistic portraits that could be misused for deepfakes. To mitigate these risks, it is crucial to establish ethical guidelines and responsible use practices. Privacy and consent concerns also arise from using individuals' images and voices. Addressing these involves transparent data usage policies, informed consent, and safeguarding privacy rights. By addressing these risks and implementing mitigations, the research aims to ensure the responsible and ethical development of this technology.
387
+
388
+ ## 🤗 Acknowledgements
389
+
390
+ We would like to thank the contributors to the [magic-animate](https://github.com/magic-research/magic-animate), [AnimateDiff](https://github.com/guoyww/AnimateDiff), [ultimatevocalremovergui](https://github.com/Anjok07/ultimatevocalremovergui), [AniPortrait](https://github.com/Zejun-Yang/AniPortrait) and [Moore-AnimateAnyone](https://github.com/MooreThreads/Moore-AnimateAnyone) repositories, for their open research and exploration.
391
+
392
+ If we missed any open-source projects or related articles, we would like to complement the acknowledgement of this specific work immediately.
393
+
394
+ ## 👏 Community Contributors
395
+
396
+ Thank you to all the contributors who have helped to make this project better!
397
+
398
+ <a href="https://github.com/fudan-generative-vision/hallo/graphs/contributors">
399
+ <img src="https://contrib.rocks/image?repo=fudan-generative-vision/hallo" />
400
+ </a>
accelerate_config.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ compute_environment: LOCAL_MACHINE
2
+ debug: true
3
+ deepspeed_config:
4
+ deepspeed_multinode_launcher: standard
5
+ gradient_accumulation_steps: 1
6
+ offload_optimizer_device: none
7
+ offload_param_device: none
8
+ zero3_init_flag: false
9
+ zero_stage: 2
10
+ distributed_type: DEEPSPEED
11
+ downcast_bf16: "no"
12
+ main_training_function: main
13
+ mixed_precision: "fp16"
14
+ num_machines: 1
15
+ num_processes: 8
16
+ rdzv_backend: static
17
+ same_network: true
18
+ tpu_env: []
19
+ tpu_use_cluster: false
20
+ tpu_use_sudo: false
21
+ use_cpu: false
requirements.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --find-links https://download.pytorch.org/whl/torch_stable.html
2
+
3
+ accelerate==0.28.0
4
+ audio-separator==0.17.2
5
+ av==12.1.0
6
+ bitsandbytes==0.43.1
7
+ decord==0.6.0
8
+ diffusers==0.27.2
9
+ einops==0.8.0
10
+ insightface==0.7.3
11
+ librosa==0.10.2.post1
12
+ mediapipe[vision]==0.10.14
13
+ mlflow==2.13.1
14
+ moviepy==1.0.3
15
+ numpy==1.26.4
16
+ omegaconf==2.3.0
17
+ onnx2torch==1.5.14
18
+ onnx==1.16.1
19
+ onnxruntime-gpu==1.18.0
20
+ opencv-contrib-python==4.9.0.80
21
+ opencv-python-headless==4.9.0.80
22
+ opencv-python==4.9.0.80
23
+ pillow==10.3.0
24
+ setuptools==70.0.0
25
+ torch==2.2.2+cu121
26
+ torchvision==0.17.2+cu121
27
+ tqdm==4.66.4
28
+ transformers==4.39.2
29
+ xformers==0.0.25.post1
30
+ isort==5.13.2
31
+ pylint==3.2.2
32
+ pre-commit==3.7.1
33
+ gradio==4.36.1
setup.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ setup.py
3
+ ----
4
+ This is the main setup file for the hallo face animation project. It defines the package
5
+ metadata, required dependencies, and provides the entry point for installing the package.
6
+
7
+ """
8
+
9
+ # -*- coding: utf-8 -*-
10
+ from setuptools import setup
11
+
12
+ packages = \
13
+ ['hallo', 'hallo.datasets', 'hallo.models', 'hallo.animate', 'hallo.utils']
14
+
15
+ package_data = \
16
+ {'': ['*']}
17
+
18
+ install_requires = \
19
+ ['accelerate==0.28.0',
20
+ 'audio-separator>=0.17.2,<0.18.0',
21
+ 'av==12.1.0',
22
+ 'bitsandbytes==0.43.1',
23
+ 'decord==0.6.0',
24
+ 'diffusers==0.27.2',
25
+ 'einops>=0.8.0,<0.9.0',
26
+ 'insightface>=0.7.3,<0.8.0',
27
+ 'mediapipe[vision]>=0.10.14,<0.11.0',
28
+ 'mlflow==2.13.1',
29
+ 'moviepy>=1.0.3,<2.0.0',
30
+ 'omegaconf>=2.3.0,<3.0.0',
31
+ 'opencv-python>=4.9.0.80,<5.0.0.0',
32
+ 'pillow>=10.3.0,<11.0.0',
33
+ 'torch==2.2.2',
34
+ 'torchvision==0.17.2',
35
+ 'transformers==4.39.2',
36
+ 'xformers==0.0.25.post1']
37
+
38
+ setup_kwargs = {
39
+ 'name': 'hallo',
40
+ 'version': '0.1.0',
41
+ 'description': '',
42
+ 'long_description': '# Anna face animation',
43
+ 'author': 'Your Name',
44
+ 'author_email': '[email protected]',
45
+ 'maintainer': 'None',
46
+ 'maintainer_email': 'None',
47
+ 'url': 'None',
48
+ 'packages': packages,
49
+ 'package_data': package_data,
50
+ 'install_requires': install_requires,
51
+ 'python_requires': '>=3.10,<4.0',
52
+ }
53
+
54
+
55
+ setup(**setup_kwargs)