방재호 commited on
Commit
5212b84
·
1 Parent(s): 5ce6560
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. CHANGELOG.md +0 -352
  3. CODEOWNERS +0 -12
  4. LICENSE.txt +0 -663
  5. README.md +0 -173
  6. __pycache__/launch.cpython-310.pyc +0 -0
  7. __pycache__/webui.cpython-310.pyc +0 -0
  8. cache.json +0 -8
  9. configs/alt-diffusion-inference.yaml +0 -72
  10. configs/instruct-pix2pix.yaml +0 -98
  11. configs/v1-inference.yaml +0 -70
  12. configs/v1-inpainting-inference.yaml +0 -70
  13. environment-wsl2.yaml +0 -11
  14. extensions-builtin/LDSR/__pycache__/ldsr_model_arch.cpython-310.pyc +0 -0
  15. extensions-builtin/LDSR/__pycache__/preload.cpython-310.pyc +0 -0
  16. extensions-builtin/LDSR/__pycache__/sd_hijack_autoencoder.cpython-310.pyc +0 -0
  17. extensions-builtin/LDSR/__pycache__/sd_hijack_ddpm_v1.cpython-310.pyc +0 -0
  18. extensions-builtin/LDSR/__pycache__/vqvae_quantize.cpython-310.pyc +0 -0
  19. extensions-builtin/LDSR/ldsr_model_arch.py +0 -250
  20. extensions-builtin/LDSR/preload.py +0 -6
  21. extensions-builtin/LDSR/scripts/__pycache__/ldsr_model.cpython-310.pyc +0 -0
  22. extensions-builtin/LDSR/scripts/ldsr_model.py +0 -68
  23. extensions-builtin/LDSR/sd_hijack_autoencoder.py +0 -293
  24. extensions-builtin/LDSR/sd_hijack_ddpm_v1.py +0 -1443
  25. extensions-builtin/LDSR/vqvae_quantize.py +0 -147
  26. extensions-builtin/Lora/__pycache__/extra_networks_lora.cpython-310.pyc +0 -0
  27. extensions-builtin/Lora/__pycache__/lora.cpython-310.pyc +0 -0
  28. extensions-builtin/Lora/__pycache__/lyco_helpers.cpython-310.pyc +0 -0
  29. extensions-builtin/Lora/__pycache__/network.cpython-310.pyc +0 -0
  30. extensions-builtin/Lora/__pycache__/network_full.cpython-310.pyc +0 -0
  31. extensions-builtin/Lora/__pycache__/network_hada.cpython-310.pyc +0 -0
  32. extensions-builtin/Lora/__pycache__/network_ia3.cpython-310.pyc +0 -0
  33. extensions-builtin/Lora/__pycache__/network_lokr.cpython-310.pyc +0 -0
  34. extensions-builtin/Lora/__pycache__/network_lora.cpython-310.pyc +0 -0
  35. extensions-builtin/Lora/__pycache__/networks.cpython-310.pyc +0 -0
  36. extensions-builtin/Lora/__pycache__/preload.cpython-310.pyc +0 -0
  37. extensions-builtin/Lora/__pycache__/ui_edit_user_metadata.cpython-310.pyc +0 -0
  38. extensions-builtin/Lora/__pycache__/ui_extra_networks_lora.cpython-310.pyc +0 -0
  39. extensions-builtin/Lora/extra_networks_lora.py +0 -59
  40. extensions-builtin/Lora/lora.py +0 -9
  41. extensions-builtin/Lora/lyco_helpers.py +0 -21
  42. extensions-builtin/Lora/network.py +0 -155
  43. extensions-builtin/Lora/network_full.py +0 -22
  44. extensions-builtin/Lora/network_hada.py +0 -55
  45. extensions-builtin/Lora/network_ia3.py +0 -30
  46. extensions-builtin/Lora/network_lokr.py +0 -64
  47. extensions-builtin/Lora/network_lora.py +0 -86
  48. extensions-builtin/Lora/networks.py +0 -468
  49. extensions-builtin/Lora/preload.py +0 -7
  50. extensions-builtin/Lora/scripts/__pycache__/lora_script.cpython-310.pyc +0 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
CHANGELOG.md DELETED
@@ -1,352 +0,0 @@
1
- ## 1.5.1
2
-
3
- ### Minor:
4
- * support parsing text encoder blocks in some new LoRAs
5
- * delete scale checker script due to user demand
6
-
7
- ### Extensions and API:
8
- * add postprocess_batch_list script callback
9
-
10
- ### Bug Fixes:
11
- * fix TI training for SD1
12
- * fix reload altclip model error
13
- * prepend the pythonpath instead of overriding it
14
- * fix typo in SD_WEBUI_RESTARTING
15
- * if txt2img/img2img raises an exception, finally call state.end()
16
- * fix composable diffusion weight parsing
17
- * restyle Startup profile for black users
18
- * fix webui not launching with --nowebui
19
- * catch exception for non git extensions
20
- * fix some options missing from /sdapi/v1/options
21
- * fix for extension update status always saying "unknown"
22
- * fix display of extra network cards that have `<>` in the name
23
- * update lora extension to work with python 3.8
24
-
25
-
26
- ## 1.5.0
27
-
28
- ### Features:
29
- * SD XL support
30
- * user metadata system for custom networks
31
- * extended Lora metadata editor: set activation text, default weight, view tags, training info
32
- * Lora extension rework to include other types of networks (all that were previously handled by LyCORIS extension)
33
- * show github stars for extenstions
34
- * img2img batch mode can read extra stuff from png info
35
- * img2img batch works with subdirectories
36
- * hotkeys to move prompt elements: alt+left/right
37
- * restyle time taken/VRAM display
38
- * add textual inversion hashes to infotext
39
- * optimization: cache git extension repo information
40
- * move generate button next to the generated picture for mobile clients
41
- * hide cards for networks of incompatible Stable Diffusion version in Lora extra networks interface
42
- * skip installing packages with pip if they all are already installed - startup speedup of about 2 seconds
43
-
44
- ### Minor:
45
- * checkbox to check/uncheck all extensions in the Installed tab
46
- * add gradio user to infotext and to filename patterns
47
- * allow gif for extra network previews
48
- * add options to change colors in grid
49
- * use natural sort for items in extra networks
50
- * Mac: use empty_cache() from torch 2 to clear VRAM
51
- * added automatic support for installing the right libraries for Navi3 (AMD)
52
- * add option SWIN_torch_compile to accelerate SwinIR upscale
53
- * suppress printing TI embedding info at start to console by default
54
- * speedup extra networks listing
55
- * added `[none]` filename token.
56
- * removed thumbs extra networks view mode (use settings tab to change width/height/scale to get thumbs)
57
- * add always_discard_next_to_last_sigma option to XYZ plot
58
- * automatically switch to 32-bit float VAE if the generated picture has NaNs without the need for `--no-half-vae` commandline flag.
59
-
60
- ### Extensions and API:
61
- * api endpoints: /sdapi/v1/server-kill, /sdapi/v1/server-restart, /sdapi/v1/server-stop
62
- * allow Script to have custom metaclass
63
- * add model exists status check /sdapi/v1/options
64
- * rename --add-stop-route to --api-server-stop
65
- * add `before_hr` script callback
66
- * add callback `after_extra_networks_activate`
67
- * disable rich exception output in console for API by default, use WEBUI_RICH_EXCEPTIONS env var to enable
68
- * return http 404 when thumb file not found
69
- * allow replacing extensions index with environment variable
70
-
71
- ### Bug Fixes:
72
- * fix for catch errors when retrieving extension index #11290
73
- * fix very slow loading speed of .safetensors files when reading from network drives
74
- * API cache cleanup
75
- * fix UnicodeEncodeError when writing to file CLIP Interrogator batch mode
76
- * fix warning of 'has_mps' deprecated from PyTorch
77
- * fix problem with extra network saving images as previews losing generation info
78
- * fix throwing exception when trying to resize image with I;16 mode
79
- * fix for #11534: canvas zoom and pan extension hijacking shortcut keys
80
- * fixed launch script to be runnable from any directory
81
- * don't add "Seed Resize: -1x-1" to API image metadata
82
- * correctly remove end parenthesis with ctrl+up/down
83
- * fixing --subpath on newer gradio version
84
- * fix: check fill size none zero when resize (fixes #11425)
85
- * use submit and blur for quick settings textbox
86
- * save img2img batch with images.save_image()
87
- * prevent running preload.py for disabled extensions
88
- * fix: previously, model name was added together with directory name to infotext and to [model_name] filename pattern; directory name is now not included
89
-
90
-
91
- ## 1.4.1
92
-
93
- ### Bug Fixes:
94
- * add queue lock for refresh-checkpoints
95
-
96
- ## 1.4.0
97
-
98
- ### Features:
99
- * zoom controls for inpainting
100
- * run basic torch calculation at startup in parallel to reduce the performance impact of first generation
101
- * option to pad prompt/neg prompt to be same length
102
- * remove taming_transformers dependency
103
- * custom k-diffusion scheduler settings
104
- * add an option to show selected settings in main txt2img/img2img UI
105
- * sysinfo tab in settings
106
- * infer styles from prompts when pasting params into the UI
107
- * an option to control the behavior of the above
108
-
109
- ### Minor:
110
- * bump Gradio to 3.32.0
111
- * bump xformers to 0.0.20
112
- * Add option to disable token counters
113
- * tooltip fixes & optimizations
114
- * make it possible to configure filename for the zip download
115
- * `[vae_filename]` pattern for filenames
116
- * Revert discarding penultimate sigma for DPM-Solver++(2M) SDE
117
- * change UI reorder setting to multiselect
118
- * read version info form CHANGELOG.md if git version info is not available
119
- * link footer API to Wiki when API is not active
120
- * persistent conds cache (opt-in optimization)
121
-
122
- ### Extensions:
123
- * After installing extensions, webui properly restarts the process rather than reloads the UI
124
- * Added VAE listing to web API. Via: /sdapi/v1/sd-vae
125
- * custom unet support
126
- * Add onAfterUiUpdate callback
127
- * refactor EmbeddingDatabase.register_embedding() to allow unregistering
128
- * add before_process callback for scripts
129
- * add ability for alwayson scripts to specify section and let user reorder those sections
130
-
131
- ### Bug Fixes:
132
- * Fix dragging text to prompt
133
- * fix incorrect quoting for infotext values with colon in them
134
- * fix "hires. fix" prompt sharing same labels with txt2img_prompt
135
- * Fix s_min_uncond default type int
136
- * Fix for #10643 (Inpainting mask sometimes not working)
137
- * fix bad styling for thumbs view in extra networks #10639
138
- * fix for empty list of optimizations #10605
139
- * small fixes to prepare_tcmalloc for Debian/Ubuntu compatibility
140
- * fix --ui-debug-mode exit
141
- * patch GitPython to not use leaky persistent processes
142
- * fix duplicate Cross attention optimization after UI reload
143
- * torch.cuda.is_available() check for SdOptimizationXformers
144
- * fix hires fix using wrong conds in second pass if using Loras.
145
- * handle exception when parsing generation parameters from png info
146
- * fix upcast attention dtype error
147
- * forcing Torch Version to 1.13.1 for RX 5000 series GPUs
148
- * split mask blur into X and Y components, patch Outpainting MK2 accordingly
149
- * don't die when a LoRA is a broken symlink
150
- * allow activation of Generate Forever during generation
151
-
152
-
153
- ## 1.3.2
154
-
155
- ### Bug Fixes:
156
- * fix files served out of tmp directory even if they are saved to disk
157
- * fix postprocessing overwriting parameters
158
-
159
- ## 1.3.1
160
-
161
- ### Features:
162
- * revert default cross attention optimization to Doggettx
163
-
164
- ### Bug Fixes:
165
- * fix bug: LoRA don't apply on dropdown list sd_lora
166
- * fix png info always added even if setting is not enabled
167
- * fix some fields not applying in xyz plot
168
- * fix "hires. fix" prompt sharing same labels with txt2img_prompt
169
- * fix lora hashes not being added properly to infotex if there is only one lora
170
- * fix --use-cpu failing to work properly at startup
171
- * make --disable-opt-split-attention command line option work again
172
-
173
- ## 1.3.0
174
-
175
- ### Features:
176
- * add UI to edit defaults
177
- * token merging (via dbolya/tomesd)
178
- * settings tab rework: add a lot of additional explanations and links
179
- * load extensions' Git metadata in parallel to loading the main program to save a ton of time during startup
180
- * update extensions table: show branch, show date in separate column, and show version from tags if available
181
- * TAESD - another option for cheap live previews
182
- * allow choosing sampler and prompts for second pass of hires fix - hidden by default, enabled in settings
183
- * calculate hashes for Lora
184
- * add lora hashes to infotext
185
- * when pasting infotext, use infotext's lora hashes to find local loras for `<lora:xxx:1>` entries whose hashes match loras the user has
186
- * select cross attention optimization from UI
187
-
188
- ### Minor:
189
- * bump Gradio to 3.31.0
190
- * bump PyTorch to 2.0.1 for macOS and Linux AMD
191
- * allow setting defaults for elements in extensions' tabs
192
- * allow selecting file type for live previews
193
- * show "Loading..." for extra networks when displaying for the first time
194
- * suppress ENSD infotext for samplers that don't use it
195
- * clientside optimizations
196
- * add options to show/hide hidden files and dirs in extra networks, and to not list models/files in hidden directories
197
- * allow whitespace in styles.csv
198
- * add option to reorder tabs
199
- * move some functionality (swap resolution and set seed to -1) to client
200
- * option to specify editor height for img2img
201
- * button to copy image resolution into img2img width/height sliders
202
- * switch from pyngrok to ngrok-py
203
- * lazy-load images in extra networks UI
204
- * set "Navigate image viewer with gamepad" option to false by default, by request
205
- * change upscalers to download models into user-specified directory (from commandline args) rather than the default models/<...>
206
- * allow hiding buttons in ui-config.json
207
-
208
- ### Extensions:
209
- * add /sdapi/v1/script-info api
210
- * use Ruff to lint Python code
211
- * use ESlint to lint Javascript code
212
- * add/modify CFG callbacks for Self-Attention Guidance extension
213
- * add command and endpoint for graceful server stopping
214
- * add some locals (prompts/seeds/etc) from processing function into the Processing class as fields
215
- * rework quoting for infotext items that have commas in them to use JSON (should be backwards compatible except for cases where it didn't work previously)
216
- * add /sdapi/v1/refresh-loras api checkpoint post request
217
- * tests overhaul
218
-
219
- ### Bug Fixes:
220
- * fix an issue preventing the program from starting if the user specifies a bad Gradio theme
221
- * fix broken prompts from file script
222
- * fix symlink scanning for extra networks
223
- * fix --data-dir ignored when launching via webui-user.bat COMMANDLINE_ARGS
224
- * allow web UI to be ran fully offline
225
- * fix inability to run with --freeze-settings
226
- * fix inability to merge checkpoint without adding metadata
227
- * fix extra networks' save preview image not adding infotext for jpeg/webm
228
- * remove blinking effect from text in hires fix and scale resolution preview
229
- * make links to `http://<...>.git` extensions work in the extension tab
230
- * fix bug with webui hanging at startup due to hanging git process
231
-
232
-
233
- ## 1.2.1
234
-
235
- ### Features:
236
- * add an option to always refer to LoRA by filenames
237
-
238
- ### Bug Fixes:
239
- * never refer to LoRA by an alias if multiple LoRAs have same alias or the alias is called none
240
- * fix upscalers disappearing after the user reloads UI
241
- * allow bf16 in safe unpickler (resolves problems with loading some LoRAs)
242
- * allow web UI to be ran fully offline
243
- * fix localizations not working
244
- * fix error for LoRAs: `'LatentDiffusion' object has no attribute 'lora_layer_mapping'`
245
-
246
- ## 1.2.0
247
-
248
- ### Features:
249
- * do not wait for Stable Diffusion model to load at startup
250
- * add filename patterns: `[denoising]`
251
- * directory hiding for extra networks: dirs starting with `.` will hide their cards on extra network tabs unless specifically searched for
252
- * LoRA: for the `<...>` text in prompt, use name of LoRA that is in the metdata of the file, if present, instead of filename (both can be used to activate LoRA)
253
- * LoRA: read infotext params from kohya-ss's extension parameters if they are present and if his extension is not active
254
- * LoRA: fix some LoRAs not working (ones that have 3x3 convolution layer)
255
- * LoRA: add an option to use old method of applying LoRAs (producing same results as with kohya-ss)
256
- * add version to infotext, footer and console output when starting
257
- * add links to wiki for filename pattern settings
258
- * add extended info for quicksettings setting and use multiselect input instead of a text field
259
-
260
- ### Minor:
261
- * bump Gradio to 3.29.0
262
- * bump PyTorch to 2.0.1
263
- * `--subpath` option for gradio for use with reverse proxy
264
- * Linux/macOS: use existing virtualenv if already active (the VIRTUAL_ENV environment variable)
265
- * do not apply localizations if there are none (possible frontend optimization)
266
- * add extra `None` option for VAE in XYZ plot
267
- * print error to console when batch processing in img2img fails
268
- * create HTML for extra network pages only on demand
269
- * allow directories starting with `.` to still list their models for LoRA, checkpoints, etc
270
- * put infotext options into their own category in settings tab
271
- * do not show licenses page when user selects Show all pages in settings
272
-
273
- ### Extensions:
274
- * tooltip localization support
275
- * add API method to get LoRA models with prompt
276
-
277
- ### Bug Fixes:
278
- * re-add `/docs` endpoint
279
- * fix gamepad navigation
280
- * make the lightbox fullscreen image function properly
281
- * fix squished thumbnails in extras tab
282
- * keep "search" filter for extra networks when user refreshes the tab (previously it showed everthing after you refreshed)
283
- * fix webui showing the same image if you configure the generation to always save results into same file
284
- * fix bug with upscalers not working properly
285
- * fix MPS on PyTorch 2.0.1, Intel Macs
286
- * make it so that custom context menu from contextMenu.js only disappears after user's click, ignoring non-user click events
287
- * prevent Reload UI button/link from reloading the page when it's not yet ready
288
- * fix prompts from file script failing to read contents from a drag/drop file
289
-
290
-
291
- ## 1.1.1
292
- ### Bug Fixes:
293
- * fix an error that prevents running webui on PyTorch<2.0 without --disable-safe-unpickle
294
-
295
- ## 1.1.0
296
- ### Features:
297
- * switch to PyTorch 2.0.0 (except for AMD GPUs)
298
- * visual improvements to custom code scripts
299
- * add filename patterns: `[clip_skip]`, `[hasprompt<>]`, `[batch_number]`, `[generation_number]`
300
- * add support for saving init images in img2img, and record their hashes in infotext for reproducability
301
- * automatically select current word when adjusting weight with ctrl+up/down
302
- * add dropdowns for X/Y/Z plot
303
- * add setting: Stable Diffusion/Random number generator source: makes it possible to make images generated from a given manual seed consistent across different GPUs
304
- * support Gradio's theme API
305
- * use TCMalloc on Linux by default; possible fix for memory leaks
306
- * add optimization option to remove negative conditioning at low sigma values #9177
307
- * embed model merge metadata in .safetensors file
308
- * extension settings backup/restore feature #9169
309
- * add "resize by" and "resize to" tabs to img2img
310
- * add option "keep original size" to textual inversion images preprocess
311
- * image viewer scrolling via analog stick
312
- * button to restore the progress from session lost / tab reload
313
-
314
- ### Minor:
315
- * bump Gradio to 3.28.1
316
- * change "scale to" to sliders in Extras tab
317
- * add labels to tool buttons to make it possible to hide them
318
- * add tiled inference support for ScuNET
319
- * add branch support for extension installation
320
- * change Linux installation script to install into current directory rather than `/home/username`
321
- * sort textual inversion embeddings by name (case-insensitive)
322
- * allow styles.csv to be symlinked or mounted in docker
323
- * remove the "do not add watermark to images" option
324
- * make selected tab configurable with UI config
325
- * make the extra networks UI fixed height and scrollable
326
- * add `disable_tls_verify` arg for use with self-signed certs
327
-
328
- ### Extensions:
329
- * add reload callback
330
- * add `is_hr_pass` field for processing
331
-
332
- ### Bug Fixes:
333
- * fix broken batch image processing on 'Extras/Batch Process' tab
334
- * add "None" option to extra networks dropdowns
335
- * fix FileExistsError for CLIP Interrogator
336
- * fix /sdapi/v1/txt2img endpoint not working on Linux #9319
337
- * fix disappearing live previews and progressbar during slow tasks
338
- * fix fullscreen image view not working properly in some cases
339
- * prevent alwayson_scripts args param resizing script_arg list when they are inserted in it
340
- * fix prompt schedule for second order samplers
341
- * fix image mask/composite for weird resolutions #9628
342
- * use correct images for previews when using AND (see #9491)
343
- * one broken image in img2img batch won't stop all processing
344
- * fix image orientation bug in train/preprocess
345
- * fix Ngrok recreating tunnels every reload
346
- * fix `--realesrgan-models-path` and `--ldsr-models-path` not working
347
- * fix `--skip-install` not working
348
- * use SAMPLE file format in Outpainting Mk2 & Poorman
349
- * do not fail all LoRAs if some have failed to load when making a picture
350
-
351
- ## 1.0.0
352
- * everything
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CODEOWNERS DELETED
@@ -1,12 +0,0 @@
1
- * @AUTOMATIC1111
2
-
3
- # if you were managing a localization and were removed from this file, this is because
4
- # the intended way to do localizations now is via extensions. See:
5
- # https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Developing-extensions
6
- # Make a repo with your localization and since you are still listed as a collaborator
7
- # you can add it to the wiki page yourself. This change is because some people complained
8
- # the git commit log is cluttered with things unrelated to almost everyone and
9
- # because I believe this is the best overall for the project to handle localizations almost
10
- # entirely without my oversight.
11
-
12
-
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE.txt DELETED
@@ -1,663 +0,0 @@
1
- GNU AFFERO GENERAL PUBLIC LICENSE
2
- Version 3, 19 November 2007
3
-
4
- Copyright (c) 2023 AUTOMATIC1111
5
-
6
- Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
7
- Everyone is permitted to copy and distribute verbatim copies
8
- of this license document, but changing it is not allowed.
9
-
10
- Preamble
11
-
12
- The GNU Affero General Public License is a free, copyleft license for
13
- software and other kinds of works, specifically designed to ensure
14
- cooperation with the community in the case of network server software.
15
-
16
- The licenses for most software and other practical works are designed
17
- to take away your freedom to share and change the works. By contrast,
18
- our General Public Licenses are intended to guarantee your freedom to
19
- share and change all versions of a program--to make sure it remains free
20
- software for all its users.
21
-
22
- When we speak of free software, we are referring to freedom, not
23
- price. Our General Public Licenses are designed to make sure that you
24
- have the freedom to distribute copies of free software (and charge for
25
- them if you wish), that you receive source code or can get it if you
26
- want it, that you can change the software or use pieces of it in new
27
- free programs, and that you know you can do these things.
28
-
29
- Developers that use our General Public Licenses protect your rights
30
- with two steps: (1) assert copyright on the software, and (2) offer
31
- you this License which gives you legal permission to copy, distribute
32
- and/or modify the software.
33
-
34
- A secondary benefit of defending all users' freedom is that
35
- improvements made in alternate versions of the program, if they
36
- receive widespread use, become available for other developers to
37
- incorporate. Many developers of free software are heartened and
38
- encouraged by the resulting cooperation. However, in the case of
39
- software used on network servers, this result may fail to come about.
40
- The GNU General Public License permits making a modified version and
41
- letting the public access it on a server without ever releasing its
42
- source code to the public.
43
-
44
- The GNU Affero General Public License is designed specifically to
45
- ensure that, in such cases, the modified source code becomes available
46
- to the community. It requires the operator of a network server to
47
- provide the source code of the modified version running there to the
48
- users of that server. Therefore, public use of a modified version, on
49
- a publicly accessible server, gives the public access to the source
50
- code of the modified version.
51
-
52
- An older license, called the Affero General Public License and
53
- published by Affero, was designed to accomplish similar goals. This is
54
- a different license, not a version of the Affero GPL, but Affero has
55
- released a new version of the Affero GPL which permits relicensing under
56
- this license.
57
-
58
- The precise terms and conditions for copying, distribution and
59
- modification follow.
60
-
61
- TERMS AND CONDITIONS
62
-
63
- 0. Definitions.
64
-
65
- "This License" refers to version 3 of the GNU Affero General Public License.
66
-
67
- "Copyright" also means copyright-like laws that apply to other kinds of
68
- works, such as semiconductor masks.
69
-
70
- "The Program" refers to any copyrightable work licensed under this
71
- License. Each licensee is addressed as "you". "Licensees" and
72
- "recipients" may be individuals or organizations.
73
-
74
- To "modify" a work means to copy from or adapt all or part of the work
75
- in a fashion requiring copyright permission, other than the making of an
76
- exact copy. The resulting work is called a "modified version" of the
77
- earlier work or a work "based on" the earlier work.
78
-
79
- A "covered work" means either the unmodified Program or a work based
80
- on the Program.
81
-
82
- To "propagate" a work means to do anything with it that, without
83
- permission, would make you directly or secondarily liable for
84
- infringement under applicable copyright law, except executing it on a
85
- computer or modifying a private copy. Propagation includes copying,
86
- distribution (with or without modification), making available to the
87
- public, and in some countries other activities as well.
88
-
89
- To "convey" a work means any kind of propagation that enables other
90
- parties to make or receive copies. Mere interaction with a user through
91
- a computer network, with no transfer of a copy, is not conveying.
92
-
93
- An interactive user interface displays "Appropriate Legal Notices"
94
- to the extent that it includes a convenient and prominently visible
95
- feature that (1) displays an appropriate copyright notice, and (2)
96
- tells the user that there is no warranty for the work (except to the
97
- extent that warranties are provided), that licensees may convey the
98
- work under this License, and how to view a copy of this License. If
99
- the interface presents a list of user commands or options, such as a
100
- menu, a prominent item in the list meets this criterion.
101
-
102
- 1. Source Code.
103
-
104
- The "source code" for a work means the preferred form of the work
105
- for making modifications to it. "Object code" means any non-source
106
- form of a work.
107
-
108
- A "Standard Interface" means an interface that either is an official
109
- standard defined by a recognized standards body, or, in the case of
110
- interfaces specified for a particular programming language, one that
111
- is widely used among developers working in that language.
112
-
113
- The "System Libraries" of an executable work include anything, other
114
- than the work as a whole, that (a) is included in the normal form of
115
- packaging a Major Component, but which is not part of that Major
116
- Component, and (b) serves only to enable use of the work with that
117
- Major Component, or to implement a Standard Interface for which an
118
- implementation is available to the public in source code form. A
119
- "Major Component", in this context, means a major essential component
120
- (kernel, window system, and so on) of the specific operating system
121
- (if any) on which the executable work runs, or a compiler used to
122
- produce the work, or an object code interpreter used to run it.
123
-
124
- The "Corresponding Source" for a work in object code form means all
125
- the source code needed to generate, install, and (for an executable
126
- work) run the object code and to modify the work, including scripts to
127
- control those activities. However, it does not include the work's
128
- System Libraries, or general-purpose tools or generally available free
129
- programs which are used unmodified in performing those activities but
130
- which are not part of the work. For example, Corresponding Source
131
- includes interface definition files associated with source files for
132
- the work, and the source code for shared libraries and dynamically
133
- linked subprograms that the work is specifically designed to require,
134
- such as by intimate data communication or control flow between those
135
- subprograms and other parts of the work.
136
-
137
- The Corresponding Source need not include anything that users
138
- can regenerate automatically from other parts of the Corresponding
139
- Source.
140
-
141
- The Corresponding Source for a work in source code form is that
142
- same work.
143
-
144
- 2. Basic Permissions.
145
-
146
- All rights granted under this License are granted for the term of
147
- copyright on the Program, and are irrevocable provided the stated
148
- conditions are met. This License explicitly affirms your unlimited
149
- permission to run the unmodified Program. The output from running a
150
- covered work is covered by this License only if the output, given its
151
- content, constitutes a covered work. This License acknowledges your
152
- rights of fair use or other equivalent, as provided by copyright law.
153
-
154
- You may make, run and propagate covered works that you do not
155
- convey, without conditions so long as your license otherwise remains
156
- in force. You may convey covered works to others for the sole purpose
157
- of having them make modifications exclusively for you, or provide you
158
- with facilities for running those works, provided that you comply with
159
- the terms of this License in conveying all material for which you do
160
- not control copyright. Those thus making or running the covered works
161
- for you must do so exclusively on your behalf, under your direction
162
- and control, on terms that prohibit them from making any copies of
163
- your copyrighted material outside their relationship with you.
164
-
165
- Conveying under any other circumstances is permitted solely under
166
- the conditions stated below. Sublicensing is not allowed; section 10
167
- makes it unnecessary.
168
-
169
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
170
-
171
- No covered work shall be deemed part of an effective technological
172
- measure under any applicable law fulfilling obligations under article
173
- 11 of the WIPO copyright treaty adopted on 20 December 1996, or
174
- similar laws prohibiting or restricting circumvention of such
175
- measures.
176
-
177
- When you convey a covered work, you waive any legal power to forbid
178
- circumvention of technological measures to the extent such circumvention
179
- is effected by exercising rights under this License with respect to
180
- the covered work, and you disclaim any intention to limit operation or
181
- modification of the work as a means of enforcing, against the work's
182
- users, your or third parties' legal rights to forbid circumvention of
183
- technological measures.
184
-
185
- 4. Conveying Verbatim Copies.
186
-
187
- You may convey verbatim copies of the Program's source code as you
188
- receive it, in any medium, provided that you conspicuously and
189
- appropriately publish on each copy an appropriate copyright notice;
190
- keep intact all notices stating that this License and any
191
- non-permissive terms added in accord with section 7 apply to the code;
192
- keep intact all notices of the absence of any warranty; and give all
193
- recipients a copy of this License along with the Program.
194
-
195
- You may charge any price or no price for each copy that you convey,
196
- and you may offer support or warranty protection for a fee.
197
-
198
- 5. Conveying Modified Source Versions.
199
-
200
- You may convey a work based on the Program, or the modifications to
201
- produce it from the Program, in the form of source code under the
202
- terms of section 4, provided that you also meet all of these conditions:
203
-
204
- a) The work must carry prominent notices stating that you modified
205
- it, and giving a relevant date.
206
-
207
- b) The work must carry prominent notices stating that it is
208
- released under this License and any conditions added under section
209
- 7. This requirement modifies the requirement in section 4 to
210
- "keep intact all notices".
211
-
212
- c) You must license the entire work, as a whole, under this
213
- License to anyone who comes into possession of a copy. This
214
- License will therefore apply, along with any applicable section 7
215
- additional terms, to the whole of the work, and all its parts,
216
- regardless of how they are packaged. This License gives no
217
- permission to license the work in any other way, but it does not
218
- invalidate such permission if you have separately received it.
219
-
220
- d) If the work has interactive user interfaces, each must display
221
- Appropriate Legal Notices; however, if the Program has interactive
222
- interfaces that do not display Appropriate Legal Notices, your
223
- work need not make them do so.
224
-
225
- A compilation of a covered work with other separate and independent
226
- works, which are not by their nature extensions of the covered work,
227
- and which are not combined with it such as to form a larger program,
228
- in or on a volume of a storage or distribution medium, is called an
229
- "aggregate" if the compilation and its resulting copyright are not
230
- used to limit the access or legal rights of the compilation's users
231
- beyond what the individual works permit. Inclusion of a covered work
232
- in an aggregate does not cause this License to apply to the other
233
- parts of the aggregate.
234
-
235
- 6. Conveying Non-Source Forms.
236
-
237
- You may convey a covered work in object code form under the terms
238
- of sections 4 and 5, provided that you also convey the
239
- machine-readable Corresponding Source under the terms of this License,
240
- in one of these ways:
241
-
242
- a) Convey the object code in, or embodied in, a physical product
243
- (including a physical distribution medium), accompanied by the
244
- Corresponding Source fixed on a durable physical medium
245
- customarily used for software interchange.
246
-
247
- b) Convey the object code in, or embodied in, a physical product
248
- (including a physical distribution medium), accompanied by a
249
- written offer, valid for at least three years and valid for as
250
- long as you offer spare parts or customer support for that product
251
- model, to give anyone who possesses the object code either (1) a
252
- copy of the Corresponding Source for all the software in the
253
- product that is covered by this License, on a durable physical
254
- medium customarily used for software interchange, for a price no
255
- more than your reasonable cost of physically performing this
256
- conveying of source, or (2) access to copy the
257
- Corresponding Source from a network server at no charge.
258
-
259
- c) Convey individual copies of the object code with a copy of the
260
- written offer to provide the Corresponding Source. This
261
- alternative is allowed only occasionally and noncommercially, and
262
- only if you received the object code with such an offer, in accord
263
- with subsection 6b.
264
-
265
- d) Convey the object code by offering access from a designated
266
- place (gratis or for a charge), and offer equivalent access to the
267
- Corresponding Source in the same way through the same place at no
268
- further charge. You need not require recipients to copy the
269
- Corresponding Source along with the object code. If the place to
270
- copy the object code is a network server, the Corresponding Source
271
- may be on a different server (operated by you or a third party)
272
- that supports equivalent copying facilities, provided you maintain
273
- clear directions next to the object code saying where to find the
274
- Corresponding Source. Regardless of what server hosts the
275
- Corresponding Source, you remain obligated to ensure that it is
276
- available for as long as needed to satisfy these requirements.
277
-
278
- e) Convey the object code using peer-to-peer transmission, provided
279
- you inform other peers where the object code and Corresponding
280
- Source of the work are being offered to the general public at no
281
- charge under subsection 6d.
282
-
283
- A separable portion of the object code, whose source code is excluded
284
- from the Corresponding Source as a System Library, need not be
285
- included in conveying the object code work.
286
-
287
- A "User Product" is either (1) a "consumer product", which means any
288
- tangible personal property which is normally used for personal, family,
289
- or household purposes, or (2) anything designed or sold for incorporation
290
- into a dwelling. In determining whether a product is a consumer product,
291
- doubtful cases shall be resolved in favor of coverage. For a particular
292
- product received by a particular user, "normally used" refers to a
293
- typical or common use of that class of product, regardless of the status
294
- of the particular user or of the way in which the particular user
295
- actually uses, or expects or is expected to use, the product. A product
296
- is a consumer product regardless of whether the product has substantial
297
- commercial, industrial or non-consumer uses, unless such uses represent
298
- the only significant mode of use of the product.
299
-
300
- "Installation Information" for a User Product means any methods,
301
- procedures, authorization keys, or other information required to install
302
- and execute modified versions of a covered work in that User Product from
303
- a modified version of its Corresponding Source. The information must
304
- suffice to ensure that the continued functioning of the modified object
305
- code is in no case prevented or interfered with solely because
306
- modification has been made.
307
-
308
- If you convey an object code work under this section in, or with, or
309
- specifically for use in, a User Product, and the conveying occurs as
310
- part of a transaction in which the right of possession and use of the
311
- User Product is transferred to the recipient in perpetuity or for a
312
- fixed term (regardless of how the transaction is characterized), the
313
- Corresponding Source conveyed under this section must be accompanied
314
- by the Installation Information. But this requirement does not apply
315
- if neither you nor any third party retains the ability to install
316
- modified object code on the User Product (for example, the work has
317
- been installed in ROM).
318
-
319
- The requirement to provide Installation Information does not include a
320
- requirement to continue to provide support service, warranty, or updates
321
- for a work that has been modified or installed by the recipient, or for
322
- the User Product in which it has been modified or installed. Access to a
323
- network may be denied when the modification itself materially and
324
- adversely affects the operation of the network or violates the rules and
325
- protocols for communication across the network.
326
-
327
- Corresponding Source conveyed, and Installation Information provided,
328
- in accord with this section must be in a format that is publicly
329
- documented (and with an implementation available to the public in
330
- source code form), and must require no special password or key for
331
- unpacking, reading or copying.
332
-
333
- 7. Additional Terms.
334
-
335
- "Additional permissions" are terms that supplement the terms of this
336
- License by making exceptions from one or more of its conditions.
337
- Additional permissions that are applicable to the entire Program shall
338
- be treated as though they were included in this License, to the extent
339
- that they are valid under applicable law. If additional permissions
340
- apply only to part of the Program, that part may be used separately
341
- under those permissions, but the entire Program remains governed by
342
- this License without regard to the additional permissions.
343
-
344
- When you convey a copy of a covered work, you may at your option
345
- remove any additional permissions from that copy, or from any part of
346
- it. (Additional permissions may be written to require their own
347
- removal in certain cases when you modify the work.) You may place
348
- additional permissions on material, added by you to a covered work,
349
- for which you have or can give appropriate copyright permission.
350
-
351
- Notwithstanding any other provision of this License, for material you
352
- add to a covered work, you may (if authorized by the copyright holders of
353
- that material) supplement the terms of this License with terms:
354
-
355
- a) Disclaiming warranty or limiting liability differently from the
356
- terms of sections 15 and 16 of this License; or
357
-
358
- b) Requiring preservation of specified reasonable legal notices or
359
- author attributions in that material or in the Appropriate Legal
360
- Notices displayed by works containing it; or
361
-
362
- c) Prohibiting misrepresentation of the origin of that material, or
363
- requiring that modified versions of such material be marked in
364
- reasonable ways as different from the original version; or
365
-
366
- d) Limiting the use for publicity purposes of names of licensors or
367
- authors of the material; or
368
-
369
- e) Declining to grant rights under trademark law for use of some
370
- trade names, trademarks, or service marks; or
371
-
372
- f) Requiring indemnification of licensors and authors of that
373
- material by anyone who conveys the material (or modified versions of
374
- it) with contractual assumptions of liability to the recipient, for
375
- any liability that these contractual assumptions directly impose on
376
- those licensors and authors.
377
-
378
- All other non-permissive additional terms are considered "further
379
- restrictions" within the meaning of section 10. If the Program as you
380
- received it, or any part of it, contains a notice stating that it is
381
- governed by this License along with a term that is a further
382
- restriction, you may remove that term. If a license document contains
383
- a further restriction but permits relicensing or conveying under this
384
- License, you may add to a covered work material governed by the terms
385
- of that license document, provided that the further restriction does
386
- not survive such relicensing or conveying.
387
-
388
- If you add terms to a covered work in accord with this section, you
389
- must place, in the relevant source files, a statement of the
390
- additional terms that apply to those files, or a notice indicating
391
- where to find the applicable terms.
392
-
393
- Additional terms, permissive or non-permissive, may be stated in the
394
- form of a separately written license, or stated as exceptions;
395
- the above requirements apply either way.
396
-
397
- 8. Termination.
398
-
399
- You may not propagate or modify a covered work except as expressly
400
- provided under this License. Any attempt otherwise to propagate or
401
- modify it is void, and will automatically terminate your rights under
402
- this License (including any patent licenses granted under the third
403
- paragraph of section 11).
404
-
405
- However, if you cease all violation of this License, then your
406
- license from a particular copyright holder is reinstated (a)
407
- provisionally, unless and until the copyright holder explicitly and
408
- finally terminates your license, and (b) permanently, if the copyright
409
- holder fails to notify you of the violation by some reasonable means
410
- prior to 60 days after the cessation.
411
-
412
- Moreover, your license from a particular copyright holder is
413
- reinstated permanently if the copyright holder notifies you of the
414
- violation by some reasonable means, this is the first time you have
415
- received notice of violation of this License (for any work) from that
416
- copyright holder, and you cure the violation prior to 30 days after
417
- your receipt of the notice.
418
-
419
- Termination of your rights under this section does not terminate the
420
- licenses of parties who have received copies or rights from you under
421
- this License. If your rights have been terminated and not permanently
422
- reinstated, you do not qualify to receive new licenses for the same
423
- material under section 10.
424
-
425
- 9. Acceptance Not Required for Having Copies.
426
-
427
- You are not required to accept this License in order to receive or
428
- run a copy of the Program. Ancillary propagation of a covered work
429
- occurring solely as a consequence of using peer-to-peer transmission
430
- to receive a copy likewise does not require acceptance. However,
431
- nothing other than this License grants you permission to propagate or
432
- modify any covered work. These actions infringe copyright if you do
433
- not accept this License. Therefore, by modifying or propagating a
434
- covered work, you indicate your acceptance of this License to do so.
435
-
436
- 10. Automatic Licensing of Downstream Recipients.
437
-
438
- Each time you convey a covered work, the recipient automatically
439
- receives a license from the original licensors, to run, modify and
440
- propagate that work, subject to this License. You are not responsible
441
- for enforcing compliance by third parties with this License.
442
-
443
- An "entity transaction" is a transaction transferring control of an
444
- organization, or substantially all assets of one, or subdividing an
445
- organization, or merging organizations. If propagation of a covered
446
- work results from an entity transaction, each party to that
447
- transaction who receives a copy of the work also receives whatever
448
- licenses to the work the party's predecessor in interest had or could
449
- give under the previous paragraph, plus a right to possession of the
450
- Corresponding Source of the work from the predecessor in interest, if
451
- the predecessor has it or can get it with reasonable efforts.
452
-
453
- You may not impose any further restrictions on the exercise of the
454
- rights granted or affirmed under this License. For example, you may
455
- not impose a license fee, royalty, or other charge for exercise of
456
- rights granted under this License, and you may not initiate litigation
457
- (including a cross-claim or counterclaim in a lawsuit) alleging that
458
- any patent claim is infringed by making, using, selling, offering for
459
- sale, or importing the Program or any portion of it.
460
-
461
- 11. Patents.
462
-
463
- A "contributor" is a copyright holder who authorizes use under this
464
- License of the Program or a work on which the Program is based. The
465
- work thus licensed is called the contributor's "contributor version".
466
-
467
- A contributor's "essential patent claims" are all patent claims
468
- owned or controlled by the contributor, whether already acquired or
469
- hereafter acquired, that would be infringed by some manner, permitted
470
- by this License, of making, using, or selling its contributor version,
471
- but do not include claims that would be infringed only as a
472
- consequence of further modification of the contributor version. For
473
- purposes of this definition, "control" includes the right to grant
474
- patent sublicenses in a manner consistent with the requirements of
475
- this License.
476
-
477
- Each contributor grants you a non-exclusive, worldwide, royalty-free
478
- patent license under the contributor's essential patent claims, to
479
- make, use, sell, offer for sale, import and otherwise run, modify and
480
- propagate the contents of its contributor version.
481
-
482
- In the following three paragraphs, a "patent license" is any express
483
- agreement or commitment, however denominated, not to enforce a patent
484
- (such as an express permission to practice a patent or covenant not to
485
- sue for patent infringement). To "grant" such a patent license to a
486
- party means to make such an agreement or commitment not to enforce a
487
- patent against the party.
488
-
489
- If you convey a covered work, knowingly relying on a patent license,
490
- and the Corresponding Source of the work is not available for anyone
491
- to copy, free of charge and under the terms of this License, through a
492
- publicly available network server or other readily accessible means,
493
- then you must either (1) cause the Corresponding Source to be so
494
- available, or (2) arrange to deprive yourself of the benefit of the
495
- patent license for this particular work, or (3) arrange, in a manner
496
- consistent with the requirements of this License, to extend the patent
497
- license to downstream recipients. "Knowingly relying" means you have
498
- actual knowledge that, but for the patent license, your conveying the
499
- covered work in a country, or your recipient's use of the covered work
500
- in a country, would infringe one or more identifiable patents in that
501
- country that you have reason to believe are valid.
502
-
503
- If, pursuant to or in connection with a single transaction or
504
- arrangement, you convey, or propagate by procuring conveyance of, a
505
- covered work, and grant a patent license to some of the parties
506
- receiving the covered work authorizing them to use, propagate, modify
507
- or convey a specific copy of the covered work, then the patent license
508
- you grant is automatically extended to all recipients of the covered
509
- work and works based on it.
510
-
511
- A patent license is "discriminatory" if it does not include within
512
- the scope of its coverage, prohibits the exercise of, or is
513
- conditioned on the non-exercise of one or more of the rights that are
514
- specifically granted under this License. You may not convey a covered
515
- work if you are a party to an arrangement with a third party that is
516
- in the business of distributing software, under which you make payment
517
- to the third party based on the extent of your activity of conveying
518
- the work, and under which the third party grants, to any of the
519
- parties who would receive the covered work from you, a discriminatory
520
- patent license (a) in connection with copies of the covered work
521
- conveyed by you (or copies made from those copies), or (b) primarily
522
- for and in connection with specific products or compilations that
523
- contain the covered work, unless you entered into that arrangement,
524
- or that patent license was granted, prior to 28 March 2007.
525
-
526
- Nothing in this License shall be construed as excluding or limiting
527
- any implied license or other defenses to infringement that may
528
- otherwise be available to you under applicable patent law.
529
-
530
- 12. No Surrender of Others' Freedom.
531
-
532
- If conditions are imposed on you (whether by court order, agreement or
533
- otherwise) that contradict the conditions of this License, they do not
534
- excuse you from the conditions of this License. If you cannot convey a
535
- covered work so as to satisfy simultaneously your obligations under this
536
- License and any other pertinent obligations, then as a consequence you may
537
- not convey it at all. For example, if you agree to terms that obligate you
538
- to collect a royalty for further conveying from those to whom you convey
539
- the Program, the only way you could satisfy both those terms and this
540
- License would be to refrain entirely from conveying the Program.
541
-
542
- 13. Remote Network Interaction; Use with the GNU General Public License.
543
-
544
- Notwithstanding any other provision of this License, if you modify the
545
- Program, your modified version must prominently offer all users
546
- interacting with it remotely through a computer network (if your version
547
- supports such interaction) an opportunity to receive the Corresponding
548
- Source of your version by providing access to the Corresponding Source
549
- from a network server at no charge, through some standard or customary
550
- means of facilitating copying of software. This Corresponding Source
551
- shall include the Corresponding Source for any work covered by version 3
552
- of the GNU General Public License that is incorporated pursuant to the
553
- following paragraph.
554
-
555
- Notwithstanding any other provision of this License, you have
556
- permission to link or combine any covered work with a work licensed
557
- under version 3 of the GNU General Public License into a single
558
- combined work, and to convey the resulting work. The terms of this
559
- License will continue to apply to the part which is the covered work,
560
- but the work with which it is combined will remain governed by version
561
- 3 of the GNU General Public License.
562
-
563
- 14. Revised Versions of this License.
564
-
565
- The Free Software Foundation may publish revised and/or new versions of
566
- the GNU Affero General Public License from time to time. Such new versions
567
- will be similar in spirit to the present version, but may differ in detail to
568
- address new problems or concerns.
569
-
570
- Each version is given a distinguishing version number. If the
571
- Program specifies that a certain numbered version of the GNU Affero General
572
- Public License "or any later version" applies to it, you have the
573
- option of following the terms and conditions either of that numbered
574
- version or of any later version published by the Free Software
575
- Foundation. If the Program does not specify a version number of the
576
- GNU Affero General Public License, you may choose any version ever published
577
- by the Free Software Foundation.
578
-
579
- If the Program specifies that a proxy can decide which future
580
- versions of the GNU Affero General Public License can be used, that proxy's
581
- public statement of acceptance of a version permanently authorizes you
582
- to choose that version for the Program.
583
-
584
- Later license versions may give you additional or different
585
- permissions. However, no additional obligations are imposed on any
586
- author or copyright holder as a result of your choosing to follow a
587
- later version.
588
-
589
- 15. Disclaimer of Warranty.
590
-
591
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592
- APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593
- HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594
- OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595
- THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596
- PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597
- IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598
- ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599
-
600
- 16. Limitation of Liability.
601
-
602
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603
- WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604
- THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605
- GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606
- USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607
- DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608
- PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609
- EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610
- SUCH DAMAGES.
611
-
612
- 17. Interpretation of Sections 15 and 16.
613
-
614
- If the disclaimer of warranty and limitation of liability provided
615
- above cannot be given local legal effect according to their terms,
616
- reviewing courts shall apply local law that most closely approximates
617
- an absolute waiver of all civil liability in connection with the
618
- Program, unless a warranty or assumption of liability accompanies a
619
- copy of the Program in return for a fee.
620
-
621
- END OF TERMS AND CONDITIONS
622
-
623
- How to Apply These Terms to Your New Programs
624
-
625
- If you develop a new program, and you want it to be of the greatest
626
- possible use to the public, the best way to achieve this is to make it
627
- free software which everyone can redistribute and change under these terms.
628
-
629
- To do so, attach the following notices to the program. It is safest
630
- to attach them to the start of each source file to most effectively
631
- state the exclusion of warranty; and each file should have at least
632
- the "copyright" line and a pointer to where the full notice is found.
633
-
634
- <one line to give the program's name and a brief idea of what it does.>
635
- Copyright (C) <year> <name of author>
636
-
637
- This program is free software: you can redistribute it and/or modify
638
- it under the terms of the GNU Affero General Public License as published by
639
- the Free Software Foundation, either version 3 of the License, or
640
- (at your option) any later version.
641
-
642
- This program is distributed in the hope that it will be useful,
643
- but WITHOUT ANY WARRANTY; without even the implied warranty of
644
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645
- GNU Affero General Public License for more details.
646
-
647
- You should have received a copy of the GNU Affero General Public License
648
- along with this program. If not, see <https://www.gnu.org/licenses/>.
649
-
650
- Also add information on how to contact you by electronic and paper mail.
651
-
652
- If your software can interact with users remotely through a computer
653
- network, you should also make sure that it provides a way for users to
654
- get its source. For example, if your program is a web application, its
655
- interface could display a "Source" link that leads users to an archive
656
- of the code. There are many ways you could offer source, and different
657
- solutions will be better for different programs; see section 13 for the
658
- specific requirements.
659
-
660
- You should also get your employer (if you work as a programmer) or school,
661
- if any, to sign a "copyright disclaimer" for the program, if necessary.
662
- For more information on this, and how to apply and follow the GNU AGPL, see
663
- <https://www.gnu.org/licenses/>.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,173 +0,0 @@
1
- # Stable Diffusion web UI
2
- A browser interface based on Gradio library for Stable Diffusion.
3
-
4
- ![](screenshot.png)
5
-
6
- ## Features
7
- [Detailed feature showcase with images](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features):
8
- - Original txt2img and img2img modes
9
- - One click install and run script (but you still must install python and git)
10
- - Outpainting
11
- - Inpainting
12
- - Color Sketch
13
- - Prompt Matrix
14
- - Stable Diffusion Upscale
15
- - Attention, specify parts of text that the model should pay more attention to
16
- - a man in a `((tuxedo))` - will pay more attention to tuxedo
17
- - a man in a `(tuxedo:1.21)` - alternative syntax
18
- - select text and press `Ctrl+Up` or `Ctrl+Down` (or `Command+Up` or `Command+Down` if you're on a MacOS) to automatically adjust attention to selected text (code contributed by anonymous user)
19
- - Loopback, run img2img processing multiple times
20
- - X/Y/Z plot, a way to draw a 3 dimensional plot of images with different parameters
21
- - Textual Inversion
22
- - have as many embeddings as you want and use any names you like for them
23
- - use multiple embeddings with different numbers of vectors per token
24
- - works with half precision floating point numbers
25
- - train embeddings on 8GB (also reports of 6GB working)
26
- - Extras tab with:
27
- - GFPGAN, neural network that fixes faces
28
- - CodeFormer, face restoration tool as an alternative to GFPGAN
29
- - RealESRGAN, neural network upscaler
30
- - ESRGAN, neural network upscaler with a lot of third party models
31
- - SwinIR and Swin2SR ([see here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/2092)), neural network upscalers
32
- - LDSR, Latent diffusion super resolution upscaling
33
- - Resizing aspect ratio options
34
- - Sampling method selection
35
- - Adjust sampler eta values (noise multiplier)
36
- - More advanced noise setting options
37
- - Interrupt processing at any time
38
- - 4GB video card support (also reports of 2GB working)
39
- - Correct seeds for batches
40
- - Live prompt token length validation
41
- - Generation parameters
42
- - parameters you used to generate images are saved with that image
43
- - in PNG chunks for PNG, in EXIF for JPEG
44
- - can drag the image to PNG info tab to restore generation parameters and automatically copy them into UI
45
- - can be disabled in settings
46
- - drag and drop an image/text-parameters to promptbox
47
- - Read Generation Parameters Button, loads parameters in promptbox to UI
48
- - Settings page
49
- - Running arbitrary python code from UI (must run with `--allow-code` to enable)
50
- - Mouseover hints for most UI elements
51
- - Possible to change defaults/mix/max/step values for UI elements via text config
52
- - Tiling support, a checkbox to create images that can be tiled like textures
53
- - Progress bar and live image generation preview
54
- - Can use a separate neural network to produce previews with almost none VRAM or compute requirement
55
- - Negative prompt, an extra text field that allows you to list what you don't want to see in generated image
56
- - Styles, a way to save part of prompt and easily apply them via dropdown later
57
- - Variations, a way to generate same image but with tiny differences
58
- - Seed resizing, a way to generate same image but at slightly different resolution
59
- - CLIP interrogator, a button that tries to guess prompt from an image
60
- - Prompt Editing, a way to change prompt mid-generation, say to start making a watermelon and switch to anime girl midway
61
- - Batch Processing, process a group of files using img2img
62
- - Img2img Alternative, reverse Euler method of cross attention control
63
- - Highres Fix, a convenience option to produce high resolution pictures in one click without usual distortions
64
- - Reloading checkpoints on the fly
65
- - Checkpoint Merger, a tab that allows you to merge up to 3 checkpoints into one
66
- - [Custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Custom-Scripts) with many extensions from community
67
- - [Composable-Diffusion](https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/), a way to use multiple prompts at once
68
- - separate prompts using uppercase `AND`
69
- - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
70
- - No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
71
- - DeepDanbooru integration, creates danbooru style tags for anime prompts
72
- - [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add `--xformers` to commandline args)
73
- - via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
74
- - Generate forever option
75
- - Training tab
76
- - hypernetworks and embeddings options
77
- - Preprocessing images: cropping, mirroring, autotagging using BLIP or deepdanbooru (for anime)
78
- - Clip skip
79
- - Hypernetworks
80
- - Loras (same as Hypernetworks but more pretty)
81
- - A sparate UI where you can choose, with preview, which embeddings, hypernetworks or Loras to add to your prompt
82
- - Can select to load a different VAE from settings screen
83
- - Estimated completion time in progress bar
84
- - API
85
- - Support for dedicated [inpainting model](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion) by RunwayML
86
- - via extension: [Aesthetic Gradients](https://github.com/AUTOMATIC1111/stable-diffusion-webui-aesthetic-gradients), a way to generate images with a specific aesthetic by using clip images embeds (implementation of [https://github.com/vicgalle/stable-diffusion-aesthetic-gradients](https://github.com/vicgalle/stable-diffusion-aesthetic-gradients))
87
- - [Stable Diffusion 2.0](https://github.com/Stability-AI/stablediffusion) support - see [wiki](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#stable-diffusion-20) for instructions
88
- - [Alt-Diffusion](https://arxiv.org/abs/2211.06679) support - see [wiki](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#alt-diffusion) for instructions
89
- - Now without any bad letters!
90
- - Load checkpoints in safetensors format
91
- - Eased resolution restriction: generated image's domension must be a multiple of 8 rather than 64
92
- - Now with a license!
93
- - Reorder elements in the UI from settings screen
94
-
95
- ## Installation and Running
96
- Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
97
-
98
- Alternatively, use online services (like Google Colab):
99
-
100
- - [List of Online Services](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Online-Services)
101
-
102
- ### Installation on Windows 10/11 with NVidia-GPUs using release package
103
- 1. Download `sd.webui.zip` from [v1.0.0-pre](https://github.com/AUTOMATIC1111/stable-diffusion-webui/releases/tag/v1.0.0-pre) and extract it's contents.
104
- 2. Run `update.bat`.
105
- 3. Run `run.bat`.
106
- > For more details see [Install-and-Run-on-NVidia-GPUs](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs)
107
-
108
- ### Automatic Installation on Windows
109
- 1. Install [Python 3.10.6](https://www.python.org/downloads/release/python-3106/) (Newer version of Python does not support torch), checking "Add Python to PATH".
110
- 2. Install [git](https://git-scm.com/download/win).
111
- 3. Download the stable-diffusion-webui repository, for example by running `git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git`.
112
- 4. Run `webui-user.bat` from Windows Explorer as normal, non-administrator, user.
113
-
114
- ### Automatic Installation on Linux
115
- 1. Install the dependencies:
116
- ```bash
117
- # Debian-based:
118
- sudo apt install wget git python3 python3-venv
119
- # Red Hat-based:
120
- sudo dnf install wget git python3
121
- # Arch-based:
122
- sudo pacman -S wget git python3
123
- ```
124
- 2. Navigate to the directory you would like the webui to be installed and execute the following command:
125
- ```bash
126
- bash <(wget -qO- https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/webui.sh)
127
- ```
128
- 3. Run `webui.sh`.
129
- 4. Check `webui-user.sh` for options.
130
- ### Installation on Apple Silicon
131
-
132
- Find the instructions [here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Installation-on-Apple-Silicon).
133
-
134
- ## Contributing
135
- Here's how to add code to this repo: [Contributing](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Contributing)
136
-
137
- ## Documentation
138
-
139
- The documentation was moved from this README over to the project's [wiki](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki).
140
-
141
- For the purposes of getting Google and other search engines to crawl the wiki, here's a link to the (not for humans) [crawlable wiki](https://github-wiki-see.page/m/AUTOMATIC1111/stable-diffusion-webui/wiki).
142
-
143
- ## Credits
144
- Licenses for borrowed code can be found in `Settings -> Licenses` screen, and also in `html/licenses.html` file.
145
-
146
- - Stable Diffusion - https://github.com/CompVis/stable-diffusion, https://github.com/CompVis/taming-transformers
147
- - k-diffusion - https://github.com/crowsonkb/k-diffusion.git
148
- - GFPGAN - https://github.com/TencentARC/GFPGAN.git
149
- - CodeFormer - https://github.com/sczhou/CodeFormer
150
- - ESRGAN - https://github.com/xinntao/ESRGAN
151
- - SwinIR - https://github.com/JingyunLiang/SwinIR
152
- - Swin2SR - https://github.com/mv-lab/swin2sr
153
- - LDSR - https://github.com/Hafiidz/latent-diffusion
154
- - MiDaS - https://github.com/isl-org/MiDaS
155
- - Ideas for optimizations - https://github.com/basujindal/stable-diffusion
156
- - Cross Attention layer optimization - Doggettx - https://github.com/Doggettx/stable-diffusion, original idea for prompt editing.
157
- - Cross Attention layer optimization - InvokeAI, lstein - https://github.com/invoke-ai/InvokeAI (originally http://github.com/lstein/stable-diffusion)
158
- - Sub-quadratic Cross Attention layer optimization - Alex Birch (https://github.com/Birch-san/diffusers/pull/1), Amin Rezaei (https://github.com/AminRezaei0x443/memory-efficient-attention)
159
- - Textual Inversion - Rinon Gal - https://github.com/rinongal/textual_inversion (we're not using his code, but we are using his ideas).
160
- - Idea for SD upscale - https://github.com/jquesnelle/txt2imghd
161
- - Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
162
- - CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator
163
- - Idea for Composable Diffusion - https://github.com/energy-based-model/Compositional-Visual-Generation-with-Composable-Diffusion-Models-PyTorch
164
- - xformers - https://github.com/facebookresearch/xformers
165
- - DeepDanbooru - interrogator for anime diffusers https://github.com/KichangKim/DeepDanbooru
166
- - Sampling in float32 precision from a float16 UNet - marunine for the idea, Birch-san for the example Diffusers implementation (https://github.com/Birch-san/diffusers-play/tree/92feee6)
167
- - Instruct pix2pix - Tim Brooks (star), Aleksander Holynski (star), Alexei A. Efros (no star) - https://github.com/timothybrooks/instruct-pix2pix
168
- - Security advice - RyotaK
169
- - UniPC sampler - Wenliang Zhao - https://github.com/wl-zhao/UniPC
170
- - TAESD - Ollin Boer Bohan - https://github.com/madebyollin/taesd
171
- - LyCORIS - KohakuBlueleaf
172
- - Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
173
- - (You)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/launch.cpython-310.pyc DELETED
Binary file (832 Bytes)
 
__pycache__/webui.cpython-310.pyc DELETED
Binary file (15 kB)
 
cache.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "hashes": {
3
- "checkpoint/absolutereality_v16.safetensors": {
4
- "mtime": 1690338820.1232889,
5
- "sha256": "be1d90c4abb7bb0183f267f899f38b44112ad6ef9a757a6723514ea4e9be15dc"
6
- }
7
- }
8
- }
 
 
 
 
 
 
 
 
 
configs/alt-diffusion-inference.yaml DELETED
@@ -1,72 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: modules.xlmr.BertSeriesModelWithTransformation
71
- params:
72
- name: "XLMR-Large"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/instruct-pix2pix.yaml DELETED
@@ -1,98 +0,0 @@
1
- # File modified by authors of InstructPix2Pix from original (https://github.com/CompVis/stable-diffusion).
2
- # See more details in LICENSE.
3
-
4
- model:
5
- base_learning_rate: 1.0e-04
6
- target: modules.models.diffusion.ddpm_edit.LatentDiffusion
7
- params:
8
- linear_start: 0.00085
9
- linear_end: 0.0120
10
- num_timesteps_cond: 1
11
- log_every_t: 200
12
- timesteps: 1000
13
- first_stage_key: edited
14
- cond_stage_key: edit
15
- # image_size: 64
16
- # image_size: 32
17
- image_size: 16
18
- channels: 4
19
- cond_stage_trainable: false # Note: different from the one we trained before
20
- conditioning_key: hybrid
21
- monitor: val/loss_simple_ema
22
- scale_factor: 0.18215
23
- use_ema: false
24
-
25
- scheduler_config: # 10000 warmup steps
26
- target: ldm.lr_scheduler.LambdaLinearScheduler
27
- params:
28
- warm_up_steps: [ 0 ]
29
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
30
- f_start: [ 1.e-6 ]
31
- f_max: [ 1. ]
32
- f_min: [ 1. ]
33
-
34
- unet_config:
35
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
36
- params:
37
- image_size: 32 # unused
38
- in_channels: 8
39
- out_channels: 4
40
- model_channels: 320
41
- attention_resolutions: [ 4, 2, 1 ]
42
- num_res_blocks: 2
43
- channel_mult: [ 1, 2, 4, 4 ]
44
- num_heads: 8
45
- use_spatial_transformer: True
46
- transformer_depth: 1
47
- context_dim: 768
48
- use_checkpoint: True
49
- legacy: False
50
-
51
- first_stage_config:
52
- target: ldm.models.autoencoder.AutoencoderKL
53
- params:
54
- embed_dim: 4
55
- monitor: val/rec_loss
56
- ddconfig:
57
- double_z: true
58
- z_channels: 4
59
- resolution: 256
60
- in_channels: 3
61
- out_ch: 3
62
- ch: 128
63
- ch_mult:
64
- - 1
65
- - 2
66
- - 4
67
- - 4
68
- num_res_blocks: 2
69
- attn_resolutions: []
70
- dropout: 0.0
71
- lossconfig:
72
- target: torch.nn.Identity
73
-
74
- cond_stage_config:
75
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
76
-
77
- data:
78
- target: main.DataModuleFromConfig
79
- params:
80
- batch_size: 128
81
- num_workers: 1
82
- wrap: false
83
- validation:
84
- target: edit_dataset.EditDataset
85
- params:
86
- path: data/clip-filtered-dataset
87
- cache_dir: data/
88
- cache_name: data_10k
89
- split: val
90
- min_text_sim: 0.2
91
- min_image_sim: 0.75
92
- min_direction_sim: 0.2
93
- max_samples_per_prompt: 1
94
- min_resize_res: 512
95
- max_resize_res: 512
96
- crop_res: 512
97
- output_as_edit: False
98
- real_input: True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inference.yaml DELETED
@@ -1,70 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-04
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 10000 ]
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 4
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/v1-inpainting-inference.yaml DELETED
@@ -1,70 +0,0 @@
1
- model:
2
- base_learning_rate: 7.5e-05
3
- target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false # Note: different from the one we trained before
15
- conditioning_key: hybrid # important
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- finetune_keys: null
19
-
20
- scheduler_config: # 10000 warmup steps
21
- target: ldm.lr_scheduler.LambdaLinearScheduler
22
- params:
23
- warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
24
- cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
25
- f_start: [ 1.e-6 ]
26
- f_max: [ 1. ]
27
- f_min: [ 1. ]
28
-
29
- unet_config:
30
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
31
- params:
32
- image_size: 32 # unused
33
- in_channels: 9 # 4 data + 4 downscaled image + 1 mask
34
- out_channels: 4
35
- model_channels: 320
36
- attention_resolutions: [ 4, 2, 1 ]
37
- num_res_blocks: 2
38
- channel_mult: [ 1, 2, 4, 4 ]
39
- num_heads: 8
40
- use_spatial_transformer: True
41
- transformer_depth: 1
42
- context_dim: 768
43
- use_checkpoint: True
44
- legacy: False
45
-
46
- first_stage_config:
47
- target: ldm.models.autoencoder.AutoencoderKL
48
- params:
49
- embed_dim: 4
50
- monitor: val/rec_loss
51
- ddconfig:
52
- double_z: true
53
- z_channels: 4
54
- resolution: 256
55
- in_channels: 3
56
- out_ch: 3
57
- ch: 128
58
- ch_mult:
59
- - 1
60
- - 2
61
- - 4
62
- - 4
63
- num_res_blocks: 2
64
- attn_resolutions: []
65
- dropout: 0.0
66
- lossconfig:
67
- target: torch.nn.Identity
68
-
69
- cond_stage_config:
70
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
environment-wsl2.yaml DELETED
@@ -1,11 +0,0 @@
1
- name: automatic
2
- channels:
3
- - pytorch
4
- - defaults
5
- dependencies:
6
- - python=3.10
7
- - pip=23.0
8
- - cudatoolkit=11.8
9
- - pytorch=2.0
10
- - torchvision=0.15
11
- - numpy=1.23
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/LDSR/__pycache__/ldsr_model_arch.cpython-310.pyc DELETED
Binary file (6.71 kB)
 
extensions-builtin/LDSR/__pycache__/preload.cpython-310.pyc DELETED
Binary file (515 Bytes)
 
extensions-builtin/LDSR/__pycache__/sd_hijack_autoencoder.cpython-310.pyc DELETED
Binary file (8.95 kB)
 
extensions-builtin/LDSR/__pycache__/sd_hijack_ddpm_v1.cpython-310.pyc DELETED
Binary file (42.4 kB)
 
extensions-builtin/LDSR/__pycache__/vqvae_quantize.cpython-310.pyc DELETED
Binary file (3.67 kB)
 
extensions-builtin/LDSR/ldsr_model_arch.py DELETED
@@ -1,250 +0,0 @@
1
- import os
2
- import gc
3
- import time
4
-
5
- import numpy as np
6
- import torch
7
- import torchvision
8
- from PIL import Image
9
- from einops import rearrange, repeat
10
- from omegaconf import OmegaConf
11
- import safetensors.torch
12
-
13
- from ldm.models.diffusion.ddim import DDIMSampler
14
- from ldm.util import instantiate_from_config, ismap
15
- from modules import shared, sd_hijack, devices
16
-
17
- cached_ldsr_model: torch.nn.Module = None
18
-
19
-
20
- # Create LDSR Class
21
- class LDSR:
22
- def load_model_from_config(self, half_attention):
23
- global cached_ldsr_model
24
-
25
- if shared.opts.ldsr_cached and cached_ldsr_model is not None:
26
- print("Loading model from cache")
27
- model: torch.nn.Module = cached_ldsr_model
28
- else:
29
- print(f"Loading model from {self.modelPath}")
30
- _, extension = os.path.splitext(self.modelPath)
31
- if extension.lower() == ".safetensors":
32
- pl_sd = safetensors.torch.load_file(self.modelPath, device="cpu")
33
- else:
34
- pl_sd = torch.load(self.modelPath, map_location="cpu")
35
- sd = pl_sd["state_dict"] if "state_dict" in pl_sd else pl_sd
36
- config = OmegaConf.load(self.yamlPath)
37
- config.model.target = "ldm.models.diffusion.ddpm.LatentDiffusionV1"
38
- model: torch.nn.Module = instantiate_from_config(config.model)
39
- model.load_state_dict(sd, strict=False)
40
- model = model.to(shared.device)
41
- if half_attention:
42
- model = model.half()
43
- if shared.cmd_opts.opt_channelslast:
44
- model = model.to(memory_format=torch.channels_last)
45
-
46
- sd_hijack.model_hijack.hijack(model) # apply optimization
47
- model.eval()
48
-
49
- if shared.opts.ldsr_cached:
50
- cached_ldsr_model = model
51
-
52
- return {"model": model}
53
-
54
- def __init__(self, model_path, yaml_path):
55
- self.modelPath = model_path
56
- self.yamlPath = yaml_path
57
-
58
- @staticmethod
59
- def run(model, selected_path, custom_steps, eta):
60
- example = get_cond(selected_path)
61
-
62
- n_runs = 1
63
- guider = None
64
- ckwargs = None
65
- ddim_use_x0_pred = False
66
- temperature = 1.
67
- eta = eta
68
- custom_shape = None
69
-
70
- height, width = example["image"].shape[1:3]
71
- split_input = height >= 128 and width >= 128
72
-
73
- if split_input:
74
- ks = 128
75
- stride = 64
76
- vqf = 4 #
77
- model.split_input_params = {"ks": (ks, ks), "stride": (stride, stride),
78
- "vqf": vqf,
79
- "patch_distributed_vq": True,
80
- "tie_braker": False,
81
- "clip_max_weight": 0.5,
82
- "clip_min_weight": 0.01,
83
- "clip_max_tie_weight": 0.5,
84
- "clip_min_tie_weight": 0.01}
85
- else:
86
- if hasattr(model, "split_input_params"):
87
- delattr(model, "split_input_params")
88
-
89
- x_t = None
90
- logs = None
91
- for _ in range(n_runs):
92
- if custom_shape is not None:
93
- x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device)
94
- x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0])
95
-
96
- logs = make_convolutional_sample(example, model,
97
- custom_steps=custom_steps,
98
- eta=eta, quantize_x0=False,
99
- custom_shape=custom_shape,
100
- temperature=temperature, noise_dropout=0.,
101
- corrector=guider, corrector_kwargs=ckwargs, x_T=x_t,
102
- ddim_use_x0_pred=ddim_use_x0_pred
103
- )
104
- return logs
105
-
106
- def super_resolution(self, image, steps=100, target_scale=2, half_attention=False):
107
- model = self.load_model_from_config(half_attention)
108
-
109
- # Run settings
110
- diffusion_steps = int(steps)
111
- eta = 1.0
112
-
113
-
114
- gc.collect()
115
- devices.torch_gc()
116
-
117
- im_og = image
118
- width_og, height_og = im_og.size
119
- # If we can adjust the max upscale size, then the 4 below should be our variable
120
- down_sample_rate = target_scale / 4
121
- wd = width_og * down_sample_rate
122
- hd = height_og * down_sample_rate
123
- width_downsampled_pre = int(np.ceil(wd))
124
- height_downsampled_pre = int(np.ceil(hd))
125
-
126
- if down_sample_rate != 1:
127
- print(
128
- f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]')
129
- im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS)
130
- else:
131
- print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)")
132
-
133
- # pad width and height to multiples of 64, pads with the edge values of image to avoid artifacts
134
- pad_w, pad_h = np.max(((2, 2), np.ceil(np.array(im_og.size) / 64).astype(int)), axis=0) * 64 - im_og.size
135
- im_padded = Image.fromarray(np.pad(np.array(im_og), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
136
-
137
- logs = self.run(model["model"], im_padded, diffusion_steps, eta)
138
-
139
- sample = logs["sample"]
140
- sample = sample.detach().cpu()
141
- sample = torch.clamp(sample, -1., 1.)
142
- sample = (sample + 1.) / 2. * 255
143
- sample = sample.numpy().astype(np.uint8)
144
- sample = np.transpose(sample, (0, 2, 3, 1))
145
- a = Image.fromarray(sample[0])
146
-
147
- # remove padding
148
- a = a.crop((0, 0) + tuple(np.array(im_og.size) * 4))
149
-
150
- del model
151
- gc.collect()
152
- devices.torch_gc()
153
-
154
- return a
155
-
156
-
157
- def get_cond(selected_path):
158
- example = {}
159
- up_f = 4
160
- c = selected_path.convert('RGB')
161
- c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0)
162
- c_up = torchvision.transforms.functional.resize(c, size=[up_f * c.shape[2], up_f * c.shape[3]],
163
- antialias=True)
164
- c_up = rearrange(c_up, '1 c h w -> 1 h w c')
165
- c = rearrange(c, '1 c h w -> 1 h w c')
166
- c = 2. * c - 1.
167
-
168
- c = c.to(shared.device)
169
- example["LR_image"] = c
170
- example["image"] = c_up
171
-
172
- return example
173
-
174
-
175
- @torch.no_grad()
176
- def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_sequence=None,
177
- mask=None, x0=None, quantize_x0=False, temperature=1., score_corrector=None,
178
- corrector_kwargs=None, x_t=None
179
- ):
180
- ddim = DDIMSampler(model)
181
- bs = shape[0]
182
- shape = shape[1:]
183
- print(f"Sampling with eta = {eta}; steps: {steps}")
184
- samples, intermediates = ddim.sample(steps, batch_size=bs, shape=shape, conditioning=cond, callback=callback,
185
- normals_sequence=normals_sequence, quantize_x0=quantize_x0, eta=eta,
186
- mask=mask, x0=x0, temperature=temperature, verbose=False,
187
- score_corrector=score_corrector,
188
- corrector_kwargs=corrector_kwargs, x_t=x_t)
189
-
190
- return samples, intermediates
191
-
192
-
193
- @torch.no_grad()
194
- def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None,
195
- corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False):
196
- log = {}
197
-
198
- z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key,
199
- return_first_stage_outputs=True,
200
- force_c_encode=not (hasattr(model, 'split_input_params')
201
- and model.cond_stage_key == 'coordinates_bbox'),
202
- return_original_cond=True)
203
-
204
- if custom_shape is not None:
205
- z = torch.randn(custom_shape)
206
- print(f"Generating {custom_shape[0]} samples of shape {custom_shape[1:]}")
207
-
208
- z0 = None
209
-
210
- log["input"] = x
211
- log["reconstruction"] = xrec
212
-
213
- if ismap(xc):
214
- log["original_conditioning"] = model.to_rgb(xc)
215
- if hasattr(model, 'cond_stage_key'):
216
- log[model.cond_stage_key] = model.to_rgb(xc)
217
-
218
- else:
219
- log["original_conditioning"] = xc if xc is not None else torch.zeros_like(x)
220
- if model.cond_stage_model:
221
- log[model.cond_stage_key] = xc if xc is not None else torch.zeros_like(x)
222
- if model.cond_stage_key == 'class_label':
223
- log[model.cond_stage_key] = xc[model.cond_stage_key]
224
-
225
- with model.ema_scope("Plotting"):
226
- t0 = time.time()
227
-
228
- sample, intermediates = convsample_ddim(model, c, steps=custom_steps, shape=z.shape,
229
- eta=eta,
230
- quantize_x0=quantize_x0, mask=None, x0=z0,
231
- temperature=temperature, score_corrector=corrector, corrector_kwargs=corrector_kwargs,
232
- x_t=x_T)
233
- t1 = time.time()
234
-
235
- if ddim_use_x0_pred:
236
- sample = intermediates['pred_x0'][-1]
237
-
238
- x_sample = model.decode_first_stage(sample)
239
-
240
- try:
241
- x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True)
242
- log["sample_noquant"] = x_sample_noquant
243
- log["sample_diff"] = torch.abs(x_sample_noquant - x_sample)
244
- except Exception:
245
- pass
246
-
247
- log["sample"] = x_sample
248
- log["time"] = t1 - t0
249
-
250
- return log
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/LDSR/preload.py DELETED
@@ -1,6 +0,0 @@
1
- import os
2
- from modules import paths
3
-
4
-
5
- def preload(parser):
6
- parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(paths.models_path, 'LDSR'))
 
 
 
 
 
 
 
extensions-builtin/LDSR/scripts/__pycache__/ldsr_model.cpython-310.pyc DELETED
Binary file (3.22 kB)
 
extensions-builtin/LDSR/scripts/ldsr_model.py DELETED
@@ -1,68 +0,0 @@
1
- import os
2
-
3
- from modules.modelloader import load_file_from_url
4
- from modules.upscaler import Upscaler, UpscalerData
5
- from ldsr_model_arch import LDSR
6
- from modules import shared, script_callbacks, errors
7
- import sd_hijack_autoencoder # noqa: F401
8
- import sd_hijack_ddpm_v1 # noqa: F401
9
-
10
-
11
- class UpscalerLDSR(Upscaler):
12
- def __init__(self, user_path):
13
- self.name = "LDSR"
14
- self.user_path = user_path
15
- self.model_url = "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1"
16
- self.yaml_url = "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1"
17
- super().__init__()
18
- scaler_data = UpscalerData("LDSR", None, self)
19
- self.scalers = [scaler_data]
20
-
21
- def load_model(self, path: str):
22
- # Remove incorrect project.yaml file if too big
23
- yaml_path = os.path.join(self.model_path, "project.yaml")
24
- old_model_path = os.path.join(self.model_path, "model.pth")
25
- new_model_path = os.path.join(self.model_path, "model.ckpt")
26
-
27
- local_model_paths = self.find_models(ext_filter=[".ckpt", ".safetensors"])
28
- local_ckpt_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.ckpt")]), None)
29
- local_safetensors_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.safetensors")]), None)
30
- local_yaml_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("project.yaml")]), None)
31
-
32
- if os.path.exists(yaml_path):
33
- statinfo = os.stat(yaml_path)
34
- if statinfo.st_size >= 10485760:
35
- print("Removing invalid LDSR YAML file.")
36
- os.remove(yaml_path)
37
-
38
- if os.path.exists(old_model_path):
39
- print("Renaming model from model.pth to model.ckpt")
40
- os.rename(old_model_path, new_model_path)
41
-
42
- if local_safetensors_path is not None and os.path.exists(local_safetensors_path):
43
- model = local_safetensors_path
44
- else:
45
- model = local_ckpt_path or load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name="model.ckpt")
46
-
47
- yaml = local_yaml_path or load_file_from_url(self.yaml_url, model_dir=self.model_download_path, file_name="project.yaml")
48
-
49
- return LDSR(model, yaml)
50
-
51
- def do_upscale(self, img, path):
52
- try:
53
- ldsr = self.load_model(path)
54
- except Exception:
55
- errors.report(f"Failed loading LDSR model {path}", exc_info=True)
56
- return img
57
- ddim_steps = shared.opts.ldsr_steps
58
- return ldsr.super_resolution(img, ddim_steps, self.scale)
59
-
60
-
61
- def on_ui_settings():
62
- import gradio as gr
63
-
64
- shared.opts.add_option("ldsr_steps", shared.OptionInfo(100, "LDSR processing steps. Lower = faster", gr.Slider, {"minimum": 1, "maximum": 200, "step": 1}, section=('upscaling', "Upscaling")))
65
- shared.opts.add_option("ldsr_cached", shared.OptionInfo(False, "Cache LDSR model in memory", gr.Checkbox, {"interactive": True}, section=('upscaling', "Upscaling")))
66
-
67
-
68
- script_callbacks.on_ui_settings(on_ui_settings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/LDSR/sd_hijack_autoencoder.py DELETED
@@ -1,293 +0,0 @@
1
- # The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo
2
- # The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo
3
- # As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder
4
- import numpy as np
5
- import torch
6
- import pytorch_lightning as pl
7
- import torch.nn.functional as F
8
- from contextlib import contextmanager
9
-
10
- from torch.optim.lr_scheduler import LambdaLR
11
-
12
- from ldm.modules.ema import LitEma
13
- from vqvae_quantize import VectorQuantizer2 as VectorQuantizer
14
- from ldm.modules.diffusionmodules.model import Encoder, Decoder
15
- from ldm.util import instantiate_from_config
16
-
17
- import ldm.models.autoencoder
18
- from packaging import version
19
-
20
- class VQModel(pl.LightningModule):
21
- def __init__(self,
22
- ddconfig,
23
- lossconfig,
24
- n_embed,
25
- embed_dim,
26
- ckpt_path=None,
27
- ignore_keys=None,
28
- image_key="image",
29
- colorize_nlabels=None,
30
- monitor=None,
31
- batch_resize_range=None,
32
- scheduler_config=None,
33
- lr_g_factor=1.0,
34
- remap=None,
35
- sane_index_shape=False, # tell vector quantizer to return indices as bhw
36
- use_ema=False
37
- ):
38
- super().__init__()
39
- self.embed_dim = embed_dim
40
- self.n_embed = n_embed
41
- self.image_key = image_key
42
- self.encoder = Encoder(**ddconfig)
43
- self.decoder = Decoder(**ddconfig)
44
- self.loss = instantiate_from_config(lossconfig)
45
- self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25,
46
- remap=remap,
47
- sane_index_shape=sane_index_shape)
48
- self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
49
- self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
50
- if colorize_nlabels is not None:
51
- assert type(colorize_nlabels)==int
52
- self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
53
- if monitor is not None:
54
- self.monitor = monitor
55
- self.batch_resize_range = batch_resize_range
56
- if self.batch_resize_range is not None:
57
- print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.")
58
-
59
- self.use_ema = use_ema
60
- if self.use_ema:
61
- self.model_ema = LitEma(self)
62
- print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
63
-
64
- if ckpt_path is not None:
65
- self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [])
66
- self.scheduler_config = scheduler_config
67
- self.lr_g_factor = lr_g_factor
68
-
69
- @contextmanager
70
- def ema_scope(self, context=None):
71
- if self.use_ema:
72
- self.model_ema.store(self.parameters())
73
- self.model_ema.copy_to(self)
74
- if context is not None:
75
- print(f"{context}: Switched to EMA weights")
76
- try:
77
- yield None
78
- finally:
79
- if self.use_ema:
80
- self.model_ema.restore(self.parameters())
81
- if context is not None:
82
- print(f"{context}: Restored training weights")
83
-
84
- def init_from_ckpt(self, path, ignore_keys=None):
85
- sd = torch.load(path, map_location="cpu")["state_dict"]
86
- keys = list(sd.keys())
87
- for k in keys:
88
- for ik in ignore_keys or []:
89
- if k.startswith(ik):
90
- print("Deleting key {} from state_dict.".format(k))
91
- del sd[k]
92
- missing, unexpected = self.load_state_dict(sd, strict=False)
93
- print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
94
- if missing:
95
- print(f"Missing Keys: {missing}")
96
- if unexpected:
97
- print(f"Unexpected Keys: {unexpected}")
98
-
99
- def on_train_batch_end(self, *args, **kwargs):
100
- if self.use_ema:
101
- self.model_ema(self)
102
-
103
- def encode(self, x):
104
- h = self.encoder(x)
105
- h = self.quant_conv(h)
106
- quant, emb_loss, info = self.quantize(h)
107
- return quant, emb_loss, info
108
-
109
- def encode_to_prequant(self, x):
110
- h = self.encoder(x)
111
- h = self.quant_conv(h)
112
- return h
113
-
114
- def decode(self, quant):
115
- quant = self.post_quant_conv(quant)
116
- dec = self.decoder(quant)
117
- return dec
118
-
119
- def decode_code(self, code_b):
120
- quant_b = self.quantize.embed_code(code_b)
121
- dec = self.decode(quant_b)
122
- return dec
123
-
124
- def forward(self, input, return_pred_indices=False):
125
- quant, diff, (_,_,ind) = self.encode(input)
126
- dec = self.decode(quant)
127
- if return_pred_indices:
128
- return dec, diff, ind
129
- return dec, diff
130
-
131
- def get_input(self, batch, k):
132
- x = batch[k]
133
- if len(x.shape) == 3:
134
- x = x[..., None]
135
- x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
136
- if self.batch_resize_range is not None:
137
- lower_size = self.batch_resize_range[0]
138
- upper_size = self.batch_resize_range[1]
139
- if self.global_step <= 4:
140
- # do the first few batches with max size to avoid later oom
141
- new_resize = upper_size
142
- else:
143
- new_resize = np.random.choice(np.arange(lower_size, upper_size+16, 16))
144
- if new_resize != x.shape[2]:
145
- x = F.interpolate(x, size=new_resize, mode="bicubic")
146
- x = x.detach()
147
- return x
148
-
149
- def training_step(self, batch, batch_idx, optimizer_idx):
150
- # https://github.com/pytorch/pytorch/issues/37142
151
- # try not to fool the heuristics
152
- x = self.get_input(batch, self.image_key)
153
- xrec, qloss, ind = self(x, return_pred_indices=True)
154
-
155
- if optimizer_idx == 0:
156
- # autoencode
157
- aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
158
- last_layer=self.get_last_layer(), split="train",
159
- predicted_indices=ind)
160
-
161
- self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True)
162
- return aeloss
163
-
164
- if optimizer_idx == 1:
165
- # discriminator
166
- discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
167
- last_layer=self.get_last_layer(), split="train")
168
- self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True)
169
- return discloss
170
-
171
- def validation_step(self, batch, batch_idx):
172
- log_dict = self._validation_step(batch, batch_idx)
173
- with self.ema_scope():
174
- self._validation_step(batch, batch_idx, suffix="_ema")
175
- return log_dict
176
-
177
- def _validation_step(self, batch, batch_idx, suffix=""):
178
- x = self.get_input(batch, self.image_key)
179
- xrec, qloss, ind = self(x, return_pred_indices=True)
180
- aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0,
181
- self.global_step,
182
- last_layer=self.get_last_layer(),
183
- split="val"+suffix,
184
- predicted_indices=ind
185
- )
186
-
187
- discloss, log_dict_disc = self.loss(qloss, x, xrec, 1,
188
- self.global_step,
189
- last_layer=self.get_last_layer(),
190
- split="val"+suffix,
191
- predicted_indices=ind
192
- )
193
- rec_loss = log_dict_ae[f"val{suffix}/rec_loss"]
194
- self.log(f"val{suffix}/rec_loss", rec_loss,
195
- prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
196
- self.log(f"val{suffix}/aeloss", aeloss,
197
- prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
198
- if version.parse(pl.__version__) >= version.parse('1.4.0'):
199
- del log_dict_ae[f"val{suffix}/rec_loss"]
200
- self.log_dict(log_dict_ae)
201
- self.log_dict(log_dict_disc)
202
- return self.log_dict
203
-
204
- def configure_optimizers(self):
205
- lr_d = self.learning_rate
206
- lr_g = self.lr_g_factor*self.learning_rate
207
- print("lr_d", lr_d)
208
- print("lr_g", lr_g)
209
- opt_ae = torch.optim.Adam(list(self.encoder.parameters())+
210
- list(self.decoder.parameters())+
211
- list(self.quantize.parameters())+
212
- list(self.quant_conv.parameters())+
213
- list(self.post_quant_conv.parameters()),
214
- lr=lr_g, betas=(0.5, 0.9))
215
- opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
216
- lr=lr_d, betas=(0.5, 0.9))
217
-
218
- if self.scheduler_config is not None:
219
- scheduler = instantiate_from_config(self.scheduler_config)
220
-
221
- print("Setting up LambdaLR scheduler...")
222
- scheduler = [
223
- {
224
- 'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule),
225
- 'interval': 'step',
226
- 'frequency': 1
227
- },
228
- {
229
- 'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule),
230
- 'interval': 'step',
231
- 'frequency': 1
232
- },
233
- ]
234
- return [opt_ae, opt_disc], scheduler
235
- return [opt_ae, opt_disc], []
236
-
237
- def get_last_layer(self):
238
- return self.decoder.conv_out.weight
239
-
240
- def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
241
- log = {}
242
- x = self.get_input(batch, self.image_key)
243
- x = x.to(self.device)
244
- if only_inputs:
245
- log["inputs"] = x
246
- return log
247
- xrec, _ = self(x)
248
- if x.shape[1] > 3:
249
- # colorize with random projection
250
- assert xrec.shape[1] > 3
251
- x = self.to_rgb(x)
252
- xrec = self.to_rgb(xrec)
253
- log["inputs"] = x
254
- log["reconstructions"] = xrec
255
- if plot_ema:
256
- with self.ema_scope():
257
- xrec_ema, _ = self(x)
258
- if x.shape[1] > 3:
259
- xrec_ema = self.to_rgb(xrec_ema)
260
- log["reconstructions_ema"] = xrec_ema
261
- return log
262
-
263
- def to_rgb(self, x):
264
- assert self.image_key == "segmentation"
265
- if not hasattr(self, "colorize"):
266
- self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
267
- x = F.conv2d(x, weight=self.colorize)
268
- x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
269
- return x
270
-
271
-
272
- class VQModelInterface(VQModel):
273
- def __init__(self, embed_dim, *args, **kwargs):
274
- super().__init__(*args, embed_dim=embed_dim, **kwargs)
275
- self.embed_dim = embed_dim
276
-
277
- def encode(self, x):
278
- h = self.encoder(x)
279
- h = self.quant_conv(h)
280
- return h
281
-
282
- def decode(self, h, force_not_quantize=False):
283
- # also go through quantization layer
284
- if not force_not_quantize:
285
- quant, emb_loss, info = self.quantize(h)
286
- else:
287
- quant = h
288
- quant = self.post_quant_conv(quant)
289
- dec = self.decoder(quant)
290
- return dec
291
-
292
- ldm.models.autoencoder.VQModel = VQModel
293
- ldm.models.autoencoder.VQModelInterface = VQModelInterface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/LDSR/sd_hijack_ddpm_v1.py DELETED
@@ -1,1443 +0,0 @@
1
- # This script is copied from the compvis/stable-diffusion repo (aka the SD V1 repo)
2
- # Original filename: ldm/models/diffusion/ddpm.py
3
- # The purpose to reinstate the old DDPM logic which works with VQ, whereas the V2 one doesn't
4
- # Some models such as LDSR require VQ to work correctly
5
- # The classes are suffixed with "V1" and added back to the "ldm.models.diffusion.ddpm" module
6
-
7
- import torch
8
- import torch.nn as nn
9
- import numpy as np
10
- import pytorch_lightning as pl
11
- from torch.optim.lr_scheduler import LambdaLR
12
- from einops import rearrange, repeat
13
- from contextlib import contextmanager
14
- from functools import partial
15
- from tqdm import tqdm
16
- from torchvision.utils import make_grid
17
- from pytorch_lightning.utilities.distributed import rank_zero_only
18
-
19
- from ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config
20
- from ldm.modules.ema import LitEma
21
- from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution
22
- from ldm.models.autoencoder import VQModelInterface, IdentityFirstStage, AutoencoderKL
23
- from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
24
- from ldm.models.diffusion.ddim import DDIMSampler
25
-
26
- import ldm.models.diffusion.ddpm
27
-
28
- __conditioning_keys__ = {'concat': 'c_concat',
29
- 'crossattn': 'c_crossattn',
30
- 'adm': 'y'}
31
-
32
-
33
- def disabled_train(self, mode=True):
34
- """Overwrite model.train with this function to make sure train/eval mode
35
- does not change anymore."""
36
- return self
37
-
38
-
39
- def uniform_on_device(r1, r2, shape, device):
40
- return (r1 - r2) * torch.rand(*shape, device=device) + r2
41
-
42
-
43
- class DDPMV1(pl.LightningModule):
44
- # classic DDPM with Gaussian diffusion, in image space
45
- def __init__(self,
46
- unet_config,
47
- timesteps=1000,
48
- beta_schedule="linear",
49
- loss_type="l2",
50
- ckpt_path=None,
51
- ignore_keys=None,
52
- load_only_unet=False,
53
- monitor="val/loss",
54
- use_ema=True,
55
- first_stage_key="image",
56
- image_size=256,
57
- channels=3,
58
- log_every_t=100,
59
- clip_denoised=True,
60
- linear_start=1e-4,
61
- linear_end=2e-2,
62
- cosine_s=8e-3,
63
- given_betas=None,
64
- original_elbo_weight=0.,
65
- v_posterior=0., # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta
66
- l_simple_weight=1.,
67
- conditioning_key=None,
68
- parameterization="eps", # all assuming fixed variance schedules
69
- scheduler_config=None,
70
- use_positional_encodings=False,
71
- learn_logvar=False,
72
- logvar_init=0.,
73
- ):
74
- super().__init__()
75
- assert parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"'
76
- self.parameterization = parameterization
77
- print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode")
78
- self.cond_stage_model = None
79
- self.clip_denoised = clip_denoised
80
- self.log_every_t = log_every_t
81
- self.first_stage_key = first_stage_key
82
- self.image_size = image_size # try conv?
83
- self.channels = channels
84
- self.use_positional_encodings = use_positional_encodings
85
- self.model = DiffusionWrapperV1(unet_config, conditioning_key)
86
- count_params(self.model, verbose=True)
87
- self.use_ema = use_ema
88
- if self.use_ema:
89
- self.model_ema = LitEma(self.model)
90
- print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
91
-
92
- self.use_scheduler = scheduler_config is not None
93
- if self.use_scheduler:
94
- self.scheduler_config = scheduler_config
95
-
96
- self.v_posterior = v_posterior
97
- self.original_elbo_weight = original_elbo_weight
98
- self.l_simple_weight = l_simple_weight
99
-
100
- if monitor is not None:
101
- self.monitor = monitor
102
- if ckpt_path is not None:
103
- self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [], only_model=load_only_unet)
104
-
105
- self.register_schedule(given_betas=given_betas, beta_schedule=beta_schedule, timesteps=timesteps,
106
- linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
107
-
108
- self.loss_type = loss_type
109
-
110
- self.learn_logvar = learn_logvar
111
- self.logvar = torch.full(fill_value=logvar_init, size=(self.num_timesteps,))
112
- if self.learn_logvar:
113
- self.logvar = nn.Parameter(self.logvar, requires_grad=True)
114
-
115
-
116
- def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
117
- linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
118
- if exists(given_betas):
119
- betas = given_betas
120
- else:
121
- betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
122
- cosine_s=cosine_s)
123
- alphas = 1. - betas
124
- alphas_cumprod = np.cumprod(alphas, axis=0)
125
- alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
126
-
127
- timesteps, = betas.shape
128
- self.num_timesteps = int(timesteps)
129
- self.linear_start = linear_start
130
- self.linear_end = linear_end
131
- assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep'
132
-
133
- to_torch = partial(torch.tensor, dtype=torch.float32)
134
-
135
- self.register_buffer('betas', to_torch(betas))
136
- self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
137
- self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
138
-
139
- # calculations for diffusion q(x_t | x_{t-1}) and others
140
- self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
141
- self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
142
- self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
143
- self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
144
- self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
145
-
146
- # calculations for posterior q(x_{t-1} | x_t, x_0)
147
- posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / (
148
- 1. - alphas_cumprod) + self.v_posterior * betas
149
- # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t)
150
- self.register_buffer('posterior_variance', to_torch(posterior_variance))
151
- # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
152
- self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20))))
153
- self.register_buffer('posterior_mean_coef1', to_torch(
154
- betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)))
155
- self.register_buffer('posterior_mean_coef2', to_torch(
156
- (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod)))
157
-
158
- if self.parameterization == "eps":
159
- lvlb_weights = self.betas ** 2 / (
160
- 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod))
161
- elif self.parameterization == "x0":
162
- lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod))
163
- else:
164
- raise NotImplementedError("mu not supported")
165
- # TODO how to choose this term
166
- lvlb_weights[0] = lvlb_weights[1]
167
- self.register_buffer('lvlb_weights', lvlb_weights, persistent=False)
168
- assert not torch.isnan(self.lvlb_weights).all()
169
-
170
- @contextmanager
171
- def ema_scope(self, context=None):
172
- if self.use_ema:
173
- self.model_ema.store(self.model.parameters())
174
- self.model_ema.copy_to(self.model)
175
- if context is not None:
176
- print(f"{context}: Switched to EMA weights")
177
- try:
178
- yield None
179
- finally:
180
- if self.use_ema:
181
- self.model_ema.restore(self.model.parameters())
182
- if context is not None:
183
- print(f"{context}: Restored training weights")
184
-
185
- def init_from_ckpt(self, path, ignore_keys=None, only_model=False):
186
- sd = torch.load(path, map_location="cpu")
187
- if "state_dict" in list(sd.keys()):
188
- sd = sd["state_dict"]
189
- keys = list(sd.keys())
190
- for k in keys:
191
- for ik in ignore_keys or []:
192
- if k.startswith(ik):
193
- print("Deleting key {} from state_dict.".format(k))
194
- del sd[k]
195
- missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(
196
- sd, strict=False)
197
- print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
198
- if missing:
199
- print(f"Missing Keys: {missing}")
200
- if unexpected:
201
- print(f"Unexpected Keys: {unexpected}")
202
-
203
- def q_mean_variance(self, x_start, t):
204
- """
205
- Get the distribution q(x_t | x_0).
206
- :param x_start: the [N x C x ...] tensor of noiseless inputs.
207
- :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
208
- :return: A tuple (mean, variance, log_variance), all of x_start's shape.
209
- """
210
- mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start)
211
- variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape)
212
- log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape)
213
- return mean, variance, log_variance
214
-
215
- def predict_start_from_noise(self, x_t, t, noise):
216
- return (
217
- extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
218
- extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
219
- )
220
-
221
- def q_posterior(self, x_start, x_t, t):
222
- posterior_mean = (
223
- extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start +
224
- extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t
225
- )
226
- posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape)
227
- posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape)
228
- return posterior_mean, posterior_variance, posterior_log_variance_clipped
229
-
230
- def p_mean_variance(self, x, t, clip_denoised: bool):
231
- model_out = self.model(x, t)
232
- if self.parameterization == "eps":
233
- x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
234
- elif self.parameterization == "x0":
235
- x_recon = model_out
236
- if clip_denoised:
237
- x_recon.clamp_(-1., 1.)
238
-
239
- model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
240
- return model_mean, posterior_variance, posterior_log_variance
241
-
242
- @torch.no_grad()
243
- def p_sample(self, x, t, clip_denoised=True, repeat_noise=False):
244
- b, *_, device = *x.shape, x.device
245
- model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised)
246
- noise = noise_like(x.shape, device, repeat_noise)
247
- # no noise when t == 0
248
- nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
249
- return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
250
-
251
- @torch.no_grad()
252
- def p_sample_loop(self, shape, return_intermediates=False):
253
- device = self.betas.device
254
- b = shape[0]
255
- img = torch.randn(shape, device=device)
256
- intermediates = [img]
257
- for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps):
258
- img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long),
259
- clip_denoised=self.clip_denoised)
260
- if i % self.log_every_t == 0 or i == self.num_timesteps - 1:
261
- intermediates.append(img)
262
- if return_intermediates:
263
- return img, intermediates
264
- return img
265
-
266
- @torch.no_grad()
267
- def sample(self, batch_size=16, return_intermediates=False):
268
- image_size = self.image_size
269
- channels = self.channels
270
- return self.p_sample_loop((batch_size, channels, image_size, image_size),
271
- return_intermediates=return_intermediates)
272
-
273
- def q_sample(self, x_start, t, noise=None):
274
- noise = default(noise, lambda: torch.randn_like(x_start))
275
- return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
276
- extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise)
277
-
278
- def get_loss(self, pred, target, mean=True):
279
- if self.loss_type == 'l1':
280
- loss = (target - pred).abs()
281
- if mean:
282
- loss = loss.mean()
283
- elif self.loss_type == 'l2':
284
- if mean:
285
- loss = torch.nn.functional.mse_loss(target, pred)
286
- else:
287
- loss = torch.nn.functional.mse_loss(target, pred, reduction='none')
288
- else:
289
- raise NotImplementedError("unknown loss type '{loss_type}'")
290
-
291
- return loss
292
-
293
- def p_losses(self, x_start, t, noise=None):
294
- noise = default(noise, lambda: torch.randn_like(x_start))
295
- x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
296
- model_out = self.model(x_noisy, t)
297
-
298
- loss_dict = {}
299
- if self.parameterization == "eps":
300
- target = noise
301
- elif self.parameterization == "x0":
302
- target = x_start
303
- else:
304
- raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported")
305
-
306
- loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3])
307
-
308
- log_prefix = 'train' if self.training else 'val'
309
-
310
- loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()})
311
- loss_simple = loss.mean() * self.l_simple_weight
312
-
313
- loss_vlb = (self.lvlb_weights[t] * loss).mean()
314
- loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb})
315
-
316
- loss = loss_simple + self.original_elbo_weight * loss_vlb
317
-
318
- loss_dict.update({f'{log_prefix}/loss': loss})
319
-
320
- return loss, loss_dict
321
-
322
- def forward(self, x, *args, **kwargs):
323
- # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size
324
- # assert h == img_size and w == img_size, f'height and width of image must be {img_size}'
325
- t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
326
- return self.p_losses(x, t, *args, **kwargs)
327
-
328
- def get_input(self, batch, k):
329
- x = batch[k]
330
- if len(x.shape) == 3:
331
- x = x[..., None]
332
- x = rearrange(x, 'b h w c -> b c h w')
333
- x = x.to(memory_format=torch.contiguous_format).float()
334
- return x
335
-
336
- def shared_step(self, batch):
337
- x = self.get_input(batch, self.first_stage_key)
338
- loss, loss_dict = self(x)
339
- return loss, loss_dict
340
-
341
- def training_step(self, batch, batch_idx):
342
- loss, loss_dict = self.shared_step(batch)
343
-
344
- self.log_dict(loss_dict, prog_bar=True,
345
- logger=True, on_step=True, on_epoch=True)
346
-
347
- self.log("global_step", self.global_step,
348
- prog_bar=True, logger=True, on_step=True, on_epoch=False)
349
-
350
- if self.use_scheduler:
351
- lr = self.optimizers().param_groups[0]['lr']
352
- self.log('lr_abs', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False)
353
-
354
- return loss
355
-
356
- @torch.no_grad()
357
- def validation_step(self, batch, batch_idx):
358
- _, loss_dict_no_ema = self.shared_step(batch)
359
- with self.ema_scope():
360
- _, loss_dict_ema = self.shared_step(batch)
361
- loss_dict_ema = {key + '_ema': loss_dict_ema[key] for key in loss_dict_ema}
362
- self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
363
- self.log_dict(loss_dict_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
364
-
365
- def on_train_batch_end(self, *args, **kwargs):
366
- if self.use_ema:
367
- self.model_ema(self.model)
368
-
369
- def _get_rows_from_list(self, samples):
370
- n_imgs_per_row = len(samples)
371
- denoise_grid = rearrange(samples, 'n b c h w -> b n c h w')
372
- denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w')
373
- denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
374
- return denoise_grid
375
-
376
- @torch.no_grad()
377
- def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs):
378
- log = {}
379
- x = self.get_input(batch, self.first_stage_key)
380
- N = min(x.shape[0], N)
381
- n_row = min(x.shape[0], n_row)
382
- x = x.to(self.device)[:N]
383
- log["inputs"] = x
384
-
385
- # get diffusion row
386
- diffusion_row = []
387
- x_start = x[:n_row]
388
-
389
- for t in range(self.num_timesteps):
390
- if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
391
- t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
392
- t = t.to(self.device).long()
393
- noise = torch.randn_like(x_start)
394
- x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
395
- diffusion_row.append(x_noisy)
396
-
397
- log["diffusion_row"] = self._get_rows_from_list(diffusion_row)
398
-
399
- if sample:
400
- # get denoise row
401
- with self.ema_scope("Plotting"):
402
- samples, denoise_row = self.sample(batch_size=N, return_intermediates=True)
403
-
404
- log["samples"] = samples
405
- log["denoise_row"] = self._get_rows_from_list(denoise_row)
406
-
407
- if return_keys:
408
- if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0:
409
- return log
410
- else:
411
- return {key: log[key] for key in return_keys}
412
- return log
413
-
414
- def configure_optimizers(self):
415
- lr = self.learning_rate
416
- params = list(self.model.parameters())
417
- if self.learn_logvar:
418
- params = params + [self.logvar]
419
- opt = torch.optim.AdamW(params, lr=lr)
420
- return opt
421
-
422
-
423
- class LatentDiffusionV1(DDPMV1):
424
- """main class"""
425
- def __init__(self,
426
- first_stage_config,
427
- cond_stage_config,
428
- num_timesteps_cond=None,
429
- cond_stage_key="image",
430
- cond_stage_trainable=False,
431
- concat_mode=True,
432
- cond_stage_forward=None,
433
- conditioning_key=None,
434
- scale_factor=1.0,
435
- scale_by_std=False,
436
- *args, **kwargs):
437
- self.num_timesteps_cond = default(num_timesteps_cond, 1)
438
- self.scale_by_std = scale_by_std
439
- assert self.num_timesteps_cond <= kwargs['timesteps']
440
- # for backwards compatibility after implementation of DiffusionWrapper
441
- if conditioning_key is None:
442
- conditioning_key = 'concat' if concat_mode else 'crossattn'
443
- if cond_stage_config == '__is_unconditional__':
444
- conditioning_key = None
445
- ckpt_path = kwargs.pop("ckpt_path", None)
446
- ignore_keys = kwargs.pop("ignore_keys", [])
447
- super().__init__(*args, conditioning_key=conditioning_key, **kwargs)
448
- self.concat_mode = concat_mode
449
- self.cond_stage_trainable = cond_stage_trainable
450
- self.cond_stage_key = cond_stage_key
451
- try:
452
- self.num_downs = len(first_stage_config.params.ddconfig.ch_mult) - 1
453
- except Exception:
454
- self.num_downs = 0
455
- if not scale_by_std:
456
- self.scale_factor = scale_factor
457
- else:
458
- self.register_buffer('scale_factor', torch.tensor(scale_factor))
459
- self.instantiate_first_stage(first_stage_config)
460
- self.instantiate_cond_stage(cond_stage_config)
461
- self.cond_stage_forward = cond_stage_forward
462
- self.clip_denoised = False
463
- self.bbox_tokenizer = None
464
-
465
- self.restarted_from_ckpt = False
466
- if ckpt_path is not None:
467
- self.init_from_ckpt(ckpt_path, ignore_keys)
468
- self.restarted_from_ckpt = True
469
-
470
- def make_cond_schedule(self, ):
471
- self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long)
472
- ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long()
473
- self.cond_ids[:self.num_timesteps_cond] = ids
474
-
475
- @rank_zero_only
476
- @torch.no_grad()
477
- def on_train_batch_start(self, batch, batch_idx, dataloader_idx):
478
- # only for very first batch
479
- if self.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0 and not self.restarted_from_ckpt:
480
- assert self.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously'
481
- # set rescale weight to 1./std of encodings
482
- print("### USING STD-RESCALING ###")
483
- x = super().get_input(batch, self.first_stage_key)
484
- x = x.to(self.device)
485
- encoder_posterior = self.encode_first_stage(x)
486
- z = self.get_first_stage_encoding(encoder_posterior).detach()
487
- del self.scale_factor
488
- self.register_buffer('scale_factor', 1. / z.flatten().std())
489
- print(f"setting self.scale_factor to {self.scale_factor}")
490
- print("### USING STD-RESCALING ###")
491
-
492
- def register_schedule(self,
493
- given_betas=None, beta_schedule="linear", timesteps=1000,
494
- linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
495
- super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s)
496
-
497
- self.shorten_cond_schedule = self.num_timesteps_cond > 1
498
- if self.shorten_cond_schedule:
499
- self.make_cond_schedule()
500
-
501
- def instantiate_first_stage(self, config):
502
- model = instantiate_from_config(config)
503
- self.first_stage_model = model.eval()
504
- self.first_stage_model.train = disabled_train
505
- for param in self.first_stage_model.parameters():
506
- param.requires_grad = False
507
-
508
- def instantiate_cond_stage(self, config):
509
- if not self.cond_stage_trainable:
510
- if config == "__is_first_stage__":
511
- print("Using first stage also as cond stage.")
512
- self.cond_stage_model = self.first_stage_model
513
- elif config == "__is_unconditional__":
514
- print(f"Training {self.__class__.__name__} as an unconditional model.")
515
- self.cond_stage_model = None
516
- # self.be_unconditional = True
517
- else:
518
- model = instantiate_from_config(config)
519
- self.cond_stage_model = model.eval()
520
- self.cond_stage_model.train = disabled_train
521
- for param in self.cond_stage_model.parameters():
522
- param.requires_grad = False
523
- else:
524
- assert config != '__is_first_stage__'
525
- assert config != '__is_unconditional__'
526
- model = instantiate_from_config(config)
527
- self.cond_stage_model = model
528
-
529
- def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False):
530
- denoise_row = []
531
- for zd in tqdm(samples, desc=desc):
532
- denoise_row.append(self.decode_first_stage(zd.to(self.device),
533
- force_not_quantize=force_no_decoder_quantization))
534
- n_imgs_per_row = len(denoise_row)
535
- denoise_row = torch.stack(denoise_row) # n_log_step, n_row, C, H, W
536
- denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w')
537
- denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w')
538
- denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
539
- return denoise_grid
540
-
541
- def get_first_stage_encoding(self, encoder_posterior):
542
- if isinstance(encoder_posterior, DiagonalGaussianDistribution):
543
- z = encoder_posterior.sample()
544
- elif isinstance(encoder_posterior, torch.Tensor):
545
- z = encoder_posterior
546
- else:
547
- raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented")
548
- return self.scale_factor * z
549
-
550
- def get_learned_conditioning(self, c):
551
- if self.cond_stage_forward is None:
552
- if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode):
553
- c = self.cond_stage_model.encode(c)
554
- if isinstance(c, DiagonalGaussianDistribution):
555
- c = c.mode()
556
- else:
557
- c = self.cond_stage_model(c)
558
- else:
559
- assert hasattr(self.cond_stage_model, self.cond_stage_forward)
560
- c = getattr(self.cond_stage_model, self.cond_stage_forward)(c)
561
- return c
562
-
563
- def meshgrid(self, h, w):
564
- y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1)
565
- x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1)
566
-
567
- arr = torch.cat([y, x], dim=-1)
568
- return arr
569
-
570
- def delta_border(self, h, w):
571
- """
572
- :param h: height
573
- :param w: width
574
- :return: normalized distance to image border,
575
- wtith min distance = 0 at border and max dist = 0.5 at image center
576
- """
577
- lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2)
578
- arr = self.meshgrid(h, w) / lower_right_corner
579
- dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0]
580
- dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0]
581
- edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0]
582
- return edge_dist
583
-
584
- def get_weighting(self, h, w, Ly, Lx, device):
585
- weighting = self.delta_border(h, w)
586
- weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"],
587
- self.split_input_params["clip_max_weight"], )
588
- weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device)
589
-
590
- if self.split_input_params["tie_braker"]:
591
- L_weighting = self.delta_border(Ly, Lx)
592
- L_weighting = torch.clip(L_weighting,
593
- self.split_input_params["clip_min_tie_weight"],
594
- self.split_input_params["clip_max_tie_weight"])
595
-
596
- L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device)
597
- weighting = weighting * L_weighting
598
- return weighting
599
-
600
- def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo load once not every time, shorten code
601
- """
602
- :param x: img of size (bs, c, h, w)
603
- :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1])
604
- """
605
- bs, nc, h, w = x.shape
606
-
607
- # number of crops in image
608
- Ly = (h - kernel_size[0]) // stride[0] + 1
609
- Lx = (w - kernel_size[1]) // stride[1] + 1
610
-
611
- if uf == 1 and df == 1:
612
- fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
613
- unfold = torch.nn.Unfold(**fold_params)
614
-
615
- fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params)
616
-
617
- weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype)
618
- normalization = fold(weighting).view(1, 1, h, w) # normalizes the overlap
619
- weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx))
620
-
621
- elif uf > 1 and df == 1:
622
- fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
623
- unfold = torch.nn.Unfold(**fold_params)
624
-
625
- fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf),
626
- dilation=1, padding=0,
627
- stride=(stride[0] * uf, stride[1] * uf))
628
- fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2)
629
-
630
- weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype)
631
- normalization = fold(weighting).view(1, 1, h * uf, w * uf) # normalizes the overlap
632
- weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx))
633
-
634
- elif df > 1 and uf == 1:
635
- fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
636
- unfold = torch.nn.Unfold(**fold_params)
637
-
638
- fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df),
639
- dilation=1, padding=0,
640
- stride=(stride[0] // df, stride[1] // df))
641
- fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2)
642
-
643
- weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype)
644
- normalization = fold(weighting).view(1, 1, h // df, w // df) # normalizes the overlap
645
- weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx))
646
-
647
- else:
648
- raise NotImplementedError
649
-
650
- return fold, unfold, normalization, weighting
651
-
652
- @torch.no_grad()
653
- def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False,
654
- cond_key=None, return_original_cond=False, bs=None):
655
- x = super().get_input(batch, k)
656
- if bs is not None:
657
- x = x[:bs]
658
- x = x.to(self.device)
659
- encoder_posterior = self.encode_first_stage(x)
660
- z = self.get_first_stage_encoding(encoder_posterior).detach()
661
-
662
- if self.model.conditioning_key is not None:
663
- if cond_key is None:
664
- cond_key = self.cond_stage_key
665
- if cond_key != self.first_stage_key:
666
- if cond_key in ['caption', 'coordinates_bbox']:
667
- xc = batch[cond_key]
668
- elif cond_key == 'class_label':
669
- xc = batch
670
- else:
671
- xc = super().get_input(batch, cond_key).to(self.device)
672
- else:
673
- xc = x
674
- if not self.cond_stage_trainable or force_c_encode:
675
- if isinstance(xc, dict) or isinstance(xc, list):
676
- # import pudb; pudb.set_trace()
677
- c = self.get_learned_conditioning(xc)
678
- else:
679
- c = self.get_learned_conditioning(xc.to(self.device))
680
- else:
681
- c = xc
682
- if bs is not None:
683
- c = c[:bs]
684
-
685
- if self.use_positional_encodings:
686
- pos_x, pos_y = self.compute_latent_shifts(batch)
687
- ckey = __conditioning_keys__[self.model.conditioning_key]
688
- c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y}
689
-
690
- else:
691
- c = None
692
- xc = None
693
- if self.use_positional_encodings:
694
- pos_x, pos_y = self.compute_latent_shifts(batch)
695
- c = {'pos_x': pos_x, 'pos_y': pos_y}
696
- out = [z, c]
697
- if return_first_stage_outputs:
698
- xrec = self.decode_first_stage(z)
699
- out.extend([x, xrec])
700
- if return_original_cond:
701
- out.append(xc)
702
- return out
703
-
704
- @torch.no_grad()
705
- def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False):
706
- if predict_cids:
707
- if z.dim() == 4:
708
- z = torch.argmax(z.exp(), dim=1).long()
709
- z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None)
710
- z = rearrange(z, 'b h w c -> b c h w').contiguous()
711
-
712
- z = 1. / self.scale_factor * z
713
-
714
- if hasattr(self, "split_input_params"):
715
- if self.split_input_params["patch_distributed_vq"]:
716
- ks = self.split_input_params["ks"] # eg. (128, 128)
717
- stride = self.split_input_params["stride"] # eg. (64, 64)
718
- uf = self.split_input_params["vqf"]
719
- bs, nc, h, w = z.shape
720
- if ks[0] > h or ks[1] > w:
721
- ks = (min(ks[0], h), min(ks[1], w))
722
- print("reducing Kernel")
723
-
724
- if stride[0] > h or stride[1] > w:
725
- stride = (min(stride[0], h), min(stride[1], w))
726
- print("reducing stride")
727
-
728
- fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf)
729
-
730
- z = unfold(z) # (bn, nc * prod(**ks), L)
731
- # 1. Reshape to img shape
732
- z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L )
733
-
734
- # 2. apply model loop over last dim
735
- if isinstance(self.first_stage_model, VQModelInterface):
736
- output_list = [self.first_stage_model.decode(z[:, :, :, :, i],
737
- force_not_quantize=predict_cids or force_not_quantize)
738
- for i in range(z.shape[-1])]
739
- else:
740
-
741
- output_list = [self.first_stage_model.decode(z[:, :, :, :, i])
742
- for i in range(z.shape[-1])]
743
-
744
- o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L)
745
- o = o * weighting
746
- # Reverse 1. reshape to img shape
747
- o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L)
748
- # stitch crops together
749
- decoded = fold(o)
750
- decoded = decoded / normalization # norm is shape (1, 1, h, w)
751
- return decoded
752
- else:
753
- if isinstance(self.first_stage_model, VQModelInterface):
754
- return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
755
- else:
756
- return self.first_stage_model.decode(z)
757
-
758
- else:
759
- if isinstance(self.first_stage_model, VQModelInterface):
760
- return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
761
- else:
762
- return self.first_stage_model.decode(z)
763
-
764
- # same as above but without decorator
765
- def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False):
766
- if predict_cids:
767
- if z.dim() == 4:
768
- z = torch.argmax(z.exp(), dim=1).long()
769
- z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None)
770
- z = rearrange(z, 'b h w c -> b c h w').contiguous()
771
-
772
- z = 1. / self.scale_factor * z
773
-
774
- if hasattr(self, "split_input_params"):
775
- if self.split_input_params["patch_distributed_vq"]:
776
- ks = self.split_input_params["ks"] # eg. (128, 128)
777
- stride = self.split_input_params["stride"] # eg. (64, 64)
778
- uf = self.split_input_params["vqf"]
779
- bs, nc, h, w = z.shape
780
- if ks[0] > h or ks[1] > w:
781
- ks = (min(ks[0], h), min(ks[1], w))
782
- print("reducing Kernel")
783
-
784
- if stride[0] > h or stride[1] > w:
785
- stride = (min(stride[0], h), min(stride[1], w))
786
- print("reducing stride")
787
-
788
- fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf)
789
-
790
- z = unfold(z) # (bn, nc * prod(**ks), L)
791
- # 1. Reshape to img shape
792
- z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L )
793
-
794
- # 2. apply model loop over last dim
795
- if isinstance(self.first_stage_model, VQModelInterface):
796
- output_list = [self.first_stage_model.decode(z[:, :, :, :, i],
797
- force_not_quantize=predict_cids or force_not_quantize)
798
- for i in range(z.shape[-1])]
799
- else:
800
-
801
- output_list = [self.first_stage_model.decode(z[:, :, :, :, i])
802
- for i in range(z.shape[-1])]
803
-
804
- o = torch.stack(output_list, axis=-1) # # (bn, nc, ks[0], ks[1], L)
805
- o = o * weighting
806
- # Reverse 1. reshape to img shape
807
- o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L)
808
- # stitch crops together
809
- decoded = fold(o)
810
- decoded = decoded / normalization # norm is shape (1, 1, h, w)
811
- return decoded
812
- else:
813
- if isinstance(self.first_stage_model, VQModelInterface):
814
- return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
815
- else:
816
- return self.first_stage_model.decode(z)
817
-
818
- else:
819
- if isinstance(self.first_stage_model, VQModelInterface):
820
- return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
821
- else:
822
- return self.first_stage_model.decode(z)
823
-
824
- @torch.no_grad()
825
- def encode_first_stage(self, x):
826
- if hasattr(self, "split_input_params"):
827
- if self.split_input_params["patch_distributed_vq"]:
828
- ks = self.split_input_params["ks"] # eg. (128, 128)
829
- stride = self.split_input_params["stride"] # eg. (64, 64)
830
- df = self.split_input_params["vqf"]
831
- self.split_input_params['original_image_size'] = x.shape[-2:]
832
- bs, nc, h, w = x.shape
833
- if ks[0] > h or ks[1] > w:
834
- ks = (min(ks[0], h), min(ks[1], w))
835
- print("reducing Kernel")
836
-
837
- if stride[0] > h or stride[1] > w:
838
- stride = (min(stride[0], h), min(stride[1], w))
839
- print("reducing stride")
840
-
841
- fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df)
842
- z = unfold(x) # (bn, nc * prod(**ks), L)
843
- # Reshape to img shape
844
- z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L )
845
-
846
- output_list = [self.first_stage_model.encode(z[:, :, :, :, i])
847
- for i in range(z.shape[-1])]
848
-
849
- o = torch.stack(output_list, axis=-1)
850
- o = o * weighting
851
-
852
- # Reverse reshape to img shape
853
- o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L)
854
- # stitch crops together
855
- decoded = fold(o)
856
- decoded = decoded / normalization
857
- return decoded
858
-
859
- else:
860
- return self.first_stage_model.encode(x)
861
- else:
862
- return self.first_stage_model.encode(x)
863
-
864
- def shared_step(self, batch, **kwargs):
865
- x, c = self.get_input(batch, self.first_stage_key)
866
- loss = self(x, c)
867
- return loss
868
-
869
- def forward(self, x, c, *args, **kwargs):
870
- t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
871
- if self.model.conditioning_key is not None:
872
- assert c is not None
873
- if self.cond_stage_trainable:
874
- c = self.get_learned_conditioning(c)
875
- if self.shorten_cond_schedule: # TODO: drop this option
876
- tc = self.cond_ids[t].to(self.device)
877
- c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float()))
878
- return self.p_losses(x, c, t, *args, **kwargs)
879
-
880
- def apply_model(self, x_noisy, t, cond, return_ids=False):
881
-
882
- if isinstance(cond, dict):
883
- # hybrid case, cond is exptected to be a dict
884
- pass
885
- else:
886
- if not isinstance(cond, list):
887
- cond = [cond]
888
- key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn'
889
- cond = {key: cond}
890
-
891
- if hasattr(self, "split_input_params"):
892
- assert len(cond) == 1 # todo can only deal with one conditioning atm
893
- assert not return_ids
894
- ks = self.split_input_params["ks"] # eg. (128, 128)
895
- stride = self.split_input_params["stride"] # eg. (64, 64)
896
-
897
- h, w = x_noisy.shape[-2:]
898
-
899
- fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride)
900
-
901
- z = unfold(x_noisy) # (bn, nc * prod(**ks), L)
902
- # Reshape to img shape
903
- z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L )
904
- z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])]
905
-
906
- if self.cond_stage_key in ["image", "LR_image", "segmentation",
907
- 'bbox_img'] and self.model.conditioning_key: # todo check for completeness
908
- c_key = next(iter(cond.keys())) # get key
909
- c = next(iter(cond.values())) # get value
910
- assert (len(c) == 1) # todo extend to list with more than one elem
911
- c = c[0] # get element
912
-
913
- c = unfold(c)
914
- c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1])) # (bn, nc, ks[0], ks[1], L )
915
-
916
- cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])]
917
-
918
- elif self.cond_stage_key == 'coordinates_bbox':
919
- assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size'
920
-
921
- # assuming padding of unfold is always 0 and its dilation is always 1
922
- n_patches_per_row = int((w - ks[0]) / stride[0] + 1)
923
- full_img_h, full_img_w = self.split_input_params['original_image_size']
924
- # as we are operating on latents, we need the factor from the original image size to the
925
- # spatial latent size to properly rescale the crops for regenerating the bbox annotations
926
- num_downs = self.first_stage_model.encoder.num_resolutions - 1
927
- rescale_latent = 2 ** (num_downs)
928
-
929
- # get top left postions of patches as conforming for the bbbox tokenizer, therefore we
930
- # need to rescale the tl patch coordinates to be in between (0,1)
931
- tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w,
932
- rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h)
933
- for patch_nr in range(z.shape[-1])]
934
-
935
- # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w)
936
- patch_limits = [(x_tl, y_tl,
937
- rescale_latent * ks[0] / full_img_w,
938
- rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates]
939
- # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates]
940
-
941
- # tokenize crop coordinates for the bounding boxes of the respective patches
942
- patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device)
943
- for bbox in patch_limits] # list of length l with tensors of shape (1, 2)
944
- print(patch_limits_tknzd[0].shape)
945
- # cut tknzd crop position from conditioning
946
- assert isinstance(cond, dict), 'cond must be dict to be fed into model'
947
- cut_cond = cond['c_crossattn'][0][..., :-2].to(self.device)
948
- print(cut_cond.shape)
949
-
950
- adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd])
951
- adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n')
952
- print(adapted_cond.shape)
953
- adapted_cond = self.get_learned_conditioning(adapted_cond)
954
- print(adapted_cond.shape)
955
- adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1])
956
- print(adapted_cond.shape)
957
-
958
- cond_list = [{'c_crossattn': [e]} for e in adapted_cond]
959
-
960
- else:
961
- cond_list = [cond for i in range(z.shape[-1])] # Todo make this more efficient
962
-
963
- # apply model by loop over crops
964
- output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])]
965
- assert not isinstance(output_list[0],
966
- tuple) # todo cant deal with multiple model outputs check this never happens
967
-
968
- o = torch.stack(output_list, axis=-1)
969
- o = o * weighting
970
- # Reverse reshape to img shape
971
- o = o.view((o.shape[0], -1, o.shape[-1])) # (bn, nc * ks[0] * ks[1], L)
972
- # stitch crops together
973
- x_recon = fold(o) / normalization
974
-
975
- else:
976
- x_recon = self.model(x_noisy, t, **cond)
977
-
978
- if isinstance(x_recon, tuple) and not return_ids:
979
- return x_recon[0]
980
- else:
981
- return x_recon
982
-
983
- def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
984
- return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \
985
- extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape)
986
-
987
- def _prior_bpd(self, x_start):
988
- """
989
- Get the prior KL term for the variational lower-bound, measured in
990
- bits-per-dim.
991
- This term can't be optimized, as it only depends on the encoder.
992
- :param x_start: the [N x C x ...] tensor of inputs.
993
- :return: a batch of [N] KL values (in bits), one per batch element.
994
- """
995
- batch_size = x_start.shape[0]
996
- t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device)
997
- qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t)
998
- kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0)
999
- return mean_flat(kl_prior) / np.log(2.0)
1000
-
1001
- def p_losses(self, x_start, cond, t, noise=None):
1002
- noise = default(noise, lambda: torch.randn_like(x_start))
1003
- x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
1004
- model_output = self.apply_model(x_noisy, t, cond)
1005
-
1006
- loss_dict = {}
1007
- prefix = 'train' if self.training else 'val'
1008
-
1009
- if self.parameterization == "x0":
1010
- target = x_start
1011
- elif self.parameterization == "eps":
1012
- target = noise
1013
- else:
1014
- raise NotImplementedError()
1015
-
1016
- loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3])
1017
- loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()})
1018
-
1019
- logvar_t = self.logvar[t].to(self.device)
1020
- loss = loss_simple / torch.exp(logvar_t) + logvar_t
1021
- # loss = loss_simple / torch.exp(self.logvar) + self.logvar
1022
- if self.learn_logvar:
1023
- loss_dict.update({f'{prefix}/loss_gamma': loss.mean()})
1024
- loss_dict.update({'logvar': self.logvar.data.mean()})
1025
-
1026
- loss = self.l_simple_weight * loss.mean()
1027
-
1028
- loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3))
1029
- loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean()
1030
- loss_dict.update({f'{prefix}/loss_vlb': loss_vlb})
1031
- loss += (self.original_elbo_weight * loss_vlb)
1032
- loss_dict.update({f'{prefix}/loss': loss})
1033
-
1034
- return loss, loss_dict
1035
-
1036
- def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False,
1037
- return_x0=False, score_corrector=None, corrector_kwargs=None):
1038
- t_in = t
1039
- model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids)
1040
-
1041
- if score_corrector is not None:
1042
- assert self.parameterization == "eps"
1043
- model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs)
1044
-
1045
- if return_codebook_ids:
1046
- model_out, logits = model_out
1047
-
1048
- if self.parameterization == "eps":
1049
- x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
1050
- elif self.parameterization == "x0":
1051
- x_recon = model_out
1052
- else:
1053
- raise NotImplementedError()
1054
-
1055
- if clip_denoised:
1056
- x_recon.clamp_(-1., 1.)
1057
- if quantize_denoised:
1058
- x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon)
1059
- model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
1060
- if return_codebook_ids:
1061
- return model_mean, posterior_variance, posterior_log_variance, logits
1062
- elif return_x0:
1063
- return model_mean, posterior_variance, posterior_log_variance, x_recon
1064
- else:
1065
- return model_mean, posterior_variance, posterior_log_variance
1066
-
1067
- @torch.no_grad()
1068
- def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False,
1069
- return_codebook_ids=False, quantize_denoised=False, return_x0=False,
1070
- temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None):
1071
- b, *_, device = *x.shape, x.device
1072
- outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised,
1073
- return_codebook_ids=return_codebook_ids,
1074
- quantize_denoised=quantize_denoised,
1075
- return_x0=return_x0,
1076
- score_corrector=score_corrector, corrector_kwargs=corrector_kwargs)
1077
- if return_codebook_ids:
1078
- raise DeprecationWarning("Support dropped.")
1079
- model_mean, _, model_log_variance, logits = outputs
1080
- elif return_x0:
1081
- model_mean, _, model_log_variance, x0 = outputs
1082
- else:
1083
- model_mean, _, model_log_variance = outputs
1084
-
1085
- noise = noise_like(x.shape, device, repeat_noise) * temperature
1086
- if noise_dropout > 0.:
1087
- noise = torch.nn.functional.dropout(noise, p=noise_dropout)
1088
- # no noise when t == 0
1089
- nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
1090
-
1091
- if return_codebook_ids:
1092
- return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1)
1093
- if return_x0:
1094
- return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0
1095
- else:
1096
- return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
1097
-
1098
- @torch.no_grad()
1099
- def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False,
1100
- img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0.,
1101
- score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None,
1102
- log_every_t=None):
1103
- if not log_every_t:
1104
- log_every_t = self.log_every_t
1105
- timesteps = self.num_timesteps
1106
- if batch_size is not None:
1107
- b = batch_size if batch_size is not None else shape[0]
1108
- shape = [batch_size] + list(shape)
1109
- else:
1110
- b = batch_size = shape[0]
1111
- if x_T is None:
1112
- img = torch.randn(shape, device=self.device)
1113
- else:
1114
- img = x_T
1115
- intermediates = []
1116
- if cond is not None:
1117
- if isinstance(cond, dict):
1118
- cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
1119
- [x[:batch_size] for x in cond[key]] for key in cond}
1120
- else:
1121
- cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
1122
-
1123
- if start_T is not None:
1124
- timesteps = min(timesteps, start_T)
1125
- iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation',
1126
- total=timesteps) if verbose else reversed(
1127
- range(0, timesteps))
1128
- if type(temperature) == float:
1129
- temperature = [temperature] * timesteps
1130
-
1131
- for i in iterator:
1132
- ts = torch.full((b,), i, device=self.device, dtype=torch.long)
1133
- if self.shorten_cond_schedule:
1134
- assert self.model.conditioning_key != 'hybrid'
1135
- tc = self.cond_ids[ts].to(cond.device)
1136
- cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond))
1137
-
1138
- img, x0_partial = self.p_sample(img, cond, ts,
1139
- clip_denoised=self.clip_denoised,
1140
- quantize_denoised=quantize_denoised, return_x0=True,
1141
- temperature=temperature[i], noise_dropout=noise_dropout,
1142
- score_corrector=score_corrector, corrector_kwargs=corrector_kwargs)
1143
- if mask is not None:
1144
- assert x0 is not None
1145
- img_orig = self.q_sample(x0, ts)
1146
- img = img_orig * mask + (1. - mask) * img
1147
-
1148
- if i % log_every_t == 0 or i == timesteps - 1:
1149
- intermediates.append(x0_partial)
1150
- if callback:
1151
- callback(i)
1152
- if img_callback:
1153
- img_callback(img, i)
1154
- return img, intermediates
1155
-
1156
- @torch.no_grad()
1157
- def p_sample_loop(self, cond, shape, return_intermediates=False,
1158
- x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False,
1159
- mask=None, x0=None, img_callback=None, start_T=None,
1160
- log_every_t=None):
1161
-
1162
- if not log_every_t:
1163
- log_every_t = self.log_every_t
1164
- device = self.betas.device
1165
- b = shape[0]
1166
- if x_T is None:
1167
- img = torch.randn(shape, device=device)
1168
- else:
1169
- img = x_T
1170
-
1171
- intermediates = [img]
1172
- if timesteps is None:
1173
- timesteps = self.num_timesteps
1174
-
1175
- if start_T is not None:
1176
- timesteps = min(timesteps, start_T)
1177
- iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed(
1178
- range(0, timesteps))
1179
-
1180
- if mask is not None:
1181
- assert x0 is not None
1182
- assert x0.shape[2:3] == mask.shape[2:3] # spatial size has to match
1183
-
1184
- for i in iterator:
1185
- ts = torch.full((b,), i, device=device, dtype=torch.long)
1186
- if self.shorten_cond_schedule:
1187
- assert self.model.conditioning_key != 'hybrid'
1188
- tc = self.cond_ids[ts].to(cond.device)
1189
- cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond))
1190
-
1191
- img = self.p_sample(img, cond, ts,
1192
- clip_denoised=self.clip_denoised,
1193
- quantize_denoised=quantize_denoised)
1194
- if mask is not None:
1195
- img_orig = self.q_sample(x0, ts)
1196
- img = img_orig * mask + (1. - mask) * img
1197
-
1198
- if i % log_every_t == 0 or i == timesteps - 1:
1199
- intermediates.append(img)
1200
- if callback:
1201
- callback(i)
1202
- if img_callback:
1203
- img_callback(img, i)
1204
-
1205
- if return_intermediates:
1206
- return img, intermediates
1207
- return img
1208
-
1209
- @torch.no_grad()
1210
- def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None,
1211
- verbose=True, timesteps=None, quantize_denoised=False,
1212
- mask=None, x0=None, shape=None,**kwargs):
1213
- if shape is None:
1214
- shape = (batch_size, self.channels, self.image_size, self.image_size)
1215
- if cond is not None:
1216
- if isinstance(cond, dict):
1217
- cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
1218
- [x[:batch_size] for x in cond[key]] for key in cond}
1219
- else:
1220
- cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
1221
- return self.p_sample_loop(cond,
1222
- shape,
1223
- return_intermediates=return_intermediates, x_T=x_T,
1224
- verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised,
1225
- mask=mask, x0=x0)
1226
-
1227
- @torch.no_grad()
1228
- def sample_log(self,cond,batch_size,ddim, ddim_steps,**kwargs):
1229
-
1230
- if ddim:
1231
- ddim_sampler = DDIMSampler(self)
1232
- shape = (self.channels, self.image_size, self.image_size)
1233
- samples, intermediates =ddim_sampler.sample(ddim_steps,batch_size,
1234
- shape,cond,verbose=False,**kwargs)
1235
-
1236
- else:
1237
- samples, intermediates = self.sample(cond=cond, batch_size=batch_size,
1238
- return_intermediates=True,**kwargs)
1239
-
1240
- return samples, intermediates
1241
-
1242
-
1243
- @torch.no_grad()
1244
- def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None,
1245
- quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True,
1246
- plot_diffusion_rows=True, **kwargs):
1247
-
1248
- use_ddim = ddim_steps is not None
1249
-
1250
- log = {}
1251
- z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key,
1252
- return_first_stage_outputs=True,
1253
- force_c_encode=True,
1254
- return_original_cond=True,
1255
- bs=N)
1256
- N = min(x.shape[0], N)
1257
- n_row = min(x.shape[0], n_row)
1258
- log["inputs"] = x
1259
- log["reconstruction"] = xrec
1260
- if self.model.conditioning_key is not None:
1261
- if hasattr(self.cond_stage_model, "decode"):
1262
- xc = self.cond_stage_model.decode(c)
1263
- log["conditioning"] = xc
1264
- elif self.cond_stage_key in ["caption"]:
1265
- xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"])
1266
- log["conditioning"] = xc
1267
- elif self.cond_stage_key == 'class_label':
1268
- xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"])
1269
- log['conditioning'] = xc
1270
- elif isimage(xc):
1271
- log["conditioning"] = xc
1272
- if ismap(xc):
1273
- log["original_conditioning"] = self.to_rgb(xc)
1274
-
1275
- if plot_diffusion_rows:
1276
- # get diffusion row
1277
- diffusion_row = []
1278
- z_start = z[:n_row]
1279
- for t in range(self.num_timesteps):
1280
- if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
1281
- t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
1282
- t = t.to(self.device).long()
1283
- noise = torch.randn_like(z_start)
1284
- z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise)
1285
- diffusion_row.append(self.decode_first_stage(z_noisy))
1286
-
1287
- diffusion_row = torch.stack(diffusion_row) # n_log_step, n_row, C, H, W
1288
- diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w')
1289
- diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w')
1290
- diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0])
1291
- log["diffusion_row"] = diffusion_grid
1292
-
1293
- if sample:
1294
- # get denoise row
1295
- with self.ema_scope("Plotting"):
1296
- samples, z_denoise_row = self.sample_log(cond=c,batch_size=N,ddim=use_ddim,
1297
- ddim_steps=ddim_steps,eta=ddim_eta)
1298
- # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True)
1299
- x_samples = self.decode_first_stage(samples)
1300
- log["samples"] = x_samples
1301
- if plot_denoise_rows:
1302
- denoise_grid = self._get_denoise_row_from_list(z_denoise_row)
1303
- log["denoise_row"] = denoise_grid
1304
-
1305
- if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance(
1306
- self.first_stage_model, IdentityFirstStage):
1307
- # also display when quantizing x0 while sampling
1308
- with self.ema_scope("Plotting Quantized Denoised"):
1309
- samples, z_denoise_row = self.sample_log(cond=c,batch_size=N,ddim=use_ddim,
1310
- ddim_steps=ddim_steps,eta=ddim_eta,
1311
- quantize_denoised=True)
1312
- # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True,
1313
- # quantize_denoised=True)
1314
- x_samples = self.decode_first_stage(samples.to(self.device))
1315
- log["samples_x0_quantized"] = x_samples
1316
-
1317
- if inpaint:
1318
- # make a simple center square
1319
- h, w = z.shape[2], z.shape[3]
1320
- mask = torch.ones(N, h, w).to(self.device)
1321
- # zeros will be filled in
1322
- mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0.
1323
- mask = mask[:, None, ...]
1324
- with self.ema_scope("Plotting Inpaint"):
1325
-
1326
- samples, _ = self.sample_log(cond=c,batch_size=N,ddim=use_ddim, eta=ddim_eta,
1327
- ddim_steps=ddim_steps, x0=z[:N], mask=mask)
1328
- x_samples = self.decode_first_stage(samples.to(self.device))
1329
- log["samples_inpainting"] = x_samples
1330
- log["mask"] = mask
1331
-
1332
- # outpaint
1333
- with self.ema_scope("Plotting Outpaint"):
1334
- samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,eta=ddim_eta,
1335
- ddim_steps=ddim_steps, x0=z[:N], mask=mask)
1336
- x_samples = self.decode_first_stage(samples.to(self.device))
1337
- log["samples_outpainting"] = x_samples
1338
-
1339
- if plot_progressive_rows:
1340
- with self.ema_scope("Plotting Progressives"):
1341
- img, progressives = self.progressive_denoising(c,
1342
- shape=(self.channels, self.image_size, self.image_size),
1343
- batch_size=N)
1344
- prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation")
1345
- log["progressive_row"] = prog_row
1346
-
1347
- if return_keys:
1348
- if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0:
1349
- return log
1350
- else:
1351
- return {key: log[key] for key in return_keys}
1352
- return log
1353
-
1354
- def configure_optimizers(self):
1355
- lr = self.learning_rate
1356
- params = list(self.model.parameters())
1357
- if self.cond_stage_trainable:
1358
- print(f"{self.__class__.__name__}: Also optimizing conditioner params!")
1359
- params = params + list(self.cond_stage_model.parameters())
1360
- if self.learn_logvar:
1361
- print('Diffusion model optimizing logvar')
1362
- params.append(self.logvar)
1363
- opt = torch.optim.AdamW(params, lr=lr)
1364
- if self.use_scheduler:
1365
- assert 'target' in self.scheduler_config
1366
- scheduler = instantiate_from_config(self.scheduler_config)
1367
-
1368
- print("Setting up LambdaLR scheduler...")
1369
- scheduler = [
1370
- {
1371
- 'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule),
1372
- 'interval': 'step',
1373
- 'frequency': 1
1374
- }]
1375
- return [opt], scheduler
1376
- return opt
1377
-
1378
- @torch.no_grad()
1379
- def to_rgb(self, x):
1380
- x = x.float()
1381
- if not hasattr(self, "colorize"):
1382
- self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x)
1383
- x = nn.functional.conv2d(x, weight=self.colorize)
1384
- x = 2. * (x - x.min()) / (x.max() - x.min()) - 1.
1385
- return x
1386
-
1387
-
1388
- class DiffusionWrapperV1(pl.LightningModule):
1389
- def __init__(self, diff_model_config, conditioning_key):
1390
- super().__init__()
1391
- self.diffusion_model = instantiate_from_config(diff_model_config)
1392
- self.conditioning_key = conditioning_key
1393
- assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm']
1394
-
1395
- def forward(self, x, t, c_concat: list = None, c_crossattn: list = None):
1396
- if self.conditioning_key is None:
1397
- out = self.diffusion_model(x, t)
1398
- elif self.conditioning_key == 'concat':
1399
- xc = torch.cat([x] + c_concat, dim=1)
1400
- out = self.diffusion_model(xc, t)
1401
- elif self.conditioning_key == 'crossattn':
1402
- cc = torch.cat(c_crossattn, 1)
1403
- out = self.diffusion_model(x, t, context=cc)
1404
- elif self.conditioning_key == 'hybrid':
1405
- xc = torch.cat([x] + c_concat, dim=1)
1406
- cc = torch.cat(c_crossattn, 1)
1407
- out = self.diffusion_model(xc, t, context=cc)
1408
- elif self.conditioning_key == 'adm':
1409
- cc = c_crossattn[0]
1410
- out = self.diffusion_model(x, t, y=cc)
1411
- else:
1412
- raise NotImplementedError()
1413
-
1414
- return out
1415
-
1416
-
1417
- class Layout2ImgDiffusionV1(LatentDiffusionV1):
1418
- # TODO: move all layout-specific hacks to this class
1419
- def __init__(self, cond_stage_key, *args, **kwargs):
1420
- assert cond_stage_key == 'coordinates_bbox', 'Layout2ImgDiffusion only for cond_stage_key="coordinates_bbox"'
1421
- super().__init__(*args, cond_stage_key=cond_stage_key, **kwargs)
1422
-
1423
- def log_images(self, batch, N=8, *args, **kwargs):
1424
- logs = super().log_images(*args, batch=batch, N=N, **kwargs)
1425
-
1426
- key = 'train' if self.training else 'validation'
1427
- dset = self.trainer.datamodule.datasets[key]
1428
- mapper = dset.conditional_builders[self.cond_stage_key]
1429
-
1430
- bbox_imgs = []
1431
- map_fn = lambda catno: dset.get_textual_label(dset.get_category_id(catno))
1432
- for tknzd_bbox in batch[self.cond_stage_key][:N]:
1433
- bboximg = mapper.plot(tknzd_bbox.detach().cpu(), map_fn, (256, 256))
1434
- bbox_imgs.append(bboximg)
1435
-
1436
- cond_img = torch.stack(bbox_imgs, dim=0)
1437
- logs['bbox_image'] = cond_img
1438
- return logs
1439
-
1440
- ldm.models.diffusion.ddpm.DDPMV1 = DDPMV1
1441
- ldm.models.diffusion.ddpm.LatentDiffusionV1 = LatentDiffusionV1
1442
- ldm.models.diffusion.ddpm.DiffusionWrapperV1 = DiffusionWrapperV1
1443
- ldm.models.diffusion.ddpm.Layout2ImgDiffusionV1 = Layout2ImgDiffusionV1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/LDSR/vqvae_quantize.py DELETED
@@ -1,147 +0,0 @@
1
- # Vendored from https://raw.githubusercontent.com/CompVis/taming-transformers/24268930bf1dce879235a7fddd0b2355b84d7ea6/taming/modules/vqvae/quantize.py,
2
- # where the license is as follows:
3
- #
4
- # Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer
5
- #
6
- # Permission is hereby granted, free of charge, to any person obtaining a copy
7
- # of this software and associated documentation files (the "Software"), to deal
8
- # in the Software without restriction, including without limitation the rights
9
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- # copies of the Software, and to permit persons to whom the Software is
11
- # furnished to do so, subject to the following conditions:
12
- #
13
- # The above copyright notice and this permission notice shall be included in all
14
- # copies or substantial portions of the Software.
15
- #
16
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
- # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
20
- # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
- # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
22
- # OR OTHER DEALINGS IN THE SOFTWARE./
23
-
24
- import torch
25
- import torch.nn as nn
26
- import numpy as np
27
- from einops import rearrange
28
-
29
-
30
- class VectorQuantizer2(nn.Module):
31
- """
32
- Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
33
- avoids costly matrix multiplications and allows for post-hoc remapping of indices.
34
- """
35
-
36
- # NOTE: due to a bug the beta term was applied to the wrong term. for
37
- # backwards compatibility we use the buggy version by default, but you can
38
- # specify legacy=False to fix it.
39
- def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random",
40
- sane_index_shape=False, legacy=True):
41
- super().__init__()
42
- self.n_e = n_e
43
- self.e_dim = e_dim
44
- self.beta = beta
45
- self.legacy = legacy
46
-
47
- self.embedding = nn.Embedding(self.n_e, self.e_dim)
48
- self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
49
-
50
- self.remap = remap
51
- if self.remap is not None:
52
- self.register_buffer("used", torch.tensor(np.load(self.remap)))
53
- self.re_embed = self.used.shape[0]
54
- self.unknown_index = unknown_index # "random" or "extra" or integer
55
- if self.unknown_index == "extra":
56
- self.unknown_index = self.re_embed
57
- self.re_embed = self.re_embed + 1
58
- print(f"Remapping {self.n_e} indices to {self.re_embed} indices. "
59
- f"Using {self.unknown_index} for unknown indices.")
60
- else:
61
- self.re_embed = n_e
62
-
63
- self.sane_index_shape = sane_index_shape
64
-
65
- def remap_to_used(self, inds):
66
- ishape = inds.shape
67
- assert len(ishape) > 1
68
- inds = inds.reshape(ishape[0], -1)
69
- used = self.used.to(inds)
70
- match = (inds[:, :, None] == used[None, None, ...]).long()
71
- new = match.argmax(-1)
72
- unknown = match.sum(2) < 1
73
- if self.unknown_index == "random":
74
- new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device)
75
- else:
76
- new[unknown] = self.unknown_index
77
- return new.reshape(ishape)
78
-
79
- def unmap_to_all(self, inds):
80
- ishape = inds.shape
81
- assert len(ishape) > 1
82
- inds = inds.reshape(ishape[0], -1)
83
- used = self.used.to(inds)
84
- if self.re_embed > self.used.shape[0]: # extra token
85
- inds[inds >= self.used.shape[0]] = 0 # simply set to zero
86
- back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
87
- return back.reshape(ishape)
88
-
89
- def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
90
- assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
91
- assert rescale_logits is False, "Only for interface compatible with Gumbel"
92
- assert return_logits is False, "Only for interface compatible with Gumbel"
93
- # reshape z -> (batch, height, width, channel) and flatten
94
- z = rearrange(z, 'b c h w -> b h w c').contiguous()
95
- z_flattened = z.view(-1, self.e_dim)
96
- # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
97
-
98
- d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
99
- torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
100
- torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
101
-
102
- min_encoding_indices = torch.argmin(d, dim=1)
103
- z_q = self.embedding(min_encoding_indices).view(z.shape)
104
- perplexity = None
105
- min_encodings = None
106
-
107
- # compute loss for embedding
108
- if not self.legacy:
109
- loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + \
110
- torch.mean((z_q - z.detach()) ** 2)
111
- else:
112
- loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \
113
- torch.mean((z_q - z.detach()) ** 2)
114
-
115
- # preserve gradients
116
- z_q = z + (z_q - z).detach()
117
-
118
- # reshape back to match original input shape
119
- z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous()
120
-
121
- if self.remap is not None:
122
- min_encoding_indices = min_encoding_indices.reshape(z.shape[0], -1) # add batch axis
123
- min_encoding_indices = self.remap_to_used(min_encoding_indices)
124
- min_encoding_indices = min_encoding_indices.reshape(-1, 1) # flatten
125
-
126
- if self.sane_index_shape:
127
- min_encoding_indices = min_encoding_indices.reshape(
128
- z_q.shape[0], z_q.shape[2], z_q.shape[3])
129
-
130
- return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
131
-
132
- def get_codebook_entry(self, indices, shape):
133
- # shape specifying (batch, height, width, channel)
134
- if self.remap is not None:
135
- indices = indices.reshape(shape[0], -1) # add batch axis
136
- indices = self.unmap_to_all(indices)
137
- indices = indices.reshape(-1) # flatten again
138
-
139
- # get quantized latent vectors
140
- z_q = self.embedding(indices)
141
-
142
- if shape is not None:
143
- z_q = z_q.view(shape)
144
- # reshape back to match original input shape
145
- z_q = z_q.permute(0, 3, 1, 2).contiguous()
146
-
147
- return z_q
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/__pycache__/extra_networks_lora.cpython-310.pyc DELETED
Binary file (2.38 kB)
 
extensions-builtin/Lora/__pycache__/lora.cpython-310.pyc DELETED
Binary file (524 Bytes)
 
extensions-builtin/Lora/__pycache__/lyco_helpers.cpython-310.pyc DELETED
Binary file (923 Bytes)
 
extensions-builtin/Lora/__pycache__/network.cpython-310.pyc DELETED
Binary file (5.63 kB)
 
extensions-builtin/Lora/__pycache__/network_full.cpython-310.pyc DELETED
Binary file (1.48 kB)
 
extensions-builtin/Lora/__pycache__/network_hada.cpython-310.pyc DELETED
Binary file (2.21 kB)
 
extensions-builtin/Lora/__pycache__/network_ia3.cpython-310.pyc DELETED
Binary file (1.6 kB)
 
extensions-builtin/Lora/__pycache__/network_lokr.cpython-310.pyc DELETED
Binary file (2.41 kB)
 
extensions-builtin/Lora/__pycache__/network_lora.cpython-310.pyc DELETED
Binary file (3.48 kB)
 
extensions-builtin/Lora/__pycache__/networks.cpython-310.pyc DELETED
Binary file (12.7 kB)
 
extensions-builtin/Lora/__pycache__/preload.cpython-310.pyc DELETED
Binary file (664 Bytes)
 
extensions-builtin/Lora/__pycache__/ui_edit_user_metadata.cpython-310.pyc DELETED
Binary file (7.51 kB)
 
extensions-builtin/Lora/__pycache__/ui_extra_networks_lora.cpython-310.pyc DELETED
Binary file (3.05 kB)
 
extensions-builtin/Lora/extra_networks_lora.py DELETED
@@ -1,59 +0,0 @@
1
- from modules import extra_networks, shared
2
- import networks
3
-
4
-
5
- class ExtraNetworkLora(extra_networks.ExtraNetwork):
6
- def __init__(self):
7
- super().__init__('lora')
8
-
9
- def activate(self, p, params_list):
10
- additional = shared.opts.sd_lora
11
-
12
- if additional != "None" and additional in networks.available_networks and not any(x for x in params_list if x.items[0] == additional):
13
- p.all_prompts = [x + f"<lora:{additional}:{shared.opts.extra_networks_default_multiplier}>" for x in p.all_prompts]
14
- params_list.append(extra_networks.ExtraNetworkParams(items=[additional, shared.opts.extra_networks_default_multiplier]))
15
-
16
- names = []
17
- te_multipliers = []
18
- unet_multipliers = []
19
- dyn_dims = []
20
- for params in params_list:
21
- assert params.items
22
-
23
- names.append(params.positional[0])
24
-
25
- te_multiplier = float(params.positional[1]) if len(params.positional) > 1 else 1.0
26
- te_multiplier = float(params.named.get("te", te_multiplier))
27
-
28
- unet_multiplier = float(params.positional[2]) if len(params.positional) > 2 else te_multiplier
29
- unet_multiplier = float(params.named.get("unet", unet_multiplier))
30
-
31
- dyn_dim = int(params.positional[3]) if len(params.positional) > 3 else None
32
- dyn_dim = int(params.named["dyn"]) if "dyn" in params.named else dyn_dim
33
-
34
- te_multipliers.append(te_multiplier)
35
- unet_multipliers.append(unet_multiplier)
36
- dyn_dims.append(dyn_dim)
37
-
38
- networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims)
39
-
40
- if shared.opts.lora_add_hashes_to_infotext:
41
- network_hashes = []
42
- for item in networks.loaded_networks:
43
- shorthash = item.network_on_disk.shorthash
44
- if not shorthash:
45
- continue
46
-
47
- alias = item.mentioned_name
48
- if not alias:
49
- continue
50
-
51
- alias = alias.replace(":", "").replace(",", "")
52
-
53
- network_hashes.append(f"{alias}: {shorthash}")
54
-
55
- if network_hashes:
56
- p.extra_generation_params["Lora hashes"] = ", ".join(network_hashes)
57
-
58
- def deactivate(self, p):
59
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/lora.py DELETED
@@ -1,9 +0,0 @@
1
- import networks
2
-
3
- list_available_loras = networks.list_available_networks
4
-
5
- available_loras = networks.available_networks
6
- available_lora_aliases = networks.available_network_aliases
7
- available_lora_hash_lookup = networks.available_network_hash_lookup
8
- forbidden_lora_aliases = networks.forbidden_network_aliases
9
- loaded_loras = networks.loaded_networks
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/lyco_helpers.py DELETED
@@ -1,21 +0,0 @@
1
- import torch
2
-
3
-
4
- def make_weight_cp(t, wa, wb):
5
- temp = torch.einsum('i j k l, j r -> i r k l', t, wb)
6
- return torch.einsum('i j k l, i r -> r j k l', temp, wa)
7
-
8
-
9
- def rebuild_conventional(up, down, shape, dyn_dim=None):
10
- up = up.reshape(up.size(0), -1)
11
- down = down.reshape(down.size(0), -1)
12
- if dyn_dim is not None:
13
- up = up[:, :dyn_dim]
14
- down = down[:dyn_dim, :]
15
- return (up @ down).reshape(shape)
16
-
17
-
18
- def rebuild_cp_decomposition(up, down, mid):
19
- up = up.reshape(up.size(0), -1)
20
- down = down.reshape(down.size(0), -1)
21
- return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/network.py DELETED
@@ -1,155 +0,0 @@
1
- from __future__ import annotations
2
- import os
3
- from collections import namedtuple
4
- import enum
5
-
6
- from modules import sd_models, cache, errors, hashes, shared
7
-
8
- NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
9
-
10
- metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
11
-
12
-
13
- class SdVersion(enum.Enum):
14
- Unknown = 1
15
- SD1 = 2
16
- SD2 = 3
17
- SDXL = 4
18
-
19
-
20
- class NetworkOnDisk:
21
- def __init__(self, name, filename):
22
- self.name = name
23
- self.filename = filename
24
- self.metadata = {}
25
- self.is_safetensors = os.path.splitext(filename)[1].lower() == ".safetensors"
26
-
27
- def read_metadata():
28
- metadata = sd_models.read_metadata_from_safetensors(filename)
29
- metadata.pop('ssmd_cover_images', None) # those are cover images, and they are too big to display in UI as text
30
-
31
- return metadata
32
-
33
- if self.is_safetensors:
34
- try:
35
- self.metadata = cache.cached_data_for_file('safetensors-metadata', "lora/" + self.name, filename, read_metadata)
36
- except Exception as e:
37
- errors.display(e, f"reading lora {filename}")
38
-
39
- if self.metadata:
40
- m = {}
41
- for k, v in sorted(self.metadata.items(), key=lambda x: metadata_tags_order.get(x[0], 999)):
42
- m[k] = v
43
-
44
- self.metadata = m
45
-
46
- self.alias = self.metadata.get('ss_output_name', self.name)
47
-
48
- self.hash = None
49
- self.shorthash = None
50
- self.set_hash(
51
- self.metadata.get('sshs_model_hash') or
52
- hashes.sha256_from_cache(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or
53
- ''
54
- )
55
-
56
- self.sd_version = self.detect_version()
57
-
58
- def detect_version(self):
59
- if str(self.metadata.get('ss_base_model_version', "")).startswith("sdxl_"):
60
- return SdVersion.SDXL
61
- elif str(self.metadata.get('ss_v2', "")) == "True":
62
- return SdVersion.SD2
63
- elif len(self.metadata):
64
- return SdVersion.SD1
65
-
66
- return SdVersion.Unknown
67
-
68
- def set_hash(self, v):
69
- self.hash = v
70
- self.shorthash = self.hash[0:12]
71
-
72
- if self.shorthash:
73
- import networks
74
- networks.available_network_hash_lookup[self.shorthash] = self
75
-
76
- def read_hash(self):
77
- if not self.hash:
78
- self.set_hash(hashes.sha256(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or '')
79
-
80
- def get_alias(self):
81
- import networks
82
- if shared.opts.lora_preferred_name == "Filename" or self.alias.lower() in networks.forbidden_network_aliases:
83
- return self.name
84
- else:
85
- return self.alias
86
-
87
-
88
- class Network: # LoraModule
89
- def __init__(self, name, network_on_disk: NetworkOnDisk):
90
- self.name = name
91
- self.network_on_disk = network_on_disk
92
- self.te_multiplier = 1.0
93
- self.unet_multiplier = 1.0
94
- self.dyn_dim = None
95
- self.modules = {}
96
- self.mtime = None
97
-
98
- self.mentioned_name = None
99
- """the text that was used to add the network to prompt - can be either name or an alias"""
100
-
101
-
102
- class ModuleType:
103
- def create_module(self, net: Network, weights: NetworkWeights) -> Network | None:
104
- return None
105
-
106
-
107
- class NetworkModule:
108
- def __init__(self, net: Network, weights: NetworkWeights):
109
- self.network = net
110
- self.network_key = weights.network_key
111
- self.sd_key = weights.sd_key
112
- self.sd_module = weights.sd_module
113
-
114
- if hasattr(self.sd_module, 'weight'):
115
- self.shape = self.sd_module.weight.shape
116
-
117
- self.dim = None
118
- self.bias = weights.w.get("bias")
119
- self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
120
- self.scale = weights.w["scale"].item() if "scale" in weights.w else None
121
-
122
- def multiplier(self):
123
- if 'transformer' in self.sd_key[:20]:
124
- return self.network.te_multiplier
125
- else:
126
- return self.network.unet_multiplier
127
-
128
- def calc_scale(self):
129
- if self.scale is not None:
130
- return self.scale
131
- if self.dim is not None and self.alpha is not None:
132
- return self.alpha / self.dim
133
-
134
- return 1.0
135
-
136
- def finalize_updown(self, updown, orig_weight, output_shape):
137
- if self.bias is not None:
138
- updown = updown.reshape(self.bias.shape)
139
- updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
140
- updown = updown.reshape(output_shape)
141
-
142
- if len(output_shape) == 4:
143
- updown = updown.reshape(output_shape)
144
-
145
- if orig_weight.size().numel() == updown.size().numel():
146
- updown = updown.reshape(orig_weight.shape)
147
-
148
- return updown * self.calc_scale() * self.multiplier()
149
-
150
- def calc_updown(self, target):
151
- raise NotImplementedError()
152
-
153
- def forward(self, x, y):
154
- raise NotImplementedError()
155
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/network_full.py DELETED
@@ -1,22 +0,0 @@
1
- import network
2
-
3
-
4
- class ModuleTypeFull(network.ModuleType):
5
- def create_module(self, net: network.Network, weights: network.NetworkWeights):
6
- if all(x in weights.w for x in ["diff"]):
7
- return NetworkModuleFull(net, weights)
8
-
9
- return None
10
-
11
-
12
- class NetworkModuleFull(network.NetworkModule):
13
- def __init__(self, net: network.Network, weights: network.NetworkWeights):
14
- super().__init__(net, weights)
15
-
16
- self.weight = weights.w.get("diff")
17
-
18
- def calc_updown(self, orig_weight):
19
- output_shape = self.weight.shape
20
- updown = self.weight.to(orig_weight.device, dtype=orig_weight.dtype)
21
-
22
- return self.finalize_updown(updown, orig_weight, output_shape)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/network_hada.py DELETED
@@ -1,55 +0,0 @@
1
- import lyco_helpers
2
- import network
3
-
4
-
5
- class ModuleTypeHada(network.ModuleType):
6
- def create_module(self, net: network.Network, weights: network.NetworkWeights):
7
- if all(x in weights.w for x in ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b"]):
8
- return NetworkModuleHada(net, weights)
9
-
10
- return None
11
-
12
-
13
- class NetworkModuleHada(network.NetworkModule):
14
- def __init__(self, net: network.Network, weights: network.NetworkWeights):
15
- super().__init__(net, weights)
16
-
17
- if hasattr(self.sd_module, 'weight'):
18
- self.shape = self.sd_module.weight.shape
19
-
20
- self.w1a = weights.w["hada_w1_a"]
21
- self.w1b = weights.w["hada_w1_b"]
22
- self.dim = self.w1b.shape[0]
23
- self.w2a = weights.w["hada_w2_a"]
24
- self.w2b = weights.w["hada_w2_b"]
25
-
26
- self.t1 = weights.w.get("hada_t1")
27
- self.t2 = weights.w.get("hada_t2")
28
-
29
- def calc_updown(self, orig_weight):
30
- w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
31
- w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
32
- w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
33
- w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
34
-
35
- output_shape = [w1a.size(0), w1b.size(1)]
36
-
37
- if self.t1 is not None:
38
- output_shape = [w1a.size(1), w1b.size(1)]
39
- t1 = self.t1.to(orig_weight.device, dtype=orig_weight.dtype)
40
- updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
41
- output_shape += t1.shape[2:]
42
- else:
43
- if len(w1b.shape) == 4:
44
- output_shape += w1b.shape[2:]
45
- updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
46
-
47
- if self.t2 is not None:
48
- t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
49
- updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
50
- else:
51
- updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)
52
-
53
- updown = updown1 * updown2
54
-
55
- return self.finalize_updown(updown, orig_weight, output_shape)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/network_ia3.py DELETED
@@ -1,30 +0,0 @@
1
- import network
2
-
3
-
4
- class ModuleTypeIa3(network.ModuleType):
5
- def create_module(self, net: network.Network, weights: network.NetworkWeights):
6
- if all(x in weights.w for x in ["weight"]):
7
- return NetworkModuleIa3(net, weights)
8
-
9
- return None
10
-
11
-
12
- class NetworkModuleIa3(network.NetworkModule):
13
- def __init__(self, net: network.Network, weights: network.NetworkWeights):
14
- super().__init__(net, weights)
15
-
16
- self.w = weights.w["weight"]
17
- self.on_input = weights.w["on_input"].item()
18
-
19
- def calc_updown(self, orig_weight):
20
- w = self.w.to(orig_weight.device, dtype=orig_weight.dtype)
21
-
22
- output_shape = [w.size(0), orig_weight.size(1)]
23
- if self.on_input:
24
- output_shape.reverse()
25
- else:
26
- w = w.reshape(-1, 1)
27
-
28
- updown = orig_weight * w
29
-
30
- return self.finalize_updown(updown, orig_weight, output_shape)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/network_lokr.py DELETED
@@ -1,64 +0,0 @@
1
- import torch
2
-
3
- import lyco_helpers
4
- import network
5
-
6
-
7
- class ModuleTypeLokr(network.ModuleType):
8
- def create_module(self, net: network.Network, weights: network.NetworkWeights):
9
- has_1 = "lokr_w1" in weights.w or ("lokr_w1_a" in weights.w and "lokr_w1_b" in weights.w)
10
- has_2 = "lokr_w2" in weights.w or ("lokr_w2_a" in weights.w and "lokr_w2_b" in weights.w)
11
- if has_1 and has_2:
12
- return NetworkModuleLokr(net, weights)
13
-
14
- return None
15
-
16
-
17
- def make_kron(orig_shape, w1, w2):
18
- if len(w2.shape) == 4:
19
- w1 = w1.unsqueeze(2).unsqueeze(2)
20
- w2 = w2.contiguous()
21
- return torch.kron(w1, w2).reshape(orig_shape)
22
-
23
-
24
- class NetworkModuleLokr(network.NetworkModule):
25
- def __init__(self, net: network.Network, weights: network.NetworkWeights):
26
- super().__init__(net, weights)
27
-
28
- self.w1 = weights.w.get("lokr_w1")
29
- self.w1a = weights.w.get("lokr_w1_a")
30
- self.w1b = weights.w.get("lokr_w1_b")
31
- self.dim = self.w1b.shape[0] if self.w1b is not None else self.dim
32
- self.w2 = weights.w.get("lokr_w2")
33
- self.w2a = weights.w.get("lokr_w2_a")
34
- self.w2b = weights.w.get("lokr_w2_b")
35
- self.dim = self.w2b.shape[0] if self.w2b is not None else self.dim
36
- self.t2 = weights.w.get("lokr_t2")
37
-
38
- def calc_updown(self, orig_weight):
39
- if self.w1 is not None:
40
- w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
41
- else:
42
- w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
43
- w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
44
- w1 = w1a @ w1b
45
-
46
- if self.w2 is not None:
47
- w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
48
- elif self.t2 is None:
49
- w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
50
- w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
51
- w2 = w2a @ w2b
52
- else:
53
- t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
54
- w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
55
- w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
56
- w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
57
-
58
- output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
59
- if len(orig_weight.shape) == 4:
60
- output_shape = orig_weight.shape
61
-
62
- updown = make_kron(output_shape, w1, w2)
63
-
64
- return self.finalize_updown(updown, orig_weight, output_shape)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/network_lora.py DELETED
@@ -1,86 +0,0 @@
1
- import torch
2
-
3
- import lyco_helpers
4
- import network
5
- from modules import devices
6
-
7
-
8
- class ModuleTypeLora(network.ModuleType):
9
- def create_module(self, net: network.Network, weights: network.NetworkWeights):
10
- if all(x in weights.w for x in ["lora_up.weight", "lora_down.weight"]):
11
- return NetworkModuleLora(net, weights)
12
-
13
- return None
14
-
15
-
16
- class NetworkModuleLora(network.NetworkModule):
17
- def __init__(self, net: network.Network, weights: network.NetworkWeights):
18
- super().__init__(net, weights)
19
-
20
- self.up_model = self.create_module(weights.w, "lora_up.weight")
21
- self.down_model = self.create_module(weights.w, "lora_down.weight")
22
- self.mid_model = self.create_module(weights.w, "lora_mid.weight", none_ok=True)
23
-
24
- self.dim = weights.w["lora_down.weight"].shape[0]
25
-
26
- def create_module(self, weights, key, none_ok=False):
27
- weight = weights.get(key)
28
-
29
- if weight is None and none_ok:
30
- return None
31
-
32
- is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention]
33
- is_conv = type(self.sd_module) in [torch.nn.Conv2d]
34
-
35
- if is_linear:
36
- weight = weight.reshape(weight.shape[0], -1)
37
- module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
38
- elif is_conv and key == "lora_down.weight" or key == "dyn_up":
39
- if len(weight.shape) == 2:
40
- weight = weight.reshape(weight.shape[0], -1, 1, 1)
41
-
42
- if weight.shape[2] != 1 or weight.shape[3] != 1:
43
- module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
44
- else:
45
- module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
46
- elif is_conv and key == "lora_mid.weight":
47
- module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
48
- elif is_conv and key == "lora_up.weight" or key == "dyn_down":
49
- module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
50
- else:
51
- raise AssertionError(f'Lora layer {self.network_key} matched a layer with unsupported type: {type(self.sd_module).__name__}')
52
-
53
- with torch.no_grad():
54
- if weight.shape != module.weight.shape:
55
- weight = weight.reshape(module.weight.shape)
56
- module.weight.copy_(weight)
57
-
58
- module.to(device=devices.cpu, dtype=devices.dtype)
59
- module.weight.requires_grad_(False)
60
-
61
- return module
62
-
63
- def calc_updown(self, orig_weight):
64
- up = self.up_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
65
- down = self.down_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
66
-
67
- output_shape = [up.size(0), down.size(1)]
68
- if self.mid_model is not None:
69
- # cp-decomposition
70
- mid = self.mid_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
71
- updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
72
- output_shape += mid.shape[2:]
73
- else:
74
- if len(down.shape) == 4:
75
- output_shape += down.shape[2:]
76
- updown = lyco_helpers.rebuild_conventional(up, down, output_shape, self.network.dyn_dim)
77
-
78
- return self.finalize_updown(updown, orig_weight, output_shape)
79
-
80
- def forward(self, x, y):
81
- self.up_model.to(device=devices.device)
82
- self.down_model.to(device=devices.device)
83
-
84
- return y + self.up_model(self.down_model(x)) * self.multiplier() * self.calc_scale()
85
-
86
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/networks.py DELETED
@@ -1,468 +0,0 @@
1
- import os
2
- import re
3
-
4
- import network
5
- import network_lora
6
- import network_hada
7
- import network_ia3
8
- import network_lokr
9
- import network_full
10
-
11
- import torch
12
- from typing import Union
13
-
14
- from modules import shared, devices, sd_models, errors, scripts, sd_hijack
15
-
16
- module_types = [
17
- network_lora.ModuleTypeLora(),
18
- network_hada.ModuleTypeHada(),
19
- network_ia3.ModuleTypeIa3(),
20
- network_lokr.ModuleTypeLokr(),
21
- network_full.ModuleTypeFull(),
22
- ]
23
-
24
-
25
- re_digits = re.compile(r"\d+")
26
- re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
27
- re_compiled = {}
28
-
29
- suffix_conversion = {
30
- "attentions": {},
31
- "resnets": {
32
- "conv1": "in_layers_2",
33
- "conv2": "out_layers_3",
34
- "time_emb_proj": "emb_layers_1",
35
- "conv_shortcut": "skip_connection",
36
- }
37
- }
38
-
39
-
40
- def convert_diffusers_name_to_compvis(key, is_sd2):
41
- def match(match_list, regex_text):
42
- regex = re_compiled.get(regex_text)
43
- if regex is None:
44
- regex = re.compile(regex_text)
45
- re_compiled[regex_text] = regex
46
-
47
- r = re.match(regex, key)
48
- if not r:
49
- return False
50
-
51
- match_list.clear()
52
- match_list.extend([int(x) if re.match(re_digits, x) else x for x in r.groups()])
53
- return True
54
-
55
- m = []
56
-
57
- if match(m, r"lora_unet_conv_in(.*)"):
58
- return f'diffusion_model_input_blocks_0_0{m[0]}'
59
-
60
- if match(m, r"lora_unet_conv_out(.*)"):
61
- return f'diffusion_model_out_2{m[0]}'
62
-
63
- if match(m, r"lora_unet_time_embedding_linear_(\d+)(.*)"):
64
- return f"diffusion_model_time_embed_{m[0] * 2 - 2}{m[1]}"
65
-
66
- if match(m, r"lora_unet_down_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
67
- suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
68
- return f"diffusion_model_input_blocks_{1 + m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
69
-
70
- if match(m, r"lora_unet_mid_block_(attentions|resnets)_(\d+)_(.+)"):
71
- suffix = suffix_conversion.get(m[0], {}).get(m[2], m[2])
72
- return f"diffusion_model_middle_block_{1 if m[0] == 'attentions' else m[1] * 2}_{suffix}"
73
-
74
- if match(m, r"lora_unet_up_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
75
- suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
76
- return f"diffusion_model_output_blocks_{m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
77
-
78
- if match(m, r"lora_unet_down_blocks_(\d+)_downsamplers_0_conv"):
79
- return f"diffusion_model_input_blocks_{3 + m[0] * 3}_0_op"
80
-
81
- if match(m, r"lora_unet_up_blocks_(\d+)_upsamplers_0_conv"):
82
- return f"diffusion_model_output_blocks_{2 + m[0] * 3}_{2 if m[0]>0 else 1}_conv"
83
-
84
- if match(m, r"lora_te_text_model_encoder_layers_(\d+)_(.+)"):
85
- if is_sd2:
86
- if 'mlp_fc1' in m[1]:
87
- return f"model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc1', 'mlp_c_fc')}"
88
- elif 'mlp_fc2' in m[1]:
89
- return f"model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc2', 'mlp_c_proj')}"
90
- else:
91
- return f"model_transformer_resblocks_{m[0]}_{m[1].replace('self_attn', 'attn')}"
92
-
93
- return f"transformer_text_model_encoder_layers_{m[0]}_{m[1]}"
94
-
95
- if match(m, r"lora_te2_text_model_encoder_layers_(\d+)_(.+)"):
96
- if 'mlp_fc1' in m[1]:
97
- return f"1_model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc1', 'mlp_c_fc')}"
98
- elif 'mlp_fc2' in m[1]:
99
- return f"1_model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc2', 'mlp_c_proj')}"
100
- else:
101
- return f"1_model_transformer_resblocks_{m[0]}_{m[1].replace('self_attn', 'attn')}"
102
-
103
- return key
104
-
105
-
106
- def assign_network_names_to_compvis_modules(sd_model):
107
- network_layer_mapping = {}
108
-
109
- if shared.sd_model.is_sdxl:
110
- for i, embedder in enumerate(shared.sd_model.conditioner.embedders):
111
- if not hasattr(embedder, 'wrapped'):
112
- continue
113
-
114
- for name, module in embedder.wrapped.named_modules():
115
- network_name = f'{i}_{name.replace(".", "_")}'
116
- network_layer_mapping[network_name] = module
117
- module.network_layer_name = network_name
118
- else:
119
- for name, module in shared.sd_model.cond_stage_model.wrapped.named_modules():
120
- network_name = name.replace(".", "_")
121
- network_layer_mapping[network_name] = module
122
- module.network_layer_name = network_name
123
-
124
- for name, module in shared.sd_model.model.named_modules():
125
- network_name = name.replace(".", "_")
126
- network_layer_mapping[network_name] = module
127
- module.network_layer_name = network_name
128
-
129
- sd_model.network_layer_mapping = network_layer_mapping
130
-
131
-
132
- def load_network(name, network_on_disk):
133
- net = network.Network(name, network_on_disk)
134
- net.mtime = os.path.getmtime(network_on_disk.filename)
135
-
136
- sd = sd_models.read_state_dict(network_on_disk.filename)
137
-
138
- # this should not be needed but is here as an emergency fix for an unknown error people are experiencing in 1.2.0
139
- if not hasattr(shared.sd_model, 'network_layer_mapping'):
140
- assign_network_names_to_compvis_modules(shared.sd_model)
141
-
142
- keys_failed_to_match = {}
143
- is_sd2 = 'model_transformer_resblocks' in shared.sd_model.network_layer_mapping
144
-
145
- matched_networks = {}
146
-
147
- for key_network, weight in sd.items():
148
- key_network_without_network_parts, network_part = key_network.split(".", 1)
149
-
150
- key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2)
151
- sd_module = shared.sd_model.network_layer_mapping.get(key, None)
152
-
153
- if sd_module is None:
154
- m = re_x_proj.match(key)
155
- if m:
156
- sd_module = shared.sd_model.network_layer_mapping.get(m.group(1), None)
157
-
158
- # SDXL loras seem to already have correct compvis keys, so only need to replace "lora_unet" with "diffusion_model"
159
- if sd_module is None and "lora_unet" in key_network_without_network_parts:
160
- key = key_network_without_network_parts.replace("lora_unet", "diffusion_model")
161
- sd_module = shared.sd_model.network_layer_mapping.get(key, None)
162
- elif sd_module is None and "lora_te1_text_model" in key_network_without_network_parts:
163
- key = key_network_without_network_parts.replace("lora_te1_text_model", "0_transformer_text_model")
164
- sd_module = shared.sd_model.network_layer_mapping.get(key, None)
165
-
166
- # some SD1 Loras also have correct compvis keys
167
- if sd_module is None:
168
- key = key_network_without_network_parts.replace("lora_te1_text_model", "transformer_text_model")
169
- sd_module = shared.sd_model.network_layer_mapping.get(key, None)
170
-
171
- if sd_module is None:
172
- keys_failed_to_match[key_network] = key
173
- continue
174
-
175
- if key not in matched_networks:
176
- matched_networks[key] = network.NetworkWeights(network_key=key_network, sd_key=key, w={}, sd_module=sd_module)
177
-
178
- matched_networks[key].w[network_part] = weight
179
-
180
- for key, weights in matched_networks.items():
181
- net_module = None
182
- for nettype in module_types:
183
- net_module = nettype.create_module(net, weights)
184
- if net_module is not None:
185
- break
186
-
187
- if net_module is None:
188
- raise AssertionError(f"Could not find a module type (out of {', '.join([x.__class__.__name__ for x in module_types])}) that would accept those keys: {', '.join(weights.w)}")
189
-
190
- net.modules[key] = net_module
191
-
192
- if keys_failed_to_match:
193
- print(f"Failed to match keys when loading network {network_on_disk.filename}: {keys_failed_to_match}")
194
-
195
- return net
196
-
197
-
198
- def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
199
- already_loaded = {}
200
-
201
- for net in loaded_networks:
202
- if net.name in names:
203
- already_loaded[net.name] = net
204
-
205
- loaded_networks.clear()
206
-
207
- networks_on_disk = [available_network_aliases.get(name, None) for name in names]
208
- if any(x is None for x in networks_on_disk):
209
- list_available_networks()
210
-
211
- networks_on_disk = [available_network_aliases.get(name, None) for name in names]
212
-
213
- failed_to_load_networks = []
214
-
215
- for i, name in enumerate(names):
216
- net = already_loaded.get(name, None)
217
-
218
- network_on_disk = networks_on_disk[i]
219
-
220
- if network_on_disk is not None:
221
- if net is None or os.path.getmtime(network_on_disk.filename) > net.mtime:
222
- try:
223
- net = load_network(name, network_on_disk)
224
- except Exception as e:
225
- errors.display(e, f"loading network {network_on_disk.filename}")
226
- continue
227
-
228
- net.mentioned_name = name
229
-
230
- network_on_disk.read_hash()
231
-
232
- if net is None:
233
- failed_to_load_networks.append(name)
234
- print(f"Couldn't find network with name {name}")
235
- continue
236
-
237
- net.te_multiplier = te_multipliers[i] if te_multipliers else 1.0
238
- net.unet_multiplier = unet_multipliers[i] if unet_multipliers else 1.0
239
- net.dyn_dim = dyn_dims[i] if dyn_dims else 1.0
240
- loaded_networks.append(net)
241
-
242
- if failed_to_load_networks:
243
- sd_hijack.model_hijack.comments.append("Failed to find networks: " + ", ".join(failed_to_load_networks))
244
-
245
-
246
- def network_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.MultiheadAttention]):
247
- weights_backup = getattr(self, "network_weights_backup", None)
248
-
249
- if weights_backup is None:
250
- return
251
-
252
- if isinstance(self, torch.nn.MultiheadAttention):
253
- self.in_proj_weight.copy_(weights_backup[0])
254
- self.out_proj.weight.copy_(weights_backup[1])
255
- else:
256
- self.weight.copy_(weights_backup)
257
-
258
-
259
- def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.MultiheadAttention]):
260
- """
261
- Applies the currently selected set of networks to the weights of torch layer self.
262
- If weights already have this particular set of networks applied, does nothing.
263
- If not, restores orginal weights from backup and alters weights according to networks.
264
- """
265
-
266
- network_layer_name = getattr(self, 'network_layer_name', None)
267
- if network_layer_name is None:
268
- return
269
-
270
- current_names = getattr(self, "network_current_names", ())
271
- wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
272
-
273
- weights_backup = getattr(self, "network_weights_backup", None)
274
- if weights_backup is None:
275
- if isinstance(self, torch.nn.MultiheadAttention):
276
- weights_backup = (self.in_proj_weight.to(devices.cpu, copy=True), self.out_proj.weight.to(devices.cpu, copy=True))
277
- else:
278
- weights_backup = self.weight.to(devices.cpu, copy=True)
279
-
280
- self.network_weights_backup = weights_backup
281
-
282
- if current_names != wanted_names:
283
- network_restore_weights_from_backup(self)
284
-
285
- for net in loaded_networks:
286
- module = net.modules.get(network_layer_name, None)
287
- if module is not None and hasattr(self, 'weight'):
288
- with torch.no_grad():
289
- updown = module.calc_updown(self.weight)
290
-
291
- if len(self.weight.shape) == 4 and self.weight.shape[1] == 9:
292
- # inpainting model. zero pad updown to make channel[1] 4 to 9
293
- updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))
294
-
295
- self.weight += updown
296
- continue
297
-
298
- module_q = net.modules.get(network_layer_name + "_q_proj", None)
299
- module_k = net.modules.get(network_layer_name + "_k_proj", None)
300
- module_v = net.modules.get(network_layer_name + "_v_proj", None)
301
- module_out = net.modules.get(network_layer_name + "_out_proj", None)
302
-
303
- if isinstance(self, torch.nn.MultiheadAttention) and module_q and module_k and module_v and module_out:
304
- with torch.no_grad():
305
- updown_q = module_q.calc_updown(self.in_proj_weight)
306
- updown_k = module_k.calc_updown(self.in_proj_weight)
307
- updown_v = module_v.calc_updown(self.in_proj_weight)
308
- updown_qkv = torch.vstack([updown_q, updown_k, updown_v])
309
- updown_out = module_out.calc_updown(self.out_proj.weight)
310
-
311
- self.in_proj_weight += updown_qkv
312
- self.out_proj.weight += updown_out
313
- continue
314
-
315
- if module is None:
316
- continue
317
-
318
- print(f'failed to calculate network weights for layer {network_layer_name}')
319
-
320
- self.network_current_names = wanted_names
321
-
322
-
323
- def network_forward(module, input, original_forward):
324
- """
325
- Old way of applying Lora by executing operations during layer's forward.
326
- Stacking many loras this way results in big performance degradation.
327
- """
328
-
329
- if len(loaded_networks) == 0:
330
- return original_forward(module, input)
331
-
332
- input = devices.cond_cast_unet(input)
333
-
334
- network_restore_weights_from_backup(module)
335
- network_reset_cached_weight(module)
336
-
337
- y = original_forward(module, input)
338
-
339
- network_layer_name = getattr(module, 'network_layer_name', None)
340
- for lora in loaded_networks:
341
- module = lora.modules.get(network_layer_name, None)
342
- if module is None:
343
- continue
344
-
345
- y = module.forward(y, input)
346
-
347
- return y
348
-
349
-
350
- def network_reset_cached_weight(self: Union[torch.nn.Conv2d, torch.nn.Linear]):
351
- self.network_current_names = ()
352
- self.network_weights_backup = None
353
-
354
-
355
- def network_Linear_forward(self, input):
356
- if shared.opts.lora_functional:
357
- return network_forward(self, input, torch.nn.Linear_forward_before_network)
358
-
359
- network_apply_weights(self)
360
-
361
- return torch.nn.Linear_forward_before_network(self, input)
362
-
363
-
364
- def network_Linear_load_state_dict(self, *args, **kwargs):
365
- network_reset_cached_weight(self)
366
-
367
- return torch.nn.Linear_load_state_dict_before_network(self, *args, **kwargs)
368
-
369
-
370
- def network_Conv2d_forward(self, input):
371
- if shared.opts.lora_functional:
372
- return network_forward(self, input, torch.nn.Conv2d_forward_before_network)
373
-
374
- network_apply_weights(self)
375
-
376
- return torch.nn.Conv2d_forward_before_network(self, input)
377
-
378
-
379
- def network_Conv2d_load_state_dict(self, *args, **kwargs):
380
- network_reset_cached_weight(self)
381
-
382
- return torch.nn.Conv2d_load_state_dict_before_network(self, *args, **kwargs)
383
-
384
-
385
- def network_MultiheadAttention_forward(self, *args, **kwargs):
386
- network_apply_weights(self)
387
-
388
- return torch.nn.MultiheadAttention_forward_before_network(self, *args, **kwargs)
389
-
390
-
391
- def network_MultiheadAttention_load_state_dict(self, *args, **kwargs):
392
- network_reset_cached_weight(self)
393
-
394
- return torch.nn.MultiheadAttention_load_state_dict_before_network(self, *args, **kwargs)
395
-
396
-
397
- def list_available_networks():
398
- available_networks.clear()
399
- available_network_aliases.clear()
400
- forbidden_network_aliases.clear()
401
- available_network_hash_lookup.clear()
402
- forbidden_network_aliases.update({"none": 1, "Addams": 1})
403
-
404
- os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
405
-
406
- candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
407
- candidates += list(shared.walk_files(shared.cmd_opts.lyco_dir_backcompat, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
408
- for filename in candidates:
409
- if os.path.isdir(filename):
410
- continue
411
-
412
- name = os.path.splitext(os.path.basename(filename))[0]
413
- try:
414
- entry = network.NetworkOnDisk(name, filename)
415
- except OSError: # should catch FileNotFoundError and PermissionError etc.
416
- errors.report(f"Failed to load network {name} from {filename}", exc_info=True)
417
- continue
418
-
419
- available_networks[name] = entry
420
-
421
- if entry.alias in available_network_aliases:
422
- forbidden_network_aliases[entry.alias.lower()] = 1
423
-
424
- available_network_aliases[name] = entry
425
- available_network_aliases[entry.alias] = entry
426
-
427
-
428
- re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
429
-
430
-
431
- def infotext_pasted(infotext, params):
432
- if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]:
433
- return # if the other extension is active, it will handle those fields, no need to do anything
434
-
435
- added = []
436
-
437
- for k in params:
438
- if not k.startswith("AddNet Model "):
439
- continue
440
-
441
- num = k[13:]
442
-
443
- if params.get("AddNet Module " + num) != "LoRA":
444
- continue
445
-
446
- name = params.get("AddNet Model " + num)
447
- if name is None:
448
- continue
449
-
450
- m = re_network_name.match(name)
451
- if m:
452
- name = m.group(1)
453
-
454
- multiplier = params.get("AddNet Weight A " + num, "1.0")
455
-
456
- added.append(f"<lora:{name}:{multiplier}>")
457
-
458
- if added:
459
- params["Prompt"] += "\n" + "".join(added)
460
-
461
-
462
- available_networks = {}
463
- available_network_aliases = {}
464
- loaded_networks = []
465
- available_network_hash_lookup = {}
466
- forbidden_network_aliases = {}
467
-
468
- list_available_networks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extensions-builtin/Lora/preload.py DELETED
@@ -1,7 +0,0 @@
1
- import os
2
- from modules import paths
3
-
4
-
5
- def preload(parser):
6
- parser.add_argument("--lora-dir", type=str, help="Path to directory with Lora networks.", default=os.path.join(paths.models_path, 'Lora'))
7
- parser.add_argument("--lyco-dir-backcompat", type=str, help="Path to directory with LyCORIS networks (for backawards compatibility; can also use --lyco-dir).", default=os.path.join(paths.models_path, 'LyCORIS'))
 
 
 
 
 
 
 
 
extensions-builtin/Lora/scripts/__pycache__/lora_script.cpython-310.pyc DELETED
Binary file (5.14 kB)