ddoc commited on
Commit
4c53d64
·
1 Parent(s): a37ad19

Upload 171 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/ISSUE_TEMPLATE/bug_report.yml +111 -0
  2. .github/ISSUE_TEMPLATE/config.yml +8 -0
  3. .github/ISSUE_TEMPLATE/feature_request.yml +46 -0
  4. .github/pull_request_template.md +5 -0
  5. .github/scripts/issue_checker.py +110 -0
  6. .github/workflows/issue_checker.yaml +23 -0
  7. .gitignore +12 -0
  8. AUTHORS.md +23 -0
  9. CONTRIBUTING.md +9 -0
  10. LICENSE +0 -0
  11. README.md +73 -3
  12. install.py +27 -0
  13. javascript/deforum-hints.js +233 -0
  14. javascript/deforum.js +34 -0
  15. requirements.txt +8 -0
  16. scripts/deforum.py +29 -0
  17. scripts/deforum_extend_paths.py +34 -0
  18. scripts/deforum_helpers/RAFT.py +45 -0
  19. scripts/deforum_helpers/animation.py +430 -0
  20. scripts/deforum_helpers/animation_key_frames.py +150 -0
  21. scripts/deforum_helpers/args.py +341 -0
  22. scripts/deforum_helpers/auto_navigation.py +89 -0
  23. scripts/deforum_helpers/colors.py +39 -0
  24. scripts/deforum_helpers/composable_masks.py +213 -0
  25. scripts/deforum_helpers/consistency_check.py +151 -0
  26. scripts/deforum_helpers/defaults.py +218 -0
  27. scripts/deforum_helpers/deforum_controlnet.py +336 -0
  28. scripts/deforum_helpers/deforum_controlnet_gradio.py +88 -0
  29. scripts/deforum_helpers/deforum_tqdm.py +99 -0
  30. scripts/deforum_helpers/deprecation_utils.py +99 -0
  31. scripts/deforum_helpers/depth.py +160 -0
  32. scripts/deforum_helpers/depth_adabins.py +79 -0
  33. scripts/deforum_helpers/depth_leres.py +72 -0
  34. scripts/deforum_helpers/depth_midas.py +92 -0
  35. scripts/deforum_helpers/depth_zoe.py +47 -0
  36. scripts/deforum_helpers/frame_interpolation.py +239 -0
  37. scripts/deforum_helpers/general_utils.py +145 -0
  38. scripts/deforum_helpers/generate.py +324 -0
  39. scripts/deforum_helpers/gradio_funcs.py +214 -0
  40. scripts/deforum_helpers/human_masking.py +87 -0
  41. scripts/deforum_helpers/hybrid_video.py +611 -0
  42. scripts/deforum_helpers/image_sharpening.py +39 -0
  43. scripts/deforum_helpers/load_images.py +113 -0
  44. scripts/deforum_helpers/masks.py +57 -0
  45. scripts/deforum_helpers/noise.py +89 -0
  46. scripts/deforum_helpers/parseq_adapter.py +210 -0
  47. scripts/deforum_helpers/parseq_adapter_test.py +157 -0
  48. scripts/deforum_helpers/prompt.py +161 -0
  49. scripts/deforum_helpers/render.py +627 -0
  50. scripts/deforum_helpers/render_modes.py +175 -0
.github/ISSUE_TEMPLATE/bug_report.yml ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug Report
2
+ description: Create a bug report for the Deforum extension
3
+ title: "[Bug]: "
4
+ labels: ["bug"]
5
+
6
+ body:
7
+ - type: checkboxes
8
+ attributes:
9
+ label: Have you read the latest version of the FAQ?
10
+ description: Please visit the page called FAQ & Troubleshooting on the Deforum wiki in this repository and see if your problem has already been described there.
11
+ options:
12
+ - label: I have visited the FAQ page right now and my issue is not present there
13
+ required: true
14
+ - type: checkboxes
15
+ attributes:
16
+ label: Is there an existing issue for this?
17
+ description: Please search to see if an issue already exists for the bug you encountered (including the closed issues).
18
+ options:
19
+ - label: I have searched the existing issues and checked the recent builds/commits of both this extension and the webui
20
+ required: true
21
+ - type: checkboxes
22
+ attributes:
23
+ label: Are you using the latest version of the Deforum extension?
24
+ description: Please, check if your Deforum is based on the latest repo commit (git log) or update it through the 'Extensions' tab and check if the issue still persist. Otherwise, check this box.
25
+ options:
26
+ - label: I have Deforum updated to the lastest version and I still have the issue.
27
+ required: true
28
+ - type: markdown
29
+ attributes:
30
+ value: |
31
+ *Please fill this form with as much information as possible, don't forget to fill "What OS..." and "What browsers" and *provide screenshots if possible**
32
+ - type: textarea
33
+ id: what-did
34
+ attributes:
35
+ label: What happened?
36
+ description: Tell us what happened in a very clear and simple way
37
+ validations:
38
+ required: true
39
+ - type: textarea
40
+ id: steps
41
+ attributes:
42
+ label: Steps to reproduce the problem
43
+ description: Please provide us with precise step by step information on how to reproduce the bug
44
+ value: |
45
+ 1. Go to ....
46
+ 2. Press ....
47
+ 3. ...
48
+ validations:
49
+ required: true
50
+ - type: textarea
51
+ id: what-should
52
+ attributes:
53
+ label: What should have happened?
54
+ description: Tell what you think the normal behavior should be
55
+ - type: textarea
56
+ id: commits
57
+ attributes:
58
+ label: WebUI and Deforum extension Commit IDs
59
+ description: Which commit of the webui/deforum extension are you running on? (Do not write *Latest version/repo/commit*, as this means nothing and will have changed by the time we read your issue. Rather, copy the **Commit** link at the bottom of the UI, or if you can't launch the webui at all, enter your cmd/terminal, CD into the main webui folder to get the webui commit id, and cd into the extensions/deforum folder to get the deforum commit id, both using the command 'git rev-parse HEAD'.)
60
+ value: |
61
+ webui commit id -
62
+ deforum exten commit id -
63
+ validations:
64
+ required: true
65
+ - type: textarea
66
+ id: what-torch
67
+ attributes:
68
+ label: Torch version
69
+ description: Which Torch version your WebUI is working with
70
+ validations:
71
+ required: true
72
+ - type: dropdown
73
+ id: where
74
+ attributes:
75
+ label: On which platform are you launching the webui with the extension?
76
+ multiple: true
77
+ options:
78
+ - Local PC setup (Windows)
79
+ - Local PC setup (Linux)
80
+ - Local PC setup (Mac)
81
+ - Google Colab (The Last Ben's)
82
+ - Google Colab (Other)
83
+ - Cloud server (Linux)
84
+ - Other (please specify in "additional information")
85
+ - type: textarea
86
+ id: deforumsettings
87
+ attributes:
88
+ label: Deforum settings
89
+ description: Send here a link to your used settings file or the latest generated one in the 'outputs/img2img-images/Deforum/' folder (ideally, upload it to GitHub gists).
90
+ validations:
91
+ required: true
92
+ - type: textarea
93
+ id: customsettings
94
+ attributes:
95
+ label: Webui core settings
96
+ description: Send here a link to your ui-config.json file in the core 'stable-diffusion-webui' folder (ideally, upload it to GitHub gists). Friendly reminder - if you have 'With img2img, do exactly the amount of steps the slider specified' checked, your issue will be discarded immediately. 😉
97
+ validations:
98
+ required: true
99
+ - type: textarea
100
+ id: logs
101
+ attributes:
102
+ label: Console logs
103
+ description: Please provide **FULL cmd/terminal logs FROM THE MOMENT YOU STARTED UI to the end of it**, after your bug happened. If it's very long, provide a link to GitHub gists or similar service.
104
+ render: Shell
105
+ validations:
106
+ required: true
107
+ - type: textarea
108
+ id: misc
109
+ attributes:
110
+ label: Additional information
111
+ description: Please provide us with any relevant additional info or context.
.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Deforum Github discussions
4
+ url: https://github.com/deforum-art/deforum-for-automatic1111-webui/discussions
5
+ about: Please ask and answer questions here. If you want to complain about something, don't try to circumvent issue filling by starting a discussion here 🙃
6
+ - name: Deforum Discord
7
+ url: https://discord.gg/deforum
8
+ about: Here is our main community where we chat, discuss development and share experiments and results
.github/ISSUE_TEMPLATE/feature_request.yml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Feature request
2
+ description: Suggest an idea for the Deforum extension
3
+ title: "[Feature Request]: "
4
+ labels: ["enhancement"]
5
+
6
+ body:
7
+ - type: checkboxes
8
+ attributes:
9
+ label: Is there an existing issue for this?
10
+ description: Please search to see if an issue already exists for the feature you want, and that it's not implemented in a recent build/commit.
11
+ options:
12
+ - label: I have searched the existing issues and checked the recent builds/commits
13
+ required: true
14
+ - type: markdown
15
+ attributes:
16
+ value: |
17
+ *Please fill this form with as much information as possible, provide screenshots and/or illustrations of the feature if possible*
18
+ - type: textarea
19
+ id: feature
20
+ attributes:
21
+ label: What would your feature do ?
22
+ description: Tell us about your feature in a very clear and simple way, and what problem it would solve
23
+ validations:
24
+ required: true
25
+ - type: textarea
26
+ id: workflow
27
+ attributes:
28
+ label: Proposed workflow
29
+ description: Please provide us with step by step information on how you'd like the feature to be accessed and used
30
+ value: |
31
+ 1. Go to ....
32
+ 2. Press ....
33
+ 3. ...
34
+ validations:
35
+ required: true
36
+ - type: textarea
37
+ id: misc
38
+ attributes:
39
+ label: Additional information
40
+ description: Add any other context or screenshots about the feature request here.
41
+ - type: textarea
42
+ attributes:
43
+ label: Are you going to help adding it?
44
+ description: Do you want to participate in Deforum development and bring the desired feature sooner? Let us know if you are willing to add the desired feature, ideally, leave your Discord handle here, so we will contact you for a less formal conversation. Our community is welcoming and ready to provide you with any information on the project structure or how the code works. If not, however, keep in mind that if you do not want to do your new feature yourself, you will have to wait until the team picks up your issue.
45
+ validations:
46
+ required: true
.github/pull_request_template.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ## ⚠ TEMPORAL POLICY ⚠
2
+
3
+ 🚧 From 2023-05-17 due to the planned changes new Pull requests are temporary disallowed; all the pending PRs are closed 🚧
4
+
5
+ In case of questions, contact us here https://discord.gg/deforum
.github/scripts/issue_checker.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from github import Github
4
+
5
+ # Get GitHub token from environment variables
6
+ token = os.environ['GITHUB_TOKEN']
7
+ g = Github(token)
8
+
9
+ # Get the current repository
10
+ print(f"Repo is {os.environ['GITHUB_REPOSITORY']}")
11
+ repo = g.get_repo(os.environ['GITHUB_REPOSITORY'])
12
+
13
+ # Get the issue number from the event payload
14
+ #issue_number = int(os.environ['ISSUE_NUMBER'])
15
+
16
+ for issue in repo.get_issues():
17
+ print(f"Processing issue №{issue.number}")
18
+ if issue.pull_request:
19
+ continue
20
+
21
+ # Get the issue object
22
+ #issue = repo.get_issue(issue_number)
23
+
24
+ # Define the keywords to search for in the issue
25
+ keywords = ['Python', 'Commit hash', 'Launching Web UI with arguments', 'Model loaded', 'deforum']
26
+
27
+ # Check if ALL of the keywords are present in the issue
28
+ def check_keywords(issue_body, keywords):
29
+ for keyword in keywords:
30
+ if not re.search(r'\b' + re.escape(keyword) + r'\b', issue_body, re.IGNORECASE):
31
+ return False
32
+ return True
33
+
34
+ # Check if the issue title has at least a specified number of words
35
+ def check_title_word_count(issue_title, min_word_count):
36
+ words = issue_title.replace("/", " ").replace("\\\\", " ").split()
37
+ return len(words) >= min_word_count
38
+
39
+ # Check if the issue title is concise
40
+ def check_title_concise(issue_title, max_word_count):
41
+ words = issue_title.replace("/", " ").replace("\\\\", " ").split()
42
+ return len(words) <= max_word_count
43
+
44
+ # Check if the commit ID is in the correct hash form
45
+ def check_commit_id_format(issue_body):
46
+ match = re.search(r'webui commit id - ([a-fA-F0-9]+|\[[a-fA-F0-9]+\])', issue_body)
47
+ if not match:
48
+ print('webui_commit_id not found')
49
+ return False
50
+ webui_commit_id = match.group(1)
51
+ print(f'webui_commit_id {webui_commit_id}')
52
+ webui_commit_id = webui_commit_id.replace("[", "").replace("]", "")
53
+ if not (7 <= len(webui_commit_id) <= 40):
54
+ print(f'invalid length!')
55
+ return False
56
+ match = re.search(r'deforum exten commit id - ([a-fA-F0-9]+|\[[a-fA-F0-9]+\])', issue_body)
57
+ if match:
58
+ print('deforum commit id not found')
59
+ return False
60
+ t2v_commit_id = match.group(1)
61
+ print(f'deforum_commit_id {t2v_commit_id}')
62
+ t2v_commit_id = t2v_commit_id.replace("[", "").replace("]", "")
63
+ if not (7 <= len(t2v_commit_id) <= 40):
64
+ print(f'invalid length!')
65
+ return False
66
+ return True
67
+
68
+ # Only if a bug report
69
+ if '[Bug]' in issue.title and not '[Feature Request]' in issue.title:
70
+ print('The issue is eligible')
71
+ # Initialize an empty list to store error messages
72
+ error_messages = []
73
+
74
+ # Check for each condition and add the corresponding error message if the condition is not met
75
+ if not check_keywords(issue.body, keywords):
76
+ error_messages.append("Include **THE FULL LOG FROM THE START OF THE WEBUI** in the issue description.")
77
+
78
+ if not check_title_word_count(issue.title, 3):
79
+ error_messages.append("Make sure the issue title has at least 3 words.")
80
+
81
+ if not check_title_concise(issue.title, 13):
82
+ error_messages.append("The issue title should be concise and contain no more than 13 words.")
83
+
84
+ # if not check_commit_id_format(issue.body):
85
+ # error_messages.append("Provide a valid commit ID in the format 'commit id - [commit_hash]' **both** for the WebUI and the Extension.")
86
+
87
+ # If there are any error messages, close the issue and send a comment with the error messages
88
+ if error_messages:
89
+ print('Invalid issue, closing')
90
+ # Add the "not planned" label to the issue
91
+ not_planned_label = repo.get_label("wrong format")
92
+ issue.add_to_labels(not_planned_label)
93
+
94
+ # Close the issue
95
+ issue.edit(state='closed')
96
+
97
+ # Generate the comment by concatenating the error messages
98
+ comment = "This issue has been closed due to incorrect formatting. Please address the following mistakes and reopen the issue (click on the 'Reopen' button below):\n\n"
99
+ comment += "\n".join(f"- {error_message}" for error_message in error_messages)
100
+
101
+ # Add the comment to the issue
102
+ issue.create_comment(comment)
103
+ elif repo.get_label("wrong format") in issue.labels:
104
+ print('Issue is fine')
105
+ issue.edit(state='open')
106
+ issue.delete_labels()
107
+ bug_label = repo.get_label("bug")
108
+ issue.add_to_labels(bug_label)
109
+ comment = "Thanks for addressing your formatting mistakes. The issue has been reopened now."
110
+ issue.create_comment(comment)
.github/workflows/issue_checker.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Issue Checker
2
+
3
+ on:
4
+ issues:
5
+ types: [opened, reopened, edited]
6
+
7
+ jobs:
8
+ check_issue:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - name: Checkout repository
12
+ uses: actions/checkout@v3
13
+ - name: Set up Python
14
+ uses: actions/setup-python@v3
15
+ with:
16
+ python-version: '3.x'
17
+ - name: Install dependencies
18
+ run: pip install PyGithub
19
+ - name: Check issue
20
+ env:
21
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22
+ ISSUE_NUMBER: ${{ github.event.number }}
23
+ run: python .github/scripts/issue_checker.py
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Unnecessary compiled python files.
2
+ __pycache__
3
+ *.pyc
4
+ *.pyo
5
+
6
+ # Output Images
7
+ outputs
8
+
9
+ # Log files for colab-convert
10
+ cc-outputs.log
11
+ *.safetensors
12
+ scripts/deforum_helpers/navigation.py
AUTHORS.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Authors
2
+
3
+ Note: this extension is a re-implementation of Deforum functionality on top of Automatic1111's Stable Diffusion WebUI. The authors who took part in its development are
4
+
5
+ kabachuha (Artem Khrapov)
6
+ hithereai
7
+ reallybigname (Forest Star Walz)
8
+ MatissesProjects (Matisse Tec)
9
+ rewbs (Robin Fernandes)
10
+ Funofabot
11
+ Kitchenn3
12
+ Zarxrax
13
+ Phobos97
14
+ yyahav
15
+ rwscarb (Ryan Scarbery)
16
+ phi-line
17
+ blackneoo
18
+ joet203 (Joe Toch)
19
+
20
+
21
+ ### Notes
22
+
23
+ For license inheritance from the Deforum notebook and the previous versions of the extension, see the LICENSE file.
CONTRIBUTING.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing
2
+
3
+ ## Licensing notice
4
+
5
+ By contributing to this project you agree that your work will be licensed under the terms of the GNU Affero General Public License version 3.
6
+
7
+ ## Contact us
8
+
9
+ Also, you may want to inform the dev team about your work via Discord https://discord.gg/deforum to ensure that no one else is working on the same stuff.
LICENSE ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -1,3 +1,73 @@
1
- ---
2
- license: other
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Deforum Stable Diffusion — official extension for AUTOMATIC1111's webui
3
+
4
+ <p align="left">
5
+ <a href="https://github.com/deforum-art/sd-webui-deforum/commits"><img alt="Last Commit" src="https://img.shields.io/github/last-commit/deforum-art/deforum-for-automatic1111-webui"></a>
6
+ <a href="https://github.com/deforum-art/sd-webui-deforum/issues"><img alt="GitHub issues" src="https://img.shields.io/github/issues/deforum-art/deforum-for-automatic1111-webui"></a>
7
+ <a href="https://github.com/deforum-art/sd-webui-deforum/stargazers"><img alt="GitHub stars" src="https://img.shields.io/github/stars/deforum-art/deforum-for-automatic1111-webui"></a>
8
+ <a href="https://github.com/deforum-art/sd-webui-deforum/network"><img alt="GitHub forks" src="https://img.shields.io/github/forks/deforum-art/deforum-for-automatic1111-webui"></a>
9
+ </a>
10
+ </p>
11
+
12
+ ## Need help? See our [FAQ](https://github.com/deforum-art/sd-webui-deforum/wiki/FAQ-&-Troubleshooting)
13
+
14
+ ## Getting Started
15
+
16
+ 1. Install [AUTOMATIC1111's webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/).
17
+
18
+ 2. Now two ways: either clone the repo into the `extensions` directory via git commandline launched within in the `stable-diffusion-webui` folder
19
+
20
+ ```sh
21
+ git clone https://github.com/deforum-art/sd-webui-deforum extensions/deforum
22
+ ```
23
+
24
+ Or download this repository, locate the `extensions` folder within your WebUI installation, create a folder named `deforum` and put the contents of the downloaded directory inside of it. Then restart WebUI.
25
+
26
+ 3. Open the webui, find the Deforum tab at the top of the page.
27
+
28
+ 4. Enter the animation settings. Refer to [this general guide](https://docs.google.com/document/d/1pEobUknMFMkn8F5TMsv8qRzamXX_75BShMMXV8IFslI/edit) and [this guide to math keyframing functions in Deforum](https://docs.google.com/document/d/1pfW1PwbDIuW0cv-dnuyYj1UzPqe23BlSLTJsqazffXM/edit?usp=sharing). However, **in this version prompt weights less than zero don't just like in original Deforum!** Split the positive and the negative prompt in the json section using --neg argument like this "apple:\`where(cos(t)>=0, cos(t), 0)\`, snow --neg strawberry:\`where(cos(t)<0, -cos(t), 0)\`"
29
+
30
+ 5. To view animation frames as they're being made, without waiting for the completion of an animation, go to the 'Settings' tab and set the value of this toolbar **above zero**. Warning: it may slow down the generation process.
31
+
32
+ ![adsdasunknown](https://user-images.githubusercontent.com/14872007/196064311-1b79866a-e55b-438a-84a7-004ff30829ad.png)
33
+
34
+
35
+ 6. Run the script and see if you got it working or even got something. **In 3D mode a large delay is expected at first** as the script loads the depth models. In the end, using the default settings the whole thing should consume 6.4 GBs of VRAM at 3D mode peaks and no more than 3.8 GB VRAM in 3D mode if you launch the webui with the '--lowvram' command line argument.
36
+
37
+ 7. After the generation process is completed, click the button with the self-describing name to show the video or gif result right in the GUI!
38
+
39
+ 8. Join our Discord where you can post generated stuff, ask questions and more: https://discord.gg/deforum. <br>
40
+ * There's also the 'Issues' tab in the repo, for well... reporting issues ;)
41
+
42
+ 9. Profit!
43
+
44
+ ## Known issues
45
+
46
+ * This port is not fully backward-compatible with the notebook and the local version both due to the changes in how AUTOMATIC1111's webui handles Stable Diffusion models and the changes in this script to get it to work in the new environment. *Expect* that you may not get exactly the same result or that the thing may break down because of the older settings.
47
+
48
+ ## Screenshots
49
+
50
+ Amazing raw Deforum animation by [Pxl.Pshr](https://www.instagram.com/pxl.pshr):
51
+ * Turn Audio ON!
52
+
53
+ (Audio credits: SKRILLEX, FRED AGAIN & FLOWDAN - RUMBLE (PHACE'S DNB FLIP))
54
+
55
+ https://user-images.githubusercontent.com/121192995/224450647-39529b28-be04-4871-bb7a-faf7afda2ef2.mp4
56
+
57
+ Setting file of that video: [here](https://github.com/deforum-art/sd-webui-deforum/files/11353167/PxlPshrWinningAnimationSettings.txt).
58
+
59
+ <br>
60
+
61
+ Main extension tab:
62
+
63
+ ![image](https://user-images.githubusercontent.com/121192995/226101131-43bf594a-3152-45dd-a5d1-2538d0bc221d.png)
64
+
65
+ Keyframes tab:
66
+
67
+ ![image](https://user-images.githubusercontent.com/121192995/226101140-bfe6cce7-9b78-4a1d-be9a-43e1fc78239e.png)
68
+
69
+ ## Licensing
70
+
71
+ The 'extension' part of this project is licensed under the Affero GNU General Public License version 3.
72
+
73
+ For license inheritance from the Deforum notebook and the previous versions of the extension and for the third party code used under permissive licenses, see the LICENSE file.
install.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import launch
19
+ import os
20
+
21
+ req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
22
+
23
+ with open(req_file) as file:
24
+ for lib in file:
25
+ lib = lib.strip()
26
+ if not launch.is_installed(lib):
27
+ launch.run_pip(f"install {lib}", f"Deforum requirement: {lib}")
javascript/deforum-hints.js ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
3
+ * Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, version 3 of the License.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
+ *
17
+ * Contact the author (Artem Khrapov): https://github.com/kabachuha/
18
+ */
19
+
20
+ // mouseover tooltips for various UI elements
21
+
22
+ deforum_titles = {
23
+ //Run
24
+ "Override settings": "specify a custom settings file and ignore settings displayed in the interface",
25
+ "Custom settings file": "the path to a custom settings file",
26
+ "Width": "The width of the output images, in pixels (must be a multiple of 64)",
27
+ "Height": "The height of the output images, in pixels (must be a multiple of 64)",
28
+ "Restore faces": "Restore low quality faces using GFPGAN neural network",
29
+ "Tiling": "Produce an image that can be tiled.",
30
+ "Highres. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition",
31
+ "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result",
32
+ "Sampler": "Which algorithm to use to produce the image",
33
+ "Enable extras": "enable additional seed settings",
34
+ "Subseed": "Seed of a different picture to be mixed into the generation.",
35
+ "Subseed strength": "How strong of a variation to produce. At 0, there will be no effect. At 1, you will get the complete picture with variation seed (except for ancestral samplers, where you will just get something).",
36
+ "Resize seed from width": "Normally, changing the resolution will completely change an image, even when using the same seed. If you generated an image with a particular seed and then changed the resolution, put the original resolution here to get an image that more closely resemles the original",
37
+ "Resize seed from height": "Normally, changing the resolution will completely change an image, even when using the same seed. If you generated an image with a particular seed and then changed the resolution, put the original resolution here to get an image that more closely resemles the original",
38
+ "Steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results",
39
+ "Batch name": "output images will be placed in a folder with this name ({timestring} token will be replaced) inside the img2img output folder. Supports placeholders like {seed}, {w}, {h}, {prompts} and more",
40
+ "Pix2Pix img CFG schedule": "*Only in use with pix2pix checkpoints!*",
41
+ "Filename format": "specify the format of the filename for output images",
42
+ "Seed behavior": "defines the seed behavior that is used for animations",
43
+ "iter": "the seed value will increment by 1 for each subsequent frame of the animation",
44
+ "fixed": "the seed will remain fixed across all frames of animation. **NOT RECOMMENDED.** Unless you know what you are doing, it will *deep fry* the pictures over time",
45
+ "random": "a random seed will be used on each frame of the animation",
46
+ "schedule": "specify your own seed schedule",
47
+ "Seed iter N":"controls for how many frames the same seed should stick before iterating to the next one",
48
+ //Keyframes
49
+ "Animation mode": "selects the type of animation",
50
+ "2D": "only 2D motion parameters will be used, but this mode uses the least amount of VRAM. You can optionally enable flip_2d_perspective to enable some psuedo-3d animation parameters while in 2D mode.",
51
+ "3D": "enables all 3D motion parameters.",
52
+ "Video Input": "will ignore all motion parameters and attempt to reference a video loaded into the runtime, specified by the video_init_path. Max_frames is ignored during video_input mode, and instead, follows the number of frames pulled from the video’s length. Resume_from_timestring is NOT available with Video_Input mode.",
53
+ "Max frames": "the maximum number of output images to be created",
54
+ "Border": "controls handling method of pixels to be generated when the image is smaller than the frame.",
55
+ "wrap": "pulls pixels from the opposite edge of the image",
56
+ "replicate": "repeats the edge of the pixels, and extends them. Animations with quick motion may yield lines where this border function was attempting to populate pixels into the empty space created.",
57
+ "Zoom": "2D operator that scales the canvas size, multiplicatively. [static = 1.0]",
58
+ "Angle": "2D operator to rotate canvas clockwise/anticlockwise in degrees per frame",
59
+ "Transform Center X": "x center axis for 2D angle/zoom *only*",
60
+ "Transform Center Y": "y center axis for 2D angle/zoom *only*",
61
+ "Translation X": "2D & 3D operator to move canvas left/right in pixels per frame",
62
+ "Translation Y": "2D & 3D operator to move canvas up/down in pixels per frame",
63
+ "Translation Z": "3D operator to move canvas towards/away from view [speed set by FOV]",
64
+ "Rotation 3D X": "3D operator to tilt canvas up/down in degrees per frame",
65
+ "Rotation 3D Y": "3D operator to pan canvas left/right in degrees per frame",
66
+ "Rotation 3D Z": "3D operator to roll canvas clockwise/anticlockwise",
67
+ "Enable perspective flip": "enables 2D mode functions to simulate faux 3D movement",
68
+ "Perspective flip theta": "the roll effect angle",
69
+ "Perspective flip phi": "the tilt effect angle",
70
+ "Perspective flip gamma": "the pan effect angle",
71
+ "Perspective flip fv": "the 2D vanishing point of perspective (recommended range 30-160)",
72
+ "Noise schedule": "amount of graininess to add per frame for diffusion diversity",
73
+ "Strength schedule": "amount of presence of previous frame to influence next frame, also controls steps in the following formula [steps - (strength_schedule * steps)]",
74
+ "Sampler schedule": "controls which sampler to use at a specific scheduled frame",
75
+ "Contrast schedule": "adjusts the overall contrast per frame [default neutral at 1.0]",
76
+ "CFG scale schedule": "how closely the image should conform to the prompt. Lower values produce more creative results. (recommended range 5-15)",
77
+ "FOV schedule": "adjusts the scale at which the canvas is moved in 3D by the translation_z value. [maximum range -180 to +180, with 0 being undefined. Values closer to 180 will make the image have less depth, while values closer to 0 will allow more depth]",
78
+ "Aspect Ratio schedule": "adjusts the aspect ratio for the depth calculation (normally 1)",
79
+ //"near_schedule": "",
80
+ //"far_schedule": "",
81
+ "Seed schedule": "allows you to specify seeds at a specific schedule, if seed_behavior is set to schedule.",
82
+ "Color coherence": "The color coherence will attempt to sample the overall pixel color information, and trend those values analyzed in the first frame to be applied to future frames.",
83
+ // "None": "Disable color coherence",
84
+ "HSV": "HSV is a good method for balancing presence of vibrant colors, but may produce unrealistic results - (ie.blue apples)",
85
+ "LAB": "LAB is a more linear approach to mimic human perception of color space - a good default setting for most users.",
86
+ "RGB": "RGB is good for enforcing unbiased amounts of color in each red, green and blue channel - some images may yield colorized artifacts if sampling is too low.",
87
+ "Legacy colormatch": "applies the colormatch only before the video noising, resulting in graying the video over time, use it for backwards compatibility",
88
+ "Cadence": "A setting of 1 will cause every frame to receive diffusion in the sequence of image outputs. A setting of 2 will only diffuse on every other frame, yet motion will still be in effect. The output of images during the cadence sequence will be automatically blended, additively and saved to the specified drive. This may improve the illusion of coherence in some workflows as the content and context of an image will not change or diffuse during frames that were skipped. Higher values of 4-8 cadence will skip over a larger amount of frames and only diffuse the “Nth” frame as set by the diffusion_cadence value. This may produce more continuity in an animation, at the cost of little opportunity to add more diffused content. In extreme examples, motion within a frame will fail to produce diverse prompt context, and the space will be filled with lines or approximations of content - resulting in unexpected animation patterns and artifacts. Video Input & Interpolation modes are not affected by diffusion_cadence.",
89
+ "Optical flow cadence": "Optional method for optical flow used to blend frames during cadence in 3D animation mode (if cadence more than 1).",
90
+ "Optical flow redo generation": "This option takes twice as long because it generates twice in order to capture the optical flow from the previous image to the first generation, then warps the previous image and redoes the generation. Works in 2D/3D animation modes.",
91
+ "Redo": "Diffusion Redo. This option renders N times before the final render. It is suggested to lower your steps if you up your redo. Seed is randomized during redo generations and restored afterwards.",
92
+ "Noise type": "Selects the type of noise being added to each frame",
93
+ "uniform": "Uniform noise covers the entire frame. It somewhat flattens and sharpens the video over time, but may be good for cartoonish look. This is the old default setting.",
94
+ "perlin": "Perlin noise is a more natural looking noise. It is heterogeneous and less sharp than uniform noise, this way it is more likely that new details will appear in a more coherent way. This is the new default setting.",
95
+ "Perlin W": "The width of the Perlin sample. Lower values will make larger noise regions. Think of it as inverse brush stroke width. The greater this setting, the smaller details it will affect.",
96
+ "Perlin H": "The height of the Perlin sample. Lower values will make larger noise regions. Think of it as inverse brush stroke width. The greater this setting, the smaller details it will affect.",
97
+ "Perlin octaves": "The number of Perlin noise octaves, that is the count of P-noise iterations. Higher values will make the noise more soft and smoke-like, whereas lower values will make it look more organic and spotty. It is limited by 8 octaves as the resulting gain will run out of bounds.",
98
+ "Perlin persistence": "How much of noise from each octave is added on each iteration. Higher values will make it more straighter and sharper, while lower values will make it rounder and smoother. It is limited by 1.0 as the resulting gain fill the frame completely with noise.",
99
+ "Use depth warping": "enables instructions to warp an image dynamically in 3D mode only.",
100
+ "MiDaS weight": "sets a midpoint at which a depthmap is to be drawn: range [-1 to +1]",
101
+ "Padding mode": "instructs the handling of pixels outside the field of view as they come into the scene.",
102
+ //"border": "Border will attempt to use the edges of the canvas as the pixels to be drawn", //duplicate name as another property
103
+ "reflection": "reflection will attempt to approximate the image and tile/repeat pixels",
104
+ "zeros": "zeros will not add any new pixel information",
105
+ "Sampling Mode": "choose from Bicubic, Bilinear or Nearest modes. (Recommended: Bicubic)",
106
+ "Save depth maps": "will output a greyscale depth map image alongside the output images.",
107
+
108
+ // Prompts
109
+ "Prompts": "prompts for your animation in a JSON format. Use --neg words to add 'words' as negative prompt",
110
+ "Prompts positive": "positive prompt to be appended to *all* prompts",
111
+ "Prompts negative": "negative prompt to be appended to *all* prompts. DON'T use --neg here!",
112
+
113
+ //Init
114
+ "Use init": "Diffuse the first frame based on an image, similar to img2img.",
115
+ "Strength": "Controls the strength of the diffusion on the init image. 0 = disabled",
116
+ "Strength 0 no init": "Set the strength to 0 automatically when no init image is used",
117
+ "Init image": "the path to your init image",
118
+ "Use mask": "Use a grayscale image as a mask on your init image. Whiter areas of the mask are areas that change more.",
119
+ "Use alpha as mask": "use the alpha channel of the init image as the mask",
120
+ "Mask file": "the path to your mask image",
121
+ "Invert mask": "Inverts the colors of the mask",
122
+ "Mask brightness adjust": "adjust the brightness of the mask. Should be a positive number, with 1.0 meaning no adjustment.",
123
+ "Mask contrast adjust": "adjust the brightness of the mask. Should be a positive number, with 1.0 meaning no adjustment.",
124
+ "overlay mask": "Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding",
125
+ "Mask overlay blur": "Blur edges of final overlay mask, if used. Minimum = 0 (no blur)",
126
+ "Video init path": "the directory \/ URL at which your video file is located for Video Input mode only",
127
+ "Extract nth frame": "during the run sequence, only frames specified by this value will be extracted, saved, and diffused upon. A value of 1 indicates that every frame is to be accounted for. Values of 2 will use every other frame for the sequence. Higher values will skip that number of frames respectively.",
128
+ "Extract from frame":"start extracting the input video only from this frame number",
129
+ "Extract to frame": "stop the extraction of the video at this frame number. -1 for no limits",
130
+ "Overwrite extracted frames": "when enabled, will re-extract video frames each run. When using video_input mode, the run will be instructed to write video frames to the drive. If you’ve already populated the frames needed, uncheck this box to skip past redundant extraction, and immediately start the render. If you have not extracted frames, you must run at least once with this box checked to write the necessary frames.",
131
+ "Use mask video": "video_input mode only, enables the extraction and use of a separate video file intended for use as a mask. White areas of the extracted video frames will not be affected by diffusion, while black areas will be fully effected. Lighter/darker areas are affected dynamically.",
132
+ "Video mask path": "the directory in which your mask video is located.",
133
+ "Interpolate key frames": "selects whether to ignore prompt schedule or _x_frames.",
134
+ "Interpolate x frames": "the number of frames to transition thru between prompts (when interpolate_key_frames = true, then the numbers in front of the animation prompts will dynamically guide the images based on their value. If set to false, will ignore the prompt numbers and force interpole_x_frames value regardless of prompt number)",
135
+ "Resume from timestring": "instructs the run to start from a specified point",
136
+ "Resume timestring": "the required timestamp to reference when resuming. Currently only available in 2D & 3D mode, the timestamp is saved as the settings .txt file name as well as images produced during your previous run. The format follows: yyyymmddhhmmss - a timestamp of when the run was started to diffuse.",
137
+
138
+ //Video Output
139
+ "Skip video creation": "when checked, do not output a video",
140
+ "Make GIF": "create a gif in addition to .mp4 file. supports up to 30 fps, will self-disable at higher fps values",
141
+ "Upscale":"upscale the images of the next run once it's finished + make a video out of them",
142
+ "Upscale model":"model of the upscaler to use. 'realesr-animevideov3' is much faster but yields smoother, less detailed results. the other models only do x4",
143
+ "Upscale factor":"how many times to upscale, actual options depend on the chosen upscale model",
144
+ "FPS": "The frames per second that the video will run at",
145
+ "Output format": "select the type of video file to output",
146
+ "PIL gif": "create an animated GIF",
147
+ "FFMPEG mp4": "create an MP4 video file",
148
+ "FFmpeg location": "the path to where ffmpeg is located. Leave at default 'ffmpeg' if ffmpeg is in your PATH!",
149
+ "FFmpeg crf": "controls quality where lower is better, less compressed. values: 0 to 51, default 17",
150
+ "FFmpeg preset": "controls how good the compression is, and the operation speed. If you're not in a rush keep it at 'veryslow'",
151
+ "Add soundtrack": "when this box is checked, and FFMPEG mp4 is selected as the output format, an audio file will be multiplexed with the video.",
152
+ "Soundtrack path": "the path\/ URL to an audio file to accompany the video",
153
+ "Use manual settings": "when this is unchecked, the video will automatically be created in the same output folder as the images. Check this box to specify different settings for the creation of the video, specified by the following options",
154
+ "Render steps": "render each step of diffusion as a separate frame",
155
+ "Max video frames": "the maximum number of frames to include in the video, when use_manual_settings is checked",
156
+ "Image path": "the location of images to create the video from, when use_manual_settings is checked",
157
+ "MP4 path": "the output location of the mp4 file, when use_manual_settings is checked",
158
+ "Delete Imgs": "if enabled, raw imgs will be deleted after a successful video/ videos (upsacling, interpolation, gif) creation",
159
+ "Engine": "choose the frame interpolation engine and version",
160
+ "Interp X":"how many times to interpolate the source video. e.g source video fps of 12 and a value of x2 will yield a 24fps interpolated video",
161
+ "Slow-Mo X":"how many times to slow-down the video. *Naturally affects output fps as well",
162
+ "Keep Imgs": "delete or keep raw affected (interpolated/ upscaled depending on the UI section) png imgs",
163
+ "Interpolate an existing video":"This feature allows you to interpolate any video with a dedicated button. Video could be completly unrelated to deforum",
164
+ "In Frame Count": "uploaded video total frame count",
165
+ "In FPS":"uploaded video FPS",
166
+ "Interpolated Vid FPS":"calculated output-interpolated video FPS",
167
+ "In Res":"uploaded video resolution",
168
+ "Out Res":"output video resolution",
169
+
170
+ // Looper Args
171
+ // "use_looper": "",
172
+ "Enable guided images mode": "check this box to enable guided images mode",
173
+ "Images to use for keyframe guidance": "images you iterate over, you can do local or web paths (no single backslashes!)",
174
+ "Image strength schedule": "how much the image should look like the previou one and new image frame init. strength schedule might be better if this is higher, around .75 during the keyfames you want to switch on",
175
+ "Blend factor max": "blendFactor = blendFactorMax - blendFactorSlope * cos((frame % tweening_frames_schedule) / (tweening_frames_schedule / 2))",
176
+ "Blend factor slope": "blendFactor = blendFactorMax - blendFactorSlope * cos((frame % tweening_frames_schedule) / (tweening_frames_schedule / 2))",
177
+ "Tweening frames schedule": "number of the frames that we will blend between current imagined image and input frame image",
178
+ "Color correction factor": "how close to get to the colors of the input frame image/ the amount each frame during a tweening step to use the new images colors",
179
+ // deforum.py / right side of the ui:
180
+ "Settings File": "Path to settings file you want to load. Path can be relative to webui folder OR full - absolute",
181
+
182
+ // Hybrid Video
183
+ "Generate inputframes": "Initiates extraction of video frames from your video_init_path to the inputframes folder. You only need to do this once and then you can change it to False and re-render",
184
+ "Hybrid composite": "Engages hybrid compositing of video into animation in various ways with comp alpha as a master mix control.",
185
+ "Use init image as video": "Use init image instead of video. Doesn't require generation of inputframes.",
186
+ "First Frame as init image": "If True, uses the first frame of the video as the init_image. False can create interesting transition effects into the video, depending on settings.",
187
+ "Motion use prev img": "If enabled, changes the behavior or hybrid_motion to captures motion by comparing the current video frame to the previous rendered image, instead of the previous video frame.",
188
+ "Hybrid motion": "Analyzes video frames for camera motion and applies movement to render.",
189
+ "Flow method": "Selects the type of Optical Flow to use if Optical Flow is selected in Hybrid motion.",
190
+ "Comp mask type": "You don't need a mask to composite video. But, Mask types can control the way that video is composited with the previous image each frame.",
191
+ "Comp mask equalize": "Equalizes the mask for the composite before or after autocontrast operation (or both)",
192
+ "Comp mask auto contrast": "Auto-contrasts the mask for the composite. If enabled, uses the low/high autocontrast cutoff schedules.",
193
+ "Comp mask inverse": "Inverts the composite mask.",
194
+ "Comp save extra frames": "If this option is selected, many extra frames will be output for the various processes into the hybridframes folder.",
195
+ "Comp alpha schedule": "Schedule controls how much the composite video is mixed in, whether set to mask is None or using a mask. This is the master mix.",
196
+ "Flow factor schedule": "Affects optical flow hybrid motion. 1 is normal flow. -1 is negative flow. 0.5 is half flow, etc...",
197
+ "Comp mask blend alpha schedule": "If using a blend mask, this controls the blend amount of the video and render for the composite mask.",
198
+ "Comp mask contrast schedule": "Controls the contrast of the composite mask. 0.5 if half, 1 is normal contrast, 2 is double, etc.",
199
+ "Comp mask auto contrast cutoff high schedule": "If using autocontrast option, this is the high cutoff for the operation.",
200
+ "Comp mask auto contrast cutoff low schedule": "If using autocontrast option, this is the low cutoff for the operation.",
201
+ "Generate human masks": "This will generate masks of all the humans in a video. Created at generation of hybrid video. Not yet integrated for auto-masking, but it will create the masks, and you can then use the mask video manually.",
202
+ }
203
+
204
+ onUiUpdate(function(){
205
+ gradioApp().querySelectorAll('span, button, select, p').forEach(function(span){
206
+ tooltip = deforum_titles[span.textContent];
207
+
208
+ if(!tooltip){
209
+ tooltip = deforum_titles[span.value];
210
+ }
211
+
212
+ if(!tooltip){
213
+ for (const c of span.classList) {
214
+ if (c in deforum_titles) {
215
+ tooltip = deforum_titles[c];
216
+ break;
217
+ }
218
+ }
219
+ }
220
+
221
+ if(tooltip){
222
+ span.title = tooltip;
223
+ }
224
+ })
225
+
226
+ gradioApp().querySelectorAll('select').forEach(function(select){
227
+ if (select.onchange != null) return;
228
+
229
+ select.onchange = function(){
230
+ select.title = deforum_titles[select.value] || "";
231
+ }
232
+ })
233
+ })
javascript/deforum.js ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
3
+ * Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
4
+ *
5
+ * This program is free software: you can redistribute it and/or modify
6
+ * it under the terms of the GNU Affero General Public License as published by
7
+ * the Free Software Foundation, version 3 of the License.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
+ *
17
+ * Contact the dev team: https://discord.gg/deforum
18
+ */
19
+
20
+ function submit_deforum(){
21
+ rememberGallerySelection('deforum_gallery')
22
+ showSubmitButtons('deforum', false)
23
+
24
+ var id = randomId()
25
+ requestProgress(id, gradioApp().getElementById('deforum_gallery_container'), gradioApp().getElementById('deforum_gallery'), function(){
26
+ showSubmitButtons('deforum', true)
27
+ })
28
+
29
+ var res = create_submit_args(arguments)
30
+
31
+ res[0] = id
32
+
33
+ return res
34
+ }
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ numexpr
2
+ matplotlib
3
+ pandas
4
+ av
5
+ pims
6
+ imageio_ffmpeg
7
+ rich
8
+ gdown
scripts/deforum.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ from modules import script_callbacks
19
+ from scripts.deforum_extend_paths import deforum_sys_extend
20
+
21
+ def init_deforum():
22
+ deforum_sys_extend()
23
+
24
+ from deforum_helpers.ui_right import on_ui_tabs
25
+ script_callbacks.on_ui_tabs(on_ui_tabs)
26
+ from deforum_helpers.ui_settings import on_ui_settings
27
+ script_callbacks.on_ui_settings(on_ui_settings)
28
+
29
+ init_deforum()
scripts/deforum_extend_paths.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import sys
20
+
21
+ def deforum_sys_extend():
22
+ deforum_folder_name = os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2])
23
+
24
+ basedirs = [os.getcwd()]
25
+ if 'google.colab' in sys.modules:
26
+ basedirs.append('/content/gdrive/MyDrive/sd/stable-diffusion-webui') # for TheLastBen's colab
27
+ for basedir in basedirs:
28
+ deforum_paths_to_ensure = [
29
+ os.path.join(deforum_folder_name, 'scripts'),
30
+ os.path.join(deforum_folder_name, 'scripts', 'deforum_helpers', 'src')
31
+ ]
32
+ for deforum_scripts_path_fix in deforum_paths_to_ensure:
33
+ if not deforum_scripts_path_fix in sys.path:
34
+ sys.path.extend([deforum_scripts_path_fix])
scripts/deforum_helpers/RAFT.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import torch
19
+ import numpy as np
20
+ import torchvision.transforms.functional as F
21
+ from torchvision.models.optical_flow import Raft_Large_Weights, raft_large
22
+
23
+ class RAFT:
24
+ def __init__(self):
25
+ weights = Raft_Large_Weights.DEFAULT
26
+ self.transforms = weights.transforms()
27
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
28
+ self.model = raft_large(weights=weights, progress=False).to(self.device).eval()
29
+
30
+ def predict(self, image1, image2, num_flow_updates:int = 50):
31
+ img1 = F.to_tensor(image1)
32
+ img2 = F.to_tensor(image2)
33
+ img1_batch, img2_batch = img1.unsqueeze(0), img2.unsqueeze(0)
34
+ img1_batch, img2_batch = self.transforms(img1_batch, img2_batch)
35
+
36
+ with torch.no_grad():
37
+ flow = self.model(image1=img1_batch.to(self.device), image2=img2_batch.to(self.device), num_flow_updates=num_flow_updates)[-1].cpu().numpy()[0]
38
+
39
+ # align the flow array to have the shape (w, h, 2) so it's compatible with the rest of CV2's flow methods
40
+ flow = np.transpose(flow, (1, 2, 0))
41
+
42
+ return flow
43
+
44
+ def delete_model(self):
45
+ del self.model
scripts/deforum_helpers/animation.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import numpy as np
19
+ import cv2
20
+ import py3d_tools as p3d # this is actually a file in our /src folder!
21
+ from functools import reduce
22
+ import math
23
+ import torch
24
+ from einops import rearrange
25
+ from modules.shared import state, opts
26
+ from .prompt import check_is_number
27
+ from .general_utils import debug_print
28
+
29
+ def sample_from_cv2(sample: np.ndarray) -> torch.Tensor:
30
+ sample = ((sample.astype(float) / 255.0) * 2) - 1
31
+ sample = sample[None].transpose(0, 3, 1, 2).astype(np.float16)
32
+ sample = torch.from_numpy(sample)
33
+ return sample
34
+
35
+ def sample_to_cv2(sample: torch.Tensor, type=np.uint8) -> np.ndarray:
36
+ sample_f32 = rearrange(sample.squeeze().cpu().numpy(), "c h w -> h w c").astype(np.float32)
37
+ sample_f32 = ((sample_f32 * 0.5) + 0.5).clip(0, 1)
38
+ sample_int8 = (sample_f32 * 255)
39
+ return sample_int8.astype(type)
40
+
41
+ def construct_RotationMatrixHomogenous(rotation_angles):
42
+ assert(type(rotation_angles)==list and len(rotation_angles)==3)
43
+ RH = np.eye(4,4)
44
+ cv2.Rodrigues(np.array(rotation_angles), RH[0:3, 0:3])
45
+ return RH
46
+
47
+ # https://en.wikipedia.org/wiki/Rotation_matrix
48
+ def getRotationMatrixManual(rotation_angles):
49
+
50
+ rotation_angles = [np.deg2rad(x) for x in rotation_angles]
51
+
52
+ phi = rotation_angles[0] # around x
53
+ gamma = rotation_angles[1] # around y
54
+ theta = rotation_angles[2] # around z
55
+
56
+ # X rotation
57
+ Rphi = np.eye(4,4)
58
+ sp = np.sin(phi)
59
+ cp = np.cos(phi)
60
+ Rphi[1,1] = cp
61
+ Rphi[2,2] = Rphi[1,1]
62
+ Rphi[1,2] = -sp
63
+ Rphi[2,1] = sp
64
+
65
+ # Y rotation
66
+ Rgamma = np.eye(4,4)
67
+ sg = np.sin(gamma)
68
+ cg = np.cos(gamma)
69
+ Rgamma[0,0] = cg
70
+ Rgamma[2,2] = Rgamma[0,0]
71
+ Rgamma[0,2] = sg
72
+ Rgamma[2,0] = -sg
73
+
74
+ # Z rotation (in-image-plane)
75
+ Rtheta = np.eye(4,4)
76
+ st = np.sin(theta)
77
+ ct = np.cos(theta)
78
+ Rtheta[0,0] = ct
79
+ Rtheta[1,1] = Rtheta[0,0]
80
+ Rtheta[0,1] = -st
81
+ Rtheta[1,0] = st
82
+
83
+ R = reduce(lambda x,y : np.matmul(x,y), [Rphi, Rgamma, Rtheta])
84
+
85
+ return R
86
+
87
+ def getPoints_for_PerspectiveTranformEstimation(ptsIn, ptsOut, W, H, sidelength):
88
+
89
+ ptsIn2D = ptsIn[0,:]
90
+ ptsOut2D = ptsOut[0,:]
91
+ ptsOut2Dlist = []
92
+ ptsIn2Dlist = []
93
+
94
+ for i in range(0,4):
95
+ ptsOut2Dlist.append([ptsOut2D[i,0], ptsOut2D[i,1]])
96
+ ptsIn2Dlist.append([ptsIn2D[i,0], ptsIn2D[i,1]])
97
+
98
+ pin = np.array(ptsIn2Dlist) + [W/2.,H/2.]
99
+ pout = (np.array(ptsOut2Dlist) + [1.,1.]) * (0.5*sidelength)
100
+ pin = pin.astype(np.float32)
101
+ pout = pout.astype(np.float32)
102
+
103
+ return pin, pout
104
+
105
+
106
+ def warpMatrix(W, H, theta, phi, gamma, scale, fV):
107
+
108
+ # M is to be estimated
109
+ M = np.eye(4, 4)
110
+
111
+ fVhalf = np.deg2rad(fV/2.)
112
+ d = np.sqrt(W*W+H*H)
113
+ sideLength = scale*d/np.cos(fVhalf)
114
+ h = d/(2.0*np.sin(fVhalf))
115
+ n = h-(d/2.0)
116
+ f = h+(d/2.0)
117
+
118
+ # Translation along Z-axis by -h
119
+ T = np.eye(4,4)
120
+ T[2,3] = -h
121
+
122
+ # Rotation matrices around x,y,z
123
+ R = getRotationMatrixManual([phi, gamma, theta])
124
+
125
+
126
+ # Projection Matrix
127
+ P = np.eye(4,4)
128
+ P[0,0] = 1.0/np.tan(fVhalf)
129
+ P[1,1] = P[0,0]
130
+ P[2,2] = -(f+n)/(f-n)
131
+ P[2,3] = -(2.0*f*n)/(f-n)
132
+ P[3,2] = -1.0
133
+
134
+ # pythonic matrix multiplication
135
+ F = reduce(lambda x,y : np.matmul(x,y), [P, T, R])
136
+
137
+ # shape should be 1,4,3 for ptsIn and ptsOut since perspectiveTransform() expects data in this way.
138
+ # In C++, this can be achieved by Mat ptsIn(1,4,CV_64FC3);
139
+ ptsIn = np.array([[
140
+ [-W/2., H/2., 0.],[ W/2., H/2., 0.],[ W/2.,-H/2., 0.],[-W/2.,-H/2., 0.]
141
+ ]])
142
+ ptsOut = np.array(np.zeros((ptsIn.shape), dtype=ptsIn.dtype))
143
+ ptsOut = cv2.perspectiveTransform(ptsIn, F)
144
+
145
+ ptsInPt2f, ptsOutPt2f = getPoints_for_PerspectiveTranformEstimation(ptsIn, ptsOut, W, H, sideLength)
146
+
147
+ # check float32 otherwise OpenCV throws an error
148
+ assert(ptsInPt2f.dtype == np.float32)
149
+ assert(ptsOutPt2f.dtype == np.float32)
150
+ M33 = cv2.getPerspectiveTransform(ptsInPt2f,ptsOutPt2f)
151
+
152
+ return M33, sideLength
153
+
154
+ def get_flip_perspective_matrix(W, H, keys, frame_idx):
155
+ perspective_flip_theta = keys.perspective_flip_theta_series[frame_idx]
156
+ perspective_flip_phi = keys.perspective_flip_phi_series[frame_idx]
157
+ perspective_flip_gamma = keys.perspective_flip_gamma_series[frame_idx]
158
+ perspective_flip_fv = keys.perspective_flip_fv_series[frame_idx]
159
+ M,sl = warpMatrix(W, H, perspective_flip_theta, perspective_flip_phi, perspective_flip_gamma, 1., perspective_flip_fv);
160
+ post_trans_mat = np.float32([[1, 0, (W-sl)/2], [0, 1, (H-sl)/2]])
161
+ post_trans_mat = np.vstack([post_trans_mat, [0,0,1]])
162
+ bM = np.matmul(M, post_trans_mat)
163
+ return bM
164
+
165
+ def flip_3d_perspective(anim_args, prev_img_cv2, keys, frame_idx):
166
+ W, H = (prev_img_cv2.shape[1], prev_img_cv2.shape[0])
167
+ return cv2.warpPerspective(
168
+ prev_img_cv2,
169
+ get_flip_perspective_matrix(W, H, keys, frame_idx),
170
+ (W, H),
171
+ borderMode=cv2.BORDER_WRAP if anim_args.border == 'wrap' else cv2.BORDER_REPLICATE
172
+ )
173
+
174
+ def anim_frame_warp(prev_img_cv2, args, anim_args, keys, frame_idx, depth_model=None, depth=None, device='cuda', half_precision = False):
175
+
176
+ if anim_args.use_depth_warping:
177
+ if depth is None and depth_model is not None:
178
+ depth = depth_model.predict(prev_img_cv2, anim_args.midas_weight, half_precision)
179
+
180
+ else:
181
+ depth = None
182
+
183
+ if anim_args.animation_mode == '2D':
184
+ prev_img = anim_frame_warp_2d(prev_img_cv2, args, anim_args, keys, frame_idx)
185
+ else: # '3D'
186
+ prev_img = anim_frame_warp_3d(device, prev_img_cv2, depth, anim_args, keys, frame_idx)
187
+
188
+ return prev_img, depth
189
+
190
+ def anim_frame_warp_2d(prev_img_cv2, args, anim_args, keys, frame_idx):
191
+ angle = keys.angle_series[frame_idx]
192
+ zoom = keys.zoom_series[frame_idx]
193
+ translation_x = keys.translation_x_series[frame_idx]
194
+ translation_y = keys.translation_y_series[frame_idx]
195
+ transform_center_x = keys.transform_center_x_series[frame_idx]
196
+ transform_center_y = keys.transform_center_y_series[frame_idx]
197
+ center_point = (args.W * transform_center_x, args.H * transform_center_y)
198
+ rot_mat = cv2.getRotationMatrix2D(center_point, angle, zoom)
199
+ trans_mat = np.float32([[1, 0, translation_x], [0, 1, translation_y]])
200
+ trans_mat = np.vstack([trans_mat, [0,0,1]])
201
+ rot_mat = np.vstack([rot_mat, [0,0,1]])
202
+ if anim_args.enable_perspective_flip:
203
+ bM = get_flip_perspective_matrix(args.W, args.H, keys, frame_idx)
204
+ rot_mat = np.matmul(bM, rot_mat, trans_mat)
205
+ else:
206
+ rot_mat = np.matmul(rot_mat, trans_mat)
207
+ return cv2.warpPerspective(
208
+ prev_img_cv2,
209
+ rot_mat,
210
+ (prev_img_cv2.shape[1], prev_img_cv2.shape[0]),
211
+ borderMode=cv2.BORDER_WRAP if anim_args.border == 'wrap' else cv2.BORDER_REPLICATE
212
+ )
213
+
214
+ def anim_frame_warp_3d(device, prev_img_cv2, depth, anim_args, keys, frame_idx):
215
+ TRANSLATION_SCALE = 1.0/200.0 # matches Disco
216
+ translate_xyz = [
217
+ -keys.translation_x_series[frame_idx] * TRANSLATION_SCALE,
218
+ keys.translation_y_series[frame_idx] * TRANSLATION_SCALE,
219
+ -keys.translation_z_series[frame_idx] * TRANSLATION_SCALE
220
+ ]
221
+ rotate_xyz = [
222
+ math.radians(keys.rotation_3d_x_series[frame_idx]),
223
+ math.radians(keys.rotation_3d_y_series[frame_idx]),
224
+ math.radians(keys.rotation_3d_z_series[frame_idx])
225
+ ]
226
+ if anim_args.enable_perspective_flip:
227
+ prev_img_cv2 = flip_3d_perspective(anim_args, prev_img_cv2, keys, frame_idx)
228
+ rot_mat = p3d.euler_angles_to_matrix(torch.tensor(rotate_xyz, device=device), "XYZ").unsqueeze(0)
229
+ result = transform_image_3d_switcher(device if not device.type.startswith('mps') else torch.device('cpu'), prev_img_cv2, depth, rot_mat, translate_xyz, anim_args, keys, frame_idx)
230
+ torch.cuda.empty_cache()
231
+ return result
232
+
233
+ def transform_image_3d_switcher(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx):
234
+ if anim_args.depth_algorithm.lower() in ['midas+adabins (old)', 'zoe+adabins (old)']:
235
+ return transform_image_3d_legacy(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx)
236
+ else:
237
+ return transform_image_3d_new(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx)
238
+
239
+ def transform_image_3d_legacy(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx):
240
+ # adapted and optimized version of transform_image_3d from Disco Diffusion https://github.com/alembics/disco-diffusion
241
+ w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
242
+
243
+ if anim_args.aspect_ratio_use_old_formula:
244
+ aspect_ratio = float(w)/float(h)
245
+ else:
246
+ aspect_ratio = keys.aspect_ratio_series[frame_idx]
247
+
248
+ near = keys.near_series[frame_idx]
249
+ far = keys.far_series[frame_idx]
250
+ fov_deg = keys.fov_series[frame_idx]
251
+ persp_cam_old = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, device=device)
252
+ persp_cam_new = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
253
+
254
+ # range of [-1,1] is important to torch grid_sample's padding handling
255
+ y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
256
+ if depth_tensor is None:
257
+ z = torch.ones_like(x)
258
+ else:
259
+ z = torch.as_tensor(depth_tensor, dtype=torch.float32, device=device)
260
+ xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
261
+
262
+ xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
263
+ xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
264
+
265
+ offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
266
+ # affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
267
+ identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
268
+ # coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
269
+ coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
270
+ offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
271
+
272
+ image_tensor = rearrange(torch.from_numpy(prev_img_cv2.astype(np.float32)), 'h w c -> c h w').to(device)
273
+ new_image = torch.nn.functional.grid_sample(
274
+ image_tensor.add(1/512 - 0.0001).unsqueeze(0),
275
+ offset_coords_2d,
276
+ mode=anim_args.sampling_mode,
277
+ padding_mode=anim_args.padding_mode,
278
+ align_corners=False
279
+ )
280
+
281
+ # convert back to cv2 style numpy array
282
+ result = rearrange(
283
+ new_image.squeeze().clamp(0,255),
284
+ 'c h w -> h w c'
285
+ ).cpu().numpy().astype(prev_img_cv2.dtype)
286
+ return result
287
+
288
+ def transform_image_3d_new(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx):
289
+ '''
290
+ originally an adapted and optimized version of transform_image_3d from Disco Diffusion https://github.com/alembics/disco-diffusion
291
+ modified by reallybigname to control various incoming tensors
292
+ '''
293
+ if anim_args.depth_algorithm.lower().startswith('midas'): # 'Midas-3-Hybrid' or 'Midas-3.1-BeitLarge'
294
+ depth = 1
295
+ depth_factor = -1
296
+ depth_offset = -2
297
+ elif anim_args.depth_algorithm.lower() == "adabins":
298
+ depth = 1
299
+ depth_factor = 1
300
+ depth_offset = 1
301
+ elif anim_args.depth_algorithm.lower() == "leres":
302
+ depth = 1
303
+ depth_factor = 1
304
+ depth_offset = 1
305
+ elif anim_args.depth_algorithm.lower() == "zoe":
306
+ depth = 1
307
+ depth_factor = 1
308
+ depth_offset = 1
309
+ else:
310
+ raise Exception(f"Unknown depth_algorithm passed to transform_image_3d function: {anim_args.depth_algorithm}")
311
+
312
+ w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
313
+
314
+ # depth stretching aspect ratio (has nothing to do with image dimensions - which is why the old formula was flawed)
315
+ aspect_ratio = float(w)/float(h) if anim_args.aspect_ratio_use_old_formula else keys.aspect_ratio_series[frame_idx]
316
+
317
+ # get projection keys
318
+ near = keys.near_series[frame_idx]
319
+ far = keys.far_series[frame_idx]
320
+ fov_deg = keys.fov_series[frame_idx]
321
+
322
+ # get perspective cams old (still) and new (transformed)
323
+ persp_cam_old = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, device=device)
324
+ persp_cam_new = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
325
+
326
+ # make xy meshgrid - range of [-1,1] is important to torch grid_sample's padding handling
327
+ y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
328
+
329
+ # test tensor for validity (some are corrupted for some reason)
330
+ depth_tensor_invalid = depth_tensor is None or torch.isnan(depth_tensor).any() or torch.isinf(depth_tensor).any() or depth_tensor.min() == depth_tensor.max()
331
+
332
+ if depth_tensor is not None:
333
+ debug_print(f"Depth_T.min: {depth_tensor.min()}, Depth_T.max: {depth_tensor.max()}")
334
+ # if invalid, create flat z for this frame
335
+ if depth_tensor_invalid:
336
+ # if none, then 3D depth is turned off, so no warning is needed.
337
+ if depth_tensor is not None:
338
+ print("Depth tensor invalid. Generating a Flat depth for this frame.")
339
+ # create flat depth
340
+ z = torch.ones_like(x)
341
+ # create z from depth tensor
342
+ else:
343
+ # prepare tensor between 0 and 1 with optional equalization and autocontrast
344
+ depth_normalized = prepare_depth_tensor(depth_tensor)
345
+
346
+ # Rescale the depth values to depth with offset (depth 2 and offset -1 would be -1 to +11)
347
+ depth_final = depth_normalized * depth + depth_offset
348
+
349
+ # depth factor (1 is normal. -1 is inverted)
350
+ if depth_factor != 1:
351
+ depth_final *= depth_factor
352
+
353
+ # console reporting of depth normalization, min, max, diff
354
+ # will *only* print to console if Dev mode is enabled in general settings of Deforum
355
+ txt_depth_min, txt_depth_max = '{:.2f}'.format(float(depth_tensor.min())), '{:.2f}'.format(float(depth_tensor.max()))
356
+ diff = '{:.2f}'.format(float(depth_tensor.max()) - float(depth_tensor.min()))
357
+ console_txt = f"\033[36mDepth normalized to {depth_final.min()}/{depth_final.max()} from"
358
+ debug_print(f"{console_txt} {txt_depth_min}/{txt_depth_max} diff {diff}\033[0m")
359
+
360
+ # add z from depth
361
+ z = torch.as_tensor(depth_final, dtype=torch.float32, device=device)
362
+
363
+ # calculate offset_xy
364
+ xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
365
+ xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
366
+ xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
367
+ offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
368
+
369
+ # affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
370
+ identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
371
+
372
+ # coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
373
+ coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
374
+ offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
375
+
376
+ # do the hyperdimensional remap
377
+ image_tensor = rearrange(torch.from_numpy(prev_img_cv2.astype(np.float32)), 'h w c -> c h w').to(device)
378
+ new_image = torch.nn.functional.grid_sample(
379
+ image_tensor.unsqueeze(0), # image_tensor.add(1/512 - 0.0001).unsqueeze(0),
380
+ offset_coords_2d,
381
+ mode=anim_args.sampling_mode,
382
+ padding_mode=anim_args.padding_mode,
383
+ align_corners=False
384
+ )
385
+
386
+ # convert back to cv2 style numpy array
387
+ result = rearrange(
388
+ new_image.squeeze().clamp(0,255),
389
+ 'c h w -> h w c'
390
+ ).cpu().numpy().astype(prev_img_cv2.dtype)
391
+ return result
392
+
393
+ def prepare_depth_tensor(depth_tensor=None):
394
+ # Prepares a depth tensor with normalization & equalization between 0 and 1
395
+ depth_range = depth_tensor.max() - depth_tensor.min()
396
+ depth_tensor = (depth_tensor - depth_tensor.min()) / depth_range
397
+ depth_tensor = depth_equalization(depth_tensor=depth_tensor)
398
+ return depth_tensor
399
+
400
+ def depth_equalization(depth_tensor):
401
+ """
402
+ Perform histogram equalization on a single-channel depth tensor.
403
+
404
+ Args:
405
+ depth_tensor (torch.Tensor): A 2D depth tensor (H, W).
406
+
407
+ Returns:
408
+ torch.Tensor: Equalized depth tensor (2D).
409
+ """
410
+
411
+ # Convert the depth tensor to a NumPy array for processing
412
+ depth_array = depth_tensor.cpu().numpy()
413
+
414
+ # Calculate the histogram of the depth values using a specified number of bins
415
+ # Increase the number of bins for higher precision depth tensors
416
+ hist, bin_edges = np.histogram(depth_array, bins=1024, range=(0, 1))
417
+
418
+ # Calculate the cumulative distribution function (CDF) of the histogram
419
+ cdf = hist.cumsum()
420
+
421
+ # Normalize the CDF so that the maximum value is 1
422
+ cdf = cdf / float(cdf[-1])
423
+
424
+ # Perform histogram equalization by mapping the original depth values to the CDF values
425
+ equalized_depth_array = np.interp(depth_array, bin_edges[:-1], cdf)
426
+
427
+ # Convert the equalized depth array back to a PyTorch tensor and return it
428
+ equalized_depth_tensor = torch.from_numpy(equalized_depth_array).to(depth_tensor.device)
429
+
430
+ return equalized_depth_tensor
scripts/deforum_helpers/animation_key_frames.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import re
19
+ import numpy as np
20
+ import numexpr
21
+ import pandas as pd
22
+ from .prompt import check_is_number
23
+
24
+ class DeformAnimKeys():
25
+ def __init__(self, anim_args, seed=-1):
26
+ self.fi = FrameInterpolater(anim_args.max_frames, seed)
27
+ self.angle_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.angle))
28
+ self.transform_center_x_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.transform_center_x))
29
+ self.transform_center_y_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.transform_center_y))
30
+ self.zoom_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.zoom))
31
+ self.translation_x_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.translation_x))
32
+ self.translation_y_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.translation_y))
33
+ self.translation_z_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.translation_z))
34
+ self.rotation_3d_x_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.rotation_3d_x))
35
+ self.rotation_3d_y_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.rotation_3d_y))
36
+ self.rotation_3d_z_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.rotation_3d_z))
37
+ self.perspective_flip_theta_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_theta))
38
+ self.perspective_flip_phi_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_phi))
39
+ self.perspective_flip_gamma_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_gamma))
40
+ self.perspective_flip_fv_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_fv))
41
+ self.noise_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.noise_schedule))
42
+ self.strength_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.strength_schedule))
43
+ self.contrast_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.contrast_schedule))
44
+ self.cfg_scale_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.cfg_scale_schedule))
45
+ self.ddim_eta_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.ddim_eta_schedule))
46
+ self.ancestral_eta_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.ancestral_eta_schedule))
47
+ self.pix2pix_img_cfg_scale_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.pix2pix_img_cfg_scale_schedule))
48
+ self.subseed_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.subseed_schedule))
49
+ self.subseed_strength_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.subseed_strength_schedule))
50
+ self.checkpoint_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.checkpoint_schedule), is_single_string = True)
51
+ self.steps_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.steps_schedule))
52
+ self.seed_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.seed_schedule))
53
+ self.sampler_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.sampler_schedule), is_single_string = True)
54
+ self.clipskip_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.clipskip_schedule))
55
+ self.noise_multiplier_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.noise_multiplier_schedule))
56
+ self.mask_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.mask_schedule), is_single_string = True)
57
+ self.noise_mask_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.noise_mask_schedule), is_single_string = True)
58
+ self.kernel_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.kernel_schedule))
59
+ self.sigma_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.sigma_schedule))
60
+ self.amount_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.amount_schedule))
61
+ self.threshold_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.threshold_schedule))
62
+ self.aspect_ratio_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.aspect_ratio_schedule))
63
+ self.fov_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.fov_schedule))
64
+ self.near_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.near_schedule))
65
+ self.cadence_flow_factor_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.cadence_flow_factor_schedule))
66
+ self.redo_flow_factor_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.redo_flow_factor_schedule))
67
+ self.far_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.far_schedule))
68
+ self.hybrid_comp_alpha_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_alpha_schedule))
69
+ self.hybrid_comp_mask_blend_alpha_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_blend_alpha_schedule))
70
+ self.hybrid_comp_mask_contrast_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_contrast_schedule))
71
+ self.hybrid_comp_mask_auto_contrast_cutoff_high_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_auto_contrast_cutoff_high_schedule))
72
+ self.hybrid_comp_mask_auto_contrast_cutoff_low_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_auto_contrast_cutoff_low_schedule))
73
+ self.hybrid_flow_factor_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_flow_factor_schedule))
74
+
75
+ class ControlNetKeys():
76
+ def __init__(self, anim_args, controlnet_args):
77
+ self.fi = FrameInterpolater(max_frames=anim_args.max_frames)
78
+ self.schedules = {}
79
+ for i in range(1, 6): # 5 CN models in total
80
+ for suffix in ['weight', 'guidance_start', 'guidance_end']:
81
+ prefix = f"cn_{i}"
82
+ key = f"{prefix}_{suffix}_schedule_series"
83
+ self.schedules[key] = self.fi.get_inbetweens(self.fi.parse_key_frames(getattr(controlnet_args, f"{prefix}_{suffix}")))
84
+ setattr(self, key, self.schedules[key])
85
+
86
+ class LooperAnimKeys():
87
+ def __init__(self, loop_args, anim_args, seed):
88
+ self.fi = FrameInterpolater(anim_args.max_frames, seed)
89
+ self.use_looper = loop_args.use_looper
90
+ self.imagesToKeyframe = loop_args.init_images
91
+ self.image_strength_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.image_strength_schedule))
92
+ self.blendFactorMax_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.blendFactorMax))
93
+ self.blendFactorSlope_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.blendFactorSlope))
94
+ self.tweening_frames_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.tweening_frames_schedule))
95
+ self.color_correction_factor_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.color_correction_factor))
96
+
97
+ class FrameInterpolater():
98
+ def __init__(self, max_frames=0, seed=-1) -> None:
99
+ self.max_frames = max_frames
100
+ self.seed = seed
101
+
102
+ def sanitize_value(self, value):
103
+ return value.replace("'","").replace('"',"").replace('(',"").replace(')',"")
104
+
105
+ def get_inbetweens(self, key_frames, integer=False, interp_method='Linear', is_single_string = False):
106
+ key_frame_series = pd.Series([np.nan for a in range(self.max_frames)])
107
+ # get our ui variables set for numexpr.evaluate
108
+ max_f = self.max_frames -1
109
+ s = self.seed
110
+ for i in range(0, self.max_frames):
111
+ if i in key_frames:
112
+ value = key_frames[i]
113
+ value_is_number = check_is_number(self.sanitize_value(value))
114
+ if value_is_number: # if it's only a number, leave the rest for the default interpolation
115
+ key_frame_series[i] = self.sanitize_value(value)
116
+ if not value_is_number:
117
+ t = i
118
+ # workaround for values formatted like 0:("I am test") //used for sampler schedules
119
+ key_frame_series[i] = numexpr.evaluate(value) if not is_single_string else self.sanitize_value(value)
120
+ elif is_single_string:# take previous string value and replicate it
121
+ key_frame_series[i] = key_frame_series[i-1]
122
+ key_frame_series = key_frame_series.astype(float) if not is_single_string else key_frame_series # as string
123
+
124
+ if interp_method == 'Cubic' and len(key_frames.items()) <= 3:
125
+ interp_method = 'Quadratic'
126
+ if interp_method == 'Quadratic' and len(key_frames.items()) <= 2:
127
+ interp_method = 'Linear'
128
+
129
+ key_frame_series[0] = key_frame_series[key_frame_series.first_valid_index()]
130
+ key_frame_series[self.max_frames-1] = key_frame_series[key_frame_series.last_valid_index()]
131
+ key_frame_series = key_frame_series.interpolate(method=interp_method.lower(), limit_direction='both')
132
+ if integer:
133
+ return key_frame_series.astype(int)
134
+ return key_frame_series
135
+
136
+ def parse_key_frames(self, string):
137
+ # because math functions (i.e. sin(t)) can utilize brackets
138
+ # it extracts the value in form of some stuff
139
+ # which has previously been enclosed with brackets and
140
+ # with a comma or end of line existing after the closing one
141
+ frames = dict()
142
+ for match_object in string.split(","):
143
+ frameParam = match_object.split(":")
144
+ max_f = self.max_frames -1
145
+ s = self.seed
146
+ frame = int(self.sanitize_value(frameParam[0])) if check_is_number(self.sanitize_value(frameParam[0].strip())) else int(numexpr.evaluate(frameParam[0].strip().replace("'","",1).replace('"',"",1)[::-1].replace("'","",1).replace('"',"",1)[::-1]))
147
+ frames[frame] = frameParam[1].strip()
148
+ if frames == {} and len(string) != 0:
149
+ raise RuntimeError('Key Frame string not correctly formatted')
150
+ return frames
scripts/deforum_helpers/args.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import json
19
+ import os
20
+ import tempfile
21
+ import time
22
+ from types import SimpleNamespace
23
+ import modules.paths as ph
24
+ import modules.shared as sh
25
+ from modules.processing import get_fixed_seed
26
+ from modules.shared import cmd_opts
27
+ from .defaults import get_guided_imgs_default_json
28
+ from .deforum_controlnet import controlnet_component_names
29
+ from .general_utils import get_os, substitute_placeholders
30
+
31
+ def RootArgs():
32
+ device = sh.device
33
+ models_path = ph.models_path + '/Deforum'
34
+ half_precision = not cmd_opts.no_half
35
+ mask_preset_names = ['everywhere', 'video_mask']
36
+ frames_cache = []
37
+ raw_batch_name = None
38
+ raw_seed = None
39
+ initial_info = None
40
+ first_frame = None
41
+ animation_prompts = None
42
+ current_user_os = get_os()
43
+ tmp_deforum_run_duplicated_folder = os.path.join(tempfile.gettempdir(), 'tmp_run_deforum')
44
+ return locals()
45
+
46
+ def CoreArgs(): # TODO: change or do something with this ugliness
47
+ subseed = -1
48
+ subseed_strength = 0
49
+ timestring = ""
50
+ init_sample = None
51
+ noise_mask = None
52
+ seed_internal = 0
53
+ return locals()
54
+
55
+ def DeforumAnimArgs():
56
+ animation_mode = '2D' # ['None', '2D', '3D', 'Video Input', 'Interpolation']
57
+ max_frames = 120
58
+ border = 'replicate' # ['wrap', 'replicate']
59
+ angle = "0:(0)"
60
+ zoom = "0:(1.0025+0.002*sin(1.25*3.14*t/30))"
61
+ translation_x = "0:(0)"
62
+ translation_y = "0:(0)"
63
+ translation_z = "0:(1.75)"
64
+ transform_center_x = "0:(0.5)"
65
+ transform_center_y = "0:(0.5)"
66
+ rotation_3d_x = "0:(0)"
67
+ rotation_3d_y = "0:(0)"
68
+ rotation_3d_z = "0:(0)"
69
+ enable_perspective_flip = False
70
+ perspective_flip_theta = "0:(0)"
71
+ perspective_flip_phi = "0:(0)"
72
+ perspective_flip_gamma = "0:(0)"
73
+ perspective_flip_fv = "0:(53)"
74
+ noise_schedule = "0: (0.065)"
75
+ strength_schedule = "0: (0.65)"
76
+ contrast_schedule = "0: (1.0)"
77
+ cfg_scale_schedule = "0: (7)"
78
+ enable_steps_scheduling = False
79
+ steps_schedule = "0: (25)"
80
+ fov_schedule = "0: (70)"
81
+ aspect_ratio_schedule = "0: (1)"
82
+ aspect_ratio_use_old_formula = False
83
+ near_schedule = "0: (200)"
84
+ far_schedule = "0: (10000)"
85
+ seed_schedule = '0:(s), 1:(-1), "max_f-2":(-1), "max_f-1":(s)'
86
+ pix2pix_img_cfg_scale_schedule = "0:(1.5)"
87
+ enable_subseed_scheduling = False
88
+ subseed_schedule = "0:(1)"
89
+ subseed_strength_schedule = "0:(0)"
90
+ enable_sampler_scheduling = False # Sampler Scheduling
91
+ sampler_schedule = '0: ("Euler a")'
92
+ use_noise_mask = False # Composable mask scheduling
93
+ mask_schedule = '0: ("{video_mask}")'
94
+ noise_mask_schedule = '0: ("{video_mask}")'
95
+ enable_checkpoint_scheduling = False # Checkpoint Scheduling
96
+ checkpoint_schedule = '0: ("model1.ckpt"), 100: ("model2.safetensors")'
97
+ enable_clipskip_scheduling = False # CLIP skip Scheduling
98
+ clipskip_schedule = '0: (2)'
99
+ enable_noise_multiplier_scheduling = True # Noise Multiplier Scheduling
100
+ noise_multiplier_schedule = '0: (1.05)'
101
+ # resume params
102
+ resume_from_timestring = False
103
+ resume_timestring = "20230129210106"
104
+ # DDIM AND Ancestral ETA scheds
105
+ enable_ddim_eta_scheduling = False
106
+ ddim_eta_schedule = "0:(0)"
107
+ enable_ancestral_eta_scheduling = False
108
+ ancestral_eta_schedule = "0:(1)"
109
+ # Anti-blur
110
+ amount_schedule = "0: (0.1)"
111
+ kernel_schedule = "0: (5)"
112
+ sigma_schedule = "0: (1.0)"
113
+ threshold_schedule = "0: (0.0)"
114
+ # Coherence
115
+ color_coherence = 'LAB' # ['None', 'HSV', 'LAB', 'RGB', 'Video Input', 'Image']
116
+ color_coherence_image_path = ""
117
+ color_coherence_video_every_N_frames = 1
118
+ color_force_grayscale = False
119
+ legacy_colormatch = False
120
+ diffusion_cadence = '2' # ['1','2','3','4','5','6','7','8']
121
+ optical_flow_cadence = 'None' # ['None', 'RAFT','DIS Medium', 'DIS Fine', 'Farneback']
122
+ cadence_flow_factor_schedule = "0: (1)"
123
+ optical_flow_redo_generation = 'None' # ['None', 'RAFT', 'DIS Medium', 'DIS Fine', 'Farneback']
124
+ redo_flow_factor_schedule = "0: (1)"
125
+ diffusion_redo = '0'
126
+ # **Noise settings:**
127
+ noise_type = 'perlin' # ['uniform', 'perlin']
128
+ # Perlin params
129
+ perlin_w = 8
130
+ perlin_h = 8
131
+ perlin_octaves = 4
132
+ perlin_persistence = 0.5
133
+ # **3D Depth Warping:**
134
+ use_depth_warping = True
135
+ depth_algorithm = 'Midas-3-Hybrid' # ['Midas+AdaBins (old)','Zoe+AdaBins (old)', 'Midas-3-Hybrid','Midas-3.1-BeitLarge', 'AdaBins', 'Zoe', 'Leres'] Midas-3.1-BeitLarge is temporarily removed 04-05-23 until fixed
136
+ midas_weight = 0.2 # midas/ zoe weight - only relevant in old/ legacy depth_algorithm modes. see above ^
137
+ padding_mode = 'border' # ['border', 'reflection', 'zeros']
138
+ sampling_mode = 'bicubic' # ['bicubic', 'bilinear', 'nearest']
139
+ save_depth_maps = False
140
+ # **Video Input:**
141
+ video_init_path = 'https://deforum.github.io/a1/V1.mp4'
142
+ extract_nth_frame = 1
143
+ extract_from_frame = 0
144
+ extract_to_frame = -1 # minus 1 for unlimited frames
145
+ overwrite_extracted_frames = True
146
+ use_mask_video = False
147
+ video_mask_path = 'https://deforum.github.io/a1/VM1.mp4'
148
+ # **Hybrid Video for 2D/3D Animation Mode:**
149
+ hybrid_comp_alpha_schedule = "0:(0.5)"
150
+ hybrid_comp_mask_blend_alpha_schedule = "0:(0.5)"
151
+ hybrid_comp_mask_contrast_schedule = "0:(1)"
152
+ hybrid_comp_mask_auto_contrast_cutoff_high_schedule = "0:(100)"
153
+ hybrid_comp_mask_auto_contrast_cutoff_low_schedule = "0:(0)"
154
+ hybrid_flow_factor_schedule = "0:(1)"
155
+ hybrid_generate_inputframes = False
156
+ hybrid_generate_human_masks = "None" # ['None','PNGs','Video', 'Both']
157
+ hybrid_use_first_frame_as_init_image = True
158
+ hybrid_motion = "None" # ['None','Optical Flow','Perspective','Affine']
159
+ hybrid_motion_use_prev_img = False
160
+ hybrid_flow_consistency = False
161
+ hybrid_consistency_blur = 2
162
+ hybrid_flow_method = "RAFT" # ['RAFT', 'DIS Medium', 'DIS Fine', 'Farneback']
163
+ hybrid_composite = 'None' # ['None', 'Normal', 'Before Motion', 'After Generation']
164
+ hybrid_use_init_image = False
165
+ hybrid_comp_mask_type = "None" # ['None', 'Depth', 'Video Depth', 'Blend', 'Difference']
166
+ hybrid_comp_mask_inverse = False
167
+ hybrid_comp_mask_equalize = "None" # ['None','Before','After','Both']
168
+ hybrid_comp_mask_auto_contrast = False
169
+ hybrid_comp_save_extra_frames = False
170
+ return locals()
171
+
172
+ def DeforumArgs():
173
+ # set default image size and make sure to resize to multiples of 64 if needed
174
+ W, H = map(lambda x: x - x % 64, (512, 512))
175
+ # whether to show gradio's info section for all params in the ui. it's a realtime toggle
176
+ show_info_on_ui = True
177
+ # **Webui stuff**
178
+ tiling = False
179
+ restore_faces = False
180
+ seed_enable_extras = False
181
+ seed_resize_from_w = 0
182
+ seed_resize_from_h = 0
183
+ # **Sampling Settings**
184
+ seed = -1 #
185
+ sampler = 'euler_ancestral' # ["klms","dpm2","dpm2_ancestral","heun","euler","euler_ancestral","plms", "ddim"]
186
+ steps = 25 #
187
+ # **Batch Settings**
188
+ batch_name = "Deforum_{timestring}"
189
+ seed_behavior = "iter" # ["iter","fixed","random","ladder","alternate","schedule"]
190
+ seed_iter_N = 1
191
+ # **Init Settings**
192
+ use_init = False
193
+ strength = 0.8
194
+ strength_0_no_init = True # Set the strength to 0 automatically when no init image is used
195
+ init_image = "https://deforum.github.io/a1/I1.png"
196
+ # Whiter areas of the mask are areas that change more
197
+ use_mask = False
198
+ use_alpha_as_mask = False # use the alpha channel of the init image as the mask
199
+ mask_file = "https://deforum.github.io/a1/M1.jpg"
200
+ invert_mask = False
201
+ # Adjust mask image, 1.0 is no adjustment. Should be positive numbers.
202
+ mask_contrast_adjust = 1.0
203
+ mask_brightness_adjust = 1.0
204
+ # Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding
205
+ overlay_mask = True
206
+ # Blur edges of final overlay mask, if used. Minimum = 0 (no blur)
207
+ mask_overlay_blur = 4
208
+ fill = 1 # MASKARGSEXPANSION Todo : Rename and convert to same formatting as used in img2img masked content
209
+ full_res_mask = True
210
+ full_res_mask_padding = 4
211
+ reroll_blank_frames = 'reroll' # reroll, interrupt, or ignore
212
+ reroll_patience = 10
213
+ return locals()
214
+
215
+ def LoopArgs():
216
+ use_looper = False
217
+ init_images = get_guided_imgs_default_json()
218
+ image_strength_schedule = "0:(0.75)"
219
+ blendFactorMax = "0:(0.35)"
220
+ blendFactorSlope = "0:(0.25)"
221
+ tweening_frames_schedule = "0:(20)"
222
+ color_correction_factor = "0:(0.075)"
223
+ return locals()
224
+
225
+ def ParseqArgs():
226
+ parseq_manifest = None
227
+ parseq_use_deltas = True
228
+ return locals()
229
+
230
+ def DeforumOutputArgs():
231
+ skip_video_creation = False
232
+ fps = 15
233
+ make_gif = False
234
+ delete_imgs = False # True will delete all imgs after a successful mp4 creation
235
+ image_path = "C:/SD/20230124234916_%09d.png"
236
+ add_soundtrack = 'None' # ["File","Init Video"]
237
+ soundtrack_path = "https://deforum.github.io/a1/A1.mp3"
238
+ # End-Run upscaling
239
+ r_upscale_video = False
240
+ r_upscale_factor = 'x2' # ['2x', 'x3', 'x4']
241
+ r_upscale_model = 'realesr-animevideov3' # 'realesr-animevideov3' (default of realesrgan engine, does 2-4x), the rest do only 4x: 'realesrgan-x4plus', 'realesrgan-x4plus-anime'
242
+ r_upscale_keep_imgs = True
243
+ store_frames_in_ram = False
244
+ # **Interpolate Video Settings**
245
+ frame_interpolation_engine = "None" # ["None", "RIFE v4.6", "FILM"]
246
+ frame_interpolation_x_amount = 2 # [2 to 1000 depends on the engine]
247
+ frame_interpolation_slow_mo_enabled = False
248
+ frame_interpolation_slow_mo_amount = 2 # [2 to 10]
249
+ frame_interpolation_keep_imgs = False
250
+ return locals()
251
+
252
+ def get_component_names():
253
+ return ['override_settings_with_file', 'custom_settings_file', *DeforumAnimArgs().keys(), 'animation_prompts', 'animation_prompts_positive', 'animation_prompts_negative',
254
+ *DeforumArgs().keys(), *DeforumOutputArgs().keys(), *ParseqArgs().keys(), *LoopArgs().keys(), *controlnet_component_names()]
255
+
256
+ def get_settings_component_names():
257
+ return [name for name in get_component_names()]
258
+
259
+ def pack_args(args_dict):
260
+ args_dict = {name: args_dict[name] for name in DeforumArgs()}
261
+ args_dict.update({name: CoreArgs()[name] for name in CoreArgs()})
262
+ return args_dict
263
+
264
+ def pack_anim_args(args_dict):
265
+ return {name: args_dict[name] for name in DeforumAnimArgs()}
266
+
267
+ def pack_video_args(args_dict):
268
+ return {name: args_dict[name] for name in DeforumOutputArgs()}
269
+
270
+ def pack_parseq_args(args_dict):
271
+ return {name: args_dict[name] for name in ParseqArgs()}
272
+
273
+ def pack_loop_args(args_dict):
274
+ return {name: args_dict[name] for name in LoopArgs()}
275
+
276
+ def pack_controlnet_args(args_dict):
277
+ return {name: args_dict[name] for name in controlnet_component_names()}
278
+
279
+ def process_args(args_dict_main, run_id):
280
+ from .settings import load_args
281
+ override_settings_with_file = args_dict_main['override_settings_with_file']
282
+ custom_settings_file = args_dict_main['custom_settings_file']
283
+ args_dict = pack_args(args_dict_main)
284
+ anim_args_dict = pack_anim_args(args_dict_main)
285
+ video_args_dict = pack_video_args(args_dict_main)
286
+ parseq_args_dict = pack_parseq_args(args_dict_main)
287
+ loop_args_dict = pack_loop_args(args_dict_main)
288
+ controlnet_args_dict = pack_controlnet_args(args_dict_main)
289
+
290
+ root = SimpleNamespace(**RootArgs())
291
+ p = args_dict_main['p']
292
+ root.animation_prompts = json.loads(args_dict_main['animation_prompts'])
293
+
294
+ args_loaded_ok = True # can use this later to error cleanly upon wrong gen param in ui
295
+ if override_settings_with_file:
296
+ args_loaded_ok = load_args(args_dict_main, args_dict, anim_args_dict, parseq_args_dict, loop_args_dict, controlnet_args_dict, video_args_dict, custom_settings_file, root, run_id)
297
+
298
+ positive_prompts = args_dict_main['animation_prompts_positive']
299
+ negative_prompts = args_dict_main['animation_prompts_negative']
300
+ negative_prompts = negative_prompts.replace('--neg', '') # remove --neg from negative_prompts if received by mistake
301
+ for key in root.animation_prompts:
302
+ animationPromptCurr = root.animation_prompts[key]
303
+ root.animation_prompts[key] = f"{positive_prompts} {animationPromptCurr} {'' if '--neg' in animationPromptCurr else '--neg'} {negative_prompts}"
304
+
305
+ os.makedirs(root.models_path, exist_ok=True)
306
+
307
+ args = SimpleNamespace(**args_dict)
308
+ anim_args = SimpleNamespace(**anim_args_dict)
309
+ video_args = SimpleNamespace(**video_args_dict)
310
+ parseq_args = SimpleNamespace(**parseq_args_dict)
311
+ loop_args = SimpleNamespace(**loop_args_dict)
312
+ controlnet_args = SimpleNamespace(**controlnet_args_dict)
313
+
314
+ if args.seed == -1:
315
+ root.raw_seed = -1
316
+ args.seed = get_fixed_seed(args.seed)
317
+ if root.raw_seed != -1:
318
+ root.raw_seed = args.seed
319
+ args.timestring = time.strftime('%Y%m%d%H%M%S')
320
+ args.strength = max(0.0, min(1.0, args.strength))
321
+ args.prompts = json.loads(args_dict_main['animation_prompts'])
322
+ args.positive_prompts = args_dict_main['animation_prompts_positive']
323
+ args.negative_prompts = args_dict_main['animation_prompts_negative']
324
+
325
+ if not args.use_init and not anim_args.hybrid_use_init_image:
326
+ args.init_image = None
327
+
328
+ if anim_args.animation_mode == 'None':
329
+ anim_args.max_frames = 1
330
+ elif anim_args.animation_mode == 'Video Input':
331
+ args.use_init = True
332
+
333
+ current_arg_list = [args, anim_args, video_args, parseq_args]
334
+ full_base_folder_path = os.path.join(os.getcwd(), p.outpath_samples)
335
+ root.raw_batch_name = args.batch_name
336
+ args.batch_name = substitute_placeholders(args.batch_name, current_arg_list, full_base_folder_path)
337
+ args.outdir = os.path.join(p.outpath_samples, str(args.batch_name))
338
+ args.outdir = os.path.join(os.getcwd(), args.outdir)
339
+ os.makedirs(args.outdir, exist_ok=True)
340
+
341
+ return args_loaded_ok, root, args, anim_args, video_args, parseq_args, loop_args, controlnet_args
scripts/deforum_helpers/auto_navigation.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import numpy as np
19
+ import torch
20
+
21
+ # reallybigname - auto-navigation functions in progress...
22
+ # usage:
23
+ # if auto_rotation:
24
+ # rot_mat = rotate_camera_towards_depth(depth_tensor, auto_rotation_steps, w, h, fov_deg, auto_rotation_depth_target)
25
+ def rotate_camera_towards_depth(depth_tensor, turn_weight, width, height, h_fov=60, target_depth=1):
26
+ # Compute the depth at the target depth
27
+ target_depth_index = int(target_depth * depth_tensor.shape[0])
28
+ target_depth_values = depth_tensor[target_depth_index]
29
+ max_depth_index = torch.argmax(target_depth_values).item()
30
+ max_depth_index = (max_depth_index, target_depth_index)
31
+ max_depth = target_depth_values[max_depth_index[0]].item()
32
+
33
+ # Compute the normalized x and y coordinates
34
+ x, y = max_depth_index
35
+ x_normalized = (x / (width - 1)) * 2 - 1
36
+ y_normalized = (y / (height - 1)) * 2 - 1
37
+
38
+ # Calculate horizontal and vertical field of view (in radians)
39
+ h_fov_rad = np.radians(h_fov)
40
+ aspect_ratio = width / height
41
+ v_fov_rad = h_fov_rad / aspect_ratio
42
+
43
+ # Calculate the world coordinates (x, y) at the target depth
44
+ x_world = np.tan(h_fov_rad / 2) * max_depth * x_normalized
45
+ y_world = np.tan(v_fov_rad / 2) * max_depth * y_normalized
46
+
47
+ # Compute the target position using the world coordinates and max_depth
48
+ target_position = np.array([x_world, y_world, max_depth])
49
+
50
+ # Assuming the camera is initially at the origin, and looking in the negative Z direction
51
+ cam_position = np.array([0, 0, 0])
52
+ current_direction = np.array([0, 0, -1])
53
+
54
+ # Compute the direction vector and normalize it
55
+ direction = target_position - cam_position
56
+ direction = direction / np.linalg.norm(direction)
57
+
58
+ # Compute the rotation angle based on the turn_weight (number of frames)
59
+ axis = np.cross(current_direction, direction)
60
+ axis = axis / np.linalg.norm(axis)
61
+ angle = np.arcsin(np.linalg.norm(axis))
62
+ max_angle = np.pi * (0.1 / turn_weight) # Limit the maximum rotation angle to half of the visible screen
63
+ rotation_angle = np.clip(np.sign(np.cross(current_direction, direction)) * angle / turn_weight, -max_angle, max_angle)
64
+
65
+ # Compute the rotation matrix
66
+ rotation_matrix = np.eye(3) + np.sin(rotation_angle) * np.array([
67
+ [0, -axis[2], axis[1]],
68
+ [axis[2], 0, -axis[0]],
69
+ [-axis[1], axis[0], 0]
70
+ ]) + (1 - np.cos(rotation_angle)) * np.outer(axis, axis)
71
+
72
+ # Convert the NumPy array to a PyTorch tensor
73
+ rotation_matrix_tensor = torch.from_numpy(rotation_matrix).float()
74
+
75
+ # Add an extra dimension to match the expected shape (1, 3, 3)
76
+ rotation_matrix_tensor = rotation_matrix_tensor.unsqueeze(0)
77
+
78
+ return rotation_matrix_tensor
79
+
80
+ def rotation_matrix(axis, angle):
81
+ axis = np.asarray(axis)
82
+ axis = axis / np.linalg.norm(axis)
83
+ a = np.cos(angle / 2.0)
84
+ b, c, d = -axis * np.sin(angle / 2.0)
85
+ aa, bb, cc, dd = a * a, b * b, c * c, d * d
86
+ bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
87
+ return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
88
+ [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
89
+ [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
scripts/deforum_helpers/colors.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import cv2
19
+ import pkg_resources
20
+ from skimage.exposure import match_histograms
21
+
22
+ def maintain_colors(prev_img, color_match_sample, mode):
23
+ skimage_version = pkg_resources.get_distribution('scikit-image').version
24
+ is_skimage_v20_or_higher = pkg_resources.parse_version(skimage_version) >= pkg_resources.parse_version('0.20.0')
25
+
26
+ match_histograms_kwargs = {'channel_axis': -1} if is_skimage_v20_or_higher else {'multichannel': True}
27
+
28
+ if mode == 'RGB':
29
+ return match_histograms(prev_img, color_match_sample, **match_histograms_kwargs)
30
+ elif mode == 'HSV':
31
+ prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV)
32
+ color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV)
33
+ matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, **match_histograms_kwargs)
34
+ return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB)
35
+ else: # LAB
36
+ prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB)
37
+ color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB)
38
+ matched_lab = match_histograms(prev_img_lab, color_match_lab, **match_histograms_kwargs)
39
+ return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB)
scripts/deforum_helpers/composable_masks.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ # At the moment there are three types of masks: mask from variable, file mask and word mask
19
+ # Variable masks include video_mask (which can be set to auto-generated human masks) and everywhere
20
+ # They are put in {}-brackets
21
+ # Word masks are framed with <>-bracets, like: <cat>, <anime girl>
22
+ # File masks are put in []-brackes
23
+ # Empty strings are counted as the whole frame
24
+ # We want to put them all into a sequence of boolean operations
25
+
26
+ # Example:
27
+ # \ <armor>
28
+ # (({human_mask} & [mask1.png]) ^ <apple>)
29
+
30
+ # Writing the parser for the boolean sequence
31
+ # using regex and PIL operations
32
+ import re
33
+ from .load_images import get_mask_from_file, check_mask_for_errors, blank_if_none
34
+ from .word_masking import get_word_mask
35
+ from PIL import ImageChops
36
+ from modules.shared import opts
37
+
38
+ # val_masks: name, PIL Image mask
39
+ # Returns an image in mode '1' (needed for bool ops), convert to 'L' in the sender function
40
+ def compose_mask(root, args, mask_seq, val_masks, frame_image, inner_idx:int = 0):
41
+ # Compose_mask recursively: go to inner brackets, then b-op it and go upstack
42
+
43
+ # Step 1:
44
+ # recursive parenthesis pass
45
+ # regex is not powerful here
46
+
47
+ seq = ""
48
+ inner_seq = ""
49
+ parentheses_counter = 0
50
+
51
+ for c in mask_seq:
52
+ if c == ')':
53
+ parentheses_counter = parentheses_counter - 1
54
+ if parentheses_counter > 0:
55
+ inner_seq += c
56
+ if c == '(':
57
+ parentheses_counter = parentheses_counter + 1
58
+ if parentheses_counter == 0:
59
+ if len(inner_seq) > 0:
60
+ inner_idx += 1
61
+ seq += compose_mask(root, args, inner_seq, val_masks, frame_image, inner_idx)
62
+ inner_seq = ""
63
+ else:
64
+ seq += c
65
+
66
+ if parentheses_counter != 0:
67
+ raise Exception('Mismatched parentheses in {mask_seq}!')
68
+
69
+ mask_seq = seq
70
+
71
+ # Step 2:
72
+ # Load the word masks and file masks as vars
73
+
74
+ # File masks
75
+ pattern = r'\[(?P<inner>[\S\s]*?)\]'
76
+
77
+ def parse(match_object):
78
+ nonlocal inner_idx
79
+ inner_idx += 1
80
+ content = match_object.groupdict()['inner']
81
+ val_masks[str(inner_idx)] = get_mask_from_file(content, args).convert('1') # TODO: add caching
82
+ return f"{{{inner_idx}}}"
83
+
84
+ mask_seq = re.sub(pattern, parse, mask_seq)
85
+
86
+ # Word masks
87
+ pattern = r'<(?P<inner>[\S\s]*?)>'
88
+
89
+ def parse(match_object):
90
+ nonlocal inner_idx
91
+ inner_idx += 1
92
+ content = match_object.groupdict()['inner']
93
+ val_masks[str(inner_idx)] = get_word_mask(root, frame_image, content).convert('1')
94
+ return f"{{{inner_idx}}}"
95
+
96
+ mask_seq = re.sub(pattern, parse, mask_seq)
97
+
98
+ # Now that all inner parenthesis are eliminated we're left with a linear string
99
+
100
+ # Step 3:
101
+ # Boolean operations with masks
102
+ # Operators: invert !, and &, or |, xor ^, difference \
103
+
104
+ # Invert vars with '!'
105
+ pattern = r'![\S\s]*{(?P<inner>[\S\s]*?)}'
106
+ def parse(match_object):
107
+ nonlocal inner_idx
108
+ inner_idx += 1
109
+ content = match_object.groupdict()['inner']
110
+ savename = content
111
+ if content in root.mask_preset_names:
112
+ inner_idx += 1
113
+ savename = str(inner_idx)
114
+ val_masks[savename] = ImageChops.invert(val_masks[content])
115
+ return f"{{{savename}}}"
116
+
117
+ mask_seq = re.sub(pattern, parse, mask_seq)
118
+
119
+ # Multiply neighbouring vars with '&'
120
+ # Wait for replacements stall (like in Markov chains)
121
+ while True:
122
+ pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*&[\s]*{(?P<inner2>[\S\s]*?)}'
123
+ def parse(match_object):
124
+ nonlocal inner_idx
125
+ inner_idx += 1
126
+ content = match_object.groupdict()['inner1']
127
+ content_second = match_object.groupdict()['inner2']
128
+ savename = content
129
+ if content in root.mask_preset_names:
130
+ inner_idx += 1
131
+ savename = str(inner_idx)
132
+ val_masks[savename] = ImageChops.logical_and(val_masks[content], val_masks[content_second])
133
+ return f"{{{savename}}}"
134
+
135
+ prev_mask_seq = mask_seq
136
+ mask_seq = re.sub(pattern, parse, mask_seq)
137
+ if mask_seq is prev_mask_seq:
138
+ break
139
+
140
+ # Add neighbouring vars with '|'
141
+ while True:
142
+ pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*?\|[\s]*?{(?P<inner2>[\S\s]*?)}'
143
+ def parse(match_object):
144
+ nonlocal inner_idx
145
+ inner_idx += 1
146
+ content = match_object.groupdict()['inner1']
147
+ content_second = match_object.groupdict()['inner2']
148
+ savename = content
149
+ if content in root.mask_preset_names:
150
+ inner_idx += 1
151
+ savename = str(inner_idx)
152
+ val_masks[savename] = ImageChops.logical_or(val_masks[content], val_masks[content_second])
153
+ return f"{{{savename}}}"
154
+
155
+ prev_mask_seq = mask_seq
156
+ mask_seq = re.sub(pattern, parse, mask_seq)
157
+ if mask_seq is prev_mask_seq:
158
+ break
159
+
160
+ # Mutually exclude neighbouring vars with '^'
161
+ while True:
162
+ pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*\^[\s]*{(?P<inner2>[\S\s]*?)}'
163
+ def parse(match_object):
164
+ nonlocal inner_idx
165
+ inner_idx += 1
166
+ content = match_object.groupdict()['inner1']
167
+ content_second = match_object.groupdict()['inner2']
168
+ savename = content
169
+ if content in root.mask_preset_names:
170
+ inner_idx += 1
171
+ savename = str(inner_idx)
172
+ val_masks[savename] = ImageChops.logical_xor(val_masks[content], val_masks[content_second])
173
+ return f"{{{savename}}}"
174
+
175
+ prev_mask_seq = mask_seq
176
+ mask_seq = re.sub(pattern, parse, mask_seq)
177
+ if mask_seq is prev_mask_seq:
178
+ break
179
+
180
+ # Set-difference the regions with '\'
181
+ while True:
182
+ pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*\\[\s]*{(?P<inner2>[\S\s]*?)}'
183
+ def parse(match_object):
184
+ content = match_object.groupdict()['inner1']
185
+ content_second = match_object.groupdict()['inner2']
186
+ savename = content
187
+ if content in root.mask_preset_names:
188
+ nonlocal inner_idx
189
+ inner_idx += 1
190
+ savename = str(inner_idx)
191
+ val_masks[savename] = ImageChops.logical_and(val_masks[content], ImageChops.invert(val_masks[content_second]))
192
+ return f"{{{savename}}}"
193
+
194
+ prev_mask_seq = mask_seq
195
+ mask_seq = re.sub(pattern, parse, mask_seq)
196
+ if mask_seq is prev_mask_seq:
197
+ break
198
+
199
+ # Step 4:
200
+ # Output
201
+ # Now we should have a single var left to return. If not, raise an error message
202
+ pattern = r'{(?P<inner>[\S\s]*?)}'
203
+ matches = re.findall(pattern, mask_seq)
204
+
205
+ if len(matches) != 1:
206
+ raise Exception(f'Wrong composable mask expression format! Broken mask sequence: {mask_seq}')
207
+
208
+ return f"{{{matches[0]}}}"
209
+
210
+ def compose_mask_with_check(root, args, mask_seq, val_masks, frame_image):
211
+ for k, v in val_masks.items():
212
+ val_masks[k] = blank_if_none(v, args.W, args.H, '1').convert('1')
213
+ return check_mask_for_errors(val_masks[compose_mask(root, args, mask_seq, val_masks, frame_image, 0)[1:-1]].convert('L'))
scripts/deforum_helpers/consistency_check.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+
19
+ # The code below is taken from https://github.com/Sxela/flow_tools/blob/main under GPL-3.0 license
20
+ # and modified to fit Deforum's purpose
21
+
22
+
23
+ # import argparse
24
+ # import PIL.Image
25
+ import numpy as np
26
+ # import scipy.ndimage
27
+ # import glob
28
+ # from tqdm import tqdm
29
+
30
+ def make_consistency(flow1, flow2, edges_unreliable=False):
31
+ # Awesome pythonic consistency check from [maua](https://github.com/maua-maua-maua/maua/blob/44485c745c65cf9d83cb1b1c792a177588e9c9fc/maua/flow/consistency.py) by Hans Brouwer and Henry Rachootin
32
+ # algorithm based on https://github.com/manuelruder/artistic-videos/blob/master/consistencyChecker/consistencyChecker.cpp
33
+ # reimplemented in numpy by Hans Brouwer
34
+ # // consistencyChecker
35
+ # // Check consistency of forward flow via backward flow.
36
+ # // (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016
37
+
38
+ flow1 = np.flip(flow1, axis=2)
39
+ flow2 = np.flip(flow2, axis=2)
40
+ h, w, _ = flow1.shape
41
+
42
+ # get grid of coordinates for each pixel
43
+ orig_coord = np.flip(np.mgrid[:w, :h], 0).T
44
+
45
+ # find where the flow1 maps each pixel
46
+ warp_coord = orig_coord + flow1
47
+
48
+ # clip the coordinates in bounds and round down
49
+ warp_coord_inbound = np.zeros_like(warp_coord)
50
+ warp_coord_inbound[..., 0] = np.clip(warp_coord[..., 0], 0, h - 2)
51
+ warp_coord_inbound[..., 1] = np.clip(warp_coord[..., 1], 0, w - 2)
52
+ warp_coord_floor = np.floor(warp_coord_inbound).astype(int)
53
+
54
+ # for each pixel: bilinear interpolation of the corresponding flow2 values around the point mapped to by flow1
55
+ alpha = warp_coord_inbound - warp_coord_floor
56
+ flow2_00 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1]]
57
+ flow2_01 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1] + 1]
58
+ flow2_10 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1]]
59
+ flow2_11 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1] + 1]
60
+ flow2_0_blend = (1 - alpha[..., 1, None]) * flow2_00 + alpha[..., 1, None] * flow2_01
61
+ flow2_1_blend = (1 - alpha[..., 1, None]) * flow2_10 + alpha[..., 1, None] * flow2_11
62
+ warp_coord_flow2 = (1 - alpha[..., 0, None]) * flow2_0_blend + alpha[..., 0, None] * flow2_1_blend
63
+
64
+ # coordinates that flow2 remaps each flow1-mapped pixel to
65
+ rewarp_coord = warp_coord + warp_coord_flow2
66
+
67
+ # where the difference in position after flow1 and flow2 are applied is larger than a threshold there is likely an
68
+ # occlusion. set values to -1 so the final gaussian blur will spread the value a couple pixels around this area
69
+ squared_diff = np.sum((rewarp_coord - orig_coord) ** 2, axis=2)
70
+ threshold = 0.01 * np.sum(warp_coord_flow2 ** 2 + flow1 ** 2, axis=2) + 0.5
71
+
72
+ reliable_flow = np.ones((squared_diff.shape[0], squared_diff.shape[1], 3))
73
+ reliable_flow[...,0] = np.where(squared_diff >= threshold, -0.75, 1)
74
+
75
+ # areas mapping outside of the frame are also occluded (don't need extra region around these though, so set 0)
76
+ if edges_unreliable:
77
+ reliable_flow[...,1] = np.where(
78
+ np.logical_or.reduce(
79
+ (
80
+ warp_coord[..., 0] < 0,
81
+ warp_coord[..., 1] < 0,
82
+ warp_coord[..., 0] >= h - 1,
83
+ warp_coord[..., 1] >= w - 1,
84
+ )
85
+ ),
86
+ 0,
87
+ reliable_flow[...,1],
88
+ )
89
+
90
+ # get derivative of flow, large changes in derivative => edge of moving object
91
+ dx = np.diff(flow1, axis=1, append=0)
92
+ dy = np.diff(flow1, axis=0, append=0)
93
+ motion_edge = np.sum(dx ** 2 + dy ** 2, axis=2)
94
+ motion_threshold = 0.01 * np.sum(flow1 ** 2, axis=2) + 0.002
95
+ reliable_flow[...,2] = np.where(np.logical_and(motion_edge > motion_threshold, reliable_flow[...,2] != -0.75), 0, reliable_flow[...,2])
96
+
97
+ return reliable_flow
98
+
99
+
100
+ # parser = argparse.ArgumentParser()
101
+ # parser.add_argument("--flow_fwd", type=str, required=True, help="Forward flow path or glob pattern")
102
+ # parser.add_argument("--flow_bwd", type=str, required=True, help="Backward flow path or glob pattern")
103
+ # parser.add_argument("--output", type=str, required=True, help="Output consistency map path")
104
+ # parser.add_argument("--output_postfix", type=str, default='_cc', help="Output consistency map name postfix")
105
+ # parser.add_argument("--image_output", action='store_true', help="Output consistency map as b\w image path")
106
+ # parser.add_argument("--skip_numpy_output", action='store_true', help="Don`t save numpy array")
107
+ # parser.add_argument("--blur", type=float, default=2., help="Gaussian blur kernel size (0 for no blur)")
108
+ # parser.add_argument("--bottom_clamp", type=float, default=0., help="Clamp lower values")
109
+ # parser.add_argument("--edges_reliable", action='store_true', help="Consider edges reliable")
110
+ # parser.add_argument("--save_separate_channels", action='store_true', help="Save consistency mask layers as separate channels")
111
+ # args = parser.parse_args()
112
+
113
+ # def run(args):
114
+ # flow_fwd_many = sorted(glob.glob(args.flow_fwd))
115
+ # flow_bwd_many = sorted(glob.glob(args.flow_bwd))
116
+ # if len(flow_fwd_many)!= len(flow_bwd_many):
117
+ # raise Exception('Forward and backward flow file numbers don`t match')
118
+ # return
119
+
120
+ # for flow_fwd,flow_bwd in tqdm(zip(flow_fwd_many, flow_bwd_many)):
121
+ # flow_fwd = flow_fwd.replace('\\','/')
122
+ # flow_bwd = flow_bwd.replace('\\','/')
123
+ # flow1 = np.load(flow_fwd)
124
+ # flow2 = np.load(flow_bwd)
125
+ # consistency_map_multilayer = make_consistency(flow1, flow2, edges_unreliable=not args.edges_reliable)
126
+
127
+ # if args.save_separate_channels:
128
+ # consistency_map = consistency_map_multilayer
129
+ # else:
130
+ # consistency_map = np.ones_like(consistency_map_multilayer[...,0])
131
+ # consistency_map*=consistency_map_multilayer[...,0]
132
+ # consistency_map*=consistency_map_multilayer[...,1]
133
+ # consistency_map*=consistency_map_multilayer[...,2]
134
+
135
+ # # blur
136
+ # if args.blur>0.:
137
+ # consistency_map = scipy.ndimage.gaussian_filter(consistency_map, [args.blur, args.blur])
138
+
139
+ # #clip values between bottom_clamp and 1
140
+ # bottom_clamp = min(max(args.bottom_clamp,0.), 0.999)
141
+ # consistency_map = consistency_map.clip(bottom_clamp, 1)
142
+ # out_fname = args.output+'/'+flow_fwd.split('/')[-1][:-4]+args.output_postfix
143
+
144
+ # if not args.skip_numpy_output:
145
+ # np.save(out_fname, consistency_map)
146
+
147
+ # #save as jpeg
148
+ # if args.image_output:
149
+ # PIL.Image.fromarray((consistency_map*255.).astype('uint8')).save(out_fname+'.jpg', quality=90)
150
+
151
+ # run(args)
scripts/deforum_helpers/defaults.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ def get_samplers_list():
19
+ return {
20
+ 'euler a': 'Euler a',
21
+ 'euler': 'Euler',
22
+ 'lms': 'LMS',
23
+ 'heun': 'Heun',
24
+ 'dpm2': 'DPM2',
25
+ 'dpm2 a': 'DPM2 a',
26
+ 'dpm++ 2s a': 'DPM++ 2S a',
27
+ 'dpm++ 2m': 'DPM++ 2M',
28
+ 'dpm++ sde': 'DPM++ SDE',
29
+ 'dpm fast': 'DPM fast',
30
+ 'dpm adaptive': 'DPM adaptive',
31
+ 'lms karras': 'LMS Karras',
32
+ 'dpm2 karras': 'DPM2 Karras',
33
+ 'dpm2 a karras': 'DPM2 a Karras',
34
+ 'dpm++ 2s a karras': 'DPM++ 2S a Karras',
35
+ 'dpm++ 2m karras': 'DPM++ 2M Karras',
36
+ 'dpm++ sde karras': 'DPM++ SDE Karras'
37
+ }
38
+
39
+ def DeforumAnimPrompts():
40
+ return r"""{
41
+ "0": "tiny cute swamp bunny, highly detailed, intricate, ultra hd, sharp photo, crepuscular rays, in focus, by tomasz alen kopera",
42
+ "30": "anthropomorphic clean cat, surrounded by fractals, epic angle and pose, symmetrical, 3d, depth of field, ruan jia and fenghua zhong",
43
+ "60": "a beautiful coconut --neg photo, realistic",
44
+ "90": "a beautiful durian, trending on Artstation"
45
+ }
46
+ """
47
+
48
+ # Guided images defaults
49
+ def get_guided_imgs_default_json():
50
+ return '''{
51
+ "0": "https://deforum.github.io/a1/Gi1.png",
52
+ "max_f/4-5": "https://deforum.github.io/a1/Gi2.png",
53
+ "max_f/2-10": "https://deforum.github.io/a1/Gi3.png",
54
+ "3*max_f/4-15": "https://deforum.github.io/a1/Gi4.jpg",
55
+ "max_f-20": "https://deforum.github.io/a1/Gi1.png"
56
+ }'''
57
+
58
+ def get_hybrid_info_html():
59
+ return """
60
+ <p style="padding-bottom:0">
61
+ <b style="text-shadow: blue -1px -1px;">Hybrid Video Compositing in 2D/3D Mode</b>
62
+ <span style="color:#DDD;font-size:0.7rem;text-shadow: black -1px -1px;margin-left:10px;">
63
+ by <a href="https://github.com/reallybigname">reallybigname</a>
64
+ </span>
65
+ </p>
66
+ <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em;">
67
+ <li>Composite video with previous frame init image in <b>2D or 3D animation_mode</b> <i>(not for Video Input mode)</i></li>
68
+ <li>Uses your <b>Init</b> settings for <b>video_init_path, extract_nth_frame, overwrite_extracted_frames</b></li>
69
+ <li>In Keyframes tab, you can also set <b>color_coherence</b> = '<b>Video Input</b>'</li>
70
+ <li><b>color_coherence_video_every_N_frames</b> lets you only match every N frames</li>
71
+ <li>Color coherence may be used with hybrid composite off, to just use video color.</li>
72
+ <li>Hybrid motion may be used with hybrid composite off, to just use video motion.</li>
73
+ </ul>
74
+ Hybrid Video Schedules
75
+ <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em;">
76
+ <li>The alpha schedule controls overall alpha for video mix, whether using a composite mask or not.</li>
77
+ <li>The <b>hybrid_comp_mask_blend_alpha_schedule</b> only affects the 'Blend' <b>hybrid_comp_mask_type</b>.</li>
78
+ <li>Mask contrast schedule is from 0-255. Normal is 1. Affects all masks.</li>
79
+ <li>Autocontrast low/high cutoff schedules 0-100. Low 0 High 100 is full range. <br>(<i><b>hybrid_comp_mask_auto_contrast</b> must be enabled</i>)</li>
80
+ </ul>
81
+ <a style='color:SteelBlue;' target='_blank' href='https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/Animation-Settings#hybrid-video-mode-for-2d3d-animations'>Click Here</a> for more info/ a Guide.
82
+ """
83
+
84
+ def get_composable_masks_info_html():
85
+ return """
86
+ <ul style="list-style-type:circle; margin-left:0.75em; margin-bottom:0.2em">
87
+ <li>To enable, check use_mask in the Init tab</li>
88
+ <li>Supports boolean operations: (! - negation, & - and, | - or, ^ - xor, \ - difference, () - nested operations)</li>
89
+ <li>default variables: in \{\}, like \{init_mask\}, \{video_mask\}, \{everywhere\}</li>
90
+ <li>masks from files: in [], like [mask1.png]</li>
91
+ <li>description-based: <i>word masks</i> in &lt;&gt;, like &lt;apple&gt;, &lt;hair&gt</li>
92
+ </ul>
93
+ """
94
+
95
+ def get_parseq_info_html():
96
+ return """
97
+ <p>Use a <a style='color:SteelBlue;' target='_blank' href='https://sd-parseq.web.app/deforum'>Parseq</a> manifest for your animation (leave blank to ignore).</p>
98
+ <p style="margin-top:1em; margin-bottom:1em;">
99
+ Fields managed in your Parseq manifest override the values and schedules set in other parts of this UI. You can select which values to override by using the "Managed Fields" section in Parseq.
100
+ </p>
101
+ """
102
+
103
+ def get_prompts_info_html():
104
+ return """
105
+ <ul style="list-style-type:circle; margin-left:0.75em; margin-bottom:0.2em">
106
+ <li>Please always keep values in math functions above 0.</li>
107
+ <li>There is *no* Batch mode like in vanilla deforum. Please Use the txt2img tab for that.</li>
108
+ <li>For negative prompts, please write your positive prompt, then --neg ugly, text, assymetric, or any other negative tokens of your choice. OR:</li>
109
+ <li>Use the negative_prompts field to automatically append all words as a negative prompt. *Don't* add --neg in the negative_prompts field!</li>
110
+ <li>Prompts are stored in JSON format. If you've got an error, check it in a <a style="color:SteelBlue" href="https://odu.github.io/slingjsonlint/">JSON Validator</a></li>
111
+ </ul>
112
+ """
113
+
114
+ def get_guided_imgs_info_html():
115
+ return """
116
+ <p>You can use this as a guided image tool or as a looper depending on your settings in the keyframe images field.
117
+ Set the keyframes and the images that you want to show up.
118
+ Note: the number of frames between each keyframe should be greater than the tweening frames.</p>
119
+
120
+ <p>Prerequisites and Important Info:</p>
121
+ <ul style="list-style-type:circle; margin-left:2em; margin-bottom:0em">
122
+ <li>This mode works ONLY with 2D/3D animation modes. Interpolation and Video Input modes aren't supported.</li>
123
+ <li>Init tab's strength slider should be greater than 0. Recommended value (.65 - .80).</li>
124
+ <li>'seed_behavior' will be forcibly set to 'schedule'.</li>
125
+ </ul>
126
+
127
+ <p>Looping recommendations:</p>
128
+ <ul style="list-style-type:circle; margin-left:2em; margin-bottom:0em">
129
+ <li>seed_schedule should start and end on the same seed.<br />
130
+ Example: seed_schedule could use 0:(5), 1:(-1), 219:(-1), 220:(5)</li>
131
+ <li>The 1st and last keyframe images should match.</li>
132
+ <li>Set your total number of keyframes to be 21 more than the last inserted keyframe image.<br />
133
+ Example: Default args should use 221 as the total keyframes.</li>
134
+ <li>Prompts are stored in JSON format. If you've got an error, check it in the validator,
135
+ <a style="color:SteelBlue" href="https://odu.github.io/slingjsonlint/">like here</a></li>
136
+ </ul>
137
+
138
+ <p>The Guided images mode exposes the following variables for the prompts and the schedules:</p>
139
+ <ul style="list-style-type:circle; margin-left:2em; margin-bottom:0em">
140
+ <li><b>s</b> is the <i>initial</i> seed for the whole video generation.</li>
141
+ <li><b>max_f</b> is the length of the video, in frames.<br />
142
+ Example: seed_schedule could use 0:(s), 1:(-1), "max_f-2":(-1), "max_f-1":(s)</li>
143
+ <li><b>t</b> is the current frame number.<br />
144
+ Example: strength_schedule could use 0:(0.25 * cos((72 / 60 * 3.141 * (t + 0) / 30))**13 + 0.7) to make alternating changes each 30 frames</li>
145
+ </ul>
146
+ """
147
+
148
+ def get_main_info_html():
149
+ return """
150
+ <p>StableDiffusion WebUI-based re-implementation of <strong><a href="https://deforum.github.io">deforum.github.io</a> maintained by <a href="https://github.com/kabachuha">kabachuha</a></strong> & <a href="https://github.com/hithereai">hithereai</a></strong></p>
151
+ <p><a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/FAQ-&-Troubleshooting">FOR HELP CLICK HERE</a></p>
152
+ <ul style="list-style-type:circle; margin-left:1em">
153
+ <li>The code for this extension: <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui">here</a>.</li>
154
+ <li>Join the <a style="color:SteelBlue" href="https://discord.gg/deforum">official Deforum Discord</a> to share your creations and suggestions.</li>
155
+ <li>Official Deforum Wiki: <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki">here</a>.</li>
156
+ <li>Anime-inclined great guide (by FizzleDorf) with lots of examples: <a style="color:SteelBlue" href="https://rentry.org/AnimAnon-Deforum">here</a>.</li>
157
+ <li>For advanced keyframing with Math functions, see <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/Maths-in-Deforum">here</a>.</li>
158
+ <li>Alternatively, use <a style="color:SteelBlue" href="https://sd-parseq.web.app/deforum">sd-parseq</a> as a UI to define your animation schedules (see the Parseq section in the Init tab).</li>
159
+ <li><a style="color:SteelBlue" href="https://www.framesync.xyz/">framesync.xyz</a> is also a good option, it makes compact math formulae for Deforum keyframes by selecting various waveforms.</li>
160
+ <li>The other site allows for making keyframes using <a style="color:SteelBlue" href="https://www.chigozie.co.uk/keyframe-string-generator/">interactive splines and Bezier curves</a> (select Disco output format).</li>
161
+ <li>If you want to use Width/Height which are not multiples of 64, please change noise_type to 'Uniform', in Keyframes --> Noise.</li>
162
+ </ul>
163
+ <italic>If you liked this extension, please <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui">give it a star on GitHub</a>!</italic> 😊
164
+ <ul style="list-style-type:circle; margin-left:1em">
165
+ <li>This program comes with <strong>ABSOLUTELY NO WARRANTY</strong>. This is free software, and you are welcome to redistribute it under certain conditions. See more information in the <a style="color:SteelBlue" href="https://github.com/deforum-art/sd-webui-deforum/blob/automatic1111-webui/LICENSE">LICENSE</a></li>
166
+ </ul>
167
+ """
168
+ def get_frame_interpolation_info_html():
169
+ return """
170
+ Use <a href="https://github.com/megvii-research/ECCV2022-RIFE">RIFE</a> / <a href="https://film-net.github.io/">FILM</a> Frame Interpolation to smooth out, slow-mo (or both) any video.</p>
171
+ <p style="margin-top:1em">
172
+ Supported engines:
173
+ <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em">
174
+ <li>RIFE v4.6 and FILM.</li>
175
+ </ul>
176
+ </p>
177
+ <p style="margin-top:1em">
178
+ Important notes:
179
+ <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em">
180
+ <li>Frame Interpolation will *not* run if any of the following are enabled: 'Store frames in ram' / 'Skip video for run all'.</li>
181
+ <li>Audio (if provided) will *not* be transferred to the interpolated video if Slow-Mo is enabled.</li>
182
+ <li>'add_soundtrack' and 'soundtrack_path' aren't being honoured in "Interpolate an existing video" mode. Original vid audio will be used instead with the same slow-mo rules above.</li>
183
+ <li>In "Interpolate existing pics" mode, FPS is determined *only* by output FPS slider. Audio will be added if requested even with slow-mo "enabled", as it does *nothing* in this mode.</li>
184
+ </ul>
185
+ </p>
186
+ """
187
+ def get_frames_to_video_info_html():
188
+ return """
189
+ <p style="margin-top:0em">
190
+ Important Notes:
191
+ <ul style="list-style-type:circle; margin-left:1em; margin-bottom:0.25em">
192
+ <li>Enter relative to webui folder or Full-Absolute path, and make sure it ends with something like this: '20230124234916_%09d.png', just replace 20230124234916 with your batch ID. The %09d is important, don't forget it!</li>
193
+ <li>In the filename, '%09d' represents the 9 counting numbers, For '20230124234916_000000001.png', use '20230124234916_%09d.png'</li>
194
+ <li>If non-deforum frames, use the correct number of counting digits. For files like 'bunnies-0000.jpg', you'd use 'bunnies-%04d.jpg'</li>
195
+ </ul>
196
+ """
197
+ def get_gradio_html(section_name):
198
+ if section_name.lower() == 'hybrid_video':
199
+ return get_hybrid_info_html()
200
+ elif section_name.lower() == 'composable_masks':
201
+ return get_composable_masks_info_html()
202
+ elif section_name.lower() == 'parseq':
203
+ return get_parseq_info_html()
204
+ elif section_name.lower() == 'prompts':
205
+ return get_prompts_info_html()
206
+ elif section_name.lower() == 'guided_imgs':
207
+ return get_guided_imgs_info_html()
208
+ elif section_name.lower() == 'main':
209
+ return get_main_info_html()
210
+ elif section_name.lower() == 'frame_interpolation':
211
+ return get_frame_interpolation_info_html()
212
+ elif section_name.lower() == 'frames_to_video':
213
+ return get_frames_to_video_info_html()
214
+ else:
215
+ return None
216
+
217
+ mask_fill_choices=['fill', 'original', 'latent noise', 'latent nothing']
218
+
scripts/deforum_helpers/deforum_controlnet.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ # This helper script is responsible for ControlNet/Deforum integration
19
+ # https://github.com/Mikubill/sd-webui-controlnet — controlnet repo
20
+
21
+ import os
22
+ import gradio as gr
23
+ import scripts
24
+ from PIL import Image
25
+ import numpy as np
26
+ import importlib
27
+ from modules import scripts
28
+ from .deforum_controlnet_gradio import hide_ui_by_cn_status, hide_file_textboxes, ToolButton
29
+ from .general_utils import count_files_in_folder, clean_gradio_path_strings # TODO: do it another way
30
+ from .video_audio_utilities import vid2frames, convert_image
31
+ from .animation_key_frames import ControlNetKeys
32
+ from .load_images import load_image
33
+
34
+ cnet = None
35
+ # number of CN model tabs to show in the deforum gui
36
+ num_of_models = 5
37
+
38
+ def find_controlnet():
39
+ global cnet
40
+ if cnet: return cnet
41
+ try:
42
+ cnet = importlib.import_module('extensions.sd-webui-controlnet.scripts.external_code', 'external_code')
43
+ except:
44
+ try:
45
+ cnet = importlib.import_module('extensions-builtin.sd-webui-controlnet.scripts.external_code', 'external_code')
46
+ except:
47
+ pass
48
+ if cnet:
49
+ print(f"\033[0;32m*Deforum ControlNet support: enabled*\033[0m")
50
+ return True
51
+ return None
52
+
53
+ def controlnet_infotext():
54
+ return """Requires the <a style='color:SteelBlue;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet'>ControlNet</a> extension to be installed.</p>
55
+ <p">If Deforum crashes due to CN updates, go <a style='color:Orange;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet/issues'>here</a> and report your problem.</p>
56
+ """
57
+
58
+ def is_controlnet_enabled(controlnet_args):
59
+ for i in range(1, num_of_models + 1):
60
+ if getattr(controlnet_args, f'cn_{i}_enabled', False):
61
+ return True
62
+ return False
63
+
64
+ def setup_controlnet_ui_raw():
65
+ cnet = find_controlnet()
66
+ cn_models = cnet.get_models()
67
+ cn_preprocessors = cnet.get_modules()
68
+
69
+ cn_modules = cnet.get_modules_detail()
70
+ preprocessor_sliders_config = {}
71
+
72
+ for config_name, config_values in cn_modules.items():
73
+ sliders = config_values.get('sliders', [])
74
+ preprocessor_sliders_config[config_name] = sliders
75
+
76
+ model_free_preprocessors = ["reference_only", "reference_adain", "reference_adain+attn"]
77
+ flag_preprocessor_resolution = "Preprocessor Resolution"
78
+
79
+ def build_sliders(module, pp):
80
+ grs = []
81
+ if module not in preprocessor_sliders_config:
82
+ grs += [
83
+ gr.update(label=flag_preprocessor_resolution, value=512, minimum=64, maximum=2048, step=1, visible=not pp, interactive=not pp),
84
+ gr.update(visible=False, interactive=False),
85
+ gr.update(visible=False, interactive=False),
86
+ gr.update(visible=True)
87
+ ]
88
+ else:
89
+ for slider_config in preprocessor_sliders_config[module]:
90
+ if isinstance(slider_config, dict):
91
+ visible = True
92
+ if slider_config['name'] == flag_preprocessor_resolution:
93
+ visible = not pp
94
+ grs.append(gr.update(
95
+ label=slider_config['name'],
96
+ value=slider_config['value'],
97
+ minimum=slider_config['min'],
98
+ maximum=slider_config['max'],
99
+ step=slider_config['step'] if 'step' in slider_config else 1,
100
+ visible=visible,
101
+ interactive=visible))
102
+ else:
103
+ grs.append(gr.update(visible=False, interactive=False))
104
+ while len(grs) < 3:
105
+ grs.append(gr.update(visible=False, interactive=False))
106
+ grs.append(gr.update(visible=True))
107
+ if module in model_free_preprocessors:
108
+ grs += [gr.update(visible=False, value='None'), gr.update(visible=False)]
109
+ else:
110
+ grs += [gr.update(visible=True), gr.update(visible=True)]
111
+ return grs
112
+
113
+ refresh_symbol = '\U0001f504' # 🔄
114
+ switch_values_symbol = '\U000021C5' # ⇅
115
+ model_dropdowns = []
116
+ infotext_fields = []
117
+
118
+ def create_model_in_tab_ui(cn_id):
119
+ with gr.Row():
120
+ enabled = gr.Checkbox(label="Enable", value=False, interactive=True)
121
+ pixel_perfect = gr.Checkbox(label="Pixel Perfect", value=False, visible=False, interactive=True)
122
+ low_vram = gr.Checkbox(label="Low VRAM", value=False, visible=False, interactive=True)
123
+ overwrite_frames = gr.Checkbox(label='Overwrite input frames', value=True, visible=False, interactive=True)
124
+ with gr.Row(visible=False) as mod_row:
125
+ module = gr.Dropdown(cn_preprocessors, label=f"Preprocessor", value="none", interactive=True)
126
+ model = gr.Dropdown(cn_models, label=f"Model", value="None", interactive=True)
127
+ refresh_models = ToolButton(value=refresh_symbol)
128
+ refresh_models.click(refresh_all_models, model, model)
129
+ with gr.Row(visible=False) as weight_row:
130
+ weight = gr.Textbox(label="Weight schedule", lines=1, value='0:(1)', interactive=True)
131
+ with gr.Row(visible=False) as start_cs_row:
132
+ guidance_start = gr.Textbox(label="Starting Control Step schedule", lines=1, value='0:(0.0)', interactive=True)
133
+ with gr.Row(visible=False) as end_cs_row:
134
+ guidance_end = gr.Textbox(label="Ending Control Step schedule", lines=1, value='0:(1.0)', interactive=True)
135
+ model_dropdowns.append(model)
136
+ with gr.Column(visible=False) as advanced_column:
137
+ processor_res = gr.Slider(label="Annotator resolution", value=64, minimum=64, maximum=2048, interactive=False)
138
+ threshold_a = gr.Slider(label="Threshold A", value=64, minimum=64, maximum=1024, interactive=False)
139
+ threshold_b = gr.Slider(label="Threshold B", value=64, minimum=64, maximum=1024, interactive=False)
140
+ with gr.Row(visible=False) as vid_path_row:
141
+ vid_path = gr.Textbox(value='', label="ControlNet Input Video/ Image Path", interactive=True)
142
+ with gr.Row(visible=False) as mask_vid_path_row: # invisible temporarily since 26-04-23 until masks are fixed
143
+ mask_vid_path = gr.Textbox(value='', label="ControlNet Mask Video/ Image Path (*NOT WORKING, kept in UI for CN's devs testing!*)", interactive=True)
144
+ with gr.Row(visible=False) as control_mode_row:
145
+ control_mode = gr.Radio(choices=["Balanced", "My prompt is more important", "ControlNet is more important"], value="Balanced", label="Control Mode", interactive=True)
146
+ with gr.Row(visible=False) as env_row:
147
+ resize_mode = gr.Radio(choices=["Outer Fit (Shrink to Fit)", "Inner Fit (Scale to Fit)", "Just Resize"], value="Inner Fit (Scale to Fit)", label="Resize Mode", interactive=True)
148
+ with gr.Row(visible=False) as control_loopback_row:
149
+ loopback_mode = gr.Checkbox(label="LoopBack mode", value=False, interactive=True)
150
+ hide_output_list = [pixel_perfect, low_vram, mod_row, module, weight_row, start_cs_row, end_cs_row, env_row, overwrite_frames, vid_path_row, control_mode_row, mask_vid_path_row,
151
+ control_loopback_row] # add mask_vid_path_row when masks are working again
152
+ for cn_output in hide_output_list:
153
+ enabled.change(fn=hide_ui_by_cn_status, inputs=enabled, outputs=cn_output)
154
+ module.change(build_sliders, inputs=[module, pixel_perfect], outputs=[processor_res, threshold_a, threshold_b, advanced_column, model, refresh_models])
155
+ # hide vid/image input fields
156
+ loopback_outs = [vid_path_row, mask_vid_path_row]
157
+ for loopback_output in loopback_outs:
158
+ loopback_mode.change(fn=hide_file_textboxes, inputs=loopback_mode, outputs=loopback_output)
159
+ # handle pixel perfect ui changes
160
+ pixel_perfect.change(build_sliders, inputs=[module, pixel_perfect], outputs=[processor_res, threshold_a, threshold_b, advanced_column, model, refresh_models])
161
+ infotext_fields.extend([
162
+ (module, f"ControlNet Preprocessor"),
163
+ (model, f"ControlNet Model"),
164
+ (weight, f"ControlNet Weight"),
165
+ ])
166
+
167
+ return {key: value for key, value in locals().items() if key in [
168
+ "enabled", "pixel_perfect", "low_vram", "module", "model", "weight",
169
+ "guidance_start", "guidance_end", "processor_res", "threshold_a", "threshold_b", "resize_mode", "control_mode",
170
+ "overwrite_frames", "vid_path", "mask_vid_path", "loopback_mode"
171
+ ]}
172
+
173
+ def refresh_all_models(*inputs):
174
+ cn_models = cnet.get_models(update=True)
175
+ dd = inputs[0]
176
+ selected = dd if dd in cn_models else "None"
177
+ return gr.Dropdown.update(value=selected, choices=cn_models)
178
+
179
+ with gr.Tabs():
180
+ model_params = {}
181
+ for i in range(1, num_of_models + 1):
182
+ with gr.Tab(f"CN Model {i}"):
183
+ model_params[i] = create_model_in_tab_ui(i)
184
+
185
+ for key, value in model_params[i].items():
186
+ locals()[f"cn_{i}_{key}"] = value
187
+
188
+ return locals()
189
+
190
+ def setup_controlnet_ui():
191
+ if not find_controlnet():
192
+ gr.HTML("""<a style='target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet'>ControlNet not found. Please install it :)</a>""", elem_id='controlnet_not_found_html_msg')
193
+ return {}
194
+
195
+ try:
196
+ return setup_controlnet_ui_raw()
197
+ except Exception as e:
198
+ print(f"'ControlNet UI setup failed with error: '{e}'!")
199
+ gr.HTML(f"""
200
+ Failed to setup ControlNet UI, check the reason in your commandline log. Please, downgrade your CN extension to <a style='color:Orange;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet/archive/c9340671d6d59e5a79fc404f78f747f969f87374.zip'>c9340671d6d59e5a79fc404f78f747f969f87374</a> or report the problem <a style='color:Orange;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet/issues'>here</a>.
201
+ """, elem_id='controlnet_not_found_html_msg')
202
+ return {}
203
+
204
+ def controlnet_component_names():
205
+ if not find_controlnet():
206
+ return []
207
+
208
+ return [f'cn_{i}_{component}' for i in range(1, num_of_models + 1) for component in [
209
+ 'overwrite_frames', 'vid_path', 'mask_vid_path', 'enabled',
210
+ 'low_vram', 'pixel_perfect',
211
+ 'module', 'model', 'weight', 'guidance_start', 'guidance_end',
212
+ 'processor_res', 'threshold_a', 'threshold_b', 'resize_mode', 'control_mode', 'loopback_mode'
213
+ ]]
214
+
215
+ def process_with_controlnet(p, args, anim_args, loop_args, controlnet_args, root, is_img2img=True, frame_idx=0):
216
+ CnSchKeys = ControlNetKeys(anim_args, controlnet_args)
217
+
218
+ def read_cn_data(cn_idx):
219
+ cn_mask_np, cn_image_np = None, None
220
+ # Loopback mode ENABLED:
221
+ if getattr(controlnet_args, f'cn_{cn_idx}_loopback_mode'):
222
+ # On very first frame, check if use init enabled, and if init image is provided
223
+ if frame_idx == 0 and args.use_init and args.init_image is not None:
224
+ cn_image_np = load_image(args.init_image)
225
+ # convert to uint8 for compatibility with CN
226
+ cn_image_np = np.array(cn_image_np).astype('uint8')
227
+ # Not first frame, use previous img (init_sample)
228
+ elif frame_idx > 0 and args.init_sample:
229
+ cn_image_np = np.array(args.init_sample).astype('uint8')
230
+ else: # loopback mode is DISABLED
231
+ cn_inputframes = os.path.join(args.outdir, f'controlnet_{cn_idx}_inputframes') # set input frames folder path
232
+ if os.path.exists(cn_inputframes):
233
+ if count_files_in_folder(cn_inputframes) == 1:
234
+ cn_frame_path = os.path.join(cn_inputframes, "000000000.jpg")
235
+ print(f'Reading ControlNet *static* base frame at {cn_frame_path}')
236
+ else:
237
+ cn_frame_path = os.path.join(cn_inputframes, f"{frame_idx:09}.jpg")
238
+ print(f'Reading ControlNet {cn_idx} base frame #{frame_idx} at {cn_frame_path}')
239
+ if os.path.exists(cn_frame_path):
240
+ cn_image_np = np.array(Image.open(cn_frame_path).convert("RGB")).astype('uint8')
241
+ cn_maskframes = os.path.join(args.outdir, f'controlnet_{cn_idx}_maskframes') # set mask frames folder path
242
+ if os.path.exists(cn_maskframes):
243
+ if count_files_in_folder(cn_maskframes) == 1:
244
+ cn_mask_frame_path = os.path.join(cn_inputframes, "000000000.jpg")
245
+ print(f'Reading ControlNet *static* mask frame at {cn_mask_frame_path}')
246
+ else:
247
+ cn_mask_frame_path = os.path.join(args.outdir, f'controlnet_{cn_idx}_maskframes', f"{frame_idx:09}.jpg")
248
+ print(f'Reading ControlNet {cn_idx} mask frame #{frame_idx} at {cn_mask_frame_path}')
249
+ if os.path.exists(cn_mask_frame_path):
250
+ cn_mask_np = np.array(Image.open(cn_mask_frame_path).convert("RGB")).astype('uint8')
251
+
252
+ return cn_mask_np, cn_image_np
253
+
254
+ cnet = find_controlnet()
255
+ cn_data = [read_cn_data(i) for i in range(1, num_of_models + 1)]
256
+
257
+ # Check if any loopback_mode is set to True
258
+ any_loopback_mode = any(getattr(controlnet_args, f'cn_{i}_loopback_mode') for i in range(1, num_of_models + 1))
259
+
260
+ cn_inputframes_list = [os.path.join(args.outdir, f'controlnet_{i}_inputframes') for i in range(1, num_of_models + 1)]
261
+
262
+ if not any(os.path.exists(cn_inputframes) for cn_inputframes in cn_inputframes_list) and not any_loopback_mode:
263
+ print(f'\033[33mNeither the base nor the masking frames for ControlNet were found. Using the regular pipeline\033[0m')
264
+
265
+ p.scripts = scripts.scripts_img2img if is_img2img else scripts.scripts_txt2img
266
+
267
+ def create_cnu_dict(cn_args, prefix, img_np, mask_np, frame_idx, CnSchKeys):
268
+
269
+ keys = [
270
+ "enabled", "module", "model", "weight", "resize_mode", "control_mode", "low_vram", "pixel_perfect",
271
+ "processor_res", "threshold_a", "threshold_b", "guidance_start", "guidance_end"
272
+ ]
273
+ cnu = {k: getattr(cn_args, f"{prefix}_{k}") for k in keys}
274
+ model_num = int(prefix.split('_')[-1]) # Extract model number from prefix (e.g., "cn_1" -> 1)
275
+ if 1 <= model_num <= 5:
276
+ # if in loopmode and no init image (img_np, after processing in this case) provided, disable CN unit for the very first frame. Will be enabled in the next frame automatically
277
+ if getattr(cn_args, f"cn_{model_num}_loopback_mode") and frame_idx == 0 and img_np is None:
278
+ cnu['enabled'] = False
279
+ cnu['weight'] = getattr(CnSchKeys, f"cn_{model_num}_weight_schedule_series")[frame_idx]
280
+ cnu['guidance_start'] = getattr(CnSchKeys, f"cn_{model_num}_guidance_start_schedule_series")[frame_idx]
281
+ cnu['guidance_end'] = getattr(CnSchKeys, f"cn_{model_num}_guidance_end_schedule_series")[frame_idx]
282
+ cnu['image'] = {'image': img_np, 'mask': mask_np} if mask_np is not None else img_np
283
+
284
+ return cnu
285
+
286
+ masks_np, images_np = zip(*cn_data)
287
+
288
+ cn_units = [cnet.ControlNetUnit(**create_cnu_dict(controlnet_args, f"cn_{i + 1}", img_np, mask_np, frame_idx, CnSchKeys))
289
+ for i, (img_np, mask_np) in enumerate(zip(images_np, masks_np))]
290
+
291
+ p.script_args = {"enabled": True}
292
+ cnet.update_cn_script_in_processing(p, cn_units, is_img2img=is_img2img, is_ui=False)
293
+
294
+ def process_controlnet_input_frames(args, anim_args, controlnet_args, video_path, mask_path, outdir_suffix, id):
295
+ if (video_path or mask_path) and getattr(controlnet_args, f'cn_{id}_enabled'):
296
+ frame_path = os.path.join(args.outdir, f'controlnet_{id}_{outdir_suffix}')
297
+ os.makedirs(frame_path, exist_ok=True)
298
+
299
+ accepted_image_extensions = ('.jpg', '.jpeg', '.png', '.bmp')
300
+ if video_path and video_path.lower().endswith(accepted_image_extensions):
301
+ convert_image(video_path, os.path.join(frame_path, '000000000.jpg'))
302
+ print(f"Copied CN Model {id}'s single input image to inputframes folder!")
303
+ elif mask_path and mask_path.lower().endswith(accepted_image_extensions):
304
+ convert_image(mask_path, os.path.join(frame_path, '000000000.jpg'))
305
+ print(f"Copied CN Model {id}'s single input image to inputframes *mask* folder!")
306
+ else:
307
+ print(f'Unpacking ControlNet {id} {"video mask" if mask_path else "base video"}')
308
+ print(f"Exporting Video Frames to {frame_path}...")
309
+ vid2frames(
310
+ video_path=video_path or mask_path,
311
+ video_in_frame_path=frame_path,
312
+ n=1 if anim_args.animation_mode != 'Video Input' else anim_args.extract_nth_frame,
313
+ overwrite=getattr(controlnet_args, f'cn_{id}_overwrite_frames'),
314
+ extract_from_frame=0 if anim_args.animation_mode != 'Video Input' else anim_args.extract_from_frame,
315
+ extract_to_frame=(anim_args.max_frames - 1) if anim_args.animation_mode != 'Video Input' else anim_args.extract_to_frame,
316
+ numeric_files_output=True
317
+ )
318
+ print(f"Loading {anim_args.max_frames} input frames from {frame_path} and saving video frames to {args.outdir}")
319
+ print(f'ControlNet {id} {"video mask" if mask_path else "base video"} unpacked!')
320
+
321
+ def unpack_controlnet_vids(args, anim_args, controlnet_args):
322
+ # this func gets called from render.py once for an entire animation run -->
323
+ # tries to trigger an extraction of CN input frames (regular + masks) from video or image
324
+ for i in range(1, num_of_models + 1):
325
+ # LoopBack mode is enabled, no need to extract a video or copy an init image
326
+ if getattr(controlnet_args, f'cn_{i}_loopback_mode'):
327
+ print(f"ControlNet #{i} is in LoopBack mode, skipping video/ image extraction stage.")
328
+ continue
329
+ vid_path = clean_gradio_path_strings(getattr(controlnet_args, f'cn_{i}_vid_path', None))
330
+ mask_path = clean_gradio_path_strings(getattr(controlnet_args, f'cn_{i}_mask_vid_path', None))
331
+
332
+ if vid_path: # Process base video, if available
333
+ process_controlnet_input_frames(args, anim_args, controlnet_args, vid_path, None, 'inputframes', i)
334
+
335
+ if mask_path: # Process mask video, if available
336
+ process_controlnet_input_frames(args, anim_args, controlnet_args, None, mask_path, 'maskframes', i)
scripts/deforum_helpers/deforum_controlnet_gradio.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import gradio as gr
19
+ # print (cnet_1.get_modules())
20
+
21
+ # *** TODO: re-enable table printing! disabled only temp! 13-04-23 ***
22
+ # table = Table(title="ControlNet params",padding=0, box=box.ROUNDED)
23
+
24
+ # TODO: auto infer the names and the values for the table
25
+ # field_names = []
26
+ # field_names += ["module", "model", "weight", "inv", "guide_start", "guide_end", "guess", "resize", "rgb_bgr", "proc res", "thr a", "thr b"]
27
+ # for field_name in field_names:
28
+ # table.add_column(field_name, justify="center")
29
+
30
+ # cn_model_name = str(controlnet_args.cn_1_model)
31
+
32
+ # rows = []
33
+ # rows += [controlnet_args.cn_1_module, cn_model_name[len('control_'):] if 'control_' in cn_model_name else cn_model_name, controlnet_args.cn_1_weight, controlnet_args.cn_1_invert_image, controlnet_args.cn_1_guidance_start, controlnet_args.cn_1_guidance_end, controlnet_args.cn_1_guess_mode, controlnet_args.cn_1_resize_mode, controlnet_args.cn_1_rgbbgr_mode, controlnet_args.cn_1_processor_res, controlnet_args.cn_1_threshold_a, controlnet_args.cn_1_threshold_b]
34
+ # rows = [str(x) for x in rows]
35
+
36
+ # table.add_row(*rows)
37
+ # console.print(table)
38
+
39
+ def hide_ui_by_cn_status(choice):
40
+ return gr.update(visible=True) if choice else gr.update(visible=False)
41
+
42
+ def hide_file_textboxes(choice):
43
+ return gr.update(visible=False) if choice else gr.update(visible=True)
44
+
45
+ class ToolButton(gr.Button, gr.components.FormComponent):
46
+ """Small button with single emoji as text, fits inside gradio forms"""
47
+ def __init__(self, **kwargs):
48
+ super().__init__(variant="tool", **kwargs)
49
+
50
+ def get_block_name(self):
51
+ return "button"
52
+
53
+ model_free_preprocessors = ["reference_only", "reference_adain", "reference_adain+attn"]
54
+ flag_preprocessor_resolution = "Preprocessor Resolution"
55
+
56
+ def build_sliders(module, pp, preprocessor_sliders_config):
57
+ grs = []
58
+ if module not in preprocessor_sliders_config:
59
+ grs += [
60
+ gr.update(label=flag_preprocessor_resolution, value=512, minimum=64, maximum=2048, step=1, visible=not pp, interactive=not pp),
61
+ gr.update(visible=False, interactive=False),
62
+ gr.update(visible=False, interactive=False),
63
+ gr.update(visible=True)
64
+ ]
65
+ else:
66
+ for slider_config in preprocessor_sliders_config[module]:
67
+ if isinstance(slider_config, dict):
68
+ visible = True
69
+ if slider_config['name'] == flag_preprocessor_resolution:
70
+ visible = not pp
71
+ grs.append(gr.update(
72
+ label=slider_config['name'],
73
+ value=slider_config['value'],
74
+ minimum=slider_config['min'],
75
+ maximum=slider_config['max'],
76
+ step=slider_config['step'] if 'step' in slider_config else 1,
77
+ visible=visible,
78
+ interactive=visible))
79
+ else:
80
+ grs.append(gr.update(visible=False, interactive=False))
81
+ while len(grs) < 3:
82
+ grs.append(gr.update(visible=False, interactive=False))
83
+ grs.append(gr.update(visible=True))
84
+ if module in model_free_preprocessors:
85
+ grs += [gr.update(visible=False, value='None'), gr.update(visible=False)]
86
+ else:
87
+ grs += [gr.update(visible=True), gr.update(visible=True)]
88
+ return grs
scripts/deforum_helpers/deforum_tqdm.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ from math import ceil
20
+ import tqdm
21
+ from modules.shared import progress_print_out, opts, cmd_opts
22
+
23
+ class DeforumTQDM:
24
+ def __init__(self, args, anim_args, parseq_args, video_args):
25
+ self._tqdm = None
26
+ self._args = args
27
+ self._anim_args = anim_args
28
+ self._parseq_args = parseq_args
29
+ self._video_args = video_args
30
+
31
+ def reset(self):
32
+ from .animation_key_frames import DeformAnimKeys
33
+ from .parseq_adapter import ParseqAnimKeys
34
+ deforum_total = 0
35
+ # FIXME: get only amount of steps
36
+ use_parseq = self._parseq_args.parseq_manifest is not None and self._parseq_args.parseq_manifest.strip()
37
+ keys = DeformAnimKeys(self._anim_args) if not use_parseq else ParseqAnimKeys(self._parseq_args, self._anim_args, self._video_args, mute=True)
38
+
39
+ start_frame = 0
40
+ if self._anim_args.resume_from_timestring:
41
+ for tmp in os.listdir(self._args.outdir):
42
+ filename = tmp.split("_")
43
+ # don't use saved depth maps to count number of frames
44
+ if self._anim_args.resume_timestring in filename and "depth" not in filename:
45
+ start_frame += 1
46
+ start_frame = start_frame - 1
47
+ using_vid_init = self._anim_args.animation_mode == 'Video Input'
48
+ turbo_steps = 1 if using_vid_init else int(self._anim_args.diffusion_cadence)
49
+ if self._anim_args.resume_from_timestring:
50
+ last_frame = start_frame - 1
51
+ if turbo_steps > 1:
52
+ last_frame -= last_frame % turbo_steps
53
+ if turbo_steps > 1:
54
+ turbo_next_frame_idx = last_frame
55
+ turbo_prev_frame_idx = turbo_next_frame_idx
56
+ start_frame = last_frame + turbo_steps
57
+ frame_idx = start_frame
58
+ had_first = False
59
+ while frame_idx < self._anim_args.max_frames:
60
+ strength = keys.strength_schedule_series[frame_idx]
61
+ if not had_first and self._args.use_init and self._args.init_image is not None and self._args.init_image != '':
62
+ deforum_total += int(ceil(self._args.steps * (1 - strength)))
63
+ had_first = True
64
+ elif not had_first:
65
+ deforum_total += self._args.steps
66
+ had_first = True
67
+ else:
68
+ deforum_total += int(ceil(self._args.steps * (1 - strength)))
69
+
70
+ if turbo_steps > 1:
71
+ frame_idx += turbo_steps
72
+ else:
73
+ frame_idx += 1
74
+
75
+ self._tqdm = tqdm.tqdm(
76
+ desc="Deforum progress",
77
+ total=deforum_total,
78
+ position=1,
79
+ file=progress_print_out
80
+ )
81
+
82
+ def update(self):
83
+ if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
84
+ return
85
+ if self._tqdm is None:
86
+ self.reset()
87
+ self._tqdm.update()
88
+
89
+ def updateTotal(self, new_total):
90
+ if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
91
+ return
92
+ if self._tqdm is None:
93
+ self.reset()
94
+ self._tqdm.total = new_total
95
+
96
+ def clear(self):
97
+ if self._tqdm is not None:
98
+ self._tqdm.close()
99
+ self._tqdm = None
scripts/deforum_helpers/deprecation_utils.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ # This file is used to map deprecated setting names in a dictionary
19
+ # and print a message containing the old and the new names
20
+
21
+ deprecation_map = {
22
+ "histogram_matching": None,
23
+ "flip_2d_perspective": "enable_perspective_flip",
24
+ "skip_video_for_run_all": "skip_video_creation",
25
+ "color_coherence": [
26
+ ("Match Frame 0 HSV", "HSV", False),
27
+ ("Match Frame 0 LAB", "LAB", False),
28
+ ("Match Frame 0 RGB", "RGB", False),
29
+ # ,("removed_value", None, True) # for removed values, if we'll need in the future
30
+ ],
31
+ "hybrid_composite": [
32
+ (False, "None", False),
33
+ (True, "Normal", False),
34
+ ],
35
+ "optical_flow_redo_generation": [
36
+ (False, "None", False),
37
+ (True, "DIS Fine", False),
38
+ ],
39
+ "optical_flow_cadence": [
40
+ (False, "None", False),
41
+ (True, "DIS Fine", False),
42
+ ],
43
+ "cn_1_resize_mode": [
44
+ ("Envelope (Outer Fit)", "Outer Fit (Shrink to Fit)", False),
45
+ ("Scale to Fit (Inner Fit)", "Inner Fit (Scale to Fit)", False),
46
+ ],
47
+ "cn_2_resize_mode": [
48
+ ("Envelope (Outer Fit)", "Outer Fit (Shrink to Fit)", False),
49
+ ("Scale to Fit (Inner Fit)", "Inner Fit (Scale to Fit)", False),
50
+ ],
51
+ "cn_3_resize_mode": [
52
+ ("Envelope (Outer Fit)", "Outer Fit (Shrink to Fit)", False),
53
+ ("Scale to Fit (Inner Fit)", "Inner Fit (Scale to Fit)", False),
54
+ ],
55
+ "use_zoe_depth": ("depth_algorithm", [("True", "Zoe+AdaBins (old)"), ("False", "Midas+AdaBins (old)")]),
56
+ }
57
+
58
+ def dynamic_num_to_schedule_formatter(old_value):
59
+ return f"0:({old_value})"
60
+
61
+ for i in range(1, 6): # 5 CN models in total
62
+ deprecation_map[f"cn_{i}_weight"] = dynamic_num_to_schedule_formatter
63
+ deprecation_map[f"cn_{i}_guidance_start"] = dynamic_num_to_schedule_formatter
64
+ deprecation_map[f"cn_{i}_guidance_end"] = dynamic_num_to_schedule_formatter
65
+
66
+ def handle_deprecated_settings(settings_json):
67
+ # Set legacy_colormatch mode to True when importing old files, so results are backwards-compatible. Print a message about it too
68
+ if 'legacy_colormatch' not in settings_json:
69
+ settings_json['legacy_colormatch'] = True
70
+ print('\033[33mlegacy_colormatch is missing from settings file, so we are setting it to *True* for backwards compatability. You are welcome to test your file with that setting being disabled for better color coherency.\033[0m')
71
+ print("")
72
+ for setting_name, deprecation_info in deprecation_map.items():
73
+ if setting_name in settings_json:
74
+ if deprecation_info is None:
75
+ print(f"WARNING: Setting '{setting_name}' has been removed. It will be discarded and the default value used instead!")
76
+ elif isinstance(deprecation_info, tuple):
77
+ new_setting_name, value_map = deprecation_info
78
+ old_value = str(settings_json.pop(setting_name)) # Convert the boolean value to a string for comparison
79
+ new_value = next((v for k, v in value_map if k == old_value), None)
80
+ if new_value is not None:
81
+ print(f"WARNING: Setting '{setting_name}' has been renamed to '{new_setting_name}' with value '{new_value}'. The saved settings file will reflect the change")
82
+ settings_json[new_setting_name] = new_value
83
+ elif callable(deprecation_info):
84
+ old_value = settings_json[setting_name]
85
+ if isinstance(old_value, (int, float)):
86
+ new_value = deprecation_info(old_value)
87
+ print(f"WARNING: Value '{old_value}' for setting '{setting_name}' has been replaced with '{new_value}'. The saved settings file will reflect the change")
88
+ settings_json[setting_name] = new_value
89
+ elif isinstance(deprecation_info, str):
90
+ print(f"WARNING: Setting '{setting_name}' has been renamed to '{deprecation_info}'. The saved settings file will reflect the change")
91
+ settings_json[deprecation_info] = settings_json.pop(setting_name)
92
+ elif isinstance(deprecation_info, list):
93
+ for old_value, new_value, is_removed in deprecation_info:
94
+ if settings_json[setting_name] == old_value:
95
+ if is_removed:
96
+ print(f"WARNING: Value '{old_value}' for setting '{setting_name}' has been removed. It will be discarded and the default value used instead!")
97
+ else:
98
+ print(f"WARNING: Value '{old_value}' for setting '{setting_name}' has been replaced with '{new_value}'. The saved settings file will reflect the change")
99
+ settings_json[setting_name] = new_value
scripts/deforum_helpers/depth.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import gc
19
+ import cv2
20
+ import numpy as np
21
+ import torch
22
+ from PIL import Image
23
+ from einops import rearrange, repeat
24
+ from modules import devices
25
+ from modules.shared import cmd_opts
26
+ from .depth_adabins import AdaBinsModel
27
+ from .depth_leres import LeReSDepth
28
+ from .depth_midas import MidasDepth
29
+ from .depth_zoe import ZoeDepth
30
+ from .general_utils import debug_print
31
+
32
+ class DepthModel:
33
+ _instance = None
34
+
35
+ def __new__(cls, *args, **kwargs):
36
+ keep_in_vram = kwargs.get('keep_in_vram', False)
37
+ depth_algorithm = kwargs.get('depth_algorithm', 'Midas-3-Hybrid')
38
+ Width, Height = kwargs.get('Width', 512), kwargs.get('Height', 512)
39
+ midas_weight = kwargs.get('midas_weight', 0.2)
40
+ model_switched = cls._instance and cls._instance.depth_algorithm != depth_algorithm
41
+ resolution_changed = cls._instance and (cls._instance.Width != Width or cls._instance.Height != Height)
42
+ zoe_algorithm = 'zoe' in depth_algorithm.lower()
43
+ model_deleted = cls._instance and cls._instance.should_delete
44
+
45
+ should_reload = (cls._instance is None or model_deleted or model_switched or (zoe_algorithm and resolution_changed))
46
+
47
+ if should_reload:
48
+ cls._instance = super().__new__(cls)
49
+ cls._instance._initialize(models_path=args[0], device=args[1], half_precision=not cmd_opts.no_half, keep_in_vram=keep_in_vram, depth_algorithm=depth_algorithm, Width=Width, Height=Height, midas_weight=midas_weight)
50
+ elif cls._instance.should_delete and keep_in_vram:
51
+ cls._instance._initialize(models_path=args[0], device=args[1], half_precision=not cmd_opts.no_half, keep_in_vram=keep_in_vram, depth_algorithm=depth_algorithm, Width=Width, Height=Height, midas_weight=midas_weight)
52
+ cls._instance.should_delete = not keep_in_vram
53
+ return cls._instance
54
+
55
+ def _initialize(self, models_path, device, half_precision=not cmd_opts.no_half, keep_in_vram=False, depth_algorithm='Midas-3-Hybrid', Width=512, Height=512, midas_weight=1.0):
56
+ self.models_path = models_path
57
+ self.device = device
58
+ self.half_precision = half_precision
59
+ self.keep_in_vram = keep_in_vram
60
+ self.depth_algorithm = depth_algorithm
61
+ self.Width, self.Height = Width, Height
62
+ self.midas_weight = midas_weight
63
+ self.depth_min, self.depth_max = 1000, -1000
64
+ self.adabins_helper = None
65
+ self._initialize_model()
66
+
67
+ def _initialize_model(self):
68
+ depth_algo = self.depth_algorithm.lower()
69
+ if depth_algo.startswith('zoe'):
70
+ self.zoe_depth = ZoeDepth(self.Width, self.Height)
71
+ if depth_algo == 'zoe+adabins (old)':
72
+ self.adabins_model = AdaBinsModel(self.models_path, keep_in_vram=self.keep_in_vram)
73
+ self.adabins_helper = self.adabins_model.adabins_helper
74
+ elif depth_algo == 'leres':
75
+ self.leres_depth = LeReSDepth(width=448, height=448, models_path=self.models_path, checkpoint_name='res101.pth', backbone='resnext101')
76
+ elif depth_algo == 'adabins':
77
+ self.adabins_model = AdaBinsModel(self.models_path, keep_in_vram=self.keep_in_vram)
78
+ self.adabins_helper = self.adabins_model.adabins_helper
79
+ elif depth_algo.startswith('midas'):
80
+ self.midas_depth = MidasDepth(self.models_path, self.device, half_precision=self.half_precision, midas_model_type=self.depth_algorithm)
81
+ if depth_algo == 'midas+adabins (old)':
82
+ self.adabins_model = AdaBinsModel(self.models_path, keep_in_vram=self.keep_in_vram)
83
+ self.adabins_helper = self.adabins_model.adabins_helper
84
+ else:
85
+ raise Exception(f"Unknown depth_algorithm: {self.depth_algorithm}")
86
+
87
+ def predict(self, prev_img_cv2, midas_weight, half_precision) -> torch.Tensor:
88
+
89
+ img_pil = Image.fromarray(cv2.cvtColor(prev_img_cv2.astype(np.uint8), cv2.COLOR_RGB2BGR))
90
+
91
+ if self.depth_algorithm.lower().startswith('zoe'):
92
+ depth_tensor = self.zoe_depth.predict(img_pil).to(self.device)
93
+ if self.depth_algorithm.lower() == 'zoe+adabins (old)' and midas_weight < 1.0:
94
+ use_adabins, adabins_depth = AdaBinsModel._instance.predict(img_pil, prev_img_cv2)
95
+ if use_adabins: # if there was no error in getting the adabins depth, align midas with adabins
96
+ depth_tensor = self.blend_and_align_with_adabins(depth_tensor, adabins_depth, midas_weight)
97
+ elif self.depth_algorithm.lower() == 'leres':
98
+ depth_tensor = self.leres_depth.predict(prev_img_cv2.astype(np.float32) / 255.0)
99
+ elif self.depth_algorithm.lower() == 'adabins':
100
+ use_adabins, adabins_depth = AdaBinsModel._instance.predict(img_pil, prev_img_cv2)
101
+ depth_tensor = torch.tensor(adabins_depth)
102
+ if use_adabins is False:
103
+ raise Exception("Error getting depth from AdaBins") # TODO: fallback to something else maybe?
104
+ elif self.depth_algorithm.lower().startswith('midas'):
105
+ depth_tensor = self.midas_depth.predict(prev_img_cv2, half_precision)
106
+ if self.depth_algorithm.lower() == 'midas+adabins (old)' and midas_weight < 1.0:
107
+ use_adabins, adabins_depth = AdaBinsModel._instance.predict(img_pil, prev_img_cv2)
108
+ if use_adabins: # if there was no error in getting the adabins depth, align midas with adabins
109
+ depth_tensor = self.blend_and_align_with_adabins(depth_tensor, adabins_depth, midas_weight)
110
+ else: # Unknown!
111
+ raise Exception(f"Unknown depth_algorithm passed to depth.predict function: {self.depth_algorithm}")
112
+
113
+ return depth_tensor
114
+
115
+ def blend_and_align_with_adabins(self, depth_tensor, adabins_depth, midas_weight):
116
+ depth_tensor = torch.subtract(50.0, depth_tensor) / 19.0 # align midas depth with adabins depth. Original alignment code from Disco Diffusion
117
+ blended_depth_map = (depth_tensor.cpu().numpy() * midas_weight + adabins_depth * (1.0 - midas_weight))
118
+ depth_tensor = torch.from_numpy(np.expand_dims(blended_depth_map, axis=0)).squeeze().to(self.device)
119
+ debug_print(f"Blended Midas Depth with AdaBins Depth")
120
+ return depth_tensor
121
+
122
+ def to(self, device):
123
+ self.device = device
124
+ if self.depth_algorithm.lower().startswith('zoe'):
125
+ self.zoe_depth.zoe.to(device)
126
+ elif self.depth_algorithm.lower() == 'leres':
127
+ self.leres_depth.to(device)
128
+ elif self.depth_algorithm.lower().startswith('midas'):
129
+ self.midas_depth.to(device)
130
+ if hasattr(self, 'adabins_model'):
131
+ self.adabins_model.to(device)
132
+ gc.collect()
133
+ torch.cuda.empty_cache()
134
+
135
+ def to_image(self, depth: torch.Tensor):
136
+ depth = depth.cpu().numpy()
137
+ depth = np.expand_dims(depth, axis=0) if len(depth.shape) == 2 else depth
138
+ self.depth_min, self.depth_max = min(self.depth_min, depth.min()), max(self.depth_max, depth.max())
139
+ denom = max(1e-8, self.depth_max - self.depth_min)
140
+ temp = rearrange((depth - self.depth_min) / denom * 255, 'c h w -> h w c')
141
+ return Image.fromarray(repeat(temp, 'h w 1 -> h w c', c=3).astype(np.uint8))
142
+
143
+ def save(self, filename: str, depth: torch.Tensor):
144
+ self.to_image(depth).save(filename)
145
+
146
+ def delete_model(self):
147
+ for attr in ['zoe_depth', 'leres_depth']:
148
+ if hasattr(self, attr):
149
+ getattr(self, attr).delete()
150
+ delattr(self, attr)
151
+
152
+ if hasattr(self, 'midas_depth'):
153
+ del self.midas_depth
154
+
155
+ if hasattr(self, 'adabins_model'):
156
+ self.adabins_model.delete_model()
157
+
158
+ gc.collect()
159
+ torch.cuda.empty_cache()
160
+ devices.torch_gc()
scripts/deforum_helpers/depth_adabins.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import torch
19
+ import numpy as np
20
+ from PIL import Image
21
+ import torchvision.transforms.functional as TF
22
+ from .general_utils import download_file_with_checksum
23
+ from infer import InferenceHelper
24
+
25
+ class AdaBinsModel:
26
+ _instance = None
27
+
28
+ def __new__(cls, *args, **kwargs):
29
+ keep_in_vram = kwargs.get('keep_in_vram', False)
30
+ if cls._instance is None:
31
+ cls._instance = super().__new__(cls)
32
+ cls._instance._initialize(*args, keep_in_vram=keep_in_vram)
33
+ return cls._instance
34
+
35
+ def _initialize(self, models_path, keep_in_vram=False):
36
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+ self.keep_in_vram = keep_in_vram
38
+ self.adabins_helper = None
39
+
40
+ download_file_with_checksum(url='https://github.com/hithereai/deforum-for-automatic1111-webui/releases/download/AdaBins/AdaBins_nyu.pt', expected_checksum='643db9785c663aca72f66739427642726b03acc6c4c1d3755a4587aa2239962746410d63722d87b49fc73581dbc98ed8e3f7e996ff7b9c0d56d0fbc98e23e41a', dest_folder=models_path, dest_filename='AdaBins_nyu.pt')
41
+
42
+ self.adabins_helper = InferenceHelper(models_path=models_path, dataset='nyu', device=self.device)
43
+
44
+ def predict(self, img_pil, prev_img_cv2):
45
+ w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
46
+ adabins_depth = np.array([])
47
+ use_adabins = True
48
+ MAX_ADABINS_AREA, MIN_ADABINS_AREA = 500000, 448 * 448
49
+
50
+ image_pil_area, resized = w * h, False
51
+
52
+ if image_pil_area not in range(MIN_ADABINS_AREA, MAX_ADABINS_AREA + 1):
53
+ scale = ((MAX_ADABINS_AREA if image_pil_area > MAX_ADABINS_AREA else MIN_ADABINS_AREA) / image_pil_area) ** 0.5
54
+ depth_input = img_pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS if image_pil_area > MAX_ADABINS_AREA else Image.BICUBIC)
55
+ print(f"AdaBins depth resized to {depth_input.width}x{depth_input.height}")
56
+ resized = True
57
+ else:
58
+ depth_input = img_pil
59
+
60
+ try:
61
+ with torch.no_grad():
62
+ _, adabins_depth = self.adabins_helper.predict_pil(depth_input)
63
+ if resized:
64
+ adabins_depth = TF.resize(torch.from_numpy(adabins_depth), torch.Size([h, w]), interpolation=TF.InterpolationMode.BICUBIC).cpu().numpy()
65
+ adabins_depth = adabins_depth.squeeze()
66
+ except Exception as e:
67
+ print("AdaBins exception encountered. Falling back to pure MiDaS/Zoe (only if running in Legacy Midas/Zoe+AdaBins mode)")
68
+ use_adabins = False
69
+ torch.cuda.empty_cache()
70
+
71
+ return use_adabins, adabins_depth
72
+
73
+ def to(self, device):
74
+ self.device = device
75
+ if self.adabins_helper is not None:
76
+ self.adabins_helper.to(device)
77
+
78
+ def delete_model(self):
79
+ del self.adabins_helper
scripts/deforum_helpers/depth_leres.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import torch
19
+ import cv2
20
+ import os
21
+ import numpy as np
22
+ import torchvision.transforms as transforms
23
+ from .general_utils import download_file_with_checksum
24
+ from leres.lib.multi_depth_model_woauxi import RelDepthModel
25
+ from leres.lib.net_tools import load_ckpt
26
+
27
+ class LeReSDepth:
28
+ def __init__(self, width=448, height=448, models_path=None, checkpoint_name='res101.pth', backbone='resnext101'):
29
+ self.width = width
30
+ self.height = height
31
+ self.models_path = models_path
32
+ self.checkpoint_name = checkpoint_name
33
+ self.backbone = backbone
34
+
35
+ download_file_with_checksum(url='https://cloudstor.aarnet.edu.au/plus/s/lTIJF4vrvHCAI31/download', expected_checksum='7fdc870ae6568cb28d56700d0be8fc45541e09cea7c4f84f01ab47de434cfb7463cacae699ad19fe40ee921849f9760dedf5e0dec04a62db94e169cf203f55b1', dest_folder=models_path, dest_filename=self.checkpoint_name)
36
+
37
+ self.depth_model = RelDepthModel(backbone=self.backbone)
38
+ self.depth_model.eval()
39
+ self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
40
+ self.depth_model.to(self.DEVICE)
41
+ load_ckpt(os.path.join(self.models_path, self.checkpoint_name), self.depth_model, None, None)
42
+
43
+ @staticmethod
44
+ def scale_torch(img):
45
+ if len(img.shape) == 2:
46
+ img = img[np.newaxis, :, :]
47
+ if img.shape[2] == 3:
48
+ transform = transforms.Compose([transforms.ToTensor(),
49
+ transforms.Normalize((0.485, 0.456, 0.406) , (0.229, 0.224, 0.225))])
50
+ img = transform(img)
51
+ else:
52
+ img = img.astype(np.float32)
53
+ img = torch.from_numpy(img)
54
+ return img
55
+
56
+ def predict(self, image):
57
+ resized_image = cv2.resize(image, (self.width, self.height))
58
+ img_torch = self.scale_torch(resized_image)[None, :, :, :]
59
+ pred_depth = self.depth_model.inference(img_torch).cpu().numpy().squeeze()
60
+ pred_depth_ori = cv2.resize(pred_depth, (image.shape[1], image.shape[0]))
61
+ return torch.from_numpy(pred_depth_ori).unsqueeze(0).to(self.DEVICE)
62
+
63
+ def save_raw_depth(self, depth, filepath):
64
+ depth_normalized = (depth / depth.max() * 60000).astype(np.uint16)
65
+ cv2.imwrite(filepath, depth_normalized)
66
+
67
+ def to(self, device):
68
+ self.DEVICE = device
69
+ self.depth_model = self.depth_model.to(device)
70
+
71
+ def delete(self):
72
+ del self.depth_model
scripts/deforum_helpers/depth_midas.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import cv2
20
+ import torch
21
+ import numpy as np
22
+ from .general_utils import download_file_with_checksum
23
+ from midas.dpt_depth import DPTDepthModel
24
+ from midas.transforms import Resize, NormalizeImage, PrepareForNet
25
+ import torchvision.transforms as T
26
+
27
+ class MidasDepth:
28
+ def __init__(self, models_path, device, half_precision=True, midas_model_type='Midas-3-Hybrid'):
29
+ if midas_model_type.lower() == 'midas-3.1-beitlarge':
30
+ self.midas_model_filename = 'dpt_beit_large_512.pt'
31
+ self.midas_model_checksum='66cbb00ea7bccd6e43d3fd277bd21002d8d8c2c5c487e5fcd1e1d70c691688a19122418b3ddfa94e62ab9f086957aa67bbec39afe2b41c742aaaf0699ee50b33'
32
+ self.midas_model_url = 'https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt'
33
+ self.resize_px = 512
34
+ self.backbone = 'beitl16_512'
35
+ else:
36
+ self.midas_model_filename = 'dpt_large-midas-2f21e586.pt'
37
+ self.midas_model_checksum = 'fcc4829e65d00eeed0a38e9001770676535d2e95c8a16965223aba094936e1316d569563552a852d471f310f83f597e8a238987a26a950d667815e08adaebc06'
38
+ self.midas_model_url = 'https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt'
39
+ self.resize_px = 384
40
+ self.backbone = 'vitl16_384'
41
+ self.device = device
42
+ self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
43
+ self.midas_transform = T.Compose([
44
+ Resize(self.resize_px, self.resize_px, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32,
45
+ resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC),
46
+ self.normalization,
47
+ PrepareForNet()
48
+ ])
49
+
50
+ download_file_with_checksum(url=self.midas_model_url, expected_checksum=self.midas_model_checksum, dest_folder=models_path, dest_filename=self.midas_model_filename)
51
+
52
+ self.load_midas_model(models_path, self.midas_model_filename)
53
+ if half_precision:
54
+ self.midas_model = self.midas_model.half()
55
+
56
+ def load_midas_model(self, models_path, midas_model_filename):
57
+ model_file = os.path.join(models_path, midas_model_filename)
58
+ print(f"Loading MiDaS model from {midas_model_filename}...")
59
+ self.midas_model = DPTDepthModel(
60
+ path=model_file,
61
+ backbone=self.backbone,
62
+ non_negative=True,
63
+ )
64
+ self.midas_model.eval().to(self.device, memory_format=torch.channels_last if self.device == torch.device("cuda") else None)
65
+
66
+ def predict(self, prev_img_cv2, half_precision):
67
+ img_midas = prev_img_cv2.astype(np.float32) / 255.0
68
+ img_midas_input = self.midas_transform({"image": img_midas})["image"]
69
+ sample = torch.from_numpy(img_midas_input).float().to(self.device).unsqueeze(0)
70
+
71
+ if self.device.type == "cuda" or self.device.type == "mps":
72
+ sample = sample.to(memory_format=torch.channels_last)
73
+ if half_precision:
74
+ sample = sample.half()
75
+
76
+ with torch.no_grad():
77
+ midas_depth = self.midas_model.forward(sample)
78
+ midas_depth = torch.nn.functional.interpolate(
79
+ midas_depth.unsqueeze(1),
80
+ size=img_midas.shape[:2],
81
+ mode="bicubic",
82
+ align_corners=False,
83
+ ).squeeze().cpu().numpy()
84
+
85
+ torch.cuda.empty_cache()
86
+ depth_tensor = torch.from_numpy(np.expand_dims(midas_depth, axis=0)).squeeze().to(self.device)
87
+
88
+ return depth_tensor
89
+
90
+ def to(self, device):
91
+ self.device = device
92
+ self.midas_model = self.midas_model.to(device, memory_format=torch.channels_last if device == torch.device("cuda") else None)
scripts/deforum_helpers/depth_zoe.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import torch
19
+ from zoedepth.models.builder import build_model
20
+ from zoedepth.utils.config import get_config
21
+
22
+ class ZoeDepth:
23
+ def __init__(self, width=512, height=512):
24
+ conf = get_config("zoedepth_nk", "infer")
25
+ conf.img_size = [width, height]
26
+ self.model_zoe = build_model(conf)
27
+ self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
28
+ self.zoe = self.model_zoe.to(self.DEVICE)
29
+ self.width = width
30
+ self.height = height
31
+
32
+ def predict(self, image):
33
+ self.zoe.core.prep.resizer._Resize__width = self.width
34
+ self.zoe.core.prep.resizer._Resize__height = self.height
35
+ depth_tensor = self.zoe.infer_pil(image, output_type="tensor")
36
+ return depth_tensor
37
+
38
+ def to(self, device):
39
+ self.DEVICE = device
40
+ self.zoe = self.model_zoe.to(device)
41
+
42
+ def save_raw_depth(self, depth, filepath):
43
+ depth.save(filepath, format='PNG', mode='I;16')
44
+
45
+ def delete(self):
46
+ del self.model_zoe
47
+ del self.zoe
scripts/deforum_helpers/frame_interpolation.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ from pathlib import Path
20
+ from rife.inference_video import run_rife_new_video_infer
21
+ from .video_audio_utilities import get_quick_vid_info, vid2frames, media_file_has_audio, extract_number, ffmpeg_stitch_video
22
+ from film_interpolation.film_inference import run_film_interp_infer
23
+ from .general_utils import duplicate_pngs_from_folder, checksum, convert_images_from_list
24
+ from modules.shared import opts
25
+
26
+ DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
27
+
28
+ # gets 'RIFE v4.3', returns: 'RIFE43'
29
+ def extract_rife_name(string):
30
+ parts = string.split()
31
+ if len(parts) != 2 or parts[0] != "RIFE" or (parts[1][0] != "v" or not parts[1][1:].replace('.','').isdigit()):
32
+ raise ValueError("Input string should contain exactly 2 words, first word should be 'RIFE' and second word should start with 'v' followed by 2 numbers")
33
+ return "RIFE"+parts[1][1:].replace('.','')
34
+
35
+ # This function usually gets a filename, and converts it to a legal linux/windows *folder* name
36
+ def clean_folder_name(string):
37
+ illegal_chars = "/\\<>:\"|?*.,\" "
38
+ translation_table = str.maketrans(illegal_chars, "_"*len(illegal_chars))
39
+ return string.translate(translation_table)
40
+
41
+ def set_interp_out_fps(interp_x, slow_x_enabled, slom_x, in_vid_fps):
42
+ if interp_x == 'Disabled' or in_vid_fps in ('---', None, '', 'None'):
43
+ return '---'
44
+
45
+ fps = float(in_vid_fps) * int(interp_x)
46
+ # if slom_x != -1:
47
+ if slow_x_enabled:
48
+ fps /= int(slom_x)
49
+ return int(fps) if fps.is_integer() else fps
50
+
51
+ # get uploaded video frame count, fps, and return 3 valuees for the gradio UI: in fcount, in fps, out fps (using the set_interp_out_fps function above)
52
+ def gradio_f_interp_get_fps_and_fcount(vid_path, interp_x, slow_x_enabled, slom_x):
53
+ if vid_path is None:
54
+ return '---', '---', '---'
55
+ fps, fcount, resolution = get_quick_vid_info(vid_path.name)
56
+ expected_out_fps = set_interp_out_fps(interp_x, slow_x_enabled, slom_x, fps)
57
+ return (str(round(fps,2)) if fps is not None else '---', (round(fcount,2)) if fcount is not None else '---', round(expected_out_fps,2))
58
+
59
+ # handle call to interpolate an uploaded video from gradio button in args.py (the function that calls this func is named 'upload_vid_to_rife')
60
+ def process_interp_vid_upload_logic(file, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, in_vid_fps, f_models_path, vid_file_name):
61
+
62
+ print("got a request to *frame interpolate* an existing video.")
63
+
64
+ _, _, resolution = get_quick_vid_info(file.name)
65
+ folder_name = clean_folder_name(Path(vid_file_name).stem)
66
+ outdir = opts.outdir_samples or os.path.join(os.getcwd(), 'outputs')
67
+ outdir_no_tmp = outdir + f'/frame-interpolation/{folder_name}'
68
+ i = 1
69
+ while os.path.exists(outdir_no_tmp):
70
+ outdir_no_tmp = f"{outdir}/frame-interpolation/{folder_name}_{i}"
71
+ i += 1
72
+
73
+ outdir = os.path.join(outdir_no_tmp, 'tmp_input_frames')
74
+ os.makedirs(outdir, exist_ok=True)
75
+
76
+ vid2frames(video_path=file.name, video_in_frame_path=outdir, overwrite=True, extract_from_frame=0, extract_to_frame=-1, numeric_files_output=True, out_img_format='png')
77
+
78
+ # check if the uploaded vid has an audio stream. If it doesn't, set audio param to None so that ffmpeg won't try to add non-existing audio to final video.
79
+ audio_file_to_pass = None
80
+ if media_file_has_audio(file.name, f_location):
81
+ audio_file_to_pass = file.name
82
+
83
+ process_video_interpolation(frame_interpolation_engine=engine, frame_interpolation_x_amount=x_am, frame_interpolation_slow_mo_enabled = sl_enabled,frame_interpolation_slow_mo_amount=sl_am, orig_vid_fps=in_vid_fps, deforum_models_path=f_models_path, real_audio_track=audio_file_to_pass, raw_output_imgs_path=outdir, img_batch_id=None, ffmpeg_location=f_location, ffmpeg_crf=f_crf, ffmpeg_preset=f_preset, keep_interp_imgs=keep_imgs, orig_vid_name=folder_name, resolution=resolution)
84
+
85
+ # handle params before talking with the actual interpolation module (rifee/film, more to be added)
86
+ def process_video_interpolation(frame_interpolation_engine, frame_interpolation_x_amount, frame_interpolation_slow_mo_enabled, frame_interpolation_slow_mo_amount, orig_vid_fps, deforum_models_path, real_audio_track, raw_output_imgs_path, img_batch_id, ffmpeg_location, ffmpeg_crf, ffmpeg_preset, keep_interp_imgs, orig_vid_name, resolution, dont_change_fps=False, srt_path=None):
87
+
88
+ is_random_pics_run = dont_change_fps
89
+ fps = float(orig_vid_fps) * (1 if is_random_pics_run else frame_interpolation_x_amount)
90
+ fps /= int(frame_interpolation_slow_mo_amount) if frame_interpolation_slow_mo_enabled and not is_random_pics_run else 1
91
+
92
+ # disable audio-adding by setting real_audio_track to None if slow-mo is enabled
93
+ if real_audio_track is not None and frame_interpolation_slow_mo_enabled:
94
+ real_audio_track = None
95
+
96
+ # disable subtitles by setting srt_path to None if slow-mo is enabled'
97
+ if srt_path is not None and frame_interpolation_slow_mo_enabled:
98
+ srt_path = None
99
+
100
+ if frame_interpolation_engine == 'None':
101
+ return
102
+ elif frame_interpolation_engine.startswith("RIFE"):
103
+ # make sure interp_x is valid and in range
104
+ if frame_interpolation_x_amount not in range(2, 11):
105
+ raise Error("frame_interpolation_x_amount must be between 2x and 10x")
106
+
107
+ # set UHD to True if res' is 2K or higher
108
+ if resolution:
109
+ UHD = resolution[0] >= 2048 and resolution[1] >= 2048
110
+ else:
111
+ UHD = False
112
+ # e.g from "RIFE v2.3 to RIFE23"
113
+ actual_model_folder_name = extract_rife_name(frame_interpolation_engine)
114
+
115
+ # run actual rife interpolation and video stitching etc - the whole suite
116
+ run_rife_new_video_infer(interp_x_amount=frame_interpolation_x_amount, slow_mo_enabled = frame_interpolation_slow_mo_enabled, slow_mo_x_amount=frame_interpolation_slow_mo_amount, model=actual_model_folder_name, fps=fps, deforum_models_path=deforum_models_path, audio_track=real_audio_track, raw_output_imgs_path=raw_output_imgs_path, img_batch_id=img_batch_id, ffmpeg_location=ffmpeg_location, ffmpeg_crf=ffmpeg_crf, ffmpeg_preset=ffmpeg_preset, keep_imgs=keep_interp_imgs, orig_vid_name=orig_vid_name, UHD=UHD, srt_path=srt_path)
117
+ elif frame_interpolation_engine == 'FILM':
118
+ prepare_film_inference(deforum_models_path=deforum_models_path, x_am=frame_interpolation_x_amount, sl_enabled=frame_interpolation_slow_mo_enabled, sl_am=frame_interpolation_slow_mo_amount, keep_imgs=keep_interp_imgs, raw_output_imgs_path=raw_output_imgs_path, img_batch_id=img_batch_id, f_location=ffmpeg_location, f_crf=ffmpeg_crf, f_preset=ffmpeg_preset, fps=fps, audio_track=real_audio_track, orig_vid_name=orig_vid_name, is_random_pics_run=is_random_pics_run, srt_path=srt_path)
119
+ else:
120
+ print("Unknown Frame Interpolation engine chosen. Doing nothing.")
121
+ return
122
+
123
+ def prepare_film_inference(deforum_models_path, x_am, sl_enabled, sl_am, keep_imgs, raw_output_imgs_path, img_batch_id, f_location, f_crf, f_preset, fps, audio_track, orig_vid_name, is_random_pics_run, srt_path=None):
124
+ import shutil
125
+
126
+ parent_folder = os.path.dirname(raw_output_imgs_path)
127
+ grandparent_folder = os.path.dirname(parent_folder)
128
+ if orig_vid_name is not None:
129
+ interp_vid_path = os.path.join(parent_folder, str(orig_vid_name) +'_FILM_x' + str(x_am))
130
+ else:
131
+ interp_vid_path = os.path.join(raw_output_imgs_path, str(img_batch_id) +'_FILM_x' + str(x_am))
132
+
133
+ film_model_name = 'film_net_fp16.pt'
134
+ film_model_folder = os.path.join(deforum_models_path,'film_interpolation')
135
+ film_model_path = os.path.join(film_model_folder, film_model_name) # actual full path to the film .pt model file
136
+ output_interp_imgs_folder = os.path.join(raw_output_imgs_path, 'interpolated_frames_film')
137
+ # set custom name depending on if we interpolate after a run, or interpolate a video (related/unrelated to deforum, we don't know) directly from within the interpolation tab
138
+ # interpolated_path = os.path.join(args.raw_output_imgs_path, 'interpolated_frames_rife')
139
+ if orig_vid_name is not None: # interpolating a video/ set of pictures (deforum or unrelated)
140
+ custom_interp_path = "{}_{}".format(output_interp_imgs_folder, orig_vid_name)
141
+ else: # interpolating after a deforum run:
142
+ custom_interp_path = "{}_{}".format(output_interp_imgs_folder, img_batch_id)
143
+
144
+ # interp_vid_path = os.path.join(raw_output_imgs_path, str(img_batch_id) + '_FILM_x' + str(x_am))
145
+ img_path_for_ffmpeg = os.path.join(custom_interp_path, "frame_%09d.png")
146
+
147
+ if sl_enabled:
148
+ interp_vid_path = interp_vid_path + '_slomo_x' + str(sl_am)
149
+ interp_vid_path = interp_vid_path + '.mp4'
150
+
151
+ # In this folder we temporarily keep the original frames (converted/ copy-pasted and img format depends on scenario)
152
+ temp_convert_raw_png_path = os.path.join(raw_output_imgs_path, "tmp_film_folder")
153
+ if is_random_pics_run: # pass dummy so it just copy-paste the imgs instead of re-writing them
154
+ total_frames = duplicate_pngs_from_folder(raw_output_imgs_path, temp_convert_raw_png_path, img_batch_id, 'DUMMY')
155
+ else: #re-write pics as png to avert a problem with 24 and 32 mixed outputs from the same animation run
156
+ total_frames = duplicate_pngs_from_folder(raw_output_imgs_path, temp_convert_raw_png_path, img_batch_id, None)
157
+ check_and_download_film_model('film_net_fp16.pt', film_model_folder) # TODO: split this part
158
+
159
+ # get number of in-between-frames to provide to FILM - mimics how RIFE works, we should get the same amount of total frames in the end
160
+ film_in_between_frames_count = calculate_frames_to_add(total_frames, x_am)
161
+ # Run actual FILM inference
162
+ run_film_interp_infer(
163
+ model_path = film_model_path,
164
+ input_folder = temp_convert_raw_png_path,
165
+ save_folder = custom_interp_path, # output folder is created in the infer part
166
+ inter_frames = film_in_between_frames_count)
167
+
168
+ add_soundtrack = 'None'
169
+ if not audio_track is None:
170
+ add_soundtrack = 'File'
171
+
172
+ print (f"*Passing interpolated frames to ffmpeg...*")
173
+ exception_raised = False
174
+ try:
175
+ ffmpeg_stitch_video(ffmpeg_location=f_location, fps=fps, outmp4_path=interp_vid_path, stitch_from_frame=0, stitch_to_frame=999999999, imgs_path=img_path_for_ffmpeg, add_soundtrack=add_soundtrack, audio_path=audio_track, crf=f_crf, preset=f_preset, srt_path=srt_path)
176
+ except Exception as e:
177
+ exception_raised = True
178
+ print(f"An error occurred while stitching the video: {e}")
179
+
180
+ if orig_vid_name and (keep_imgs or exception_raised):
181
+ shutil.move(custom_interp_path, parent_folder)
182
+ if not keep_imgs and not exception_raised:
183
+ if fps <= 450: # keep interp frames automatically if out_vid fps is above 450
184
+ shutil.rmtree(custom_interp_path, ignore_errors=True)
185
+ # delete duplicated raw non-interpolated frames
186
+ shutil.rmtree(temp_convert_raw_png_path, ignore_errors=True)
187
+ # remove folder with raw (non-interpolated) vid input frames in case of input VID and not PNGs
188
+ if orig_vid_name:
189
+ shutil.rmtree(raw_output_imgs_path, ignore_errors=True)
190
+
191
+ def check_and_download_film_model(model_name, model_dest_folder):
192
+ from basicsr.utils.download_util import load_file_from_url
193
+ if model_name == 'film_net_fp16.pt':
194
+ model_dest_path = os.path.join(model_dest_folder, model_name)
195
+ download_url = 'https://github.com/hithereai/frame-interpolation-pytorch/releases/download/film_net_fp16.pt/film_net_fp16.pt'
196
+ film_model_hash = '0a823815b111488ac2b7dd7fe6acdd25d35a22b703e8253587764cf1ee3f8f93676d24154d9536d2ce5bc3b2f102fb36dfe0ca230dfbe289d5cd7bde5a34ec12'
197
+ else: # Unknown FILM model
198
+ raise Exception("Got a request to download an unknown FILM model. Can't proceed.")
199
+ if os.path.exists(model_dest_path):
200
+ return
201
+ try:
202
+ os.makedirs(model_dest_folder, exist_ok=True)
203
+ # download film model from url
204
+ load_file_from_url(download_url, model_dest_folder)
205
+ # verify checksum
206
+ if checksum(model_dest_path) != film_model_hash:
207
+ raise Exception(f"Error while downloading {model_name}. Please download from: {download_url}, and put in: {model_dest_folder}")
208
+ except Exception as e:
209
+ raise Exception(f"Error while downloading {model_name}. Please download from: {download_url}, and put in: {model_dest_folder}")
210
+
211
+ # get film no. of frames to add after each pic from tot frames in interp_x values
212
+ def calculate_frames_to_add(total_frames, interp_x):
213
+ frames_to_add = (total_frames * interp_x - total_frames) / (total_frames - 1)
214
+ return int(round(frames_to_add))
215
+
216
+ def process_interp_pics_upload_logic(pic_list, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, fps, f_models_path, resolution, add_soundtrack, audio_track):
217
+ pic_path_list = [pic.name for pic in pic_list]
218
+ print(f"got a request to *frame interpolate* a set of {len(pic_list)} images.")
219
+ folder_name = clean_folder_name(Path(pic_list[0].orig_name).stem)
220
+ outdir_no_tmp = os.path.join(os.getcwd(), 'outputs', 'frame-interpolation', folder_name)
221
+ i = 1
222
+ while os.path.exists(outdir_no_tmp):
223
+ outdir_no_tmp = os.path.join(os.getcwd(), 'outputs', 'frame-interpolation', folder_name + '_' + str(i))
224
+ i += 1
225
+
226
+ outdir = os.path.join(outdir_no_tmp, 'tmp_input_frames')
227
+ os.makedirs(outdir, exist_ok=True)
228
+
229
+ convert_images_from_list(paths=pic_path_list, output_dir=outdir,format='png')
230
+
231
+ audio_file_to_pass = None
232
+ # todo? add handling of vid input sound? if needed at all...
233
+ if add_soundtrack == 'File':
234
+ audio_file_to_pass = audio_track
235
+ # todo: upgrade function so it takes url and check if audio really exist before passing? not crucial as ffmpeg sofly fallbacks if needed
236
+ # if media_file_has_audio(audio_track, f_location):
237
+
238
+ # pass param so it won't duplicate the images at all as we already do it in here?!
239
+ process_video_interpolation(frame_interpolation_engine=engine, frame_interpolation_x_amount=x_am, frame_interpolation_slow_mo_enabled = sl_enabled,frame_interpolation_slow_mo_amount=sl_am, orig_vid_fps=fps, deforum_models_path=f_models_path, real_audio_track=audio_file_to_pass, raw_output_imgs_path=outdir, img_batch_id=None, ffmpeg_location=f_location, ffmpeg_crf=f_crf, ffmpeg_preset=f_preset, keep_interp_imgs=keep_imgs, orig_vid_name=folder_name, resolution=resolution, dont_change_fps=True)
scripts/deforum_helpers/general_utils.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import shutil
20
+ import hashlib
21
+ from modules.shared import opts
22
+ from basicsr.utils.download_util import load_file_from_url
23
+
24
+ def debug_print(message):
25
+ DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
26
+ if DEBUG_MODE:
27
+ print(message)
28
+
29
+ def checksum(filename, hash_factory=hashlib.blake2b, chunk_num_blocks=128):
30
+ h = hash_factory()
31
+ with open(filename,'rb') as f:
32
+ while chunk := f.read(chunk_num_blocks*h.block_size):
33
+ h.update(chunk)
34
+ return h.hexdigest()
35
+
36
+ def get_os():
37
+ import platform
38
+ return {"Windows": "Windows", "Linux": "Linux", "Darwin": "Mac"}.get(platform.system(), "Unknown")
39
+
40
+ # used in src/rife/inference_video.py and more, soon
41
+ def duplicate_pngs_from_folder(from_folder, to_folder, img_batch_id, orig_vid_name):
42
+ import cv2
43
+ #TODO: don't copy-paste at all if the input is a video (now it copy-pastes, and if input is deforum run is also converts to make sure no errors rise cuz of 24-32 bit depth differences)
44
+ temp_convert_raw_png_path = os.path.join(from_folder, to_folder)
45
+ os.makedirs(temp_convert_raw_png_path, exist_ok=True)
46
+
47
+ frames_handled = 0
48
+ for f in os.listdir(from_folder):
49
+ if ('png' in f or 'jpg' in f) and '-' not in f and '_depth_' not in f and ((img_batch_id is not None and f.startswith(img_batch_id) or img_batch_id is None)):
50
+ frames_handled +=1
51
+ original_img_path = os.path.join(from_folder, f)
52
+ if orig_vid_name is not None:
53
+ shutil.copy(original_img_path, temp_convert_raw_png_path)
54
+ else:
55
+ image = cv2.imread(original_img_path)
56
+ new_path = os.path.join(temp_convert_raw_png_path, f)
57
+ cv2.imwrite(new_path, image, [cv2.IMWRITE_PNG_COMPRESSION, 0])
58
+ return frames_handled
59
+
60
+ def convert_images_from_list(paths, output_dir, format):
61
+ import os
62
+ from PIL import Image
63
+ # Ensure that the output directory exists
64
+ os.makedirs(output_dir, exist_ok=True)
65
+
66
+ # Loop over all input images
67
+ for i, path in enumerate(paths):
68
+ # Open the image
69
+ with Image.open(path) as img:
70
+ # Generate the output filename
71
+ filename = f"{i+1:09d}.{format}"
72
+ # Save the image to the output directory
73
+ img.save(os.path.join(output_dir, filename))
74
+
75
+ def get_deforum_version():
76
+ from modules import extensions as mext
77
+ try:
78
+ for ext in mext.extensions:
79
+ if ext.name in ["deforum", "deforum-for-automatic1111-webui", "sd-webui-deforum"] and ext.enabled:
80
+ ext.read_info_from_repo() # need this call to get exten info on ui-launch, not to be removed
81
+ return ext.version
82
+ return "Unknown"
83
+ except:
84
+ return "Unknown"
85
+
86
+ def custom_placeholder_format(value_dict, placeholder_match):
87
+ key = placeholder_match.group(1).lower()
88
+ value = value_dict.get(key, key) or "_"
89
+ if isinstance(value, dict) and value:
90
+ first_key = list(value.keys())[0]
91
+ value = str(value[first_key][0]) if isinstance(value[first_key], list) and value[first_key] else str(value[first_key])
92
+ return str(value)[:50]
93
+
94
+ def test_long_path_support(base_folder_path):
95
+ long_folder_name = 'A' * 300
96
+ long_path = os.path.join(base_folder_path, long_folder_name)
97
+ try:
98
+ os.makedirs(long_path)
99
+ shutil.rmtree(long_path)
100
+ return True
101
+ except OSError:
102
+ return False
103
+
104
+ def get_max_path_length(base_folder_path):
105
+ if get_os() == 'Windows':
106
+ return (32767 if test_long_path_support(base_folder_path) else 260) - len(base_folder_path) - 1
107
+ return 4096 - len(base_folder_path) - 1
108
+
109
+ def substitute_placeholders(template, arg_list, base_folder_path):
110
+ import re
111
+ # Find and update timestring values if resume_from_timestring is True
112
+ resume_from_timestring = next((arg_obj.resume_from_timestring for arg_obj in arg_list if hasattr(arg_obj, 'resume_from_timestring')), False)
113
+ resume_timestring = next((arg_obj.resume_timestring for arg_obj in arg_list if hasattr(arg_obj, 'resume_timestring')), None)
114
+
115
+ if resume_from_timestring and resume_timestring:
116
+ for arg_obj in arg_list:
117
+ if hasattr(arg_obj, 'timestring'):
118
+ arg_obj.timestring = resume_timestring
119
+
120
+ max_length = get_max_path_length(base_folder_path)
121
+ values = {attr.lower(): getattr(arg_obj, attr)
122
+ for arg_obj in arg_list
123
+ for attr in dir(arg_obj) if not callable(getattr(arg_obj, attr)) and not attr.startswith('__')}
124
+ formatted_string = re.sub(r"{(\w+)}", lambda m: custom_placeholder_format(values, m), template)
125
+ formatted_string = re.sub(r'[<>:"/\\|?*\s,]', '_', formatted_string)
126
+ return formatted_string[:max_length]
127
+
128
+ def count_files_in_folder(folder_path):
129
+ import glob
130
+ file_pattern = folder_path + "/*"
131
+ file_count = len(glob.glob(file_pattern))
132
+ return file_count
133
+
134
+ def clean_gradio_path_strings(input_str):
135
+ if isinstance(input_str, str) and input_str.startswith('"') and input_str.endswith('"'):
136
+ return input_str[1:-1]
137
+ else:
138
+ return input_str
139
+
140
+ def download_file_with_checksum(url, expected_checksum, dest_folder, dest_filename):
141
+ expected_full_path = os.path.join(dest_folder, dest_filename)
142
+ if not os.path.exists(expected_full_path) and not os.path.isdir(expected_full_path):
143
+ load_file_from_url(url=url, model_dir=dest_folder, file_name=dest_filename, progress=True)
144
+ if checksum(expected_full_path) != expected_checksum:
145
+ raise Exception(f"Error while downloading {dest_filename}.]nPlease manually download from: {url}\nAnd place it in: {dest_folder}")
scripts/deforum_helpers/generate.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ from PIL import Image
19
+ import math
20
+ import json
21
+ import itertools
22
+ import requests
23
+ import numexpr
24
+ from modules import processing, sd_models
25
+ from modules.shared import sd_model, state, cmd_opts
26
+ from .deforum_controlnet import is_controlnet_enabled, process_with_controlnet
27
+ from .prompt import split_weighted_subprompts
28
+ from .load_images import load_img, prepare_mask, check_mask_for_errors
29
+ from .webui_sd_pipeline import get_webui_sd_pipeline
30
+ from .rich import console
31
+ from .defaults import get_samplers_list
32
+ from .prompt import check_is_number
33
+
34
+ def load_mask_latent(mask_input, shape):
35
+ # mask_input (str or PIL Image.Image): Path to the mask image or a PIL Image object
36
+ # shape (list-like len(4)): shape of the image to match, usually latent_image.shape
37
+
38
+ if isinstance(mask_input, str): # mask input is probably a file name
39
+ if mask_input.startswith('http://') or mask_input.startswith('https://'):
40
+ mask_image = Image.open(requests.get(mask_input, stream=True).raw).convert('RGBA')
41
+ else:
42
+ mask_image = Image.open(mask_input).convert('RGBA')
43
+ elif isinstance(mask_input, Image.Image):
44
+ mask_image = mask_input
45
+ else:
46
+ raise Exception("mask_input must be a PIL image or a file name")
47
+
48
+ mask_w_h = (shape[-1], shape[-2])
49
+ mask = mask_image.resize(mask_w_h, resample=Image.LANCZOS)
50
+ mask = mask.convert("L")
51
+ return mask
52
+
53
+ def isJson(myjson):
54
+ try:
55
+ json.loads(myjson)
56
+ except ValueError as e:
57
+ return False
58
+ return True
59
+
60
+ # Add pairwise implementation here not to upgrade
61
+ # the whole python to 3.10 just for one function
62
+ def pairwise_repl(iterable):
63
+ a, b = itertools.tee(iterable)
64
+ next(b, None)
65
+ return zip(a, b)
66
+
67
+ def generate(args, keys, anim_args, loop_args, controlnet_args, root, frame=0, sampler_name=None):
68
+ if state.interrupted:
69
+ return None
70
+
71
+ if args.reroll_blank_frames == 'ignore':
72
+ return generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
73
+
74
+ image, caught_vae_exception = generate_with_nans_check(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
75
+
76
+ if caught_vae_exception or not image.getbbox():
77
+ patience = args.reroll_patience
78
+ print("Blank frame detected! If you don't have the NSFW filter enabled, this may be due to a glitch!")
79
+ if args.reroll_blank_frames == 'reroll':
80
+ while caught_vae_exception or not image.getbbox():
81
+ print("Rerolling with +1 seed...")
82
+ args.seed += 1
83
+ image, caught_vae_exception = generate_with_nans_check(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
84
+ patience -= 1
85
+ if patience == 0:
86
+ print("Rerolling with +1 seed failed for 10 iterations! Try setting webui's precision to 'full' and if it fails, please report this to the devs! Interrupting...")
87
+ state.interrupted = True
88
+ state.current_image = image
89
+ return None
90
+ elif args.reroll_blank_frames == 'interrupt':
91
+ print("Interrupting to save your eyes...")
92
+ state.interrupted = True
93
+ state.current_image = image
94
+ return None
95
+ return image
96
+
97
+ def generate_with_nans_check(args, keys, anim_args, loop_args, controlnet_args, root, frame=0, sampler_name=None):
98
+ if cmd_opts.disable_nan_check:
99
+ image = generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
100
+ else:
101
+ try:
102
+ image = generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
103
+ except Exception as e:
104
+ if "A tensor with all NaNs was produced in VAE." in repr(e):
105
+ print(e)
106
+ return None, True
107
+ else:
108
+ raise e
109
+ return image, False
110
+
111
+ def generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame=0, sampler_name=None):
112
+ # Setup the pipeline
113
+ p = get_webui_sd_pipeline(args, root)
114
+ p.prompt, p.negative_prompt = split_weighted_subprompts(args.prompt, frame, anim_args.max_frames)
115
+
116
+ if not args.use_init and args.strength > 0 and args.strength_0_no_init:
117
+ args.strength = 0
118
+ processed = None
119
+ mask_image = None
120
+ init_image = None
121
+ image_init0 = None
122
+
123
+ if loop_args.use_looper and anim_args.animation_mode in ['2D', '3D']:
124
+ args.strength = loop_args.imageStrength
125
+ tweeningFrames = loop_args.tweeningFrameSchedule
126
+ blendFactor = .07
127
+ colorCorrectionFactor = loop_args.colorCorrectionFactor
128
+ jsonImages = json.loads(loop_args.imagesToKeyframe)
129
+ # find which image to show
130
+ parsedImages = {}
131
+ frameToChoose = 0
132
+ max_f = anim_args.max_frames - 1
133
+
134
+ for key, value in jsonImages.items():
135
+ if check_is_number(key): # default case 0:(1 + t %5), 30:(5-t%2)
136
+ parsedImages[key] = value
137
+ else: # math on the left hand side case 0:(1 + t %5), maxKeyframes/2:(5-t%2)
138
+ parsedImages[int(numexpr.evaluate(key))] = value
139
+
140
+ framesToImageSwapOn = list(map(int, list(parsedImages.keys())))
141
+
142
+ for swappingFrame in framesToImageSwapOn[1:]:
143
+ frameToChoose += (frame >= int(swappingFrame))
144
+
145
+ # find which frame to do our swapping on for tweening
146
+ skipFrame = 25
147
+ for fs, fe in pairwise_repl(framesToImageSwapOn):
148
+ if fs <= frame <= fe:
149
+ skipFrame = fe - fs
150
+
151
+ if frame % skipFrame <= tweeningFrames: # number of tweening frames
152
+ blendFactor = loop_args.blendFactorMax - loop_args.blendFactorSlope * math.cos((frame % tweeningFrames) / (tweeningFrames / 2))
153
+ init_image2, _ = load_img(list(jsonImages.values())[frameToChoose],
154
+ shape=(args.W, args.H),
155
+ use_alpha_as_mask=args.use_alpha_as_mask)
156
+ image_init0 = list(jsonImages.values())[0]
157
+
158
+ else: # they passed in a single init image
159
+ image_init0 = args.init_image
160
+
161
+ available_samplers = get_samplers_list()
162
+ if sampler_name is not None:
163
+ if sampler_name in available_samplers.keys():
164
+ p.sampler_name = available_samplers[sampler_name]
165
+ else:
166
+ raise RuntimeError(f"Sampler name '{sampler_name}' is invalid. Please check the available sampler list in the 'Run' tab")
167
+
168
+ if args.checkpoint is not None:
169
+ info = sd_models.get_closet_checkpoint_match(args.checkpoint)
170
+ if info is None:
171
+ raise RuntimeError(f"Unknown checkpoint: {args.checkpoint}")
172
+ sd_models.reload_model_weights(info=info)
173
+
174
+ if args.init_sample is not None:
175
+ # TODO: cleanup init_sample remains later
176
+ img = args.init_sample
177
+ init_image = img
178
+ image_init0 = img
179
+ if loop_args.use_looper and isJson(loop_args.imagesToKeyframe) and anim_args.animation_mode in ['2D', '3D']:
180
+ init_image = Image.blend(init_image, init_image2, blendFactor)
181
+ correction_colors = Image.blend(init_image, init_image2, colorCorrectionFactor)
182
+ p.color_corrections = [processing.setup_color_correction(correction_colors)]
183
+
184
+ # this is the first pass
185
+ elif (loop_args.use_looper and anim_args.animation_mode in ['2D', '3D']) or (args.use_init and ((args.init_image != None and args.init_image != ''))):
186
+ init_image, mask_image = load_img(image_init0, # initial init image
187
+ shape=(args.W, args.H),
188
+ use_alpha_as_mask=args.use_alpha_as_mask)
189
+
190
+ else:
191
+
192
+ if anim_args.animation_mode != 'Interpolation':
193
+ print(f"Not using an init image (doing pure txt2img)")
194
+ p_txt = processing.StableDiffusionProcessingTxt2Img(
195
+ sd_model=sd_model,
196
+ outpath_samples=root.tmp_deforum_run_duplicated_folder,
197
+ outpath_grids=root.tmp_deforum_run_duplicated_folder,
198
+ prompt=p.prompt,
199
+ styles=p.styles,
200
+ negative_prompt=p.negative_prompt,
201
+ seed=p.seed,
202
+ subseed=p.subseed,
203
+ subseed_strength=p.subseed_strength,
204
+ seed_resize_from_h=p.seed_resize_from_h,
205
+ seed_resize_from_w=p.seed_resize_from_w,
206
+ sampler_name=p.sampler_name,
207
+ batch_size=p.batch_size,
208
+ n_iter=p.n_iter,
209
+ steps=p.steps,
210
+ cfg_scale=p.cfg_scale,
211
+ width=p.width,
212
+ height=p.height,
213
+ restore_faces=p.restore_faces,
214
+ tiling=p.tiling,
215
+ enable_hr=False,
216
+ denoising_strength=0,
217
+ )
218
+
219
+ print_combined_table(args, anim_args, p_txt, keys, frame) # print dynamic table to cli
220
+
221
+ if is_controlnet_enabled(controlnet_args):
222
+ process_with_controlnet(p_txt, args, anim_args, loop_args, controlnet_args, root, is_img2img=False, frame_idx=frame)
223
+
224
+ processed = processing.process_images(p_txt)
225
+
226
+ if processed is None:
227
+ # Mask functions
228
+ if args.use_mask:
229
+ mask_image = args.mask_image
230
+ mask = prepare_mask(args.mask_file if mask_image is None else mask_image,
231
+ (args.W, args.H),
232
+ args.mask_contrast_adjust,
233
+ args.mask_brightness_adjust)
234
+ p.inpainting_mask_invert = args.invert_mask
235
+ p.inpainting_fill = args.fill
236
+ p.inpaint_full_res = args.full_res_mask
237
+ p.inpaint_full_res_padding = args.full_res_mask_padding
238
+ # prevent loaded mask from throwing errors in Image operations if completely black and crop and resize in webui pipeline
239
+ # doing this after contrast and brightness adjustments to ensure that mask is not passed as black or blank
240
+ mask = check_mask_for_errors(mask, args.invert_mask)
241
+ args.noise_mask = mask
242
+ else:
243
+ mask = None
244
+
245
+ assert not ((mask is not None and args.use_mask and args.overlay_mask) and (
246
+ args.init_sample is None and init_image is None)), "Need an init image when use_mask == True and overlay_mask == True"
247
+
248
+ p.init_images = [init_image]
249
+ p.image_mask = mask
250
+ p.image_cfg_scale = args.pix2pix_img_cfg_scale
251
+
252
+ print_combined_table(args, anim_args, p, keys, frame) # print dynamic table to cli
253
+
254
+ if is_controlnet_enabled(controlnet_args):
255
+ process_with_controlnet(p, args, anim_args, loop_args, controlnet_args, root, is_img2img=True, frame_idx=frame)
256
+
257
+ processed = processing.process_images(p)
258
+
259
+ if root.initial_info is None:
260
+ root.initial_info = processed.info
261
+
262
+ if root.first_frame is None:
263
+ root.first_frame = processed.images[0]
264
+
265
+ results = processed.images[0]
266
+
267
+ return results
268
+
269
+ def print_combined_table(args, anim_args, p, keys, frame_idx):
270
+ from rich.table import Table
271
+ from rich import box
272
+
273
+ table = Table(padding=0, box=box.ROUNDED)
274
+
275
+ field_names1 = ["Steps", "CFG"]
276
+ if anim_args.animation_mode != 'Interpolation':
277
+ field_names1.append("Denoise")
278
+ field_names1 += ["Subseed", "Subs. str"] * (anim_args.enable_subseed_scheduling)
279
+ field_names1 += ["Sampler"] * anim_args.enable_sampler_scheduling
280
+ field_names1 += ["Checkpoint"] * anim_args.enable_checkpoint_scheduling
281
+
282
+ for field_name in field_names1:
283
+ table.add_column(field_name, justify="center")
284
+
285
+ rows1 = [str(p.steps), str(p.cfg_scale)]
286
+ if anim_args.animation_mode != 'Interpolation':
287
+ rows1.append(f"{p.denoising_strength:.5g}" if p.denoising_strength is not None else "None")
288
+
289
+ rows1 += [str(p.subseed), f"{p.subseed_strength:.5g}"] * anim_args.enable_subseed_scheduling
290
+ rows1 += [p.sampler_name] * anim_args.enable_sampler_scheduling
291
+ rows1 += [str(args.checkpoint)] * anim_args.enable_checkpoint_scheduling
292
+
293
+ rows2 = []
294
+ if anim_args.animation_mode not in ['Video Input', 'Interpolation']:
295
+ if anim_args.animation_mode == '2D':
296
+ field_names2 = ["Angle", "Zoom"]
297
+ else:
298
+ field_names2 = []
299
+ field_names2 += ["Tr X", "Tr Y"]
300
+ if anim_args.animation_mode == '3D':
301
+ field_names2 += ["Tr Z", "Ro X", "Ro Y", "Ro Z"]
302
+ if anim_args.aspect_ratio_schedule.replace(" ", "") != '0:(1)':
303
+ field_names2 += ["Asp. Ratio"]
304
+ if anim_args.enable_perspective_flip:
305
+ field_names2 += ["Pf T", "Pf P", "Pf G", "Pf F"]
306
+
307
+ for field_name in field_names2:
308
+ table.add_column(field_name, justify="center")
309
+
310
+ if anim_args.animation_mode == '2D':
311
+ rows2 += [f"{keys.angle_series[frame_idx]:.5g}", f"{keys.zoom_series[frame_idx]:.5g}"]
312
+ rows2 += [f"{keys.translation_x_series[frame_idx]:.5g}", f"{keys.translation_y_series[frame_idx]:.5g}"]
313
+
314
+ if anim_args.animation_mode == '3D':
315
+ rows2 += [f"{keys.translation_z_series[frame_idx]:.5g}", f"{keys.rotation_3d_x_series[frame_idx]:.5g}",
316
+ f"{keys.rotation_3d_y_series[frame_idx]:.5g}", f"{keys.rotation_3d_z_series[frame_idx]:.5g}"]
317
+ if anim_args.aspect_ratio_schedule.replace(" ", "") != '0:(1)':
318
+ rows2 += [f"{keys.aspect_ratio_series[frame_idx]:.5g}"]
319
+ if anim_args.enable_perspective_flip:
320
+ rows2 += [f"{keys.perspective_flip_theta_series[frame_idx]:.5g}", f"{keys.perspective_flip_phi_series[frame_idx]:.5g}",
321
+ f"{keys.perspective_flip_gamma_series[frame_idx]:.5g}", f"{keys.perspective_flip_fv_series[frame_idx]:.5g}"]
322
+
323
+ table.add_row(*rows1, *rows2)
324
+ console.print(table)
scripts/deforum_helpers/gradio_funcs.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import gradio as gr
19
+ import modules.paths as ph
20
+ from .general_utils import get_os
21
+ from .upscaling import process_ncnn_upscale_vid_upload_logic
22
+ from .video_audio_utilities import extract_number, get_quick_vid_info, get_ffmpeg_params
23
+ from .frame_interpolation import process_interp_vid_upload_logic, process_interp_pics_upload_logic
24
+ from .vid2depth import process_depth_vid_upload_logic
25
+
26
+ f_models_path = ph.models_path + '/Deforum'
27
+
28
+ # START gradio-to-frame-interoplation/ upscaling functions
29
+ def upload_vid_to_interpolate(file, engine, x_am, sl_enabled, sl_am, keep_imgs, in_vid_fps):
30
+ # print msg and do nothing if vid not uploaded or interp_x not provided
31
+ if not file or engine == 'None':
32
+ return print("Please upload a video and set a proper value for 'Interp X'. Can't interpolate x0 times :)")
33
+ f_location, f_crf, f_preset = get_ffmpeg_params()
34
+
35
+ process_interp_vid_upload_logic(file, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, in_vid_fps, f_models_path, file.orig_name)
36
+
37
+ def upload_pics_to_interpolate(pic_list, engine, x_am, sl_enabled, sl_am, keep_imgs, fps, add_audio, audio_track):
38
+ from PIL import Image
39
+
40
+ if pic_list is None or len(pic_list) < 2:
41
+ return print("Please upload at least 2 pics for interpolation.")
42
+ f_location, f_crf, f_preset = get_ffmpeg_params()
43
+ # make sure all uploaded pics have the same resolution
44
+ pic_sizes = [Image.open(picture_path.name).size for picture_path in pic_list]
45
+ if len(set(pic_sizes)) != 1:
46
+ return print("All uploaded pics need to be of the same Width and Height / resolution.")
47
+
48
+ resolution = pic_sizes[0]
49
+
50
+ process_interp_pics_upload_logic(pic_list, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, fps, f_models_path, resolution, add_audio, audio_track)
51
+
52
+ def ncnn_upload_vid_to_upscale(vid_path, in_vid_fps, in_vid_res, out_vid_res, upscale_model, upscale_factor, keep_imgs):
53
+ if vid_path is None:
54
+ print("Please upload a video :)")
55
+ return
56
+ f_location, f_crf, f_preset = get_ffmpeg_params()
57
+ current_user = get_os()
58
+ process_ncnn_upscale_vid_upload_logic(vid_path, in_vid_fps, in_vid_res, out_vid_res, f_models_path, upscale_model, upscale_factor, keep_imgs, f_location, f_crf, f_preset, current_user)
59
+
60
+ def upload_vid_to_depth(vid_to_depth_chosen_file, mode, thresholding, threshold_value, threshold_value_max, adapt_block_size, adapt_c, invert, end_blur, midas_weight_vid2depth, depth_keep_imgs):
61
+ # print msg and do nothing if vid not uploaded
62
+ if not vid_to_depth_chosen_file:
63
+ return print("Please upload a video :()")
64
+ f_location, f_crf, f_preset = get_ffmpeg_params()
65
+
66
+ process_depth_vid_upload_logic(vid_to_depth_chosen_file, mode, thresholding, threshold_value, threshold_value_max, adapt_block_size, adapt_c, invert, end_blur, midas_weight_vid2depth,
67
+ vid_to_depth_chosen_file.orig_name, depth_keep_imgs, f_location, f_crf, f_preset, f_models_path)
68
+
69
+ # END gradio-to-frame-interoplation/ upscaling functions
70
+
71
+ def change_visibility_from_skip_video(choice):
72
+ return gr.update(visible=False) if choice else gr.update(visible=True)
73
+
74
+ def update_r_upscale_factor(choice):
75
+ return gr.update(value='x4', choices=['x4']) if choice != 'realesr-animevideov3' else gr.update(value='x2', choices=['x2', 'x3', 'x4'])
76
+
77
+ def change_perlin_visibility(choice):
78
+ return gr.update(visible=choice == "perlin")
79
+
80
+ def legacy_3d_mode(choice):
81
+ return gr.update(visible=choice.lower() in ["midas+adabins (old)", 'zoe+adabins (old)'])
82
+
83
+ def change_color_coherence_image_path_visibility(choice):
84
+ return gr.update(visible=choice == "Image")
85
+
86
+ def change_color_coherence_video_every_N_frames_visibility(choice):
87
+ return gr.update(visible=choice == "Video Input")
88
+
89
+ def change_seed_iter_visibility(choice):
90
+ return gr.update(visible=choice == "iter")
91
+
92
+ def change_seed_schedule_visibility(choice):
93
+ return gr.update(visible=choice == "schedule")
94
+
95
+ def disable_pers_flip_accord(choice):
96
+ return gr.update(visible=True) if choice in ['2D', '3D'] else gr.update(visible=False)
97
+
98
+ def per_flip_handle(anim_mode, per_f_enabled):
99
+ if anim_mode in ['2D', '3D'] and per_f_enabled:
100
+ return gr.update(visible=True)
101
+ return gr.update(visible=False)
102
+
103
+ def change_max_frames_visibility(choice):
104
+ return gr.update(visible=choice != "Video Input")
105
+
106
+ def change_diffusion_cadence_visibility(choice):
107
+ return gr.update(visible=choice not in ['Video Input', 'Interpolation'])
108
+
109
+ def disble_3d_related_stuff(choice):
110
+ return gr.update(visible=False) if choice != '3D' else gr.update(visible=True)
111
+
112
+ def only_show_in_non_3d_mode(choice):
113
+ return gr.update(visible=False) if choice == '3D' else gr.update(visible=True)
114
+
115
+ def enable_2d_related_stuff(choice):
116
+ return gr.update(visible=True) if choice == '2D' else gr.update(visible=False)
117
+
118
+ def disable_by_interpolation(choice):
119
+ return gr.update(visible=False) if choice in ['Interpolation'] else gr.update(visible=True)
120
+
121
+ def disable_by_video_input(choice):
122
+ return gr.update(visible=False) if choice in ['Video Input'] else gr.update(visible=True)
123
+
124
+ def hide_if_none(choice):
125
+ return gr.update(visible=choice != "None")
126
+
127
+ def change_gif_button_visibility(choice):
128
+ if choice is None or choice == "":
129
+ return gr.update(visible=True)
130
+ return gr.update(visible=False, value=False) if int(choice) > 30 else gr.update(visible=True)
131
+
132
+ def hide_if_false(choice):
133
+ return gr.update(visible=True) if choice else gr.update(visible=False)
134
+
135
+ def hide_if_true(choice):
136
+ return gr.update(visible=False) if choice else gr.update(visible=True)
137
+
138
+ def disable_by_hybrid_composite_dynamic(choice, comp_mask_type):
139
+ if choice in ['Normal', 'Before Motion', 'After Generation']:
140
+ if comp_mask_type != 'None':
141
+ return gr.update(visible=True)
142
+ return gr.update(visible=False)
143
+
144
+ def disable_by_non_optical_flow(choice):
145
+ return gr.update(visible=False) if choice != 'Optical Flow' else gr.update(visible=True)
146
+
147
+ # Upscaling Gradio UI related funcs
148
+ def vid_upscale_gradio_update_stats(vid_path, upscale_factor):
149
+ if not vid_path:
150
+ return '---', '---', '---', '---'
151
+ factor = extract_number(upscale_factor)
152
+ fps, fcount, resolution = get_quick_vid_info(vid_path.name)
153
+ in_res_str = f"{resolution[0]}*{resolution[1]}"
154
+ out_res_str = f"{resolution[0] * factor}*{resolution[1] * factor}"
155
+ return fps, fcount, in_res_str, out_res_str
156
+
157
+ def update_upscale_out_res(in_res, upscale_factor):
158
+ if not in_res:
159
+ return '---'
160
+ factor = extract_number(upscale_factor)
161
+ w, h = [int(x) * factor for x in in_res.split('*')]
162
+ return f"{w}*{h}"
163
+
164
+ def update_upscale_out_res_by_model_name(in_res, upscale_model_name):
165
+ if not upscale_model_name or in_res == '---':
166
+ return '---'
167
+ factor = 2 if upscale_model_name == 'realesr-animevideov3' else 4
168
+ return f"{int(in_res.split('*')[0]) * factor}*{int(in_res.split('*')[1]) * factor}"
169
+
170
+ def hide_optical_flow_cadence(cadence_value):
171
+ return gr.update(visible=True) if cadence_value > 1 else gr.update(visible=False)
172
+
173
+ def hide_interp_by_interp_status(choice):
174
+ return gr.update(visible=False) if choice == 'None' else gr.update(visible=True)
175
+
176
+ def change_interp_x_max_limit(engine_name, current_value):
177
+ if engine_name == 'FILM':
178
+ return gr.update(maximum=300)
179
+ elif current_value > 10:
180
+ return gr.update(maximum=10, value=2)
181
+ return gr.update(maximum=10)
182
+
183
+ def hide_interp_stats(choice):
184
+ return gr.update(visible=True) if choice is not None else gr.update(visible=False)
185
+
186
+ def show_hybrid_html_msg(choice):
187
+ return gr.update(visible=True) if choice not in ['2D', '3D'] else gr.update(visible=False)
188
+
189
+ def change_hybrid_tab_status(choice):
190
+ return gr.update(visible=True) if choice in ['2D', '3D'] else gr.update(visible=False)
191
+
192
+ def show_leres_html_msg(choice):
193
+ return gr.update(visible=True) if choice.lower() == 'leres' else gr.update(visible=False)
194
+
195
+ def show_when_ddim(sampler_name):
196
+ return gr.update(visible=True) if sampler_name.lower() == 'ddim' else gr.update(visible=False)
197
+
198
+ def show_when_ancestral_samplers(sampler_name):
199
+ return gr.update(visible=True) if sampler_name.lower() in ['euler a', 'dpm++ 2s a', 'dpm2 a', 'dpm2 a karras', 'dpm++ 2s a karras'] else gr.update(visible=False)
200
+
201
+ def change_css(checkbox_status):
202
+ if checkbox_status:
203
+ display = "block"
204
+ else:
205
+ display = "none"
206
+
207
+ html_template = f'''
208
+ <style>
209
+ #tab_deforum_interface .svelte-e8n7p6, #f_interp_accord {{
210
+ display: {display} !important;
211
+ }}
212
+ </style>
213
+ '''
214
+ return html_template
scripts/deforum_helpers/human_masking.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os, cv2
19
+ import torch
20
+ from pathlib import Path
21
+ from multiprocessing import freeze_support
22
+
23
+ def extract_frames(input_video_path, output_imgs_path):
24
+ # Open the video file
25
+ vidcap = cv2.VideoCapture(input_video_path)
26
+
27
+ # Get the total number of frames in the video
28
+ frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
29
+
30
+ # Create the output directory if it does not exist
31
+ os.makedirs(output_imgs_path, exist_ok=True)
32
+
33
+ # Extract the frames
34
+ for i in range(frame_count):
35
+ success, image = vidcap.read()
36
+ if success:
37
+ cv2.imwrite(os.path.join(output_imgs_path, f"frame{i}.png"), image)
38
+ print(f"{frame_count} frames extracted and saved to {output_imgs_path}")
39
+
40
+ def video2humanmasks(input_frames_path, output_folder_path, output_type, fps):
41
+ # freeze support is needed for video outputting
42
+ freeze_support()
43
+
44
+ # check if input path exists and is a directory
45
+ if not os.path.exists(input_frames_path) or not os.path.isdir(input_frames_path):
46
+ raise ValueError("Invalid input path: {}".format(input_frames_path))
47
+
48
+ # check if output path exists and is a directory
49
+ if not os.path.exists(output_folder_path) or not os.path.isdir(output_folder_path):
50
+ raise ValueError("Invalid output path: {}".format(output_folder_path))
51
+
52
+ # check if output_type is valid
53
+ valid_output_types = ["video", "pngs", "both"]
54
+ if output_type.lower() not in valid_output_types:
55
+ raise ValueError("Invalid output type: {}. Must be one of {}".format(output_type, valid_output_types))
56
+
57
+ # try to predict where torch cache lives, so we can try and fetch models from cache in the next step
58
+ predicted_torch_model_cache_path = os.path.join(Path.home(), ".cache", "torch", "hub", "hithereai_RobustVideoMatting_master")
59
+ predicted_rvm_cache_testilfe = os.path.join(predicted_torch_model_cache_path, "hubconf.py")
60
+
61
+ # try to fetch the models from cache, and only if it can't be find, download from the internet (to enable offline usage)
62
+ try:
63
+ # Try to fetch the models from cache
64
+ convert_video = torch.hub.load(predicted_torch_model_cache_path, "converter", source='local')
65
+ model = torch.hub.load(predicted_torch_model_cache_path, "resnet50", source='local').cuda()
66
+ except:
67
+ # Download from the internet if not found in cache
68
+ convert_video = torch.hub.load("hithereai/RobustVideoMatting", "converter")
69
+ model = torch.hub.load("hithereai/RobustVideoMatting", "resnet50").cuda()
70
+
71
+ output_alpha_vid_path = os.path.join(output_folder_path, "human_masked_video.mp4")
72
+ # extract humans masks from the input folder' imgs.
73
+ # in this step PNGs will be extracted only if output_type is set to PNGs. Otherwise a video will be made, and in the case of Both, the video will be extracted in the next step to PNGs
74
+ convert_video(
75
+ model,
76
+ input_source=input_frames_path, # full path of the folder that contains all of the extracted input imgs
77
+ output_type='video' if output_type.upper() in ("VIDEO", "BOTH") else 'png_sequence',
78
+ output_alpha=output_alpha_vid_path if output_type.upper() in ("VIDEO", "BOTH") else output_folder_path,
79
+ output_video_mbps=4,
80
+ output_video_fps=fps,
81
+ downsample_ratio=None, # None for auto
82
+ seq_chunk=12, # Process n frames at once for better parallelism
83
+ progress=True # show extraction progress
84
+ )
85
+
86
+ if output_type.lower() == "both":
87
+ extract_frames(output_alpha_vid_path, output_folder_path)
scripts/deforum_helpers/hybrid_video.py ADDED
@@ -0,0 +1,611 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import pathlib
20
+ import random
21
+ import cv2
22
+ import numpy as np
23
+ import PIL
24
+ from PIL import Image, ImageChops, ImageOps, ImageEnhance
25
+ from scipy.ndimage.filters import gaussian_filter
26
+ from .consistency_check import make_consistency
27
+ from .human_masking import video2humanmasks
28
+ from .load_images import load_image
29
+ from .video_audio_utilities import vid2frames, get_quick_vid_info, get_frame_name
30
+
31
+ def delete_all_imgs_in_folder(folder_path):
32
+ files = list(pathlib.Path(folder_path).glob('*.jpg'))
33
+ files.extend(list(pathlib.Path(folder_path).glob('*.png')))
34
+ for f in files: os.remove(f)
35
+
36
+ def hybrid_generation(args, anim_args, root):
37
+ video_in_frame_path = os.path.join(args.outdir, 'inputframes')
38
+ hybrid_frame_path = os.path.join(args.outdir, 'hybridframes')
39
+ human_masks_path = os.path.join(args.outdir, 'human_masks')
40
+
41
+ # create hybridframes folder whether using init_image or inputframes
42
+ os.makedirs(hybrid_frame_path, exist_ok=True)
43
+
44
+ if anim_args.hybrid_generate_inputframes:
45
+ # create folders for the video input frames and optional hybrid frames to live in
46
+ os.makedirs(video_in_frame_path, exist_ok=True)
47
+
48
+ # delete frames if overwrite = true
49
+ if anim_args.overwrite_extracted_frames:
50
+ delete_all_imgs_in_folder(hybrid_frame_path)
51
+
52
+ # save the video frames from input video
53
+ print(f"Video to extract: {anim_args.video_init_path}")
54
+ print(f"Extracting video (1 every {anim_args.extract_nth_frame}) frames to {video_in_frame_path}...")
55
+ video_fps = vid2frames(video_path=anim_args.video_init_path, video_in_frame_path=video_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
56
+
57
+ # extract alpha masks of humans from the extracted input video imgs
58
+ if anim_args.hybrid_generate_human_masks != "None":
59
+ # create a folder for the human masks imgs to live in
60
+ print(f"Checking /creating a folder for the human masks")
61
+ os.makedirs(human_masks_path, exist_ok=True)
62
+
63
+ # delete frames if overwrite = true
64
+ if anim_args.overwrite_extracted_frames:
65
+ delete_all_imgs_in_folder(human_masks_path)
66
+
67
+ # in case that generate_input_frames isn't selected, we won't get the video fps rate as vid2frames isn't called, So we'll check the video fps in here instead
68
+ if not anim_args.hybrid_generate_inputframes:
69
+ _, video_fps, _ = get_quick_vid_info(anim_args.video_init_path)
70
+
71
+ # calculate the correct fps of the masked video according to the original video fps and 'extract_nth_frame'
72
+ output_fps = video_fps/anim_args.extract_nth_frame
73
+
74
+ # generate the actual alpha masks from the input imgs
75
+ print(f"Extracting alpha humans masks from the input frames")
76
+ video2humanmasks(video_in_frame_path, human_masks_path, anim_args.hybrid_generate_human_masks, output_fps)
77
+
78
+ # get sorted list of inputfiles
79
+ inputfiles = sorted(pathlib.Path(video_in_frame_path).glob('*.jpg'))
80
+
81
+ if not anim_args.hybrid_use_init_image:
82
+ # determine max frames from length of input frames
83
+ anim_args.max_frames = len(inputfiles)
84
+ print(f"Using {anim_args.max_frames} input frames from {video_in_frame_path}...")
85
+
86
+ # use first frame as init
87
+ if anim_args.hybrid_use_first_frame_as_init_image:
88
+ for f in inputfiles:
89
+ args.init_image = str(f)
90
+ args.use_init = True
91
+ print(f"Using init_image from video: {args.init_image}")
92
+ break
93
+
94
+ return args, anim_args, inputfiles
95
+
96
+ def hybrid_composite(args, anim_args, frame_idx, prev_img, depth_model, hybrid_comp_schedules, root):
97
+ video_frame = os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg")
98
+ video_depth_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_vid_depth{frame_idx:09}.jpg")
99
+ depth_frame = os.path.join(args.outdir, f"{args.timestring}_depth_{frame_idx-1:09}.png")
100
+ mask_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_mask{frame_idx:09}.jpg")
101
+ comp_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_comp{frame_idx:09}.jpg")
102
+ prev_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_prev{frame_idx:09}.jpg")
103
+ prev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2RGB)
104
+ prev_img_hybrid = Image.fromarray(prev_img)
105
+ if anim_args.hybrid_use_init_image:
106
+ video_image = load_image(args.init_image)
107
+ else:
108
+ video_image = Image.open(video_frame)
109
+ video_image = video_image.resize((args.W, args.H), PIL.Image.LANCZOS)
110
+ hybrid_mask = None
111
+
112
+ # composite mask types
113
+ if anim_args.hybrid_comp_mask_type == 'Depth': # get depth from last generation
114
+ hybrid_mask = Image.open(depth_frame)
115
+ elif anim_args.hybrid_comp_mask_type == 'Video Depth': # get video depth
116
+ video_depth = depth_model.predict(np.array(video_image), anim_args.midas_weight, root.half_precision)
117
+ depth_model.save(video_depth_frame, video_depth)
118
+ hybrid_mask = Image.open(video_depth_frame)
119
+ elif anim_args.hybrid_comp_mask_type == 'Blend': # create blend mask image
120
+ hybrid_mask = Image.blend(ImageOps.grayscale(prev_img_hybrid), ImageOps.grayscale(video_image), hybrid_comp_schedules['mask_blend_alpha'])
121
+ elif anim_args.hybrid_comp_mask_type == 'Difference': # create difference mask image
122
+ hybrid_mask = ImageChops.difference(ImageOps.grayscale(prev_img_hybrid), ImageOps.grayscale(video_image))
123
+
124
+ # optionally invert mask, if mask type is defined
125
+ if anim_args.hybrid_comp_mask_inverse and anim_args.hybrid_comp_mask_type != "None":
126
+ hybrid_mask = ImageOps.invert(hybrid_mask)
127
+
128
+ # if a mask type is selected, make composition
129
+ if hybrid_mask is None:
130
+ hybrid_comp = video_image
131
+ else:
132
+ # ensure grayscale
133
+ hybrid_mask = ImageOps.grayscale(hybrid_mask)
134
+ # equalization before
135
+ if anim_args.hybrid_comp_mask_equalize in ['Before', 'Both']:
136
+ hybrid_mask = ImageOps.equalize(hybrid_mask)
137
+ # contrast
138
+ hybrid_mask = ImageEnhance.Contrast(hybrid_mask).enhance(hybrid_comp_schedules['mask_contrast'])
139
+ # auto contrast with cutoffs lo/hi
140
+ if anim_args.hybrid_comp_mask_auto_contrast:
141
+ hybrid_mask = autocontrast_grayscale(np.array(hybrid_mask), hybrid_comp_schedules['mask_auto_contrast_cutoff_low'], hybrid_comp_schedules['mask_auto_contrast_cutoff_high'])
142
+ hybrid_mask = Image.fromarray(hybrid_mask)
143
+ hybrid_mask = ImageOps.grayscale(hybrid_mask)
144
+ if anim_args.hybrid_comp_save_extra_frames:
145
+ hybrid_mask.save(mask_frame)
146
+ # equalization after
147
+ if anim_args.hybrid_comp_mask_equalize in ['After', 'Both']:
148
+ hybrid_mask = ImageOps.equalize(hybrid_mask)
149
+ # do compositing and save
150
+ hybrid_comp = Image.composite(prev_img_hybrid, video_image, hybrid_mask)
151
+ if anim_args.hybrid_comp_save_extra_frames:
152
+ hybrid_comp.save(comp_frame)
153
+
154
+ # final blend of composite with prev_img, or just a blend if no composite is selected
155
+ hybrid_blend = Image.blend(prev_img_hybrid, hybrid_comp, hybrid_comp_schedules['alpha'])
156
+ if anim_args.hybrid_comp_save_extra_frames:
157
+ hybrid_blend.save(prev_frame)
158
+
159
+ prev_img = cv2.cvtColor(np.array(hybrid_blend), cv2.COLOR_RGB2BGR)
160
+
161
+ # restore to np array and return
162
+ return args, prev_img
163
+
164
+ def get_matrix_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_motion):
165
+ print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}")
166
+ img1 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions), cv2.COLOR_BGR2GRAY)
167
+ img2 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY)
168
+ M = get_transformation_matrix_from_images(img1, img2, hybrid_motion)
169
+ return M
170
+
171
+ def get_matrix_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, prev_img, hybrid_motion):
172
+ print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}")
173
+ # first handle invalid images by returning default matrix
174
+ height, width = prev_img.shape[:2]
175
+ if height == 0 or width == 0 or prev_img != np.uint8:
176
+ return get_hybrid_motion_default_matrix(hybrid_motion)
177
+ else:
178
+ prev_img_gray = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY)
179
+ img = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY)
180
+ M = get_transformation_matrix_from_images(prev_img_gray, img, hybrid_motion)
181
+ return M
182
+
183
+ def get_flow_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False):
184
+ print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}")
185
+ i1 = get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions)
186
+ i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions)
187
+ if consistency_check:
188
+ flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check
189
+ if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path)
190
+ else:
191
+ flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) # old single flow forward
192
+ if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
193
+ return flow
194
+
195
+ def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, prev_img, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False):
196
+ print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}")
197
+ reliable_flow = None
198
+ # first handle invalid images by returning default flow
199
+ height, width = prev_img.shape[:2]
200
+ if height == 0 or width == 0:
201
+ flow = get_hybrid_motion_default_flow(dimensions)
202
+ else:
203
+ i1 = prev_img.astype(np.uint8)
204
+ i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions)
205
+ if consistency_check:
206
+ flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check
207
+ if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path)
208
+ else:
209
+ flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
210
+ if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
211
+ return flow
212
+
213
+ def get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur, reliability=0):
214
+ flow_forward = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
215
+ flow_backward = get_flow_from_images(i2, i1, method, raft_model, None)
216
+ reliable_flow = make_consistency(flow_forward, flow_backward, edges_unreliable=False)
217
+ if consistency_blur > 0:
218
+ reliable_flow = custom_gaussian_blur(reliable_flow.astype(np.float32), 1, consistency_blur)
219
+ return filter_flow(flow_forward, reliable_flow, consistency_blur, reliability), reliable_flow
220
+
221
+ def custom_gaussian_blur(input_array, blur_size, sigma):
222
+ return gaussian_filter(input_array, sigma=(sigma, sigma, 0), order=0, mode='constant', cval=0.0, truncate=blur_size)
223
+
224
+ def filter_flow(flow, reliable_flow, reliability=0.5, consistency_blur=0):
225
+ # reliability from reliabile flow: -0.75 is bad, 0 is meh/outside, 1 is great
226
+ # Create a mask from the first channel of the reliable_flow array
227
+ mask = reliable_flow[..., 0]
228
+
229
+ # to set everything to 1 or 0 based on reliability
230
+ # mask = np.where(mask >= reliability, 1, 0)
231
+
232
+ # Expand the mask to match the shape of the forward_flow array
233
+ mask = np.repeat(mask[..., np.newaxis], flow.shape[2], axis=2)
234
+
235
+ # Apply the mask to the flow
236
+ return flow * mask
237
+
238
+ def image_transform_ransac(image_cv2, M, hybrid_motion, depth=None):
239
+ if hybrid_motion == "Perspective":
240
+ return image_transform_perspective(image_cv2, M, depth)
241
+ else: # Affine
242
+ return image_transform_affine(image_cv2, M, depth)
243
+
244
+ def image_transform_optical_flow(img, flow, flow_factor):
245
+ # if flow factor not normal, calculate flow factor
246
+ if flow_factor != 1:
247
+ flow = flow * flow_factor
248
+ # flow is reversed, so you need to reverse it:
249
+ flow = -flow
250
+ h, w = img.shape[:2]
251
+ flow[:, :, 0] += np.arange(w)
252
+ flow[:, :, 1] += np.arange(h)[:,np.newaxis]
253
+ return remap(img, flow)
254
+
255
+ def image_transform_affine(image_cv2, M, depth=None):
256
+ if depth is None:
257
+ return cv2.warpAffine(
258
+ image_cv2,
259
+ M,
260
+ (image_cv2.shape[1],image_cv2.shape[0]),
261
+ borderMode=cv2.BORDER_REFLECT_101
262
+ )
263
+ else: # NEED TO IMPLEMENT THE FOLLOWING FUNCTION
264
+ return depth_based_affine_warp(
265
+ image_cv2,
266
+ depth,
267
+ M
268
+ )
269
+
270
+ def image_transform_perspective(image_cv2, M, depth=None):
271
+ if depth is None:
272
+ return cv2.warpPerspective(
273
+ image_cv2,
274
+ M,
275
+ (image_cv2.shape[1], image_cv2.shape[0]),
276
+ borderMode=cv2.BORDER_REFLECT_101
277
+ )
278
+ else: # NEED TO IMPLEMENT THE FOLLOWING FUNCTION
279
+ return render_3d_perspective(
280
+ image_cv2,
281
+ depth,
282
+ M
283
+ )
284
+
285
+ def get_hybrid_motion_default_matrix(hybrid_motion):
286
+ if hybrid_motion == "Perspective":
287
+ arr = np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
288
+ else:
289
+ arr = np.array([[1., 0., 0.], [0., 1., 0.]])
290
+ return arr
291
+
292
+ def get_hybrid_motion_default_flow(dimensions):
293
+ cols, rows = dimensions
294
+ flow = np.zeros((rows, cols, 2), np.float32)
295
+ return flow
296
+
297
+ def get_transformation_matrix_from_images(img1, img2, hybrid_motion, confidence=0.75):
298
+ # Create SIFT detector and feature extractor
299
+ sift = cv2.SIFT_create()
300
+
301
+ # Detect keypoints and compute descriptors
302
+ kp1, des1 = sift.detectAndCompute(img1, None)
303
+ kp2, des2 = sift.detectAndCompute(img2, None)
304
+
305
+ # Create BFMatcher object and match descriptors
306
+ bf = cv2.BFMatcher()
307
+ matches = bf.knnMatch(des1, des2, k=2)
308
+
309
+ # Apply ratio test to filter good matches
310
+ good_matches = []
311
+ for m, n in matches:
312
+ if m.distance < confidence * n.distance:
313
+ good_matches.append(m)
314
+
315
+ if len(good_matches) <= 8:
316
+ get_hybrid_motion_default_matrix(hybrid_motion)
317
+
318
+ # Convert keypoints to numpy arrays
319
+ src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
320
+ dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
321
+
322
+ if len(src_pts) <= 8 or len(dst_pts) <= 8:
323
+ return get_hybrid_motion_default_matrix(hybrid_motion)
324
+ elif hybrid_motion == "Perspective": # Perspective transformation (3x3)
325
+ transformation_matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
326
+ return transformation_matrix
327
+ else: # Affine - rigid transformation (no skew 3x2)
328
+ transformation_rigid_matrix, rigid_mask = cv2.estimateAffinePartial2D(src_pts, dst_pts)
329
+ return transformation_rigid_matrix
330
+
331
+ def get_flow_from_images(i1, i2, method, raft_model, prev_flow=None):
332
+ if method == "RAFT":
333
+ if raft_model is None:
334
+ raise Exception("RAFT Model not provided to get_flow_from_images function, cannot continue.")
335
+ return get_flow_from_images_RAFT(i1, i2, raft_model)
336
+ elif method == "DIS Medium":
337
+ return get_flow_from_images_DIS(i1, i2, 'medium', prev_flow)
338
+ elif method == "DIS Fine":
339
+ return get_flow_from_images_DIS(i1, i2, 'fine', prev_flow)
340
+ elif method == "DenseRLOF": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
341
+ return get_flow_from_images_Dense_RLOF(i1, i2, prev_flow)
342
+ elif method == "SF": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
343
+ return get_flow_from_images_SF(i1, i2, prev_flow)
344
+ elif method == "DualTVL1": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
345
+ return get_flow_from_images_DualTVL1(i1, i2, prev_flow)
346
+ elif method == "DeepFlow": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
347
+ return get_flow_from_images_DeepFlow(i1, i2, prev_flow)
348
+ elif method == "PCAFlow": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
349
+ return get_flow_from_images_PCAFlow(i1, i2, prev_flow)
350
+ elif method == "Farneback": # Farneback Normal:
351
+ return get_flow_from_images_Farneback(i1, i2, prev_flow)
352
+ # if we reached this point, something went wrong. raise an error:
353
+ raise RuntimeError(f"Invald flow method name: '{method}'")
354
+
355
+ def get_flow_from_images_RAFT(i1, i2, raft_model):
356
+ flow = raft_model.predict(i1, i2)
357
+ return flow
358
+
359
+ def get_flow_from_images_DIS(i1, i2, preset, prev_flow):
360
+ # DIS PRESETS CHART KEY: finest scale, grad desc its, patch size
361
+ # DIS_MEDIUM: 1, 25, 8 | DIS_FAST: 2, 16, 8 | DIS_ULTRAFAST: 2, 12, 8
362
+ if preset == 'medium': preset_code = cv2.DISOPTICAL_FLOW_PRESET_MEDIUM
363
+ elif preset == 'fast': preset_code = cv2.DISOPTICAL_FLOW_PRESET_FAST
364
+ elif preset == 'ultrafast': preset_code = cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST
365
+ elif preset in ['slow','fine']: preset_code = None
366
+ i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
367
+ i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
368
+ dis = cv2.DISOpticalFlow_create(preset_code)
369
+ # custom presets
370
+ if preset == 'slow':
371
+ dis.setGradientDescentIterations(192)
372
+ dis.setFinestScale(1)
373
+ dis.setPatchSize(8)
374
+ dis.setPatchStride(4)
375
+ if preset == 'fine':
376
+ dis.setGradientDescentIterations(192)
377
+ dis.setFinestScale(0)
378
+ dis.setPatchSize(8)
379
+ dis.setPatchStride(4)
380
+ return dis.calc(i1, i2, prev_flow)
381
+
382
+ def get_flow_from_images_Dense_RLOF(i1, i2, last_flow=None):
383
+ return cv2.optflow.calcOpticalFlowDenseRLOF(i1, i2, flow = last_flow)
384
+
385
+ def get_flow_from_images_SF(i1, i2, last_flow=None, layers = 3, averaging_block_size = 2, max_flow = 4):
386
+ return cv2.optflow.calcOpticalFlowSF(i1, i2, layers, averaging_block_size, max_flow)
387
+
388
+ def get_flow_from_images_DualTVL1(i1, i2, prev_flow):
389
+ i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
390
+ i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
391
+ f = cv2.optflow.DualTVL1OpticalFlow_create()
392
+ return f.calc(i1, i2, prev_flow)
393
+
394
+ def get_flow_from_images_DeepFlow(i1, i2, prev_flow):
395
+ i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
396
+ i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
397
+ f = cv2.optflow.createOptFlow_DeepFlow()
398
+ return f.calc(i1, i2, prev_flow)
399
+
400
+ def get_flow_from_images_PCAFlow(i1, i2, prev_flow):
401
+ i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
402
+ i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
403
+ f = cv2.optflow.createOptFlow_PCAFlow()
404
+ return f.calc(i1, i2, prev_flow)
405
+
406
+ def get_flow_from_images_Farneback(i1, i2, preset="normal", last_flow=None, pyr_scale = 0.5, levels = 3, winsize = 15, iterations = 3, poly_n = 5, poly_sigma = 1.2, flags = 0):
407
+ flags = cv2.OPTFLOW_FARNEBACK_GAUSSIAN # Specify the operation flags
408
+ pyr_scale = 0.5 # The image scale (<1) to build pyramids for each image
409
+ if preset == "fine":
410
+ levels = 13 # The number of pyramid layers, including the initial image
411
+ winsize = 77 # The averaging window size
412
+ iterations = 13 # The number of iterations at each pyramid level
413
+ poly_n = 15 # The size of the pixel neighborhood used to find polynomial expansion in each pixel
414
+ poly_sigma = 0.8 # The standard deviation of the Gaussian used to smooth derivatives used as a basis for the polynomial expansion
415
+ else: # "normal"
416
+ levels = 5 # The number of pyramid layers, including the initial image
417
+ winsize = 21 # The averaging window size
418
+ iterations = 5 # The number of iterations at each pyramid level
419
+ poly_n = 7 # The size of the pixel neighborhood used to find polynomial expansion in each pixel
420
+ poly_sigma = 1.2 # The standard deviation of the Gaussian used to smooth derivatives used as a basis for the polynomial expansion
421
+ i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
422
+ i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
423
+ flags = 0 # flags = cv2.OPTFLOW_USE_INITIAL_FLOW
424
+ flow = cv2.calcOpticalFlowFarneback(i1, i2, last_flow, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)
425
+ return flow
426
+
427
+ def save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path):
428
+ flow_img_file = os.path.join(hybrid_frame_path, f"flow{frame_idx:09}.jpg")
429
+ flow_img = cv2.imread(str(inputfiles[frame_idx]))
430
+ flow_img = cv2.resize(flow_img, (dimensions[0], dimensions[1]), cv2.INTER_AREA)
431
+ flow_img = cv2.cvtColor(flow_img, cv2.COLOR_RGB2GRAY)
432
+ flow_img = cv2.cvtColor(flow_img, cv2.COLOR_GRAY2BGR)
433
+ flow_img = draw_flow_lines_in_grid_in_color(flow_img, flow)
434
+ flow_img = cv2.cvtColor(flow_img, cv2.COLOR_BGR2RGB)
435
+ cv2.imwrite(flow_img_file, flow_img)
436
+ print(f"Saved optical flow visualization: {flow_img_file}")
437
+
438
+ def save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path, color=True):
439
+ flow_mask_img_file = os.path.join(hybrid_frame_path, f"flow_mask{frame_idx:09}.jpg")
440
+ if color:
441
+ # Normalize the reliable_flow array to the range [0, 255]
442
+ normalized_reliable_flow = (reliable_flow - reliable_flow.min()) / (reliable_flow.max() - reliable_flow.min()) * 255
443
+ # Change the data type to np.uint8
444
+ mask_image = normalized_reliable_flow.astype(np.uint8)
445
+ else:
446
+ # Extract the first channel of the reliable_flow array
447
+ first_channel = reliable_flow[..., 0]
448
+ # Normalize the first channel to the range [0, 255]
449
+ normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255
450
+ # Change the data type to np.uint8
451
+ grayscale_image = normalized_first_channel.astype(np.uint8)
452
+ # Replicate the grayscale channel three times to form a BGR image
453
+ mask_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2)
454
+ cv2.imwrite(flow_mask_img_file, mask_image)
455
+ print(f"Saved mask flow visualization: {flow_mask_img_file}")
456
+
457
+ def reliable_flow_to_image(reliable_flow):
458
+ # Extract the first channel of the reliable_flow array
459
+ first_channel = reliable_flow[..., 0]
460
+ # Normalize the first channel to the range [0, 255]
461
+ normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255
462
+ # Change the data type to np.uint8
463
+ grayscale_image = normalized_first_channel.astype(np.uint8)
464
+ # Replicate the grayscale channel three times to form a BGR image
465
+ bgr_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2)
466
+ return bgr_image
467
+
468
+ def draw_flow_lines_in_grid_in_color(img, flow, step=8, magnitude_multiplier=1, min_magnitude = 0, max_magnitude = 10000):
469
+ flow = flow * magnitude_multiplier
470
+ h, w = img.shape[:2]
471
+ y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
472
+ fx, fy = flow[y,x].T
473
+ lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
474
+ lines = np.int32(lines + 0.5)
475
+ vis = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
476
+ vis = cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
477
+
478
+ mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
479
+ hsv = np.zeros((flow.shape[0], flow.shape[1], 3), dtype=np.uint8)
480
+ hsv[...,0] = ang*180/np.pi/2
481
+ hsv[...,1] = 255
482
+ hsv[...,2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
483
+ bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
484
+ vis = cv2.add(vis, bgr)
485
+
486
+ # Iterate through the lines
487
+ for (x1, y1), (x2, y2) in lines:
488
+ # Calculate the magnitude of the line
489
+ magnitude = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
490
+
491
+ # Only draw the line if it falls within the magnitude range
492
+ if min_magnitude <= magnitude <= max_magnitude:
493
+ b = int(bgr[y1, x1, 0])
494
+ g = int(bgr[y1, x1, 1])
495
+ r = int(bgr[y1, x1, 2])
496
+ color = (b, g, r)
497
+ cv2.arrowedLine(vis, (x1, y1), (x2, y2), color, thickness=1, tipLength=0.1)
498
+ return vis
499
+
500
+ def draw_flow_lines_in_color(img, flow, threshold=3, magnitude_multiplier=1, min_magnitude = 0, max_magnitude = 10000):
501
+ # h, w = img.shape[:2]
502
+ vis = img.copy() # Create a copy of the input image
503
+
504
+ # Find the locations in the flow field where the magnitude of the flow is greater than the threshold
505
+ mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
506
+ idx = np.where(mag > threshold)
507
+
508
+ # Create HSV image
509
+ hsv = np.zeros((flow.shape[0], flow.shape[1], 3), dtype=np.uint8)
510
+ hsv[...,0] = ang*180/np.pi/2
511
+ hsv[...,1] = 255
512
+ hsv[...,2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
513
+
514
+ # Convert HSV image to BGR
515
+ bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
516
+
517
+ # Add color from bgr
518
+ vis = cv2.add(vis, bgr)
519
+
520
+ # Draw an arrow at each of these locations to indicate the direction of the flow
521
+ for i, (y, x) in enumerate(zip(idx[0], idx[1])):
522
+ # Calculate the magnitude of the line
523
+ x2 = x + magnitude_multiplier * int(flow[y, x, 0])
524
+ y2 = y + magnitude_multiplier * int(flow[y, x, 1])
525
+ magnitude = np.sqrt((x2 - x)**2 + (y2 - y)**2)
526
+
527
+ # Only draw the line if it falls within the magnitude range
528
+ if min_magnitude <= magnitude <= max_magnitude:
529
+ if i % random.randint(100, 200) == 0:
530
+ b = int(bgr[y, x, 0])
531
+ g = int(bgr[y, x, 1])
532
+ r = int(bgr[y, x, 2])
533
+ color = (b, g, r)
534
+ cv2.arrowedLine(vis, (x, y), (x2, y2), color, thickness=1, tipLength=0.25)
535
+
536
+ return vis
537
+
538
+ def autocontrast_grayscale(image, low_cutoff=0, high_cutoff=100):
539
+ # Perform autocontrast on a grayscale np array image.
540
+ # Find the minimum and maximum values in the image
541
+ min_val = np.percentile(image, low_cutoff)
542
+ max_val = np.percentile(image, high_cutoff)
543
+
544
+ # Scale the image so that the minimum value is 0 and the maximum value is 255
545
+ image = 255 * (image - min_val) / (max_val - min_val)
546
+
547
+ # Clip values that fall outside the range [0, 255]
548
+ image = np.clip(image, 0, 255)
549
+
550
+ return image
551
+
552
+ def get_resized_image_from_filename(im, dimensions):
553
+ img = cv2.imread(im)
554
+ return cv2.resize(img, (dimensions[0], dimensions[1]), cv2.INTER_AREA)
555
+
556
+ def remap(img, flow):
557
+ border_mode = cv2.BORDER_REFLECT_101
558
+ h, w = img.shape[:2]
559
+ displacement = int(h * 0.25), int(w * 0.25)
560
+ larger_img = cv2.copyMakeBorder(img, displacement[0], displacement[0], displacement[1], displacement[1], border_mode)
561
+ lh, lw = larger_img.shape[:2]
562
+ larger_flow = extend_flow(flow, lw, lh)
563
+ remapped_img = cv2.remap(larger_img, larger_flow, None, cv2.INTER_LINEAR, border_mode)
564
+ output_img = center_crop_image(remapped_img, w, h)
565
+ return output_img
566
+
567
+ def center_crop_image(img, w, h):
568
+ y, x, _ = img.shape
569
+ width_indent = int((x - w) / 2)
570
+ height_indent = int((y - h) / 2)
571
+ cropped_img = img[height_indent:y-height_indent, width_indent:x-width_indent]
572
+ return cropped_img
573
+
574
+ def extend_flow(flow, w, h):
575
+ # Get the shape of the original flow image
576
+ flow_h, flow_w = flow.shape[:2]
577
+ # Calculate the position of the image in the new image
578
+ x_offset = int((w - flow_w) / 2)
579
+ y_offset = int((h - flow_h) / 2)
580
+ # Generate the X and Y grids
581
+ x_grid, y_grid = np.meshgrid(np.arange(w), np.arange(h))
582
+ # Create the new flow image and set it to the X and Y grids
583
+ new_flow = np.dstack((x_grid, y_grid)).astype(np.float32)
584
+ # Shift the values of the original flow by the size of the border
585
+ flow[:,:,0] += x_offset
586
+ flow[:,:,1] += y_offset
587
+ # Overwrite the middle of the grid with the original flow
588
+ new_flow[y_offset:y_offset+flow_h, x_offset:x_offset+flow_w, :] = flow
589
+ # Return the extended image
590
+ return new_flow
591
+
592
+ def abs_flow_to_rel_flow(flow, width, height):
593
+ fx, fy = flow[:,:,0], flow[:,:,1]
594
+ max_flow_x = np.max(np.abs(fx))
595
+ max_flow_y = np.max(np.abs(fy))
596
+ max_flow = max(max_flow_x, max_flow_y)
597
+
598
+ rel_fx = fx / (max_flow * width)
599
+ rel_fy = fy / (max_flow * height)
600
+ return np.dstack((rel_fx, rel_fy))
601
+
602
+ def rel_flow_to_abs_flow(rel_flow, width, height):
603
+ rel_fx, rel_fy = rel_flow[:,:,0], rel_flow[:,:,1]
604
+
605
+ max_flow_x = np.max(np.abs(rel_fx * width))
606
+ max_flow_y = np.max(np.abs(rel_fy * height))
607
+ max_flow = max(max_flow_x, max_flow_y)
608
+
609
+ fx = rel_fx * (max_flow * width)
610
+ fy = rel_fy * (max_flow * height)
611
+ return np.dstack((fx, fy))
scripts/deforum_helpers/image_sharpening.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import cv2
19
+ import numpy as np
20
+
21
+ def unsharp_mask(img, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0, mask=None):
22
+ if amount == 0:
23
+ return img
24
+ # Return a sharpened version of the image, using an unsharp mask.
25
+ # If mask is not None, only areas under mask are handled
26
+ blurred = cv2.GaussianBlur(img, kernel_size, sigma)
27
+ sharpened = float(amount + 1) * img - float(amount) * blurred
28
+ sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
29
+ sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
30
+ sharpened = sharpened.round().astype(np.uint8)
31
+ if threshold > 0:
32
+ low_contrast_mask = np.absolute(img - blurred) < threshold
33
+ np.copyto(sharpened, img, where=low_contrast_mask)
34
+ if mask is not None:
35
+ mask = np.array(mask)
36
+ masked_sharpened = cv2.bitwise_and(sharpened, sharpened, mask=mask)
37
+ masked_img = cv2.bitwise_and(img, img, mask=255-mask)
38
+ sharpened = cv2.add(masked_img, masked_sharpened)
39
+ return sharpened
scripts/deforum_helpers/load_images.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import requests
19
+ import os
20
+ from PIL import Image
21
+ import socket
22
+ import torchvision.transforms.functional as TF
23
+ from .general_utils import clean_gradio_path_strings
24
+
25
+ def load_img(path : str, shape=None, use_alpha_as_mask=False):
26
+ # use_alpha_as_mask: Read the alpha channel of the image as the mask image
27
+ image = load_image(path)
28
+ image = image.convert('RGBA') if use_alpha_as_mask else image.convert('RGB')
29
+ image = image.resize(shape, resample=Image.LANCZOS) if shape is not None else image
30
+
31
+ mask_image = None
32
+ if use_alpha_as_mask:
33
+ # Split alpha channel into a mask_image
34
+ red, green, blue, alpha = Image.Image.split(image) # not interested in R G or B, just in the alpha channel
35
+ mask_image = alpha.convert('L')
36
+ image = image.convert('RGB')
37
+
38
+ # check using init image alpha as mask if mask is not blank
39
+ extrema = mask_image.getextrema()
40
+ if (extrema == (0,0)) or extrema == (255,255):
41
+ print("use_alpha_as_mask==True: Using the alpha channel from the init image as a mask, but the alpha channel is blank.")
42
+ print("ignoring alpha as mask.")
43
+ mask_image = None
44
+
45
+ return image, mask_image
46
+
47
+ def load_image(image_path :str):
48
+ image_path = clean_gradio_path_strings(image_path)
49
+ image = None
50
+ if image_path.startswith('http://') or image_path.startswith('https://'):
51
+ try:
52
+ host = socket.gethostbyname("www.google.com")
53
+ s = socket.create_connection((host, 80), 2)
54
+ s.close()
55
+ except:
56
+ raise ConnectionError("There is no active internet connection available (couldn't connect to google.com as a network test) - please use *local* masks and init files only.")
57
+ try:
58
+ response = requests.get(image_path, stream=True)
59
+ except requests.exceptions.RequestException as e:
60
+ raise ConnectionError("Failed to download image due to no internet connection. Error: {}".format(e))
61
+ if response.status_code == 404 or response.status_code != 200:
62
+ raise ConnectionError("Init image url or mask image url is not valid")
63
+ image = Image.open(response.raw).convert('RGB')
64
+ else:
65
+ if not os.path.exists(image_path):
66
+ raise RuntimeError("Init image path or mask image path is not valid")
67
+ image = Image.open(image_path).convert('RGB')
68
+
69
+ return image
70
+
71
+ def prepare_mask(mask_input, mask_shape, mask_brightness_adjust=1.0, mask_contrast_adjust=1.0):
72
+ """
73
+ prepares mask for use in webui
74
+ """
75
+ if isinstance(mask_input, Image.Image):
76
+ mask = mask_input
77
+ else :
78
+ mask = load_image(mask_input)
79
+ mask = mask.resize(mask_shape, resample=Image.LANCZOS)
80
+ if mask_brightness_adjust != 1:
81
+ mask = TF.adjust_brightness(mask, mask_brightness_adjust)
82
+ if mask_contrast_adjust != 1:
83
+ mask = TF.adjust_contrast(mask, mask_contrast_adjust)
84
+ mask = mask.convert('L')
85
+ return mask
86
+
87
+ # "check_mask_for_errors" may have prevented errors in composable masks,
88
+ # but it CAUSES errors on any frame where it's all black.
89
+ # Bypassing the check below until we can fix it even better.
90
+ # This may break composable masks, but it makes ACTUAL masks usable.
91
+ def check_mask_for_errors(mask_input, invert_mask=False):
92
+ extrema = mask_input.getextrema()
93
+ if (invert_mask):
94
+ if extrema == (255,255):
95
+ print("after inverting mask will be blank. ignoring mask")
96
+ return None
97
+ elif extrema == (0,0):
98
+ print("mask is blank. ignoring mask")
99
+ return None
100
+ else:
101
+ return mask_input
102
+
103
+ def get_mask(args):
104
+ return prepare_mask(args.mask_file, (args.W, args.H), args.mask_contrast_adjust, args.mask_brightness_adjust)
105
+
106
+ def get_mask_from_file(mask_file, args):
107
+ return prepare_mask(mask_file, (args.W, args.H), args.mask_contrast_adjust, args.mask_brightness_adjust)
108
+
109
+ def blank_if_none(mask, w, h, mode):
110
+ return Image.new(mode, (w, h), (0)) if mask is None else mask
111
+
112
+ def none_if_blank(mask):
113
+ return None if mask.getextrema() == (0,0) else mask
scripts/deforum_helpers/masks.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import cv2
20
+ import gc
21
+ import numpy as np
22
+ from PIL import Image, ImageOps
23
+ from .video_audio_utilities import get_frame_name
24
+ from .load_images import load_image
25
+
26
+ def do_overlay_mask(args, anim_args, img, frame_idx, is_bgr_array=False):
27
+ if is_bgr_array:
28
+ img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
29
+ img = Image.fromarray(img)
30
+
31
+ if anim_args.use_mask_video:
32
+ current_mask = Image.open(os.path.join(args.outdir, 'maskframes', get_frame_name(anim_args.video_mask_path) + f"{frame_idx:09}.jpg"))
33
+ current_frame = Image.open(os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg"))
34
+ elif args.use_mask:
35
+ current_mask = args.mask_image if args.mask_image is not None else load_image(args.mask_file)
36
+ if args.init_image is None:
37
+ current_frame = img
38
+ else:
39
+ current_frame = load_image(args.init_image)
40
+
41
+ current_mask = current_mask.resize((args.W, args.H), Image.LANCZOS)
42
+ current_frame = current_frame.resize((args.W, args.H), Image.LANCZOS)
43
+ current_mask = ImageOps.grayscale(current_mask)
44
+
45
+ if args.invert_mask:
46
+ current_mask = ImageOps.invert(current_mask)
47
+
48
+ img = Image.composite(img, current_frame, current_mask)
49
+
50
+ if is_bgr_array:
51
+ img = np.array(img)
52
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
53
+
54
+ del(current_mask, current_frame)
55
+ gc.collect()
56
+
57
+ return img
scripts/deforum_helpers/noise.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import torch
19
+ from torch.nn.functional import interpolate
20
+ import numpy as np
21
+ from PIL import ImageOps
22
+ import math
23
+ from .animation import sample_to_cv2
24
+ import cv2
25
+ from modules.shared import opts
26
+
27
+ DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
28
+
29
+ deforum_noise_gen = torch.Generator(device='cpu')
30
+
31
+ # 2D Perlin noise in PyTorch https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57
32
+ def rand_perlin_2d(shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
33
+ delta = (res[0] / shape[0], res[1] / shape[1])
34
+ d = (shape[0] // res[0], shape[1] // res[1])
35
+
36
+ grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1]), indexing='ij'), dim = -1) % 1
37
+ angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1, generator=deforum_noise_gen)
38
+ gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
39
+
40
+ tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
41
+ dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
42
+
43
+ n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
44
+ n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
45
+ n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
46
+ n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
47
+ t = fade(grid[:shape[0], :shape[1]])
48
+ return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
49
+
50
+ def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
51
+ noise = torch.zeros(shape)
52
+ frequency = 1
53
+ amplitude = 1
54
+ for _ in range(int(octaves)):
55
+ noise += amplitude * rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
56
+ frequency *= 2
57
+ amplitude *= persistence
58
+ return noise
59
+
60
+ def condition_noise_mask(noise_mask, invert_mask = False):
61
+ if invert_mask:
62
+ noise_mask = ImageOps.invert(noise_mask)
63
+ noise_mask = np.array(noise_mask.convert("L"))
64
+ noise_mask = noise_mask.astype(np.float32) / 255.0
65
+ noise_mask = np.around(noise_mask, decimals=0)
66
+ noise_mask = torch.from_numpy(noise_mask)
67
+ #noise_mask = torch.round(noise_mask)
68
+ return noise_mask
69
+
70
+ def add_noise(sample, noise_amt: float, seed: int, noise_type: str, noise_args, noise_mask = None, invert_mask = False):
71
+ deforum_noise_gen.manual_seed(seed) # Reproducibility
72
+ perlin_w = sample.shape[0]
73
+ perlin_h = sample.shape[1]
74
+ perlin_w, perlin_h = map(lambda x: x - x % 64, (perlin_w, perlin_h)) # rescale perlin to multiplies of 64
75
+ sample2dshape = (perlin_w, perlin_h)
76
+ noise = torch.randn((sample.shape[2], perlin_w, perlin_h), generator=deforum_noise_gen) # White noise
77
+ if noise_type == 'perlin':
78
+ # rand_perlin_2d_octaves is between -1 and 1, so we need to shift it to be between 0 and 1
79
+ # print(sample.shape)
80
+ noise = noise * ((rand_perlin_2d_octaves(sample2dshape, (int(noise_args[0]), int(noise_args[1])), octaves=noise_args[2], persistence=noise_args[3]) + torch.ones(sample2dshape)) / 2)
81
+ noise = interpolate(noise.unsqueeze(1), size=(sample.shape[0], sample.shape[1])).squeeze(1) # rescale perlin back to the target resolution
82
+ if noise_mask is not None:
83
+ noise_mask = condition_noise_mask(noise_mask, invert_mask)
84
+ noise_to_add = sample_to_cv2(noise * noise_mask)
85
+ else:
86
+ noise_to_add = sample_to_cv2(noise)
87
+ sample = cv2.addWeighted(sample, 1-noise_amt, noise_to_add, noise_amt, 0)
88
+
89
+ return sample
scripts/deforum_helpers/parseq_adapter.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import copy
19
+ import json
20
+ import logging
21
+ import operator
22
+ from operator import itemgetter
23
+ import numpy as np
24
+ import pandas as pd
25
+ import requests
26
+ from .animation_key_frames import DeformAnimKeys
27
+ from .rich import console
28
+
29
+ logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
30
+
31
+ class ParseqAnimKeys():
32
+ def __init__(self, parseq_args, anim_args, video_args, mute=False):
33
+
34
+ # Resolve manifest either directly from supplied value
35
+ # or via supplied URL
36
+ manifestOrUrl = parseq_args.parseq_manifest.strip()
37
+ if (manifestOrUrl.startswith('http')):
38
+ logging.info(f"Loading Parseq manifest from URL: {manifestOrUrl}")
39
+ try:
40
+ body = requests.get(manifestOrUrl).text
41
+ logging.debug(f"Loaded remote manifest: {body}")
42
+ self.parseq_json = json.loads(body)
43
+
44
+ # Add the parseq manifest without the detailed frame data to parseq_args.
45
+ # This ensures it will be saved in the settings file, so that you can always
46
+ # see exactly what parseq prompts and keyframes were used, even if what the URL
47
+ # points to changes.
48
+ parseq_args.fetched_parseq_manifest_summary = copy.deepcopy(self.parseq_json)
49
+ if parseq_args.fetched_parseq_manifest_summary['rendered_frames']:
50
+ del parseq_args.fetched_parseq_manifest_summary['rendered_frames']
51
+ if parseq_args.fetched_parseq_manifest_summary['rendered_frames_meta']:
52
+ del parseq_args.fetched_parseq_manifest_summary['rendered_frames_meta']
53
+
54
+ except Exception as e:
55
+ logging.error(f"Unable to load Parseq manifest from URL: {manifestOrUrl}")
56
+ raise e
57
+ else:
58
+ self.parseq_json = json.loads(manifestOrUrl)
59
+
60
+ self.default_anim_keys = DeformAnimKeys(anim_args)
61
+ self.rendered_frames = self.parseq_json['rendered_frames']
62
+ self.max_frame = self.get_max('frame')
63
+ self.required_frames = anim_args.max_frames
64
+ # TODO these values are currently only used to emit a subtle warning. User must ensure the output FPS set in parseq
65
+ # matches the one set in Deforum to avoid unexpected results.
66
+ # In the future we may wish to override video_args.fps value with the one from parseq.
67
+ self.required_fps = video_args.fps
68
+ self.config_output_fps = self.parseq_json['options']['output_fps']
69
+
70
+ if not mute:
71
+ self.print_parseq_table()
72
+
73
+ count_defined_frames = len(self.rendered_frames)
74
+ expected_defined_frames = self.max_frame+1 # frames are 0-indexed
75
+ if (expected_defined_frames != count_defined_frames):
76
+ logging.warning(f"There may be duplicated or missing frame data in the Parseq input: expected {expected_defined_frames} frames including frame 0 because the highest frame number is {self.max_frame}, but there are {count_defined_frames} frames defined.")
77
+
78
+ # Parseq treats input values as absolute values. So if you want to
79
+ # progressively rotate 180 degrees over 4 frames, you specify: 45, 90, 135, 180.
80
+ # However, many animation parameters are relative to the previous frame if there is enough
81
+ # loopback strength. So if you want to rotate 180 degrees over 5 frames, the animation engine expects:
82
+ # 45, 45, 45, 45. Therefore, for such parameter, we use the fact that Parseq supplies delta values.
83
+ optional_delta = '_delta' if parseq_args.parseq_use_deltas else ''
84
+ self.angle_series = self.parseq_to_anim_series('angle' + optional_delta)
85
+ self.zoom_series = self.parseq_to_anim_series('zoom' + optional_delta)
86
+ self.translation_x_series = self.parseq_to_anim_series('translation_x' + optional_delta)
87
+ self.translation_y_series = self.parseq_to_anim_series('translation_y' + optional_delta)
88
+ self.translation_z_series = self.parseq_to_anim_series('translation_z' + optional_delta)
89
+ self.rotation_3d_x_series = self.parseq_to_anim_series('rotation_3d_x' + optional_delta)
90
+ self.rotation_3d_y_series = self.parseq_to_anim_series('rotation_3d_y' + optional_delta)
91
+ self.rotation_3d_z_series = self.parseq_to_anim_series('rotation_3d_z' + optional_delta)
92
+ self.perspective_flip_theta_series = self.parseq_to_anim_series('perspective_flip_theta' + optional_delta)
93
+ self.perspective_flip_phi_series = self.parseq_to_anim_series('perspective_flip_phi' + optional_delta)
94
+ self.perspective_flip_gamma_series = self.parseq_to_anim_series('perspective_flip_gamma' + optional_delta)
95
+
96
+ # Non-motion animation args
97
+ self.perspective_flip_fv_series = self.parseq_to_anim_series('perspective_flip_fv')
98
+ self.noise_schedule_series = self.parseq_to_anim_series('noise')
99
+ self.strength_schedule_series = self.parseq_to_anim_series('strength')
100
+ self.sampler_schedule_series = self.parseq_to_anim_series('sampler_schedule')
101
+ self.contrast_schedule_series = self.parseq_to_anim_series('contrast')
102
+ self.cfg_scale_schedule_series = self.parseq_to_anim_series('scale')
103
+ self.steps_schedule_series = self.parseq_to_anim_series("steps_schedule")
104
+ self.seed_schedule_series = self.parseq_to_anim_series('seed')
105
+ self.fov_series = self.parseq_to_anim_series('fov')
106
+ self.near_series = self.parseq_to_anim_series('near')
107
+ self.far_series = self.parseq_to_anim_series('far')
108
+ self.prompts = self.parseq_to_anim_series('deforum_prompt') # formatted as "{positive} --neg {negative}"
109
+ self.subseed_schedule_series = self.parseq_to_anim_series('subseed')
110
+ self.subseed_strength_schedule_series = self.parseq_to_anim_series('subseed_strength')
111
+ self.kernel_schedule_series = self.parseq_to_anim_series('antiblur_kernel')
112
+ self.sigma_schedule_series = self.parseq_to_anim_series('antiblur_sigma')
113
+ self.amount_schedule_series = self.parseq_to_anim_series('antiblur_amount')
114
+ self.threshold_schedule_series = self.parseq_to_anim_series('antiblur_threshold')
115
+
116
+ def print_parseq_table(self):
117
+ from rich.table import Table
118
+ from rich import box
119
+ table = Table(padding=0, box=box.ROUNDED, show_lines=True)
120
+ table.add_column("", style="white bold")
121
+ table.add_column("Parseq", style="cyan")
122
+ table.add_column("Deforum", style="green")
123
+
124
+ table.add_row("Fields", '\n'.join(self.managed_fields()), '\n'.join(self.unmanaged_fields()))
125
+ table.add_row("Prompts", "✅" if self.manages_prompts() else "❌", "✅" if not self.manages_prompts() else "❌")
126
+ table.add_row("Frames", str(len(self.rendered_frames)), str(self.required_frames) + (" ⚠️" if self.required_frames != len(self.rendered_frames) else ""))
127
+ table.add_row("FPS", str(self.config_output_fps), str(self.required_fps) + (" ⚠️" if self.required_fps != self.config_output_fps else ""))
128
+
129
+ console.print("\nUse this table to validate your Parseq & Deforum setup:")
130
+ console.print(table)
131
+
132
+ def manages_prompts(self):
133
+ return 'deforum_prompt' in self.rendered_frames[0].keys()
134
+
135
+ def managed_fields(self):
136
+ return [field for field in self.rendered_frames[0].keys()
137
+ if (field not in ['frame', 'deforum_prompt']
138
+ and not field.endswith('_delta')
139
+ and not field.endswith('_pc'))]
140
+
141
+ def unmanaged_fields(self):
142
+ managed_fields = self.managed_fields()
143
+ all_fields = [self.strip_suffixes(property) for property, _ in vars(self.default_anim_keys).items() if property not in ['fi'] and not property.startswith('_')]
144
+ return [field for field in all_fields if field not in managed_fields]
145
+
146
+
147
+ def get_max(self, seriesName):
148
+ return max(self.rendered_frames, key=itemgetter(seriesName))[seriesName]
149
+
150
+ def parseq_to_anim_series(self, seriesName):
151
+
152
+ # Check if valus is present in first frame of JSON data. If not, assume it's undefined.
153
+ # The Parseq contract is that the first frame (at least) must define values for all fields.
154
+ try:
155
+ if self.rendered_frames[0][seriesName] is not None:
156
+ logging.debug(f"Found {seriesName} in first frame of Parseq data. Assuming it's defined.")
157
+ except KeyError:
158
+ return None
159
+
160
+ key_frame_series = pd.Series([np.nan for a in range(self.required_frames)])
161
+
162
+ for frame in self.rendered_frames:
163
+ frame_idx = frame['frame']
164
+ if frame_idx < self.required_frames:
165
+ if not np.isnan(key_frame_series[frame_idx]):
166
+ logging.warning(f"Duplicate frame definition {frame_idx} detected for data {seriesName}. Latest wins.")
167
+ key_frame_series[frame_idx] = frame[seriesName]
168
+
169
+ # If the animation will have more frames than Parseq defines,
170
+ # duplicate final value to match the required frame count.
171
+ while (frame_idx < self.required_frames):
172
+ key_frame_series[frame_idx] = operator.itemgetter(-1)(self.rendered_frames)[seriesName]
173
+ frame_idx += 1
174
+
175
+ return key_frame_series
176
+
177
+ # fallback to anim_args if the series is not defined in the Parseq data
178
+ def __getattribute__(inst, name):
179
+ try:
180
+ definedField = super(ParseqAnimKeys, inst).__getattribute__(name)
181
+ except AttributeError:
182
+ # No field with this name has been explicitly extracted from the JSON data.
183
+ # It must be a new parameter. Let's see if it's in the raw JSON.
184
+
185
+ parseqName = inst.strip_suffixes(name)
186
+
187
+ # returns None if not defined in Parseq JSON data
188
+ definedField = inst.parseq_to_anim_series(parseqName)
189
+ if (definedField is not None):
190
+ # add the field to the instance so we don't compute it again.
191
+ setattr(inst, name, definedField)
192
+
193
+ if (definedField is not None):
194
+ return definedField
195
+ else:
196
+ logging.debug(f"Data for {name} not defined in Parseq data. Falling back to standard Deforum values.")
197
+ return getattr(inst.default_anim_keys, name)
198
+
199
+
200
+ # parseq doesn't use _series, _schedule or _schedule_series suffixes in the
201
+ # JSON data - remove them.
202
+ def strip_suffixes(self, name):
203
+ strippableSuffixes = ['_series', '_schedule']
204
+ parseqName = name
205
+ while any(parseqName.endswith(suffix) for suffix in strippableSuffixes):
206
+ for suffix in strippableSuffixes:
207
+ if parseqName.endswith(suffix):
208
+ parseqName = parseqName[:-len(suffix)]
209
+ return parseqName
210
+
scripts/deforum_helpers/parseq_adapter_test.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ ##
19
+ # From /scripts directory, run like: python -m unittest deforum_helpers.parseq_adapter_test
20
+ ##
21
+
22
+ import unittest
23
+ from .parseq_adapter import ParseqAnimKeys
24
+ from .animation_key_frames import DeformAnimKeys
25
+ from unittest.mock import patch
26
+ from unittest.mock import MagicMock, PropertyMock
27
+
28
+ from types import SimpleNamespace
29
+
30
+ class TestParseqAnimKeys(unittest.TestCase):
31
+
32
+ @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
33
+ def test_withprompt(self, mock_deformanimkeys):
34
+ parseq_args = SimpleNamespace(parseq_use_deltas=True, parseq_manifest="""
35
+ {
36
+ "options": {
37
+ "output_fps": 30
38
+ },
39
+ "rendered_frames": [
40
+ {
41
+ "frame": 0,
42
+ "deforum_prompt": "blah"
43
+ },
44
+ {
45
+ "frame": 1,
46
+ "deforum_prompt": "blah"
47
+ }
48
+ ]
49
+ }
50
+ """)
51
+ anim_args = SimpleNamespace(max_frames=2)
52
+ video_args = SimpleNamespace(fps=30)
53
+ parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
54
+ self.assertTrue(parseq_anim_keys.manages_prompts())
55
+
56
+
57
+ @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
58
+ def test_withoutprompt(self, mock_deformanimkeys):
59
+ parseq_args = SimpleNamespace(parseq_use_deltas=True, parseq_manifest="""
60
+ {
61
+ "options": {
62
+ "output_fps": 30
63
+ },
64
+ "rendered_frames": [
65
+ {
66
+ "frame": 0
67
+ },
68
+ {
69
+ "frame": 1
70
+ }
71
+ ]
72
+ }
73
+ """)
74
+ anim_args = SimpleNamespace(max_frames=2)
75
+ video_args = SimpleNamespace(fps=30)
76
+ parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
77
+ self.assertFalse(parseq_anim_keys.manages_prompts())
78
+
79
+ @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
80
+ def test_usedelta(self, mock_deformanimkeys):
81
+ parseq_args = SimpleNamespace(parseq_use_deltas=True, parseq_manifest="""
82
+ {
83
+ "options": {
84
+ "output_fps": 30
85
+ },
86
+ "rendered_frames": [
87
+ {
88
+ "frame": 0,
89
+ "angle": 90,
90
+ "angle_delta": 90
91
+ },
92
+ {
93
+ "frame": 1,
94
+ "angle": 180,
95
+ "angle_delta": 90
96
+ }
97
+ ]
98
+ }
99
+ """)
100
+ anim_args = SimpleNamespace(max_frames=2)
101
+ video_args = SimpleNamespace(fps=30)
102
+ parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
103
+ self.assertEqual(parseq_anim_keys.angle_series[1], 90)
104
+
105
+ @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
106
+ def test_usenondelta(self, mock_deformanimkeys):
107
+ parseq_args = SimpleNamespace(parseq_use_deltas=False, parseq_manifest="""
108
+ {
109
+ "options": {
110
+ "output_fps": 30
111
+ },
112
+ "rendered_frames": [
113
+ {
114
+ "frame": 0,
115
+ "angle": 90,
116
+ "angle_delta": 90
117
+ },
118
+ {
119
+ "frame": 1,
120
+ "angle": 180,
121
+ "angle_delta": 90
122
+ }
123
+ ]
124
+ }
125
+ """)
126
+ anim_args = SimpleNamespace(max_frames=2)
127
+ video_args = SimpleNamespace(fps=30)
128
+ parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
129
+ self.assertEqual(parseq_anim_keys.angle_series[1], 180)
130
+
131
+ @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
132
+ def test_fallbackonundefined(self, mock_deformanimkeys):
133
+ parseq_args = SimpleNamespace(parseq_use_deltas=False, parseq_manifest="""
134
+ {
135
+ "options": {
136
+ "output_fps": 30
137
+ },
138
+ "rendered_frames": [
139
+ {
140
+ "frame": 0
141
+ },
142
+ {
143
+ "frame": 1
144
+ }
145
+ ]
146
+ }
147
+ """)
148
+
149
+ anim_args = SimpleNamespace(max_frames=1)
150
+ video_args = SimpleNamespace(fps=20)
151
+ parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
152
+ #TODO - this is a hacky check to make sure we're falling back to the mock.
153
+ #There must be a better way to inject an expected value via patch and check for that...
154
+ self.assertRegex(str(parseq_anim_keys.angle_series[0]), r'MagicMock')
155
+
156
+ if __name__ == '__main__':
157
+ unittest.main()
scripts/deforum_helpers/prompt.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import re
19
+ import numexpr
20
+
21
+ def check_is_number(value):
22
+ float_pattern = r'^(?=.)([+-]?([0-9]*)(\.([0-9]+))?)$'
23
+ return re.match(float_pattern, value)
24
+
25
+ def parse_weight(match, frame = 0, max_frames = 0)->float:
26
+ w_raw = match.group("weight")
27
+ max_f = max_frames # this line has to be left intact as it's in use by numexpr even though it looks like it doesn't
28
+ if w_raw is None:
29
+ return 1
30
+ if check_is_number(w_raw):
31
+ return float(w_raw)
32
+ else:
33
+ t = frame
34
+ if len(w_raw) < 3:
35
+ print('the value inside `-characters cannot represent a math function')
36
+ return 1
37
+ return float(numexpr.evaluate(w_raw[1:-1]))
38
+
39
+ def split_weighted_subprompts(text, frame = 0, max_frames = 0):
40
+ """
41
+ splits the prompt based on deforum webui implementation, moved from generate.py
42
+ """
43
+ math_parser = re.compile("""
44
+ (?P<weight>(
45
+ `[\S\s]*?`# a math function wrapped in `-characters
46
+ ))
47
+ """, re.VERBOSE)
48
+
49
+ parsed_prompt = re.sub(math_parser, lambda m: str(parse_weight(m, frame)), text)
50
+
51
+ negative_prompts = []
52
+ positive_prompts = []
53
+
54
+ prompt_split = parsed_prompt.split("--neg")
55
+ if len(prompt_split) > 1:
56
+ positive_prompts, negative_prompts = parsed_prompt.split("--neg") #TODO: add --neg to vanilla Deforum for compat
57
+ else:
58
+ positive_prompts = prompt_split[0]
59
+ negative_prompts = ""
60
+
61
+ return positive_prompts, negative_prompts
62
+
63
+ def interpolate_prompts(animation_prompts, max_frames):
64
+ import numpy as np
65
+ import pandas as pd
66
+ # Get prompts sorted by keyframe
67
+ max_f = max_frames
68
+ parsed_animation_prompts = {}
69
+ for key, value in animation_prompts.items():
70
+ if check_is_number(key):# default case 0:(1 + t %5), 30:(5-t%2)
71
+ parsed_animation_prompts[key] = value
72
+ else:# math on the left hand side case 0:(1 + t %5), maxKeyframes/2:(5-t%2)
73
+ parsed_animation_prompts[int(numexpr.evaluate(key))] = value
74
+
75
+ sorted_prompts = sorted(parsed_animation_prompts.items(), key=lambda item: int(item[0]))
76
+
77
+ # Setup container for interpolated prompts
78
+ prompt_series = pd.Series([np.nan for a in range(max_frames)])
79
+
80
+ # For every keyframe prompt except the last
81
+ for i in range(0,len(sorted_prompts)-1):
82
+ # Get current and next keyframe
83
+ current_frame = int(sorted_prompts[i][0])
84
+ next_frame = int(sorted_prompts[i+1][0])
85
+
86
+ # Ensure there's no weird ordering issues or duplication in the animation prompts
87
+ # (unlikely because we sort above, and the json parser will strip dupes)
88
+ if current_frame>=next_frame:
89
+ print(f"WARNING: Sequential prompt keyframes {i}:{current_frame} and {i+1}:{next_frame} are not monotonously increasing; skipping interpolation.")
90
+ continue
91
+
92
+ # Get current and next keyframes' positive and negative prompts (if any)
93
+ current_prompt = sorted_prompts[i][1]
94
+ next_prompt = sorted_prompts[i+1][1]
95
+ current_positive, current_negative, *_ = current_prompt.split("--neg") + [None]
96
+ next_positive, next_negative, *_ = next_prompt.split("--neg") + [None]
97
+ # Calculate how much to shift the weight from current to next prompt at each frame
98
+ weight_step = 1/(next_frame-current_frame)
99
+
100
+ # Apply weighted prompt interpolation for each frame between current and next keyframe
101
+ # using the syntax: prompt1 :weight1 AND prompt1 :weight2 --neg nprompt1 :weight1 AND nprompt1 :weight2
102
+ # (See: https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#composable-diffusion )
103
+ for f in range(current_frame,next_frame):
104
+ next_weight = weight_step * (f-current_frame)
105
+ current_weight = 1 - next_weight
106
+
107
+ # We will build the prompt incrementally depending on which prompts are present
108
+ prompt_series[f] = ''
109
+
110
+ # Cater for the case where neither, either or both current & next have positive prompts:
111
+ if current_positive:
112
+ prompt_series[f] += f" ({current_positive}):{current_weight}"
113
+ if current_positive and next_positive:
114
+ prompt_series[f] += f" AND "
115
+ if next_positive:
116
+ prompt_series[f] += f" ({next_positive}):{next_weight}"
117
+
118
+ # Cater for the case where neither, either or both current & next have negative prompts:
119
+ if len(current_negative) > 1 or len(next_negative) > 1:
120
+ prompt_series[f] += " --neg "
121
+ if len(current_negative) > 1:
122
+ prompt_series[f] += f" ({current_negative}):{current_weight}"
123
+ if len(current_negative) > 1 and len(next_negative) > 1:
124
+ prompt_series[f] += f" AND "
125
+ if len(next_negative) > 1:
126
+ prompt_series[f] += f" ({next_negative}):{next_weight}"
127
+
128
+ # Set explicitly declared keyframe prompts (overwriting interpolated values at the keyframe idx). This ensures:
129
+ # - That final prompt is set, and
130
+ # - Gives us a chance to emit warnings if any keyframe prompts are already using composable diffusion
131
+ for i, prompt in parsed_animation_prompts.items():
132
+ prompt_series[int(i)] = prompt
133
+ if ' AND ' in prompt:
134
+ print(f"WARNING: keyframe {i}'s prompt is using composable diffusion (aka the 'AND' keyword). This will cause unexpected behaviour with interpolation.")
135
+
136
+ # Return the filled series, in case max_frames is greater than the last keyframe or any ranges were skipped.
137
+ return prompt_series.ffill().bfill()
138
+
139
+ def prepare_prompt(prompt_series, max_frames, seed, frame_idx):
140
+ max_f = max_frames - 1
141
+ pattern = r'`.*?`'
142
+ regex = re.compile(pattern)
143
+ prompt_parsed = prompt_series
144
+ for match in regex.finditer(prompt_parsed):
145
+ matched_string = match.group(0)
146
+ parsed_string = matched_string.replace('t', f'{frame_idx}').replace("max_f" , f"{max_f}").replace('`','')
147
+ parsed_value = numexpr.evaluate(parsed_string)
148
+ prompt_parsed = prompt_parsed.replace(matched_string, str(parsed_value))
149
+
150
+ prompt_to_print, *after_neg = prompt_parsed.strip().split("--neg")
151
+ prompt_to_print = prompt_to_print.strip()
152
+ after_neg = "".join(after_neg).strip()
153
+
154
+ print(f"\033[32mSeed: \033[0m{seed}")
155
+ print(f"\033[35mPrompt: \033[0m{prompt_to_print}")
156
+ if after_neg and after_neg.strip():
157
+ print(f"\033[91mNeg Prompt: \033[0m{after_neg}")
158
+ prompt_to_print += f"--neg {after_neg}"
159
+
160
+ # set value back into the prompt
161
+ return prompt_to_print
scripts/deforum_helpers/render.py ADDED
@@ -0,0 +1,627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import pandas as pd
20
+ import cv2
21
+ import numpy as np
22
+ import numexpr
23
+ import gc
24
+ import random
25
+ import PIL
26
+ import time
27
+ from PIL import Image, ImageOps
28
+ from .generate import generate, isJson
29
+ from .noise import add_noise
30
+ from .animation import anim_frame_warp
31
+ from .animation_key_frames import DeformAnimKeys, LooperAnimKeys
32
+ from .video_audio_utilities import get_frame_name, get_next_frame
33
+ from .depth import DepthModel
34
+ from .colors import maintain_colors
35
+ from .parseq_adapter import ParseqAnimKeys
36
+ from .seed import next_seed
37
+ from .image_sharpening import unsharp_mask
38
+ from .load_images import get_mask, load_img, load_image, get_mask_from_file
39
+ from .hybrid_video import (
40
+ hybrid_generation, hybrid_composite, get_matrix_for_hybrid_motion, get_matrix_for_hybrid_motion_prev, get_flow_for_hybrid_motion,get_flow_for_hybrid_motion_prev, image_transform_ransac, image_transform_optical_flow, get_flow_from_images, abs_flow_to_rel_flow, rel_flow_to_abs_flow)
41
+ from .save_images import save_image
42
+ from .composable_masks import compose_mask_with_check
43
+ from .settings import save_settings_from_animation_run
44
+ from .deforum_controlnet import unpack_controlnet_vids, is_controlnet_enabled
45
+ from .subtitle_handler import init_srt_file, write_frame_subtitle, format_animation_params
46
+ from .resume import get_resume_vars
47
+ from .masks import do_overlay_mask
48
+ from .prompt import prepare_prompt
49
+ from modules.shared import opts, cmd_opts, state, sd_model
50
+ from modules import lowvram, devices, sd_hijack
51
+ from .RAFT import RAFT
52
+
53
+ def render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
54
+
55
+ if opts.data.get("deforum_save_gen_info_as_srt", False): # create .srt file and set timeframe mechanism using FPS
56
+ srt_filename = os.path.join(args.outdir, f"{args.timestring}.srt")
57
+ srt_frame_duration = init_srt_file(srt_filename, video_args.fps)
58
+
59
+ if anim_args.animation_mode in ['2D','3D']:
60
+ # handle hybrid video generation
61
+ if anim_args.hybrid_composite != 'None' or anim_args.hybrid_motion in ['Affine', 'Perspective', 'Optical Flow']:
62
+ args, anim_args, inputfiles = hybrid_generation(args, anim_args, root)
63
+ # path required by hybrid functions, even if hybrid_comp_save_extra_frames is False
64
+ hybrid_frame_path = os.path.join(args.outdir, 'hybridframes')
65
+ # initialize prev_flow
66
+ if anim_args.hybrid_motion == 'Optical Flow':
67
+ prev_flow = None
68
+
69
+ if loop_args.use_looper:
70
+ print("Using Guided Images mode: seed_behavior will be set to 'schedule' and 'strength_0_no_init' to False")
71
+ if args.strength == 0:
72
+ raise RuntimeError("Strength needs to be greater than 0 in Init tab")
73
+ args.strength_0_no_init = False
74
+ args.seed_behavior = "schedule"
75
+ if not isJson(loop_args.init_images):
76
+ raise RuntimeError("The images set for use with keyframe-guidance are not in a proper JSON format")
77
+
78
+ # handle controlnet video input frames generation
79
+ if is_controlnet_enabled(controlnet_args):
80
+ unpack_controlnet_vids(args, anim_args, controlnet_args)
81
+
82
+ # use parseq if manifest is provided
83
+ use_parseq = parseq_args.parseq_manifest != None and parseq_args.parseq_manifest.strip()
84
+ # expand key frame strings to values
85
+ keys = DeformAnimKeys(anim_args, args.seed) if not use_parseq else ParseqAnimKeys(parseq_args, anim_args, video_args)
86
+ loopSchedulesAndData = LooperAnimKeys(loop_args, anim_args, args.seed)
87
+
88
+ # create output folder for the batch
89
+ os.makedirs(args.outdir, exist_ok=True)
90
+ print(f"Saving animation frames to:\n{args.outdir}")
91
+
92
+ # save settings.txt file for the current run
93
+ save_settings_from_animation_run(args, anim_args, parseq_args, loop_args, controlnet_args, video_args, root)
94
+
95
+ # resume from timestring
96
+ if anim_args.resume_from_timestring:
97
+ args.timestring = anim_args.resume_timestring
98
+
99
+ # Always enable pseudo-3d with parseq. No need for an extra toggle:
100
+ # Whether it's used or not in practice is defined by the schedules
101
+ if use_parseq:
102
+ anim_args.flip_2d_perspective = True
103
+
104
+ # expand prompts out to per-frame
105
+ if use_parseq and keys.manages_prompts():
106
+ prompt_series = keys.prompts
107
+ else:
108
+ prompt_series = pd.Series([np.nan for a in range(anim_args.max_frames)])
109
+ for i, prompt in root.animation_prompts.items():
110
+ if str(i).isdigit():
111
+ prompt_series[int(i)] = prompt
112
+ else:
113
+ prompt_series[int(numexpr.evaluate(i))] = prompt
114
+ prompt_series = prompt_series.ffill().bfill()
115
+
116
+ # check for video inits
117
+ using_vid_init = anim_args.animation_mode == 'Video Input'
118
+
119
+ # load depth model for 3D
120
+ predict_depths = (anim_args.animation_mode == '3D' and anim_args.use_depth_warping) or anim_args.save_depth_maps
121
+ predict_depths = predict_depths or (anim_args.hybrid_composite and anim_args.hybrid_comp_mask_type in ['Depth','Video Depth'])
122
+ if predict_depths:
123
+ keep_in_vram = opts.data.get("deforum_keep_3d_models_in_vram")
124
+
125
+ device = ('cpu' if cmd_opts.lowvram or cmd_opts.medvram else root.device)
126
+ depth_model = DepthModel(root.models_path, device, root.half_precision, keep_in_vram=keep_in_vram, depth_algorithm=anim_args.depth_algorithm, Width=args.W, Height=args.H, midas_weight=anim_args.midas_weight)
127
+
128
+ # depth-based hybrid composite mask requires saved depth maps
129
+ if anim_args.hybrid_composite != 'None' and anim_args.hybrid_comp_mask_type =='Depth':
130
+ anim_args.save_depth_maps = True
131
+ else:
132
+ depth_model = None
133
+ anim_args.save_depth_maps = False
134
+
135
+ raft_model = None
136
+ load_raft = (anim_args.optical_flow_cadence == "RAFT" and int(anim_args.diffusion_cadence) > 1) or \
137
+ (anim_args.hybrid_motion == "Optical Flow" and anim_args.hybrid_flow_method == "RAFT") or \
138
+ (anim_args.optical_flow_redo_generation == "RAFT")
139
+ if load_raft:
140
+ print("Loading RAFT model...")
141
+ raft_model = RAFT()
142
+
143
+ # state for interpolating between diffusion steps
144
+ turbo_steps = 1 if using_vid_init else int(anim_args.diffusion_cadence)
145
+ turbo_prev_image, turbo_prev_frame_idx = None, 0
146
+ turbo_next_image, turbo_next_frame_idx = None, 0
147
+
148
+ # initialize vars
149
+ prev_img = None
150
+ color_match_sample = None
151
+ start_frame = 0
152
+
153
+ # resume animation (requires at least two frames - see function)
154
+ if anim_args.resume_from_timestring:
155
+ # determine last frame and frame to start on
156
+ prev_frame, next_frame, prev_img, next_img = get_resume_vars(
157
+ folder=args.outdir,
158
+ timestring=anim_args.resume_timestring,
159
+ cadence=turbo_steps
160
+ )
161
+
162
+ # set up turbo step vars
163
+ if turbo_steps > 1:
164
+ turbo_prev_image, turbo_prev_frame_idx = prev_img, prev_frame
165
+ turbo_next_image, turbo_next_frame_idx = next_img, next_frame
166
+
167
+ # advance start_frame to next frame
168
+ start_frame = next_frame + 1
169
+
170
+ frame_idx = start_frame
171
+
172
+ # reset the mask vals as they are overwritten in the compose_mask algorithm
173
+ mask_vals = {}
174
+ noise_mask_vals = {}
175
+
176
+ mask_vals['everywhere'] = Image.new('1', (args.W, args.H), 1)
177
+ noise_mask_vals['everywhere'] = Image.new('1', (args.W, args.H), 1)
178
+
179
+ mask_image = None
180
+
181
+ if args.use_init and args.init_image != None and args.init_image != '':
182
+ _, mask_image = load_img(args.init_image,
183
+ shape=(args.W, args.H),
184
+ use_alpha_as_mask=args.use_alpha_as_mask)
185
+ mask_vals['video_mask'] = mask_image
186
+ noise_mask_vals['video_mask'] = mask_image
187
+
188
+ # Grab the first frame masks since they wont be provided until next frame
189
+ # Video mask overrides the init image mask, also, won't be searching for init_mask if use_mask_video is set
190
+ # Made to solve https://github.com/deforum-art/deforum-for-automatic1111-webui/issues/386
191
+ if anim_args.use_mask_video:
192
+
193
+ args.mask_file = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
194
+ args.noise_mask = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
195
+
196
+ mask_vals['video_mask'] = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
197
+ noise_mask_vals['video_mask'] = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
198
+ elif mask_image is None and args.use_mask:
199
+ mask_vals['video_mask'] = get_mask(args)
200
+ noise_mask_vals['video_mask'] = get_mask(args) # TODO?: add a different default noisc mask
201
+
202
+ # get color match for 'Image' color coherence only once, before loop
203
+ if anim_args.color_coherence == 'Image':
204
+ color_match_sample = load_image(anim_args.color_coherence_image_path)
205
+ color_match_sample = color_match_sample.resize((args.W, args.H), PIL.Image.LANCZOS)
206
+ color_match_sample = cv2.cvtColor(np.array(color_match_sample), cv2.COLOR_RGB2BGR)
207
+
208
+ #Webui
209
+ state.job_count = anim_args.max_frames
210
+
211
+ while frame_idx < anim_args.max_frames:
212
+ #Webui
213
+
214
+ state.job = f"frame {frame_idx + 1}/{anim_args.max_frames}"
215
+ state.job_no = frame_idx + 1
216
+
217
+ if state.skipped:
218
+ print("\n** PAUSED **")
219
+ state.skipped = False
220
+ while not state.skipped:
221
+ time.sleep(0.1)
222
+ print("** RESUMING **")
223
+
224
+ print(f"\033[36mAnimation frame: \033[0m{frame_idx}/{anim_args.max_frames} ")
225
+
226
+ noise = keys.noise_schedule_series[frame_idx]
227
+ strength = keys.strength_schedule_series[frame_idx]
228
+ scale = keys.cfg_scale_schedule_series[frame_idx]
229
+ contrast = keys.contrast_schedule_series[frame_idx]
230
+ kernel = int(keys.kernel_schedule_series[frame_idx])
231
+ sigma = keys.sigma_schedule_series[frame_idx]
232
+ amount = keys.amount_schedule_series[frame_idx]
233
+ threshold = keys.threshold_schedule_series[frame_idx]
234
+ cadence_flow_factor = keys.cadence_flow_factor_schedule_series[frame_idx]
235
+ redo_flow_factor = keys.redo_flow_factor_schedule_series[frame_idx]
236
+ hybrid_comp_schedules = {
237
+ "alpha": keys.hybrid_comp_alpha_schedule_series[frame_idx],
238
+ "mask_blend_alpha": keys.hybrid_comp_mask_blend_alpha_schedule_series[frame_idx],
239
+ "mask_contrast": keys.hybrid_comp_mask_contrast_schedule_series[frame_idx],
240
+ "mask_auto_contrast_cutoff_low": int(keys.hybrid_comp_mask_auto_contrast_cutoff_low_schedule_series[frame_idx]),
241
+ "mask_auto_contrast_cutoff_high": int(keys.hybrid_comp_mask_auto_contrast_cutoff_high_schedule_series[frame_idx]),
242
+ "flow_factor": keys.hybrid_flow_factor_schedule_series[frame_idx]
243
+ }
244
+ scheduled_sampler_name = None
245
+ scheduled_clipskip = None
246
+ scheduled_noise_multiplier = None
247
+ scheduled_ddim_eta = None
248
+ scheduled_ancestral_eta = None
249
+
250
+ mask_seq = None
251
+ noise_mask_seq = None
252
+ if anim_args.enable_steps_scheduling and keys.steps_schedule_series[frame_idx] is not None:
253
+ args.steps = int(keys.steps_schedule_series[frame_idx])
254
+ if anim_args.enable_sampler_scheduling and keys.sampler_schedule_series[frame_idx] is not None:
255
+ scheduled_sampler_name = keys.sampler_schedule_series[frame_idx].casefold()
256
+ if anim_args.enable_clipskip_scheduling and keys.clipskip_schedule_series[frame_idx] is not None:
257
+ scheduled_clipskip = int(keys.clipskip_schedule_series[frame_idx])
258
+ if anim_args.enable_noise_multiplier_scheduling and keys.noise_multiplier_schedule_series[frame_idx] is not None:
259
+ scheduled_noise_multiplier = float(keys.noise_multiplier_schedule_series[frame_idx])
260
+ if anim_args.enable_ddim_eta_scheduling and keys.ddim_eta_schedule_series[frame_idx] is not None:
261
+ scheduled_ddim_eta = float(keys.ddim_eta_schedule_series[frame_idx])
262
+ if anim_args.enable_ancestral_eta_scheduling and keys.ancestral_eta_schedule_series[frame_idx] is not None:
263
+ scheduled_ancestral_eta = float(keys.ancestral_eta_schedule_series[frame_idx])
264
+ if args.use_mask and keys.mask_schedule_series[frame_idx] is not None:
265
+ mask_seq = keys.mask_schedule_series[frame_idx]
266
+ if anim_args.use_noise_mask and keys.noise_mask_schedule_series[frame_idx] is not None:
267
+ noise_mask_seq = keys.noise_mask_schedule_series[frame_idx]
268
+
269
+ if args.use_mask and not anim_args.use_noise_mask:
270
+ noise_mask_seq = mask_seq
271
+
272
+ depth = None
273
+
274
+ if anim_args.animation_mode == '3D' and (cmd_opts.lowvram or cmd_opts.medvram):
275
+ # Unload the main checkpoint and load the depth model
276
+ lowvram.send_everything_to_cpu()
277
+ sd_hijack.model_hijack.undo_hijack(sd_model)
278
+ devices.torch_gc()
279
+ if predict_depths: depth_model.to(root.device)
280
+
281
+ if turbo_steps == 1 and opts.data.get("deforum_save_gen_info_as_srt"):
282
+ params_string = format_animation_params(keys, prompt_series, frame_idx)
283
+ write_frame_subtitle(srt_filename, frame_idx, srt_frame_duration, f"F#: {frame_idx}; Cadence: false; Seed: {args.seed}; {params_string}")
284
+ params_string = None
285
+
286
+ # emit in-between frames
287
+ if turbo_steps > 1:
288
+ tween_frame_start_idx = max(start_frame, frame_idx-turbo_steps)
289
+ cadence_flow = None
290
+ for tween_frame_idx in range(tween_frame_start_idx, frame_idx):
291
+ # update progress during cadence
292
+ state.job = f"frame {tween_frame_idx + 1}/{anim_args.max_frames}"
293
+ state.job_no = tween_frame_idx + 1
294
+ # cadence vars
295
+ tween = float(tween_frame_idx - tween_frame_start_idx + 1) / float(frame_idx - tween_frame_start_idx)
296
+ advance_prev = turbo_prev_image is not None and tween_frame_idx > turbo_prev_frame_idx
297
+ advance_next = tween_frame_idx > turbo_next_frame_idx
298
+
299
+ # optical flow cadence setup before animation warping
300
+ if anim_args.animation_mode in ['2D', '3D'] and anim_args.optical_flow_cadence != 'None':
301
+ if keys.strength_schedule_series[tween_frame_start_idx] > 0:
302
+ if cadence_flow is None and turbo_prev_image is not None and turbo_next_image is not None:
303
+ cadence_flow = get_flow_from_images(turbo_prev_image, turbo_next_image, anim_args.optical_flow_cadence, raft_model) / 2
304
+ turbo_next_image = image_transform_optical_flow(turbo_next_image, -cadence_flow, 1)
305
+
306
+ if opts.data.get("deforum_save_gen_info_as_srt"):
307
+ params_string = format_animation_params(keys, prompt_series, tween_frame_idx)
308
+ write_frame_subtitle(srt_filename, tween_frame_idx, srt_frame_duration, f"F#: {tween_frame_idx}; Cadence: {tween < 1.0}; Seed: {args.seed}; {params_string}")
309
+ params_string = None
310
+
311
+ print(f"Creating in-between {'' if cadence_flow is None else anim_args.optical_flow_cadence + ' optical flow '}cadence frame: {tween_frame_idx}; tween:{tween:0.2f};")
312
+
313
+ if depth_model is not None:
314
+ assert(turbo_next_image is not None)
315
+ depth = depth_model.predict(turbo_next_image, anim_args.midas_weight, root.half_precision)
316
+
317
+ if advance_prev:
318
+ turbo_prev_image, _ = anim_frame_warp(turbo_prev_image, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device, half_precision=root.half_precision)
319
+ if advance_next:
320
+ turbo_next_image, _ = anim_frame_warp(turbo_next_image, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device, half_precision=root.half_precision)
321
+
322
+ # hybrid video motion - warps turbo_prev_image or turbo_next_image to match motion
323
+ if tween_frame_idx > 0:
324
+ if anim_args.hybrid_motion in ['Affine', 'Perspective']:
325
+ if anim_args.hybrid_motion_use_prev_img:
326
+ matrix = get_matrix_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, prev_img, anim_args.hybrid_motion)
327
+ if advance_prev:
328
+ turbo_prev_image = image_transform_ransac(turbo_prev_image, matrix, anim_args.hybrid_motion)
329
+ if advance_next:
330
+ turbo_next_image = image_transform_ransac(turbo_next_image, matrix, anim_args.hybrid_motion)
331
+ else:
332
+ matrix = get_matrix_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, anim_args.hybrid_motion)
333
+ if advance_prev:
334
+ turbo_prev_image = image_transform_ransac(turbo_prev_image, matrix, anim_args.hybrid_motion)
335
+ if advance_next:
336
+ turbo_next_image = image_transform_ransac(turbo_next_image, matrix, anim_args.hybrid_motion)
337
+ if anim_args.hybrid_motion in ['Optical Flow']:
338
+ if anim_args.hybrid_motion_use_prev_img:
339
+ flow = get_flow_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
340
+ if advance_prev:
341
+ turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor'])
342
+ if advance_next:
343
+ turbo_next_image = image_transform_optical_flow(turbo_next_image, flow, hybrid_comp_schedules['flow_factor'])
344
+ prev_flow = flow
345
+ else:
346
+ flow = get_flow_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
347
+ if advance_prev:
348
+ turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor'])
349
+ if advance_next:
350
+ turbo_next_image = image_transform_optical_flow(turbo_next_image, flow, hybrid_comp_schedules['flow_factor'])
351
+ prev_flow = flow
352
+
353
+ # do optical flow cadence after animation warping
354
+ if cadence_flow is not None:
355
+ cadence_flow = abs_flow_to_rel_flow(cadence_flow, args.W, args.H)
356
+ cadence_flow, _ = anim_frame_warp(cadence_flow, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device, half_precision=root.half_precision)
357
+ cadence_flow_inc = rel_flow_to_abs_flow(cadence_flow, args.W, args.H) * tween
358
+ if advance_prev:
359
+ turbo_prev_image = image_transform_optical_flow(turbo_prev_image, cadence_flow_inc, cadence_flow_factor)
360
+ if advance_next:
361
+ turbo_next_image = image_transform_optical_flow(turbo_next_image, cadence_flow_inc, cadence_flow_factor)
362
+
363
+ turbo_prev_frame_idx = turbo_next_frame_idx = tween_frame_idx
364
+
365
+ if turbo_prev_image is not None and tween < 1.0:
366
+ img = turbo_prev_image*(1.0-tween) + turbo_next_image*tween
367
+ else:
368
+ img = turbo_next_image
369
+
370
+ # intercept and override to grayscale
371
+ if anim_args.color_force_grayscale:
372
+ img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2GRAY)
373
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
374
+
375
+ # overlay mask
376
+ if args.overlay_mask and (anim_args.use_mask_video or args.use_mask):
377
+ img = do_overlay_mask(args, anim_args, img, tween_frame_idx, True)
378
+
379
+ # get prev_img during cadence
380
+ prev_img = img
381
+
382
+ # current image update for cadence frames (left commented because it doesn't currently update the preview)
383
+ # state.current_image = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB))
384
+
385
+ # saving cadence frames
386
+ filename = f"{args.timestring}_{tween_frame_idx:09}.png"
387
+ cv2.imwrite(os.path.join(args.outdir, filename), img)
388
+ if anim_args.save_depth_maps:
389
+ depth_model.save(os.path.join(args.outdir, f"{args.timestring}_depth_{tween_frame_idx:09}.png"), depth)
390
+
391
+ # get color match for video outside of prev_img conditional
392
+ hybrid_available = anim_args.hybrid_composite != 'None' or anim_args.hybrid_motion in ['Optical Flow', 'Affine', 'Perspective']
393
+ if anim_args.color_coherence == 'Video Input' and hybrid_available:
394
+ if int(frame_idx) % int(anim_args.color_coherence_video_every_N_frames) == 0:
395
+ prev_vid_img = Image.open(os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg"))
396
+ prev_vid_img = prev_vid_img.resize((args.W, args.H), PIL.Image.LANCZOS)
397
+ color_match_sample = np.asarray(prev_vid_img)
398
+ color_match_sample = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2BGR)
399
+
400
+ # after 1st frame, prev_img exists
401
+ if prev_img is not None:
402
+ # apply transforms to previous frame
403
+ prev_img, depth = anim_frame_warp(prev_img, args, anim_args, keys, frame_idx, depth_model, depth=None, device=root.device, half_precision=root.half_precision)
404
+
405
+ # do hybrid compositing before motion
406
+ if anim_args.hybrid_composite == 'Before Motion':
407
+ args, prev_img = hybrid_composite(args, anim_args, frame_idx, prev_img, depth_model, hybrid_comp_schedules, root)
408
+
409
+ # hybrid video motion - warps prev_img to match motion, usually to prepare for compositing
410
+ if anim_args.hybrid_motion in ['Affine', 'Perspective']:
411
+ if anim_args.hybrid_motion_use_prev_img:
412
+ matrix = get_matrix_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, prev_img, anim_args.hybrid_motion)
413
+ else:
414
+ matrix = get_matrix_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, anim_args.hybrid_motion)
415
+ prev_img = image_transform_ransac(prev_img, matrix, anim_args.hybrid_motion)
416
+ if anim_args.hybrid_motion in ['Optical Flow']:
417
+ if anim_args.hybrid_motion_use_prev_img:
418
+ flow = get_flow_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
419
+ else:
420
+ flow = get_flow_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
421
+ prev_img = image_transform_optical_flow(prev_img, flow, hybrid_comp_schedules['flow_factor'])
422
+ prev_flow = flow
423
+
424
+ # do hybrid compositing after motion (normal)
425
+ if anim_args.hybrid_composite == 'Normal':
426
+ args, prev_img = hybrid_composite(args, anim_args, frame_idx, prev_img, depth_model, hybrid_comp_schedules, root)
427
+
428
+ # apply color matching
429
+ if anim_args.color_coherence != 'None':
430
+ if color_match_sample is None:
431
+ color_match_sample = prev_img.copy()
432
+ else:
433
+ prev_img = maintain_colors(prev_img, color_match_sample, anim_args.color_coherence)
434
+
435
+ # intercept and override to grayscale
436
+ if anim_args.color_force_grayscale:
437
+ prev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY)
438
+ prev_img = cv2.cvtColor(prev_img, cv2.COLOR_GRAY2BGR)
439
+
440
+ # apply scaling
441
+ contrast_image = (prev_img * contrast).round().astype(np.uint8)
442
+ # anti-blur
443
+ if amount > 0:
444
+ contrast_image = unsharp_mask(contrast_image, (kernel, kernel), sigma, amount, threshold, mask_image if args.use_mask else None)
445
+ # apply frame noising
446
+ if args.use_mask or anim_args.use_noise_mask:
447
+ args.noise_mask = compose_mask_with_check(root, args, noise_mask_seq, noise_mask_vals, Image.fromarray(cv2.cvtColor(contrast_image, cv2.COLOR_BGR2RGB)))
448
+ noised_image = add_noise(contrast_image, noise, args.seed, anim_args.noise_type,
449
+ (anim_args.perlin_w, anim_args.perlin_h, anim_args.perlin_octaves, anim_args.perlin_persistence),
450
+ args.noise_mask, args.invert_mask)
451
+
452
+ # use transformed previous frame as init for current
453
+ args.use_init = True
454
+ args.init_sample = Image.fromarray(cv2.cvtColor(noised_image, cv2.COLOR_BGR2RGB))
455
+ args.strength = max(0.0, min(1.0, strength))
456
+
457
+ args.scale = scale
458
+
459
+ # Pix2Pix Image CFG Scale - does *nothing* with non pix2pix checkpoints
460
+ args.pix2pix_img_cfg_scale = float(keys.pix2pix_img_cfg_scale_series[frame_idx])
461
+
462
+ # grab prompt for current frame
463
+ args.prompt = prompt_series[frame_idx]
464
+
465
+ if args.seed_behavior == 'schedule' or use_parseq:
466
+ args.seed = int(keys.seed_schedule_series[frame_idx])
467
+
468
+ if anim_args.enable_checkpoint_scheduling:
469
+ args.checkpoint = keys.checkpoint_schedule_series[frame_idx]
470
+ else:
471
+ args.checkpoint = None
472
+
473
+ #SubSeed scheduling
474
+ if anim_args.enable_subseed_scheduling:
475
+ args.subseed = int(keys.subseed_schedule_series[frame_idx])
476
+ args.subseed_strength = float(keys.subseed_strength_schedule_series[frame_idx])
477
+
478
+ if use_parseq:
479
+ args.seed_enable_extras = True
480
+ anim_args.enable_subseed_scheduling = True
481
+ args.subseed = int(keys.subseed_schedule_series[frame_idx])
482
+ args.subseed_strength = keys.subseed_strength_schedule_series[frame_idx]
483
+
484
+ # set value back into the prompt - prepare and report prompt and seed
485
+ args.prompt = prepare_prompt(args.prompt, anim_args.max_frames, args.seed, frame_idx)
486
+
487
+ # grab init image for current frame
488
+ if using_vid_init:
489
+ init_frame = get_next_frame(args.outdir, anim_args.video_init_path, frame_idx, False)
490
+ print(f"Using video init frame {init_frame}")
491
+ args.init_image = init_frame
492
+ args.strength = max(0.0, min(1.0, strength))
493
+ if anim_args.use_mask_video:
494
+ args.mask_file = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
495
+ args.noise_mask = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
496
+
497
+ mask_vals['video_mask'] = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
498
+
499
+ if args.use_mask:
500
+ args.mask_image = compose_mask_with_check(root, args, mask_seq, mask_vals, args.init_sample) if args.init_sample is not None else None # we need it only after the first frame anyway
501
+
502
+ # setting up some arguments for the looper
503
+ loop_args.imageStrength = loopSchedulesAndData.image_strength_schedule_series[frame_idx]
504
+ loop_args.blendFactorMax = loopSchedulesAndData.blendFactorMax_series[frame_idx]
505
+ loop_args.blendFactorSlope = loopSchedulesAndData.blendFactorSlope_series[frame_idx]
506
+ loop_args.tweeningFrameSchedule = loopSchedulesAndData.tweening_frames_schedule_series[frame_idx]
507
+ loop_args.colorCorrectionFactor = loopSchedulesAndData.color_correction_factor_series[frame_idx]
508
+ loop_args.use_looper = loopSchedulesAndData.use_looper
509
+ loop_args.imagesToKeyframe = loopSchedulesAndData.imagesToKeyframe
510
+
511
+ if 'img2img_fix_steps' in opts.data and opts.data["img2img_fix_steps"]: # disable "with img2img do exactly x steps" from general setting, as it *ruins* deforum animations
512
+ opts.data["img2img_fix_steps"] = False
513
+ if scheduled_clipskip is not None:
514
+ opts.data["CLIP_stop_at_last_layers"] = scheduled_clipskip
515
+ if scheduled_noise_multiplier is not None:
516
+ opts.data["initial_noise_multiplier"] = scheduled_noise_multiplier
517
+ if scheduled_ddim_eta is not None:
518
+ opts.data["eta_ddim"] = scheduled_ddim_eta
519
+ if scheduled_ancestral_eta is not None:
520
+ opts.data["eta_ancestral"] = scheduled_ancestral_eta
521
+
522
+ if anim_args.animation_mode == '3D' and (cmd_opts.lowvram or cmd_opts.medvram):
523
+ if predict_depths: depth_model.to('cpu')
524
+ devices.torch_gc()
525
+ lowvram.setup_for_low_vram(sd_model, cmd_opts.medvram)
526
+ sd_hijack.model_hijack.hijack(sd_model)
527
+
528
+ # optical flow redo before generation
529
+ if anim_args.optical_flow_redo_generation != 'None' and prev_img is not None and strength > 0:
530
+ print(f"Optical flow redo is diffusing and warping using {anim_args.optical_flow_redo_generation} optical flow before generation.")
531
+ stored_seed = args.seed
532
+ args.seed = random.randint(0, 2**32 - 1)
533
+ disposable_image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
534
+ disposable_image = cv2.cvtColor(np.array(disposable_image), cv2.COLOR_RGB2BGR)
535
+ disposable_flow = get_flow_from_images(prev_img, disposable_image, anim_args.optical_flow_redo_generation, raft_model)
536
+ disposable_image = cv2.cvtColor(disposable_image, cv2.COLOR_BGR2RGB)
537
+ disposable_image = image_transform_optical_flow(disposable_image, disposable_flow, redo_flow_factor)
538
+ args.seed = stored_seed
539
+ args.init_sample = Image.fromarray(disposable_image)
540
+ del(disposable_image,disposable_flow,stored_seed)
541
+ gc.collect()
542
+
543
+ # diffusion redo
544
+ if int(anim_args.diffusion_redo) > 0 and prev_img is not None and strength > 0:
545
+ stored_seed = args.seed
546
+ for n in range(0,int(anim_args.diffusion_redo)):
547
+ print(f"Redo generation {n+1} of {int(anim_args.diffusion_redo)} before final generation")
548
+ args.seed = random.randint(0, 2**32 - 1)
549
+ disposable_image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
550
+ disposable_image = cv2.cvtColor(np.array(disposable_image), cv2.COLOR_RGB2BGR)
551
+ # color match on last one only
552
+ if (n == int(anim_args.diffusion_redo)):
553
+ disposable_image = maintain_colors(prev_img, color_match_sample, anim_args.color_coherence)
554
+ args.seed = stored_seed
555
+ args.init_sample = Image.fromarray(cv2.cvtColor(disposable_image, cv2.COLOR_BGR2RGB))
556
+ del(disposable_image, stored_seed)
557
+ gc.collect()
558
+
559
+ # generation
560
+ image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
561
+
562
+ if image is None:
563
+ break
564
+
565
+ # do hybrid video after generation
566
+ if frame_idx > 0 and anim_args.hybrid_composite == 'After Generation':
567
+ image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
568
+ args, image = hybrid_composite(args, anim_args, frame_idx, image, depth_model, hybrid_comp_schedules, root)
569
+ image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
570
+
571
+ # color matching on first frame is after generation, color match was collected earlier, so we do an extra generation to avoid the corruption introduced by the color match of first output
572
+ if frame_idx == 0 and (anim_args.color_coherence == 'Image' or (anim_args.color_coherence == 'Video Input' and hybrid_available)):
573
+ image = maintain_colors(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR), color_match_sample, anim_args.color_coherence)
574
+ image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
575
+ elif color_match_sample is not None and anim_args.color_coherence != 'None' and not anim_args.legacy_colormatch:
576
+ image = maintain_colors(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR), color_match_sample, anim_args.color_coherence)
577
+ image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
578
+
579
+ # intercept and override to grayscale
580
+ if anim_args.color_force_grayscale:
581
+ image = ImageOps.grayscale(image)
582
+ image = ImageOps.colorize(image, black ="black", white ="white")
583
+
584
+ # overlay mask
585
+ if args.overlay_mask and (anim_args.use_mask_video or args.use_mask):
586
+ image = do_overlay_mask(args, anim_args, image, frame_idx)
587
+
588
+ # on strength 0, set color match to generation
589
+ if ((not anim_args.legacy_colormatch and not args.use_init) or (anim_args.legacy_colormatch and strength == 0)) and not anim_args.color_coherence in ['Image', 'Video Input']:
590
+ color_match_sample = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
591
+
592
+ opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
593
+ if not using_vid_init:
594
+ prev_img = opencv_image
595
+
596
+ if turbo_steps > 1:
597
+ turbo_prev_image, turbo_prev_frame_idx = turbo_next_image, turbo_next_frame_idx
598
+ turbo_next_image, turbo_next_frame_idx = opencv_image, frame_idx
599
+ frame_idx += turbo_steps
600
+ else:
601
+ filename = f"{args.timestring}_{frame_idx:09}.png"
602
+ save_image(image, 'PIL', filename, args, video_args, root)
603
+
604
+ if anim_args.save_depth_maps:
605
+ if cmd_opts.lowvram or cmd_opts.medvram:
606
+ lowvram.send_everything_to_cpu()
607
+ sd_hijack.model_hijack.undo_hijack(sd_model)
608
+ devices.torch_gc()
609
+ depth_model.to(root.device)
610
+ depth = depth_model.predict(opencv_image, anim_args.midas_weight, root.half_precision)
611
+ depth_model.save(os.path.join(args.outdir, f"{args.timestring}_depth_{frame_idx:09}.png"), depth)
612
+ if cmd_opts.lowvram or cmd_opts.medvram:
613
+ depth_model.to('cpu')
614
+ devices.torch_gc()
615
+ lowvram.setup_for_low_vram(sd_model, cmd_opts.medvram)
616
+ sd_hijack.model_hijack.hijack(sd_model)
617
+ frame_idx += 1
618
+
619
+ state.current_image = image
620
+
621
+ args.seed = next_seed(args)
622
+
623
+ if predict_depths and not keep_in_vram:
624
+ depth_model.delete_model() # handles adabins too
625
+
626
+ if load_raft:
627
+ raft_model.delete_model()
scripts/deforum_helpers/render_modes.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2
+ # Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published by
6
+ # the Free Software Foundation, version 3 of the License.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU Affero General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU Affero General Public License
14
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
+
16
+ # Contact the dev team: https://discord.gg/deforum
17
+
18
+ import os
19
+ import time
20
+ import pathlib
21
+ import re
22
+ import numexpr
23
+ from modules.shared import opts, state
24
+ from .render import render_animation
25
+ from .seed import next_seed
26
+ from .video_audio_utilities import vid2frames
27
+ from .prompt import interpolate_prompts
28
+ from .generate import generate
29
+ from .animation_key_frames import DeformAnimKeys
30
+ from .parseq_adapter import ParseqAnimKeys
31
+ from .save_images import save_image
32
+ from .settings import save_settings_from_animation_run
33
+
34
+ def render_input_video(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
35
+ # create a folder for the video input frames to live in
36
+ video_in_frame_path = os.path.join(args.outdir, 'inputframes')
37
+ os.makedirs(video_in_frame_path, exist_ok=True)
38
+
39
+ # save the video frames from input video
40
+ print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {video_in_frame_path}...")
41
+ vid2frames(video_path = anim_args.video_init_path, video_in_frame_path=video_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
42
+
43
+ # determine max frames from length of input frames
44
+ anim_args.max_frames = len([f for f in pathlib.Path(video_in_frame_path).glob('*.jpg')])
45
+ args.use_init = True
46
+ print(f"Loading {anim_args.max_frames} input frames from {video_in_frame_path} and saving video frames to {args.outdir}")
47
+
48
+ if anim_args.use_mask_video:
49
+ # create a folder for the mask video input frames to live in
50
+ mask_in_frame_path = os.path.join(args.outdir, 'maskframes')
51
+ os.makedirs(mask_in_frame_path, exist_ok=True)
52
+
53
+ # save the video frames from mask video
54
+ print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {mask_in_frame_path}...")
55
+ vid2frames(video_path=anim_args.video_mask_path,video_in_frame_path=mask_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
56
+ max_mask_frames = len([f for f in pathlib.Path(mask_in_frame_path).glob('*.jpg')])
57
+
58
+ # limit max frames if there are less frames in the video mask compared to input video
59
+ if max_mask_frames < anim_args.max_frames :
60
+ anim_args.max_mask_frames
61
+ print ("Video mask contains less frames than init video, max frames limited to number of mask frames.")
62
+ args.use_mask = True
63
+ args.overlay_mask = True
64
+
65
+ render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root)
66
+
67
+ # Modified a copy of the above to allow using masking video with out a init video.
68
+ def render_animation_with_video_mask(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
69
+ # create a folder for the video input frames to live in
70
+ mask_in_frame_path = os.path.join(args.outdir, 'maskframes')
71
+ os.makedirs(mask_in_frame_path, exist_ok=True)
72
+
73
+ # save the video frames from mask video
74
+ print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {mask_in_frame_path}...")
75
+ vid2frames(video_path=anim_args.video_mask_path, video_in_frame_path=mask_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
76
+ args.use_mask = True
77
+ #args.overlay_mask = True
78
+
79
+ # determine max frames from length of input frames
80
+ anim_args.max_frames = len([f for f in pathlib.Path(mask_in_frame_path).glob('*.jpg')])
81
+ #args.use_init = True
82
+ print(f"Loading {anim_args.max_frames} input frames from {mask_in_frame_path} and saving video frames to {args.outdir}")
83
+
84
+ render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root)
85
+
86
+ def get_parsed_value(value, frame_idx, max_f):
87
+ pattern = r'`.*?`'
88
+ regex = re.compile(pattern)
89
+ parsed_value = value
90
+ for match in regex.finditer(parsed_value):
91
+ matched_string = match.group(0)
92
+ parsed_string = matched_string.replace('t', f'{frame_idx}').replace("max_f" , f"{max_f}").replace('`','')
93
+ value = numexpr.evaluate(parsed_string)
94
+ parsed_value = parsed_value.replace(matched_string, str(value))
95
+ return parsed_value
96
+
97
+ def render_interpolation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
98
+
99
+ # use parseq if manifest is provided
100
+ use_parseq = parseq_args.parseq_manifest != None and parseq_args.parseq_manifest.strip()
101
+
102
+ # expand key frame strings to values
103
+ keys = DeformAnimKeys(anim_args) if not use_parseq else ParseqAnimKeys(parseq_args, anim_args, video_args)
104
+
105
+ # create output folder for the batch
106
+ os.makedirs(args.outdir, exist_ok=True)
107
+ print(f"Saving interpolation animation frames to {args.outdir}")
108
+
109
+ # save settings.txt file for the current run
110
+ save_settings_from_animation_run(args, anim_args, parseq_args, loop_args, controlnet_args, video_args, root)
111
+
112
+ # Compute interpolated prompts
113
+ if use_parseq and keys.manages_prompts():
114
+ print("Parseq prompts are assumed to already be interpolated - not doing any additional prompt interpolation")
115
+ prompt_series = keys.prompts
116
+ else:
117
+ print("Generating interpolated prompts for all frames")
118
+ prompt_series = interpolate_prompts(root.animation_prompts, anim_args.max_frames)
119
+
120
+ state.job_count = anim_args.max_frames
121
+ frame_idx = 0
122
+ # INTERPOLATION MODE
123
+ while frame_idx < anim_args.max_frames:
124
+ # print data to cli
125
+ prompt_to_print = get_parsed_value(prompt_series[frame_idx].strip(), frame_idx, anim_args.max_frames)
126
+
127
+ if prompt_to_print.endswith("--neg"):
128
+ prompt_to_print = prompt_to_print[:-5]
129
+ print(f"\033[36mInterpolation frame: \033[0m{frame_idx}/{anim_args.max_frames} ")
130
+ print(f"\033[32mSeed: \033[0m{args.seed}")
131
+ print(f"\033[35mPrompt: \033[0m{prompt_to_print}")
132
+
133
+ state.job = f"frame {frame_idx + 1}/{anim_args.max_frames}"
134
+ state.job_no = frame_idx + 1
135
+
136
+ if state.interrupted:
137
+ break
138
+ if state.skipped:
139
+ print("\n** PAUSED **")
140
+ state.skipped = False
141
+ while not state.skipped:
142
+ time.sleep(0.1)
143
+ print("** RESUMING **")
144
+
145
+ # grab inputs for current frame generation
146
+ args.prompt = prompt_to_print
147
+ args.scale = keys.cfg_scale_schedule_series[frame_idx]
148
+ args.pix2pix_img_cfg_scale = keys.pix2pix_img_cfg_scale_series[frame_idx]
149
+
150
+ scheduled_sampler_name = keys.sampler_schedule_series[frame_idx].casefold() if anim_args.enable_sampler_scheduling and keys.sampler_schedule_series[frame_idx] is not None else None
151
+ args.steps = int(keys.steps_schedule_series[frame_idx]) if anim_args.enable_steps_scheduling and keys.steps_schedule_series[frame_idx] is not None else args.steps
152
+ scheduled_clipskip = int(keys.clipskip_schedule_series[frame_idx]) if anim_args.enable_clipskip_scheduling and keys.clipskip_schedule_series[frame_idx] is not None else None
153
+ args.checkpoint = keys.checkpoint_schedule_series[frame_idx] if anim_args.enable_checkpoint_scheduling else None
154
+ if anim_args.enable_subseed_scheduling:
155
+ args.subseed = int(keys.subseed_schedule_series[frame_idx])
156
+ args.subseed_strength = keys.subseed_strength_schedule_series[frame_idx]
157
+ else:
158
+ args.subseed, args.subseed_strength = keys.subseed_schedule_series[frame_idx], keys.subseed_strength_schedule_series[frame_idx]
159
+ if use_parseq:
160
+ anim_args.enable_subseed_scheduling = True
161
+ args.subseed, args.subseed_strength = int(keys.subseed_schedule_series[frame_idx]), keys.subseed_strength_schedule_series[frame_idx]
162
+ args.seed = int(keys.seed_schedule_series[frame_idx]) if args.seed_behavior == 'schedule' or use_parseq else args.seed
163
+ opts.data["CLIP_stop_at_last_layers"] = scheduled_clipskip if scheduled_clipskip is not None else opts.data["CLIP_stop_at_last_layers"]
164
+
165
+ image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
166
+ filename = f"{args.timestring}_{frame_idx:09}.png"
167
+
168
+ save_image(image, 'PIL', filename, args, video_args, root)
169
+
170
+ state.current_image = image
171
+
172
+ if args.seed_behavior != 'schedule':
173
+ args.seed = next_seed(args)
174
+
175
+ frame_idx += 1