ddoc commited on May 22, 2023

Commit

4c53d64

1 Parent(s): a37ad19

Upload 171 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/ISSUE_TEMPLATE/bug_report.yml +111 -0
.github/ISSUE_TEMPLATE/config.yml +8 -0
.github/ISSUE_TEMPLATE/feature_request.yml +46 -0
.github/pull_request_template.md +5 -0
.github/scripts/issue_checker.py +110 -0
.github/workflows/issue_checker.yaml +23 -0
.gitignore +12 -0
AUTHORS.md +23 -0
CONTRIBUTING.md +9 -0
LICENSE +0 -0
README.md +73 -3
install.py +27 -0
javascript/deforum-hints.js +233 -0
javascript/deforum.js +34 -0
requirements.txt +8 -0
scripts/deforum.py +29 -0
scripts/deforum_extend_paths.py +34 -0
scripts/deforum_helpers/RAFT.py +45 -0
scripts/deforum_helpers/animation.py +430 -0
scripts/deforum_helpers/animation_key_frames.py +150 -0
scripts/deforum_helpers/args.py +341 -0
scripts/deforum_helpers/auto_navigation.py +89 -0
scripts/deforum_helpers/colors.py +39 -0
scripts/deforum_helpers/composable_masks.py +213 -0
scripts/deforum_helpers/consistency_check.py +151 -0
scripts/deforum_helpers/defaults.py +218 -0
scripts/deforum_helpers/deforum_controlnet.py +336 -0
scripts/deforum_helpers/deforum_controlnet_gradio.py +88 -0
scripts/deforum_helpers/deforum_tqdm.py +99 -0
scripts/deforum_helpers/deprecation_utils.py +99 -0
scripts/deforum_helpers/depth.py +160 -0
scripts/deforum_helpers/depth_adabins.py +79 -0
scripts/deforum_helpers/depth_leres.py +72 -0
scripts/deforum_helpers/depth_midas.py +92 -0
scripts/deforum_helpers/depth_zoe.py +47 -0
scripts/deforum_helpers/frame_interpolation.py +239 -0
scripts/deforum_helpers/general_utils.py +145 -0
scripts/deforum_helpers/generate.py +324 -0
scripts/deforum_helpers/gradio_funcs.py +214 -0
scripts/deforum_helpers/human_masking.py +87 -0
scripts/deforum_helpers/hybrid_video.py +611 -0
scripts/deforum_helpers/image_sharpening.py +39 -0
scripts/deforum_helpers/load_images.py +113 -0
scripts/deforum_helpers/masks.py +57 -0
scripts/deforum_helpers/noise.py +89 -0
scripts/deforum_helpers/parseq_adapter.py +210 -0
scripts/deforum_helpers/parseq_adapter_test.py +157 -0
scripts/deforum_helpers/prompt.py +161 -0
scripts/deforum_helpers/render.py +627 -0
scripts/deforum_helpers/render_modes.py +175 -0

.github/ISSUE_TEMPLATE/bug_report.yml ADDED Viewed

	@@ -0,0 +1,111 @@

+name: Bug Report
+description: Create a bug report for the Deforum extension
+title: "[Bug]: "
+labels: ["bug"]
+body:
+  - type: checkboxes
+    attributes:
+      label: Have you read the latest version of the FAQ?
+      description: Please visit the page called FAQ & Troubleshooting on the Deforum wiki in this repository and see if your problem has already been described there.
+      options:
+        - label: I have visited the FAQ page right now and my issue is not present there
+          required: true
+  - type: checkboxes
+    attributes:
+      label: Is there an existing issue for this?
+      description: Please search to see if an issue already exists for the bug you encountered (including the closed issues).
+      options:
+        - label: I have searched the existing issues and checked the recent builds/commits of both this extension and the webui
+          required: true
+  - type: checkboxes
+    attributes:
+      label: Are you using the latest version of the Deforum extension?
+      description: Please, check if your Deforum is based on the latest repo commit (git log) or update it through the 'Extensions' tab and check if the issue still persist. Otherwise, check this box.
+      options:
+        - label: I have Deforum updated to the lastest version and I still have the issue.
+          required: true
+  - type: markdown
+    attributes:
+      value: |
+        *Please fill this form with as much information as possible, don't forget to fill "What OS..." and "What browsers" and *provide screenshots if possible**
+  - type: textarea
+    id: what-did
+    attributes:
+      label: What happened?
+      description: Tell us what happened in a very clear and simple way
+    validations:
+      required: true
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to reproduce the problem
+      description: Please provide us with precise step by step information on how to reproduce the bug
+      value: |
+        1. Go to ....
+        2. Press ....
+        3. ...
+    validations:
+      required: true
+  - type: textarea
+    id: what-should
+    attributes:
+      label: What should have happened?
+      description: Tell what you think the normal behavior should be
+  - type: textarea
+    id: commits
+    attributes:
+      label: WebUI and Deforum extension Commit IDs
+      description: Which commit of the webui/deforum extension are you running on? (Do not write *Latest version/repo/commit*, as this means nothing and will have changed by the time we read your issue. Rather, copy the **Commit** link at the bottom of the UI, or if you can't launch the webui at all, enter your cmd/terminal, CD into the main webui folder to get the webui commit id, and cd into the extensions/deforum folder to get the deforum commit id, both using the command 'git rev-parse HEAD'.)
+      value: |
+             webui commit id -
+             deforum exten commit id -
+    validations:
+      required: true
+  - type: textarea
+    id: what-torch
+    attributes:
+      label: Torch version
+      description: Which Torch version your WebUI is working with
+    validations:
+      required: true
+  - type: dropdown
+    id: where
+    attributes:
+      label: On which platform are you launching the webui with the extension?
+      multiple: true
+      options:
+        - Local PC setup (Windows)
+        - Local PC setup (Linux)
+        - Local PC setup (Mac)
+        - Google Colab (The Last Ben's)
+        - Google Colab (Other)
+        - Cloud server (Linux)
+        - Other (please specify in "additional information")
+  - type: textarea
+    id: deforumsettings
+    attributes:
+      label: Deforum settings
+      description: Send here a link to your used settings file or the latest generated one in the 'outputs/img2img-images/Deforum/' folder (ideally, upload it to GitHub gists).
+    validations:
+      required: true
+  - type: textarea
+    id: customsettings
+    attributes:
+      label: Webui core settings
+      description: Send here a link to your ui-config.json file in the core 'stable-diffusion-webui' folder (ideally, upload it to GitHub gists). Friendly reminder - if you have 'With img2img, do exactly the amount of steps the slider specified' checked, your issue will be discarded immediately. 😉
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Console logs
+      description: Please provide **FULL cmd/terminal logs FROM THE MOMENT YOU STARTED UI to the end of it**, after your bug happened. If it's very long, provide a link to GitHub gists or similar service.
+      render: Shell
+    validations:
+      required: true
+  - type: textarea
+    id: misc
+    attributes:
+      label: Additional information
+      description: Please provide us with any relevant additional info or context.

.github/ISSUE_TEMPLATE/config.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+blank_issues_enabled: false
+contact_links:
+  - name: Deforum Github discussions
+    url: https://github.com/deforum-art/deforum-for-automatic1111-webui/discussions
+    about: Please ask and answer questions here. If you want to complain about something, don't try to circumvent issue filling by starting a discussion here 🙃
+  - name: Deforum Discord
+    url: https://discord.gg/deforum
+    about: Here is our main community where we chat, discuss development and share experiments and results

.github/ISSUE_TEMPLATE/feature_request.yml ADDED Viewed

	@@ -0,0 +1,46 @@

+name: Feature request
+description: Suggest an idea for the Deforum extension
+title: "[Feature Request]: "
+labels: ["enhancement"]
+body:
+  - type: checkboxes
+    attributes:
+      label: Is there an existing issue for this?
+      description: Please search to see if an issue already exists for the feature you want, and that it's not implemented in a recent build/commit.
+      options:
+        - label: I have searched the existing issues and checked the recent builds/commits
+          required: true
+  - type: markdown
+    attributes:
+      value: |
+        *Please fill this form with as much information as possible, provide screenshots and/or illustrations of the feature if possible*
+  - type: textarea
+    id: feature
+    attributes:
+      label: What would your feature do ?
+      description: Tell us about your feature in a very clear and simple way, and what problem it would solve
+    validations:
+      required: true
+  - type: textarea
+    id: workflow
+    attributes:
+      label: Proposed workflow
+      description: Please provide us with step by step information on how you'd like the feature to be accessed and used
+      value: |
+        1. Go to ....
+        2. Press ....
+        3. ...
+    validations:
+      required: true
+  - type: textarea
+    id: misc
+    attributes:
+      label: Additional information
+      description: Add any other context or screenshots about the feature request here.
+  - type: textarea
+    attributes:
+      label: Are you going to help adding it?
+      description: Do you want to participate in Deforum development and bring the desired feature sooner? Let us know if you are willing to add the desired feature, ideally, leave your Discord handle here, so we will contact you for a less formal conversation. Our community is welcoming and ready to provide you with any information on the project structure or how the code works. If not, however, keep in mind that if you do not want to do your new feature yourself, you will have to wait until the team picks up your issue.
+    validations:
+      required: true

.github/pull_request_template.md ADDED Viewed

	@@ -0,0 +1,5 @@

+## ⚠ TEMPORAL POLICY ⚠
+🚧 From 2023-05-17 due to the planned changes new Pull requests are temporary disallowed; all the pending PRs are closed 🚧
+In case of questions, contact us here https://discord.gg/deforum

.github/scripts/issue_checker.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import re
+from github import Github
+# Get GitHub token from environment variables
+token = os.environ['GITHUB_TOKEN']
+g = Github(token)
+# Get the current repository
+print(f"Repo is {os.environ['GITHUB_REPOSITORY']}")
+repo = g.get_repo(os.environ['GITHUB_REPOSITORY'])
+# Get the issue number from the event payload
+#issue_number = int(os.environ['ISSUE_NUMBER'])
+for issue in repo.get_issues():
+    print(f"Processing issue №{issue.number}")
+    if issue.pull_request:
+        continue
+    # Get the issue object
+    #issue = repo.get_issue(issue_number)
+    # Define the keywords to search for in the issue
+    keywords = ['Python', 'Commit hash', 'Launching Web UI with arguments', 'Model loaded', 'deforum']
+    # Check if ALL of the keywords are present in the issue
+    def check_keywords(issue_body, keywords):
+        for keyword in keywords:
+            if not re.search(r'\b' + re.escape(keyword) + r'\b', issue_body, re.IGNORECASE):
+                return False
+        return True
+    # Check if the issue title has at least a specified number of words
+    def check_title_word_count(issue_title, min_word_count):
+        words = issue_title.replace("/", " ").replace("\\\\", " ").split()
+        return len(words) >= min_word_count
+    # Check if the issue title is concise
+    def check_title_concise(issue_title, max_word_count):
+        words = issue_title.replace("/", " ").replace("\\\\", " ").split()
+        return len(words) <= max_word_count
+    # Check if the commit ID is in the correct hash form
+    def check_commit_id_format(issue_body):
+        match = re.search(r'webui commit id - ([a-fA-F0-9]+|\[[a-fA-F0-9]+\])', issue_body)
+        if not match:
+            print('webui_commit_id not found')
+            return False
+        webui_commit_id = match.group(1)
+        print(f'webui_commit_id {webui_commit_id}')
+        webui_commit_id = webui_commit_id.replace("[", "").replace("]", "")
+        if not (7 <= len(webui_commit_id) <= 40):
+            print(f'invalid length!')
+            return False
+        match = re.search(r'deforum exten commit id - ([a-fA-F0-9]+|\[[a-fA-F0-9]+\])', issue_body)
+        if match:
+            print('deforum commit id not found')
+            return False
+        t2v_commit_id = match.group(1)
+        print(f'deforum_commit_id {t2v_commit_id}')
+        t2v_commit_id = t2v_commit_id.replace("[", "").replace("]", "")
+        if not (7 <= len(t2v_commit_id) <= 40):
+            print(f'invalid length!')
+            return False
+        return True
+    # Only if a bug report
+    if '[Bug]' in issue.title and not '[Feature Request]' in issue.title:
+        print('The issue is eligible')
+        # Initialize an empty list to store error messages
+        error_messages = []
+        # Check for each condition and add the corresponding error message if the condition is not met
+        if not check_keywords(issue.body, keywords):
+            error_messages.append("Include **THE FULL LOG FROM THE START OF THE WEBUI** in the issue description.")
+        if not check_title_word_count(issue.title, 3):
+            error_messages.append("Make sure the issue title has at least 3 words.")
+        if not check_title_concise(issue.title, 13):
+            error_messages.append("The issue title should be concise and contain no more than 13 words.")
+        # if not check_commit_id_format(issue.body):
+            # error_messages.append("Provide a valid commit ID in the format 'commit id - [commit_hash]' **both** for the WebUI and the Extension.")
+        # If there are any error messages, close the issue and send a comment with the error messages
+        if error_messages:
+            print('Invalid issue, closing')
+            # Add the "not planned" label to the issue
+            not_planned_label = repo.get_label("wrong format")
+            issue.add_to_labels(not_planned_label)
+            # Close the issue
+            issue.edit(state='closed')
+            # Generate the comment by concatenating the error messages
+            comment = "This issue has been closed due to incorrect formatting. Please address the following mistakes and reopen the issue (click on the 'Reopen' button below):\n\n"
+            comment += "\n".join(f"- {error_message}" for error_message in error_messages)
+            # Add the comment to the issue
+            issue.create_comment(comment)
+        elif repo.get_label("wrong format") in issue.labels:
+            print('Issue is fine')
+            issue.edit(state='open')
+            issue.delete_labels()
+            bug_label = repo.get_label("bug")
+            issue.add_to_labels(bug_label)
+            comment = "Thanks for addressing your formatting mistakes. The issue has been reopened now."
+            issue.create_comment(comment)

.github/workflows/issue_checker.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+name: Issue Checker
+on:
+  issues:
+    types: [opened, reopened, edited]
+jobs:
+  check_issue:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: pip install PyGithub
+    - name: Check issue
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        ISSUE_NUMBER: ${{ github.event.number }}
+      run: python .github/scripts/issue_checker.py

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+# Unnecessary compiled python files.
+__pycache__
+*.pyc
+*.pyo
+# Output Images
+outputs
+# Log files for colab-convert
+cc-outputs.log
+*.safetensors
+scripts/deforum_helpers/navigation.py

AUTHORS.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# Authors
+Note: this extension is a re-implementation of Deforum functionality on top of Automatic1111's Stable Diffusion WebUI. The authors who took part in its development are
+kabachuha (Artem Khrapov)
+hithereai
+reallybigname (Forest Star Walz)
+MatissesProjects (Matisse Tec)
+rewbs (Robin Fernandes)
+Funofabot
+Kitchenn3
+Zarxrax
+Phobos97
+yyahav
+rwscarb (Ryan Scarbery)
+phi-line
+blackneoo
+joet203 (Joe Toch)
+### Notes
+For license inheritance from the Deforum notebook and the previous versions of the extension, see the LICENSE file.

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,9 @@

+# Contributing
+## Licensing notice
+By contributing to this project you agree that your work will be licensed under the terms of the GNU Affero General Public License version 3.
+## Contact us
+Also, you may want to inform the dev team about your work via Discord https://discord.gg/deforum to ensure that no one else is working on the same stuff.

LICENSE ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -1,3 +1,73 @@
----
-license: other
----

+# Deforum Stable Diffusion — official extension for AUTOMATIC1111's webui
+<p align="left">
+    <a href="https://github.com/deforum-art/sd-webui-deforum/commits"><img alt="Last Commit" src="https://img.shields.io/github/last-commit/deforum-art/deforum-for-automatic1111-webui"></a>
+    <a href="https://github.com/deforum-art/sd-webui-deforum/issues"><img alt="GitHub issues" src="https://img.shields.io/github/issues/deforum-art/deforum-for-automatic1111-webui"></a>
+    <a href="https://github.com/deforum-art/sd-webui-deforum/stargazers"><img alt="GitHub stars" src="https://img.shields.io/github/stars/deforum-art/deforum-for-automatic1111-webui"></a>
+    <a href="https://github.com/deforum-art/sd-webui-deforum/network"><img alt="GitHub forks" src="https://img.shields.io/github/forks/deforum-art/deforum-for-automatic1111-webui"></a>
+    </a>
+</p>
+## Need help? See our [FAQ](https://github.com/deforum-art/sd-webui-deforum/wiki/FAQ-&-Troubleshooting)
+## Getting Started
+1. Install [AUTOMATIC1111's webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/).
+2. Now two ways: either clone the repo into the `extensions` directory via git commandline launched within in the `stable-diffusion-webui` folder
+```sh
+git clone https://github.com/deforum-art/sd-webui-deforum extensions/deforum
+```
+Or download this repository, locate the `extensions` folder within your WebUI installation, create a folder named `deforum` and put the contents of the downloaded directory inside of it. Then restart WebUI.
+3. Open the webui, find the Deforum tab at the top of the page.
+4. Enter the animation settings. Refer to [this general guide](https://docs.google.com/document/d/1pEobUknMFMkn8F5TMsv8qRzamXX_75BShMMXV8IFslI/edit) and [this guide to math keyframing functions in Deforum](https://docs.google.com/document/d/1pfW1PwbDIuW0cv-dnuyYj1UzPqe23BlSLTJsqazffXM/edit?usp=sharing). However, **in this version prompt weights less than zero don't just like in original Deforum!** Split the positive and the negative prompt in the json section using --neg argument like this "apple:\`where(cos(t)>=0, cos(t), 0)\`, snow --neg strawberry:\`where(cos(t)<0, -cos(t), 0)\`"
+5. To view animation frames as they're being made, without waiting for the completion of an animation, go to the 'Settings' tab and set the value of this toolbar **above zero**. Warning: it may slow down the generation process.
+![adsdasunknown](https://user-images.githubusercontent.com/14872007/196064311-1b79866a-e55b-438a-84a7-004ff30829ad.png)
+6. Run the script and see if you got it working or even got something. **In 3D mode a large delay is expected at first** as the script loads the depth models. In the end, using the default settings the whole thing should consume 6.4 GBs of VRAM at 3D mode peaks and no more than 3.8 GB VRAM in 3D mode if you launch the webui with the '--lowvram' command line argument.
+7. After the generation process is completed, click the button with the self-describing name to show the video or gif result right in the GUI!
+8. Join our Discord where you can post generated stuff, ask questions and more: https://discord.gg/deforum. <br>
+* There's also the 'Issues' tab in the repo, for well... reporting issues ;)
+9. Profit!
+## Known issues
+* This port is not fully backward-compatible with the notebook and the local version both due to the changes in how AUTOMATIC1111's webui handles Stable Diffusion models and the changes in this script to get it to work in the new environment. *Expect* that you may not get exactly the same result or that the thing may break down because of the older settings.
+## Screenshots
+Amazing raw Deforum animation by [Pxl.Pshr](https://www.instagram.com/pxl.pshr):
+* Turn Audio ON!
+(Audio credits: SKRILLEX, FRED AGAIN & FLOWDAN - RUMBLE (PHACE'S DNB FLIP))
+https://user-images.githubusercontent.com/121192995/224450647-39529b28-be04-4871-bb7a-faf7afda2ef2.mp4
+Setting file of that video: [here](https://github.com/deforum-art/sd-webui-deforum/files/11353167/PxlPshrWinningAnimationSettings.txt).
+<br>
+Main extension tab:
+![image](https://user-images.githubusercontent.com/121192995/226101131-43bf594a-3152-45dd-a5d1-2538d0bc221d.png)
+Keyframes tab:
+![image](https://user-images.githubusercontent.com/121192995/226101140-bfe6cce7-9b78-4a1d-be9a-43e1fc78239e.png)
+## Licensing
+The 'extension' part of this project is licensed under the Affero GNU General Public License version 3.
+For license inheritance from the Deforum notebook and the previous versions of the extension and for the third party code used under permissive licenses, see the LICENSE file.

install.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import launch
+import os
+req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
+with open(req_file) as file:
+    for lib in file:
+        lib = lib.strip()
+        if not launch.is_installed(lib):
+            launch.run_pip(f"install {lib}", f"Deforum requirement: {lib}")

javascript/deforum-hints.js ADDED Viewed

	@@ -0,0 +1,233 @@

+/*
+ * 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+ * Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Contact the author (Artem Khrapov): https://github.com/kabachuha/
+ */
+// mouseover tooltips for various UI elements
+deforum_titles = {
+    //Run
+    "Override settings": "specify a custom settings file and ignore settings displayed in the interface",
+	"Custom settings file": "the path to a custom settings file",
+    "Width": "The width of the output images, in pixels (must be a multiple of 64)",
+    "Height": "The height of the output images, in pixels (must be a multiple of 64)",
+    "Restore faces": "Restore low quality faces using GFPGAN neural network",
+    "Tiling": "Produce an image that can be tiled.",
+    "Highres. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition",
+    "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result",
+    "Sampler": "Which algorithm to use to produce the image",
+    "Enable extras": "enable additional seed settings",
+    "Subseed": "Seed of a different picture to be mixed into the generation.",
+    "Subseed strength": "How strong of a variation to produce. At 0, there will be no effect. At 1, you will get the complete picture with variation seed (except for ancestral samplers, where you will just get something).",
+    "Resize seed from width": "Normally, changing the resolution will completely change an image, even when using the same seed. If you generated an image with a particular seed and then changed the resolution, put the original resolution here to get an image that more closely resemles the original",
+    "Resize seed from height": "Normally, changing the resolution will completely change an image, even when using the same seed. If you generated an image with a particular seed and then changed the resolution, put the original resolution here to get an image that more closely resemles the original",
+    "Steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results",
+    "Batch name": "output images will be placed in a folder with this name ({timestring} token will be replaced) inside the img2img output folder. Supports placeholders like {seed}, {w}, {h}, {prompts} and more",
+	"Pix2Pix img CFG schedule": "*Only in use with pix2pix checkpoints!*",
+    "Filename format": "specify the format of the filename for output images",
+    "Seed behavior": "defines the seed behavior that is used for animations",
+        "iter": "the seed value will increment by 1 for each subsequent frame of the animation",
+        "fixed": "the seed will remain fixed across all frames of animation. **NOT RECOMMENDED.** Unless you know what you are doing, it will *deep fry* the pictures over time",
+        "random": "a random seed will be used on each frame of the animation",
+		"schedule": "specify your own seed schedule",
+	"Seed iter N":"controls for how many frames the same seed should stick before iterating to the next one",
+    //Keyframes
+    "Animation mode": "selects the type of animation",
+        "2D": "only 2D motion parameters will be used, but this mode uses the least amount of VRAM. You can optionally enable flip_2d_perspective to enable some psuedo-3d animation parameters while in 2D mode.",
+        "3D": "enables all 3D motion parameters.",
+        "Video Input": "will ignore all motion parameters and attempt to reference a video loaded into the runtime, specified by the video_init_path. Max_frames is ignored during video_input mode, and instead, follows the number of frames pulled from the video’s length. Resume_from_timestring is NOT available with Video_Input mode.",
+    "Max frames": "the maximum number of output images to be created",
+    "Border": "controls handling method of pixels to be generated when the image is smaller than the frame.",
+        "wrap": "pulls pixels from the opposite edge of the image",
+        "replicate": "repeats the edge of the pixels, and extends them. Animations with quick motion may yield lines where this border function was attempting to populate pixels into the empty space created.",
+	"Zoom": "2D operator that scales the canvas size, multiplicatively. [static = 1.0]",
+    "Angle": "2D operator to rotate canvas clockwise/anticlockwise in degrees per frame",
+    "Transform Center X": "x center axis for 2D angle/zoom *only*",
+	"Transform Center Y": "y center axis for 2D angle/zoom *only*",
+    "Translation X": "2D & 3D operator to move canvas left/right in pixels per frame",
+    "Translation Y": "2D & 3D operator to move canvas up/down in pixels per frame",
+    "Translation Z": "3D operator to move canvas towards/away from view [speed set by FOV]",
+    "Rotation 3D X": "3D operator to tilt canvas up/down in degrees per frame",
+    "Rotation 3D Y": "3D operator to pan canvas left/right in degrees per frame",
+    "Rotation 3D Z": "3D operator to roll canvas clockwise/anticlockwise",
+    "Enable perspective flip": "enables 2D mode functions to simulate faux 3D movement",
+    "Perspective flip theta": "the roll effect angle",
+    "Perspective flip phi": "the tilt effect angle",
+    "Perspective flip gamma": "the pan effect angle",
+    "Perspective flip fv": "the 2D vanishing point of perspective (recommended range 30-160)",
+    "Noise schedule": "amount of graininess to add per frame for diffusion diversity",
+    "Strength schedule": "amount of presence of previous frame to influence next frame, also controls steps in the following formula [steps - (strength_schedule * steps)]",
+    "Sampler schedule": "controls which sampler to use at a specific scheduled frame",
+    "Contrast schedule": "adjusts the overall contrast per frame [default neutral at 1.0]",
+    "CFG scale schedule": "how closely the image should conform to the prompt. Lower values produce more creative results. (recommended range 5-15)",
+    "FOV schedule": "adjusts the scale at which the canvas is moved in 3D by the translation_z value. [maximum range -180 to +180, with 0 being undefined. Values closer to 180 will make the image have less depth, while values closer to 0 will allow more depth]",
+    "Aspect Ratio schedule": "adjusts the aspect ratio for the depth calculation (normally 1)",
+    //"near_schedule": "",
+    //"far_schedule":  "",
+    "Seed schedule": "allows you to specify seeds at a specific schedule, if seed_behavior is set to schedule.",
+    "Color coherence": "The color coherence will attempt to sample the overall pixel color information, and trend those values analyzed in the first frame to be applied to future frames.",
+        // "None": "Disable color coherence",
+        "HSV": "HSV is a good method for balancing presence of vibrant colors, but may produce unrealistic results - (ie.blue apples)",
+        "LAB": "LAB is a more linear approach to mimic human perception of color space - a good default setting for most users.",
+        "RGB": "RGB is good for enforcing unbiased amounts of color in each red, green and blue channel - some images may yield colorized artifacts if sampling is too low.",
+        "Legacy colormatch": "applies the colormatch only before the video noising, resulting in graying the video over time, use it for backwards compatibility",
+    "Cadence": "A setting of 1 will cause every frame to receive diffusion in the sequence of image outputs. A setting of 2 will only diffuse on every other frame, yet motion will still be in effect. The output of images during the cadence sequence will be automatically blended, additively and saved to the specified drive. This may improve the illusion of coherence in some workflows as the content and context of an image will not change or diffuse during frames that were skipped. Higher values of 4-8 cadence will skip over a larger amount of frames and only diffuse the “Nth” frame as set by the diffusion_cadence value. This may produce more continuity in an animation, at the cost of little opportunity to add more diffused content. In extreme examples, motion within a frame will fail to produce diverse prompt context, and the space will be filled with lines or approximations of content - resulting in unexpected animation patterns and artifacts. Video Input & Interpolation modes are not affected by diffusion_cadence.",
+    "Optical flow cadence": "Optional method for optical flow used to blend frames during cadence in 3D animation mode (if cadence more than 1).",
+    "Optical flow redo generation": "This option takes twice as long because it generates twice in order to capture the optical flow from the previous image to the first generation, then warps the previous image and redoes the generation. Works in 2D/3D animation modes.",
+    "Redo": "Diffusion Redo. This option renders N times before the final render. It is suggested to lower your steps if you up your redo. Seed is randomized during redo generations and restored afterwards.",
+    "Noise type": "Selects the type of noise being added to each frame",
+        "uniform": "Uniform noise covers the entire frame. It somewhat flattens and sharpens the video over time, but may be good for cartoonish look. This is the old default setting.",
+        "perlin": "Perlin noise is a more natural looking noise. It is heterogeneous and less sharp than uniform noise, this way it is more likely that new details will appear in a more coherent way. This is the new default setting.",
+    "Perlin W": "The width of the Perlin sample. Lower values will make larger noise regions. Think of it as inverse brush stroke width. The greater this setting, the smaller details it will affect.",
+    "Perlin H": "The height of the Perlin sample. Lower values will make larger noise regions. Think of it as inverse brush stroke width. The greater this setting, the smaller details it will affect.",
+    "Perlin octaves": "The number of Perlin noise octaves, that is the count of P-noise iterations. Higher values will make the noise more soft and smoke-like, whereas lower values will make it look more organic and spotty. It is limited by 8 octaves as the resulting gain will run out of bounds.",
+    "Perlin persistence": "How much of noise from each octave is added on each iteration. Higher values will make it more straighter and sharper, while lower values will make it rounder and smoother. It is limited by 1.0 as the resulting gain fill the frame completely with noise.",
+    "Use depth warping": "enables instructions to warp an image dynamically in 3D mode only.",
+    "MiDaS weight": "sets a midpoint at which a depthmap is to be drawn: range [-1 to +1]",
+    "Padding mode": "instructs the handling of pixels outside the field of view as they come into the scene.",
+	    //"border": "Border will attempt to use the edges of the canvas as the pixels to be drawn", //duplicate name as another property
+	    "reflection": "reflection will attempt to approximate the image and tile/repeat pixels",
+	    "zeros": "zeros will not add any new pixel information",
+	"Sampling Mode": "choose from Bicubic, Bilinear or Nearest modes. (Recommended: Bicubic)",
+    "Save depth maps": "will output a greyscale depth map image alongside the output images.",
+	// Prompts
+	"Prompts": "prompts for your animation in a JSON format. Use --neg words to add 'words' as negative prompt",
+	"Prompts positive": "positive prompt to be appended to *all* prompts",
+	"Prompts negative": "negative prompt to be appended to *all* prompts. DON'T use --neg here!",
+    //Init
+	"Use init": "Diffuse the first frame based on an image, similar to img2img.",
+    "Strength": "Controls the strength of the diffusion on the init image. 0 = disabled",
+    "Strength 0 no init": "Set the strength to 0 automatically when no init image is used",
+    "Init image": "the path to your init image",
+    "Use mask": "Use a grayscale image as a mask on your init image. Whiter areas of the mask are areas that change more.",
+    "Use alpha as mask": "use the alpha channel of the init image as the mask",
+    "Mask file": "the path to your mask image",
+    "Invert mask": "Inverts the colors of the mask",
+    "Mask brightness adjust": "adjust the brightness of the mask. Should be a positive number, with 1.0 meaning no adjustment.",
+    "Mask contrast adjust": "adjust the brightness of the mask. Should be a positive number, with 1.0 meaning no adjustment.",
+    "overlay mask": "Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding",
+    "Mask overlay blur": "Blur edges of final overlay mask, if used. Minimum = 0 (no blur)",
+    "Video init path": "the directory \/ URL at which your video file is located for Video Input mode only",
+    "Extract nth frame": "during the run sequence, only frames specified by this value will be extracted, saved, and diffused upon. A value of 1 indicates that every frame is to be accounted for. Values of 2 will use every other frame for the sequence. Higher values will skip that number of frames respectively.",
+	"Extract from frame":"start extracting the input video only from this frame number",
+	"Extract to frame": "stop the extraction of the video at this frame number. -1 for no limits",
+    "Overwrite extracted frames": "when enabled, will re-extract video frames each run. When using video_input mode, the run will be instructed to write video frames to the drive. If you’ve already populated the frames needed, uncheck this box to skip past redundant extraction, and immediately start the render. If you have not extracted frames, you must run at least once with this box checked to write the necessary frames.",
+    "Use mask video": "video_input mode only, enables the extraction and use of a separate video file intended for use as a mask. White areas of the extracted video frames will not be affected by diffusion, while black areas will be fully effected. Lighter/darker areas are affected dynamically.",
+    "Video mask path": "the directory in which your mask video is located.",
+    "Interpolate key frames": "selects whether to ignore prompt schedule or _x_frames.",
+    "Interpolate x frames": "the number of frames to transition thru between prompts (when interpolate_key_frames = true, then the numbers in front of the animation prompts will dynamically guide the images based on their value. If set to false, will ignore the prompt numbers and force interpole_x_frames value regardless of prompt number)",
+    "Resume from timestring": "instructs the run to start from a specified point",
+    "Resume timestring": "the required timestamp to reference when resuming. Currently only available in 2D & 3D mode, the timestamp is saved as the settings .txt file name as well as images produced during your previous run. The format follows: yyyymmddhhmmss - a timestamp of when the run was started to diffuse.",
+    //Video Output
+    "Skip video creation": "when checked, do not output a video",
+	"Make GIF": "create a gif in addition to .mp4 file. supports up to 30 fps, will self-disable at higher fps values",
+	"Upscale":"upscale the images of the next run once it's finished + make a video out of them",
+	"Upscale model":"model of the upscaler to use. 'realesr-animevideov3' is much faster but yields smoother, less detailed results. the other models only do x4",
+	"Upscale factor":"how many times to upscale, actual options depend on the chosen upscale model",
+    "FPS": "The frames per second that the video will run at",
+    "Output format": "select the type of video file to output",
+        "PIL gif": "create an animated GIF",
+        "FFMPEG mp4": "create an MP4 video file",
+    "FFmpeg location": "the path to where ffmpeg is located. Leave at default 'ffmpeg' if ffmpeg is in your PATH!",
+	"FFmpeg crf": "controls quality where lower is better, less compressed. values: 0 to 51, default 17",
+	"FFmpeg preset": "controls how good the compression is, and the operation speed. If you're not in a rush keep it at 'veryslow'",
+    "Add soundtrack": "when this box is checked, and FFMPEG mp4 is selected as the output format, an audio file will be multiplexed with the video.",
+    "Soundtrack path": "the path\/ URL to an audio file to accompany the video",
+    "Use manual settings": "when this is unchecked, the video will automatically be created in the same output folder as the images. Check this box to specify different settings for the creation of the video, specified by the following options",
+    "Render steps": "render each step of diffusion as a separate frame",
+    "Max video frames": "the maximum number of frames to include in the video, when use_manual_settings is checked",
+    "Image path": "the location of images to create the video from, when use_manual_settings is checked",
+    "MP4 path": "the output location of the mp4 file, when use_manual_settings is checked",
+	"Delete Imgs": "if enabled, raw imgs will be deleted after a successful video/ videos (upsacling, interpolation, gif) creation",
+	"Engine": "choose the frame interpolation engine and version",
+	"Interp X":"how many times to interpolate the source video. e.g source video fps of 12 and a value of x2 will yield a 24fps interpolated video",
+	"Slow-Mo X":"how many times to slow-down the video. *Naturally affects output fps as well",
+	"Keep Imgs": "delete or keep raw affected (interpolated/ upscaled depending on the UI section) png imgs",
+	"Interpolate an existing video":"This feature allows you to interpolate any video with a dedicated button. Video could be completly unrelated to deforum",
+	"In Frame Count": "uploaded video total frame count",
+	"In FPS":"uploaded video FPS",
+	"Interpolated Vid FPS":"calculated output-interpolated video FPS",
+	"In Res":"uploaded video resolution",
+	"Out Res":"output video resolution",
+    // Looper Args
+    // "use_looper": "",
+	"Enable guided images mode": "check this box to enable guided images mode",
+    "Images to use for keyframe guidance": "images you iterate over, you can do local or web paths (no single backslashes!)",
+    "Image strength schedule": "how much the image should look like the previou one and new image frame init. strength schedule might be better if this is higher, around .75 during the keyfames you want to switch on",
+    "Blend factor max": "blendFactor = blendFactorMax - blendFactorSlope * cos((frame % tweening_frames_schedule) / (tweening_frames_schedule / 2))",
+    "Blend factor slope": "blendFactor = blendFactorMax - blendFactorSlope * cos((frame % tweening_frames_schedule) / (tweening_frames_schedule / 2))",
+    "Tweening frames schedule": "number of the frames that we will blend between current imagined image and input frame image",
+    "Color correction factor": "how close to get to the colors of the input frame image/ the amount each frame during a tweening step to use the new images colors",
+	// deforum.py / right side of the ui:
+	"Settings File": "Path to settings file you want to load. Path can be relative to webui folder OR full - absolute",
+    // Hybrid Video
+    "Generate inputframes": "Initiates extraction of video frames from your video_init_path to the inputframes folder. You only need to do this once and then you can change it to False and re-render",
+    "Hybrid composite": "Engages hybrid compositing of video into animation in various ways with comp alpha as a master mix control.",
+	"Use init image as video": "Use init image instead of video. Doesn't require generation of inputframes.",
+    "First Frame as init image": "If True, uses the first frame of the video as the init_image. False can create interesting transition effects into the video, depending on settings.",
+    "Motion use prev img": "If enabled, changes the behavior or hybrid_motion to captures motion by comparing the current video frame to the previous rendered image, instead of the previous video frame.",
+    "Hybrid motion": "Analyzes video frames for camera motion and applies movement to render.",
+    "Flow method": "Selects the type of Optical Flow to use if Optical Flow is selected in Hybrid motion.",
+    "Comp mask type": "You don't need a mask to composite video. But, Mask types can control the way that video is composited with the previous image each frame.",
+    "Comp mask equalize": "Equalizes the mask for the composite before or after autocontrast operation (or both)",
+    "Comp mask auto contrast": "Auto-contrasts the mask for the composite. If enabled, uses the low/high autocontrast cutoff schedules.",
+    "Comp mask inverse": "Inverts the composite mask.",
+    "Comp save extra frames": "If this option is selected, many extra frames will be output for the various processes into the hybridframes folder.",
+    "Comp alpha schedule": "Schedule controls how much the composite video is mixed in, whether set to mask is None or using a mask. This is the master mix.",
+    "Flow factor schedule": "Affects optical flow hybrid motion. 1 is normal flow. -1 is negative flow. 0.5 is half flow, etc...",
+    "Comp mask blend alpha schedule": "If using a blend mask, this controls the blend amount of the video and render for the composite mask.",
+    "Comp mask contrast schedule": "Controls the contrast of the composite mask. 0.5 if half, 1 is normal contrast, 2 is double, etc.",
+    "Comp mask auto contrast cutoff high schedule": "If using autocontrast option, this is the high cutoff for the operation.",
+    "Comp mask auto contrast cutoff low schedule": "If using autocontrast option, this is the low cutoff for the operation.",
+    "Generate human masks": "This will generate masks of all the humans in a video. Created at generation of hybrid video. Not yet integrated for auto-masking, but it will create the masks, and you can then use the mask video manually.",
+}
+onUiUpdate(function(){
+	gradioApp().querySelectorAll('span, button, select, p').forEach(function(span){
+		tooltip = deforum_titles[span.textContent];
+		if(!tooltip){
+		    tooltip = deforum_titles[span.value];
+		}
+		if(!tooltip){
+			for (const c of span.classList) {
+				if (c in deforum_titles) {
+					tooltip = deforum_titles[c];
+					break;
+				}
+			}
+		}
+		if(tooltip){
+			span.title = tooltip;
+		}
+	})
+	gradioApp().querySelectorAll('select').forEach(function(select){
+	    if (select.onchange != null) return;
+	    select.onchange = function(){
+            select.title = deforum_titles[select.value] || "";
+	    }
+	})
+})

javascript/deforum.js ADDED Viewed

	@@ -0,0 +1,34 @@

+/*
+ * 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+ * Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * Contact the dev team: https://discord.gg/deforum
+ */
+function submit_deforum(){
+    rememberGallerySelection('deforum_gallery')
+    showSubmitButtons('deforum', false)
+    var id = randomId()
+    requestProgress(id, gradioApp().getElementById('deforum_gallery_container'), gradioApp().getElementById('deforum_gallery'), function(){
+        showSubmitButtons('deforum', true)
+    })
+    var res = create_submit_args(arguments)
+    res[0] = id
+    return res
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+numexpr
+matplotlib
+pandas
+av
+pims
+imageio_ffmpeg
+rich
+gdown

scripts/deforum.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+from modules import script_callbacks
+from scripts.deforum_extend_paths import deforum_sys_extend
+def init_deforum():
+    deforum_sys_extend()
+    from deforum_helpers.ui_right import on_ui_tabs
+    script_callbacks.on_ui_tabs(on_ui_tabs)
+    from deforum_helpers.ui_settings import on_ui_settings
+    script_callbacks.on_ui_settings(on_ui_settings)
+init_deforum()

scripts/deforum_extend_paths.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import sys
+def deforum_sys_extend():
+    deforum_folder_name = os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2])
+    basedirs = [os.getcwd()]
+    if 'google.colab' in sys.modules:
+        basedirs.append('/content/gdrive/MyDrive/sd/stable-diffusion-webui') # for TheLastBen's colab
+    for basedir in basedirs:
+        deforum_paths_to_ensure = [
+            os.path.join(deforum_folder_name, 'scripts'),
+            os.path.join(deforum_folder_name, 'scripts', 'deforum_helpers', 'src')
+            ]
+        for deforum_scripts_path_fix in deforum_paths_to_ensure:
+            if not deforum_scripts_path_fix in sys.path:
+                sys.path.extend([deforum_scripts_path_fix])

scripts/deforum_helpers/RAFT.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import torch
+import numpy as np
+import torchvision.transforms.functional as F
+from torchvision.models.optical_flow import Raft_Large_Weights, raft_large
+class RAFT:
+    def __init__(self):
+        weights = Raft_Large_Weights.DEFAULT
+        self.transforms = weights.transforms()
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = raft_large(weights=weights, progress=False).to(self.device).eval()
+    def predict(self, image1, image2, num_flow_updates:int = 50):
+        img1 = F.to_tensor(image1)
+        img2 = F.to_tensor(image2)
+        img1_batch, img2_batch = img1.unsqueeze(0), img2.unsqueeze(0)
+        img1_batch, img2_batch = self.transforms(img1_batch, img2_batch)
+        with torch.no_grad():
+            flow = self.model(image1=img1_batch.to(self.device), image2=img2_batch.to(self.device), num_flow_updates=num_flow_updates)[-1].cpu().numpy()[0]
+        # align the flow array to have the shape (w, h, 2) so it's compatible with the rest of CV2's flow methods
+        flow = np.transpose(flow, (1, 2, 0))
+        return flow
+    def delete_model(self):
+        del self.model

scripts/deforum_helpers/animation.py ADDED Viewed

	@@ -0,0 +1,430 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import numpy as np
+import cv2
+import py3d_tools as p3d # this is actually a file in our /src folder!
+from functools import reduce
+import math
+import torch
+from einops import rearrange
+from modules.shared import state, opts
+from .prompt import check_is_number
+from .general_utils import debug_print
+def sample_from_cv2(sample: np.ndarray) -> torch.Tensor:
+    sample = ((sample.astype(float) / 255.0) * 2) - 1
+    sample = sample[None].transpose(0, 3, 1, 2).astype(np.float16)
+    sample = torch.from_numpy(sample)
+    return sample
+def sample_to_cv2(sample: torch.Tensor, type=np.uint8) -> np.ndarray:
+    sample_f32 = rearrange(sample.squeeze().cpu().numpy(), "c h w -> h w c").astype(np.float32)
+    sample_f32 = ((sample_f32 * 0.5) + 0.5).clip(0, 1)
+    sample_int8 = (sample_f32 * 255)
+    return sample_int8.astype(type)
+def construct_RotationMatrixHomogenous(rotation_angles):
+    assert(type(rotation_angles)==list and len(rotation_angles)==3)
+    RH = np.eye(4,4)
+    cv2.Rodrigues(np.array(rotation_angles), RH[0:3, 0:3])
+    return RH
+# https://en.wikipedia.org/wiki/Rotation_matrix
+def getRotationMatrixManual(rotation_angles):
+    rotation_angles = [np.deg2rad(x) for x in rotation_angles]
+    phi         = rotation_angles[0] # around x
+    gamma       = rotation_angles[1] # around y
+    theta       = rotation_angles[2] # around z
+    # X rotation
+    Rphi        = np.eye(4,4)
+    sp          = np.sin(phi)
+    cp          = np.cos(phi)
+    Rphi[1,1]   = cp
+    Rphi[2,2]   = Rphi[1,1]
+    Rphi[1,2]   = -sp
+    Rphi[2,1]   = sp
+    # Y rotation
+    Rgamma        = np.eye(4,4)
+    sg            = np.sin(gamma)
+    cg            = np.cos(gamma)
+    Rgamma[0,0]   = cg
+    Rgamma[2,2]   = Rgamma[0,0]
+    Rgamma[0,2]   = sg
+    Rgamma[2,0]   = -sg
+    # Z rotation (in-image-plane)
+    Rtheta      = np.eye(4,4)
+    st          = np.sin(theta)
+    ct          = np.cos(theta)
+    Rtheta[0,0] = ct
+    Rtheta[1,1] = Rtheta[0,0]
+    Rtheta[0,1] = -st
+    Rtheta[1,0] = st
+    R           = reduce(lambda x,y : np.matmul(x,y), [Rphi, Rgamma, Rtheta])
+    return R
+def getPoints_for_PerspectiveTranformEstimation(ptsIn, ptsOut, W, H, sidelength):
+    ptsIn2D      =  ptsIn[0,:]
+    ptsOut2D     =  ptsOut[0,:]
+    ptsOut2Dlist =  []
+    ptsIn2Dlist  =  []
+    for i in range(0,4):
+        ptsOut2Dlist.append([ptsOut2D[i,0], ptsOut2D[i,1]])
+        ptsIn2Dlist.append([ptsIn2D[i,0], ptsIn2D[i,1]])
+    pin  =  np.array(ptsIn2Dlist)   +  [W/2.,H/2.]
+    pout = (np.array(ptsOut2Dlist)  +  [1.,1.]) * (0.5*sidelength)
+    pin  = pin.astype(np.float32)
+    pout = pout.astype(np.float32)
+    return pin, pout
+def warpMatrix(W, H, theta, phi, gamma, scale, fV):
+    # M is to be estimated
+    M          = np.eye(4, 4)
+    fVhalf     = np.deg2rad(fV/2.)
+    d          = np.sqrt(W*W+H*H)
+    sideLength = scale*d/np.cos(fVhalf)
+    h          = d/(2.0*np.sin(fVhalf))
+    n          = h-(d/2.0)
+    f          = h+(d/2.0)
+    # Translation along Z-axis by -h
+    T       = np.eye(4,4)
+    T[2,3]  = -h
+    # Rotation matrices around x,y,z
+    R = getRotationMatrixManual([phi, gamma, theta])
+    # Projection Matrix
+    P       = np.eye(4,4)
+    P[0,0]  = 1.0/np.tan(fVhalf)
+    P[1,1]  = P[0,0]
+    P[2,2]  = -(f+n)/(f-n)
+    P[2,3]  = -(2.0*f*n)/(f-n)
+    P[3,2]  = -1.0
+    # pythonic matrix multiplication
+    F       = reduce(lambda x,y : np.matmul(x,y), [P, T, R])
+    # shape should be 1,4,3 for ptsIn and ptsOut since perspectiveTransform() expects data in this way.
+    # In C++, this can be achieved by Mat ptsIn(1,4,CV_64FC3);
+    ptsIn = np.array([[
+                 [-W/2., H/2., 0.],[ W/2., H/2., 0.],[ W/2.,-H/2., 0.],[-W/2.,-H/2., 0.]
+                 ]])
+    ptsOut  = np.array(np.zeros((ptsIn.shape), dtype=ptsIn.dtype))
+    ptsOut  = cv2.perspectiveTransform(ptsIn, F)
+    ptsInPt2f, ptsOutPt2f = getPoints_for_PerspectiveTranformEstimation(ptsIn, ptsOut, W, H, sideLength)
+    # check float32 otherwise OpenCV throws an error
+    assert(ptsInPt2f.dtype  == np.float32)
+    assert(ptsOutPt2f.dtype == np.float32)
+    M33 = cv2.getPerspectiveTransform(ptsInPt2f,ptsOutPt2f)
+    return M33, sideLength
+def get_flip_perspective_matrix(W, H, keys, frame_idx):
+    perspective_flip_theta = keys.perspective_flip_theta_series[frame_idx]
+    perspective_flip_phi = keys.perspective_flip_phi_series[frame_idx]
+    perspective_flip_gamma = keys.perspective_flip_gamma_series[frame_idx]
+    perspective_flip_fv = keys.perspective_flip_fv_series[frame_idx]
+    M,sl = warpMatrix(W, H, perspective_flip_theta, perspective_flip_phi, perspective_flip_gamma, 1., perspective_flip_fv);
+    post_trans_mat = np.float32([[1, 0, (W-sl)/2], [0, 1, (H-sl)/2]])
+    post_trans_mat = np.vstack([post_trans_mat, [0,0,1]])
+    bM = np.matmul(M, post_trans_mat)
+    return bM
+def flip_3d_perspective(anim_args, prev_img_cv2, keys, frame_idx):
+    W, H = (prev_img_cv2.shape[1], prev_img_cv2.shape[0])
+    return cv2.warpPerspective(
+        prev_img_cv2,
+        get_flip_perspective_matrix(W, H, keys, frame_idx),
+        (W, H),
+        borderMode=cv2.BORDER_WRAP if anim_args.border == 'wrap' else cv2.BORDER_REPLICATE
+    )
+def anim_frame_warp(prev_img_cv2, args, anim_args, keys, frame_idx, depth_model=None, depth=None, device='cuda', half_precision = False):
+    if anim_args.use_depth_warping:
+        if depth is None and depth_model is not None:
+            depth = depth_model.predict(prev_img_cv2, anim_args.midas_weight, half_precision)
+    else:
+        depth = None
+    if anim_args.animation_mode == '2D':
+        prev_img = anim_frame_warp_2d(prev_img_cv2, args, anim_args, keys, frame_idx)
+    else: # '3D'
+        prev_img = anim_frame_warp_3d(device, prev_img_cv2, depth, anim_args, keys, frame_idx)
+    return prev_img, depth
+def anim_frame_warp_2d(prev_img_cv2, args, anim_args, keys, frame_idx):
+    angle = keys.angle_series[frame_idx]
+    zoom = keys.zoom_series[frame_idx]
+    translation_x = keys.translation_x_series[frame_idx]
+    translation_y = keys.translation_y_series[frame_idx]
+    transform_center_x = keys.transform_center_x_series[frame_idx]
+    transform_center_y = keys.transform_center_y_series[frame_idx]
+    center_point = (args.W * transform_center_x, args.H * transform_center_y)
+    rot_mat = cv2.getRotationMatrix2D(center_point, angle, zoom)
+    trans_mat = np.float32([[1, 0, translation_x], [0, 1, translation_y]])
+    trans_mat = np.vstack([trans_mat, [0,0,1]])
+    rot_mat = np.vstack([rot_mat, [0,0,1]])
+    if anim_args.enable_perspective_flip:
+        bM = get_flip_perspective_matrix(args.W, args.H, keys, frame_idx)
+        rot_mat = np.matmul(bM, rot_mat, trans_mat)
+    else:
+        rot_mat = np.matmul(rot_mat, trans_mat)
+    return cv2.warpPerspective(
+        prev_img_cv2,
+        rot_mat,
+        (prev_img_cv2.shape[1], prev_img_cv2.shape[0]),
+        borderMode=cv2.BORDER_WRAP if anim_args.border == 'wrap' else cv2.BORDER_REPLICATE
+    )
+def anim_frame_warp_3d(device, prev_img_cv2, depth, anim_args, keys, frame_idx):
+    TRANSLATION_SCALE = 1.0/200.0 # matches Disco
+    translate_xyz = [
+        -keys.translation_x_series[frame_idx] * TRANSLATION_SCALE,
+        keys.translation_y_series[frame_idx] * TRANSLATION_SCALE,
+        -keys.translation_z_series[frame_idx] * TRANSLATION_SCALE
+    ]
+    rotate_xyz = [
+        math.radians(keys.rotation_3d_x_series[frame_idx]),
+        math.radians(keys.rotation_3d_y_series[frame_idx]),
+        math.radians(keys.rotation_3d_z_series[frame_idx])
+    ]
+    if anim_args.enable_perspective_flip:
+        prev_img_cv2 = flip_3d_perspective(anim_args, prev_img_cv2, keys, frame_idx)
+    rot_mat = p3d.euler_angles_to_matrix(torch.tensor(rotate_xyz, device=device), "XYZ").unsqueeze(0)
+    result = transform_image_3d_switcher(device if not device.type.startswith('mps') else torch.device('cpu'), prev_img_cv2, depth, rot_mat, translate_xyz, anim_args, keys, frame_idx)
+    torch.cuda.empty_cache()
+    return result
+def transform_image_3d_switcher(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx):
+    if anim_args.depth_algorithm.lower() in ['midas+adabins (old)', 'zoe+adabins (old)']:
+        return transform_image_3d_legacy(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx)
+    else:
+        return transform_image_3d_new(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx)
+def transform_image_3d_legacy(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx):
+    # adapted and optimized version of transform_image_3d from Disco Diffusion https://github.com/alembics/disco-diffusion
+    w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
+    if anim_args.aspect_ratio_use_old_formula:
+        aspect_ratio = float(w)/float(h)
+    else:
+        aspect_ratio = keys.aspect_ratio_series[frame_idx]
+    near = keys.near_series[frame_idx]
+    far = keys.far_series[frame_idx]
+    fov_deg = keys.fov_series[frame_idx]
+    persp_cam_old = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, device=device)
+    persp_cam_new = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
+    # range of [-1,1] is important to torch grid_sample's padding handling
+    y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
+    if depth_tensor is None:
+        z = torch.ones_like(x)
+    else:
+        z = torch.as_tensor(depth_tensor, dtype=torch.float32, device=device)
+    xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
+    xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
+    # affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
+    identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
+    # coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
+    coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
+    offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
+    image_tensor = rearrange(torch.from_numpy(prev_img_cv2.astype(np.float32)), 'h w c -> c h w').to(device)
+    new_image = torch.nn.functional.grid_sample(
+        image_tensor.add(1/512 - 0.0001).unsqueeze(0),
+        offset_coords_2d,
+        mode=anim_args.sampling_mode,
+        padding_mode=anim_args.padding_mode,
+        align_corners=False
+    )
+    # convert back to cv2 style numpy array
+    result = rearrange(
+        new_image.squeeze().clamp(0,255),
+        'c h w -> h w c'
+    ).cpu().numpy().astype(prev_img_cv2.dtype)
+    return result
+def transform_image_3d_new(device, prev_img_cv2, depth_tensor, rot_mat, translate, anim_args, keys, frame_idx):
+    '''
+    originally an adapted and optimized version of transform_image_3d from Disco Diffusion https://github.com/alembics/disco-diffusion
+    modified by reallybigname to control various incoming tensors
+    '''
+    if anim_args.depth_algorithm.lower().startswith('midas'): # 'Midas-3-Hybrid' or 'Midas-3.1-BeitLarge'
+        depth = 1
+        depth_factor = -1
+        depth_offset = -2
+    elif anim_args.depth_algorithm.lower() == "adabins":
+        depth = 1
+        depth_factor = 1
+        depth_offset = 1
+    elif anim_args.depth_algorithm.lower() == "leres":
+        depth = 1
+        depth_factor = 1
+        depth_offset = 1
+    elif anim_args.depth_algorithm.lower() == "zoe":
+        depth = 1
+        depth_factor = 1
+        depth_offset = 1
+    else:
+        raise Exception(f"Unknown depth_algorithm passed to transform_image_3d function: {anim_args.depth_algorithm}")
+    w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
+    # depth stretching aspect ratio (has nothing to do with image dimensions - which is why the old formula was flawed)
+    aspect_ratio = float(w)/float(h) if anim_args.aspect_ratio_use_old_formula else keys.aspect_ratio_series[frame_idx]
+    # get projection keys
+    near = keys.near_series[frame_idx]
+    far = keys.far_series[frame_idx]
+    fov_deg = keys.fov_series[frame_idx]
+    # get perspective cams old (still) and new (transformed)
+    persp_cam_old = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, device=device)
+    persp_cam_new = p3d.FoVPerspectiveCameras(near, far, aspect_ratio, fov=fov_deg, degrees=True, R=rot_mat, T=torch.tensor([translate]), device=device)
+    # make xy meshgrid - range of [-1,1] is important to torch grid_sample's padding handling
+    y,x = torch.meshgrid(torch.linspace(-1.,1.,h,dtype=torch.float32,device=device),torch.linspace(-1.,1.,w,dtype=torch.float32,device=device))
+    # test tensor for validity (some are corrupted for some reason)
+    depth_tensor_invalid = depth_tensor is None or torch.isnan(depth_tensor).any() or torch.isinf(depth_tensor).any() or depth_tensor.min() == depth_tensor.max()
+    if depth_tensor is not None:
+        debug_print(f"Depth_T.min: {depth_tensor.min()}, Depth_T.max: {depth_tensor.max()}")
+    # if invalid, create flat z for this frame
+    if depth_tensor_invalid:
+        # if none, then 3D depth is turned off, so no warning is needed.
+        if depth_tensor is not None:
+            print("Depth tensor invalid. Generating a Flat depth for this frame.")
+        # create flat depth
+        z = torch.ones_like(x)
+    # create z from depth tensor
+    else:
+        # prepare tensor between 0 and 1 with optional equalization and autocontrast
+        depth_normalized = prepare_depth_tensor(depth_tensor)
+        # Rescale the depth values to depth with offset (depth 2 and offset -1 would be -1 to +11)
+        depth_final = depth_normalized * depth + depth_offset
+        # depth factor (1 is normal. -1 is inverted)
+        if depth_factor != 1:
+            depth_final *= depth_factor
+        # console reporting of depth normalization, min, max, diff
+        # will *only* print to console if Dev mode is enabled in general settings of Deforum
+        txt_depth_min, txt_depth_max = '{:.2f}'.format(float(depth_tensor.min())), '{:.2f}'.format(float(depth_tensor.max()))
+        diff = '{:.2f}'.format(float(depth_tensor.max()) - float(depth_tensor.min()))
+        console_txt = f"\033[36mDepth normalized to {depth_final.min()}/{depth_final.max()} from"
+        debug_print(f"{console_txt} {txt_depth_min}/{txt_depth_max} diff {diff}\033[0m")
+        # add z from depth
+        z = torch.as_tensor(depth_final, dtype=torch.float32, device=device)
+    # calculate offset_xy
+    xyz_old_world = torch.stack((x.flatten(), y.flatten(), z.flatten()), dim=1)
+    xyz_old_cam_xy = persp_cam_old.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    xyz_new_cam_xy = persp_cam_new.get_full_projection_transform().transform_points(xyz_old_world)[:,0:2]
+    offset_xy = xyz_new_cam_xy - xyz_old_cam_xy
+    # affine_grid theta param expects a batch of 2D mats. Each is 2x3 to do rotation+translation.
+    identity_2d_batch = torch.tensor([[1.,0.,0.],[0.,1.,0.]], device=device).unsqueeze(0)
+    # coords_2d will have shape (N,H,W,2).. which is also what grid_sample needs.
+    coords_2d = torch.nn.functional.affine_grid(identity_2d_batch, [1,1,h,w], align_corners=False)
+    offset_coords_2d = coords_2d - torch.reshape(offset_xy, (h,w,2)).unsqueeze(0)
+    # do the hyperdimensional remap
+    image_tensor = rearrange(torch.from_numpy(prev_img_cv2.astype(np.float32)), 'h w c -> c h w').to(device)
+    new_image = torch.nn.functional.grid_sample(
+        image_tensor.unsqueeze(0),  # image_tensor.add(1/512 - 0.0001).unsqueeze(0),
+        offset_coords_2d,
+        mode=anim_args.sampling_mode,
+        padding_mode=anim_args.padding_mode,
+        align_corners=False
+    )
+    # convert back to cv2 style numpy array
+    result = rearrange(
+        new_image.squeeze().clamp(0,255),
+        'c h w -> h w c'
+    ).cpu().numpy().astype(prev_img_cv2.dtype)
+    return result
+def prepare_depth_tensor(depth_tensor=None):
+    # Prepares a depth tensor with normalization & equalization between 0 and 1
+    depth_range = depth_tensor.max() - depth_tensor.min()
+    depth_tensor = (depth_tensor - depth_tensor.min()) / depth_range
+    depth_tensor = depth_equalization(depth_tensor=depth_tensor)
+    return depth_tensor
+def depth_equalization(depth_tensor):
+    """
+    Perform histogram equalization on a single-channel depth tensor.
+    Args:
+    depth_tensor (torch.Tensor): A 2D depth tensor (H, W).
+    Returns:
+    torch.Tensor: Equalized depth tensor (2D).
+    """
+    # Convert the depth tensor to a NumPy array for processing
+    depth_array = depth_tensor.cpu().numpy()
+    # Calculate the histogram of the depth values using a specified number of bins
+    # Increase the number of bins for higher precision depth tensors
+    hist, bin_edges = np.histogram(depth_array, bins=1024, range=(0, 1))
+    # Calculate the cumulative distribution function (CDF) of the histogram
+    cdf = hist.cumsum()
+    # Normalize the CDF so that the maximum value is 1
+    cdf = cdf / float(cdf[-1])
+    # Perform histogram equalization by mapping the original depth values to the CDF values
+    equalized_depth_array = np.interp(depth_array, bin_edges[:-1], cdf)
+    # Convert the equalized depth array back to a PyTorch tensor and return it
+    equalized_depth_tensor = torch.from_numpy(equalized_depth_array).to(depth_tensor.device)
+    return equalized_depth_tensor

scripts/deforum_helpers/animation_key_frames.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import re
+import numpy as np
+import numexpr
+import pandas as pd
+from .prompt import check_is_number
+class DeformAnimKeys():
+    def __init__(self, anim_args, seed=-1):
+        self.fi = FrameInterpolater(anim_args.max_frames, seed)
+        self.angle_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.angle))
+        self.transform_center_x_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.transform_center_x))
+        self.transform_center_y_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.transform_center_y))
+        self.zoom_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.zoom))
+        self.translation_x_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.translation_x))
+        self.translation_y_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.translation_y))
+        self.translation_z_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.translation_z))
+        self.rotation_3d_x_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.rotation_3d_x))
+        self.rotation_3d_y_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.rotation_3d_y))
+        self.rotation_3d_z_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.rotation_3d_z))
+        self.perspective_flip_theta_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_theta))
+        self.perspective_flip_phi_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_phi))
+        self.perspective_flip_gamma_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_gamma))
+        self.perspective_flip_fv_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.perspective_flip_fv))
+        self.noise_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.noise_schedule))
+        self.strength_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.strength_schedule))
+        self.contrast_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.contrast_schedule))
+        self.cfg_scale_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.cfg_scale_schedule))
+        self.ddim_eta_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.ddim_eta_schedule))
+        self.ancestral_eta_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.ancestral_eta_schedule))
+        self.pix2pix_img_cfg_scale_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.pix2pix_img_cfg_scale_schedule))
+        self.subseed_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.subseed_schedule))
+        self.subseed_strength_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.subseed_strength_schedule))
+        self.checkpoint_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.checkpoint_schedule), is_single_string = True)
+        self.steps_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.steps_schedule))
+        self.seed_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.seed_schedule))
+        self.sampler_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.sampler_schedule), is_single_string = True)
+        self.clipskip_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.clipskip_schedule))
+        self.noise_multiplier_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.noise_multiplier_schedule))
+        self.mask_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.mask_schedule), is_single_string = True)
+        self.noise_mask_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.noise_mask_schedule), is_single_string = True)
+        self.kernel_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.kernel_schedule))
+        self.sigma_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.sigma_schedule))
+        self.amount_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.amount_schedule))
+        self.threshold_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.threshold_schedule))
+        self.aspect_ratio_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.aspect_ratio_schedule))
+        self.fov_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.fov_schedule))
+        self.near_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.near_schedule))
+        self.cadence_flow_factor_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.cadence_flow_factor_schedule))
+        self.redo_flow_factor_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.redo_flow_factor_schedule))
+        self.far_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.far_schedule))
+        self.hybrid_comp_alpha_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_alpha_schedule))
+        self.hybrid_comp_mask_blend_alpha_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_blend_alpha_schedule))
+        self.hybrid_comp_mask_contrast_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_contrast_schedule))
+        self.hybrid_comp_mask_auto_contrast_cutoff_high_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_auto_contrast_cutoff_high_schedule))
+        self.hybrid_comp_mask_auto_contrast_cutoff_low_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_comp_mask_auto_contrast_cutoff_low_schedule))
+        self.hybrid_flow_factor_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(anim_args.hybrid_flow_factor_schedule))
+class ControlNetKeys():
+    def __init__(self, anim_args, controlnet_args):
+        self.fi = FrameInterpolater(max_frames=anim_args.max_frames)
+        self.schedules = {}
+        for i in range(1, 6): # 5 CN models in total
+            for suffix in ['weight', 'guidance_start', 'guidance_end']:
+                prefix = f"cn_{i}"
+                key = f"{prefix}_{suffix}_schedule_series"
+                self.schedules[key] = self.fi.get_inbetweens(self.fi.parse_key_frames(getattr(controlnet_args, f"{prefix}_{suffix}")))
+                setattr(self, key, self.schedules[key])
+class LooperAnimKeys():
+    def __init__(self, loop_args, anim_args, seed):
+        self.fi = FrameInterpolater(anim_args.max_frames, seed)
+        self.use_looper = loop_args.use_looper
+        self.imagesToKeyframe = loop_args.init_images
+        self.image_strength_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.image_strength_schedule))
+        self.blendFactorMax_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.blendFactorMax))
+        self.blendFactorSlope_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.blendFactorSlope))
+        self.tweening_frames_schedule_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.tweening_frames_schedule))
+        self.color_correction_factor_series = self.fi.get_inbetweens(self.fi.parse_key_frames(loop_args.color_correction_factor))
+class FrameInterpolater():
+    def __init__(self, max_frames=0, seed=-1) -> None:
+        self.max_frames = max_frames
+        self.seed = seed
+    def sanitize_value(self, value):
+        return value.replace("'","").replace('"',"").replace('(',"").replace(')',"")
+    def get_inbetweens(self, key_frames, integer=False, interp_method='Linear', is_single_string = False):
+        key_frame_series = pd.Series([np.nan for a in range(self.max_frames)])
+        # get our ui variables set for numexpr.evaluate
+        max_f = self.max_frames -1
+        s = self.seed
+        for i in range(0, self.max_frames):
+            if i in key_frames:
+                value = key_frames[i]
+                value_is_number = check_is_number(self.sanitize_value(value))
+                if value_is_number: # if it's only a number, leave the rest for the default interpolation
+                    key_frame_series[i] = self.sanitize_value(value)
+            if not value_is_number:
+                t = i
+                # workaround for values formatted like 0:("I am test") //used for sampler schedules
+                key_frame_series[i] = numexpr.evaluate(value) if not is_single_string else self.sanitize_value(value)
+            elif is_single_string:# take previous string value and replicate it
+                key_frame_series[i] = key_frame_series[i-1]
+        key_frame_series = key_frame_series.astype(float) if not is_single_string else key_frame_series # as string
+        if interp_method == 'Cubic' and len(key_frames.items()) <= 3:
+            interp_method = 'Quadratic'
+        if interp_method == 'Quadratic' and len(key_frames.items()) <= 2:
+            interp_method = 'Linear'
+        key_frame_series[0] = key_frame_series[key_frame_series.first_valid_index()]
+        key_frame_series[self.max_frames-1] = key_frame_series[key_frame_series.last_valid_index()]
+        key_frame_series = key_frame_series.interpolate(method=interp_method.lower(), limit_direction='both')
+        if integer:
+            return key_frame_series.astype(int)
+        return key_frame_series
+    def parse_key_frames(self, string):
+        # because math functions (i.e. sin(t)) can utilize brackets
+        # it extracts the value in form of some stuff
+        # which has previously been enclosed with brackets and
+        # with a comma or end of line existing after the closing one
+        frames = dict()
+        for match_object in string.split(","):
+            frameParam = match_object.split(":")
+            max_f = self.max_frames -1
+            s = self.seed
+            frame = int(self.sanitize_value(frameParam[0])) if check_is_number(self.sanitize_value(frameParam[0].strip())) else int(numexpr.evaluate(frameParam[0].strip().replace("'","",1).replace('"',"",1)[::-1].replace("'","",1).replace('"',"",1)[::-1]))
+            frames[frame] = frameParam[1].strip()
+        if frames == {} and len(string) != 0:
+            raise RuntimeError('Key Frame string not correctly formatted')
+        return frames

scripts/deforum_helpers/args.py ADDED Viewed

	@@ -0,0 +1,341 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import json
+import os
+import tempfile
+import time
+from types import SimpleNamespace
+import modules.paths as ph
+import modules.shared as sh
+from modules.processing import get_fixed_seed
+from modules.shared import cmd_opts
+from .defaults import get_guided_imgs_default_json
+from .deforum_controlnet import controlnet_component_names
+from .general_utils import get_os, substitute_placeholders
+def RootArgs():
+    device = sh.device
+    models_path = ph.models_path + '/Deforum'
+    half_precision = not cmd_opts.no_half
+    mask_preset_names = ['everywhere', 'video_mask']
+    frames_cache = []
+    raw_batch_name = None
+    raw_seed = None
+    initial_info = None
+    first_frame = None
+    animation_prompts = None
+    current_user_os = get_os()
+    tmp_deforum_run_duplicated_folder = os.path.join(tempfile.gettempdir(), 'tmp_run_deforum')
+    return locals()
+def CoreArgs():  # TODO: change or do something with this ugliness
+    subseed = -1
+    subseed_strength = 0
+    timestring = ""
+    init_sample = None
+    noise_mask = None
+    seed_internal = 0
+    return locals()
+def DeforumAnimArgs():
+    animation_mode = '2D'  # ['None', '2D', '3D', 'Video Input', 'Interpolation']
+    max_frames = 120
+    border = 'replicate'  # ['wrap', 'replicate']
+    angle = "0:(0)"
+    zoom = "0:(1.0025+0.002*sin(1.25*3.14*t/30))"
+    translation_x = "0:(0)"
+    translation_y = "0:(0)"
+    translation_z = "0:(1.75)"
+    transform_center_x = "0:(0.5)"
+    transform_center_y = "0:(0.5)"
+    rotation_3d_x = "0:(0)"
+    rotation_3d_y = "0:(0)"
+    rotation_3d_z = "0:(0)"
+    enable_perspective_flip = False
+    perspective_flip_theta = "0:(0)"
+    perspective_flip_phi = "0:(0)"
+    perspective_flip_gamma = "0:(0)"
+    perspective_flip_fv = "0:(53)"
+    noise_schedule = "0: (0.065)"
+    strength_schedule = "0: (0.65)"
+    contrast_schedule = "0: (1.0)"
+    cfg_scale_schedule = "0: (7)"
+    enable_steps_scheduling = False
+    steps_schedule = "0: (25)"
+    fov_schedule = "0: (70)"
+    aspect_ratio_schedule = "0: (1)"
+    aspect_ratio_use_old_formula = False
+    near_schedule = "0: (200)"
+    far_schedule = "0: (10000)"
+    seed_schedule = '0:(s), 1:(-1), "max_f-2":(-1), "max_f-1":(s)'
+    pix2pix_img_cfg_scale_schedule = "0:(1.5)"
+    enable_subseed_scheduling = False
+    subseed_schedule = "0:(1)"
+    subseed_strength_schedule = "0:(0)"
+    enable_sampler_scheduling = False  # Sampler Scheduling
+    sampler_schedule = '0: ("Euler a")'
+    use_noise_mask = False  # Composable mask scheduling
+    mask_schedule = '0: ("{video_mask}")'
+    noise_mask_schedule = '0: ("{video_mask}")'
+    enable_checkpoint_scheduling = False  # Checkpoint Scheduling
+    checkpoint_schedule = '0: ("model1.ckpt"), 100: ("model2.safetensors")'
+    enable_clipskip_scheduling = False  # CLIP skip Scheduling
+    clipskip_schedule = '0: (2)'
+    enable_noise_multiplier_scheduling = True  # Noise Multiplier Scheduling
+    noise_multiplier_schedule = '0: (1.05)'
+    # resume params
+    resume_from_timestring = False
+    resume_timestring = "20230129210106"
+    # DDIM AND Ancestral ETA scheds
+    enable_ddim_eta_scheduling = False
+    ddim_eta_schedule = "0:(0)"
+    enable_ancestral_eta_scheduling = False
+    ancestral_eta_schedule = "0:(1)"
+    # Anti-blur
+    amount_schedule = "0: (0.1)"
+    kernel_schedule = "0: (5)"
+    sigma_schedule = "0: (1.0)"
+    threshold_schedule = "0: (0.0)"
+    # Coherence
+    color_coherence = 'LAB'  # ['None', 'HSV', 'LAB', 'RGB', 'Video Input', 'Image']
+    color_coherence_image_path = ""
+    color_coherence_video_every_N_frames = 1
+    color_force_grayscale = False
+    legacy_colormatch = False
+    diffusion_cadence = '2'  # ['1','2','3','4','5','6','7','8']
+    optical_flow_cadence = 'None'  # ['None', 'RAFT','DIS Medium', 'DIS Fine', 'Farneback']
+    cadence_flow_factor_schedule = "0: (1)"
+    optical_flow_redo_generation = 'None'  # ['None', 'RAFT', 'DIS Medium', 'DIS Fine', 'Farneback']
+    redo_flow_factor_schedule = "0: (1)"
+    diffusion_redo = '0'
+    # **Noise settings:**
+    noise_type = 'perlin'  # ['uniform', 'perlin']
+    # Perlin params
+    perlin_w = 8
+    perlin_h = 8
+    perlin_octaves = 4
+    perlin_persistence = 0.5
+    # **3D Depth Warping:**
+    use_depth_warping = True
+    depth_algorithm = 'Midas-3-Hybrid'  # ['Midas+AdaBins (old)','Zoe+AdaBins (old)', 'Midas-3-Hybrid','Midas-3.1-BeitLarge', 'AdaBins', 'Zoe', 'Leres'] Midas-3.1-BeitLarge is temporarily removed 04-05-23 until fixed
+    midas_weight = 0.2  # midas/ zoe weight - only relevant in old/ legacy depth_algorithm modes. see above ^
+    padding_mode = 'border'  # ['border', 'reflection', 'zeros']
+    sampling_mode = 'bicubic'  # ['bicubic', 'bilinear', 'nearest']
+    save_depth_maps = False
+    # **Video Input:**
+    video_init_path = 'https://deforum.github.io/a1/V1.mp4'
+    extract_nth_frame = 1
+    extract_from_frame = 0
+    extract_to_frame = -1  # minus 1 for unlimited frames
+    overwrite_extracted_frames = True
+    use_mask_video = False
+    video_mask_path = 'https://deforum.github.io/a1/VM1.mp4'
+    # **Hybrid Video for 2D/3D Animation Mode:**
+    hybrid_comp_alpha_schedule = "0:(0.5)"
+    hybrid_comp_mask_blend_alpha_schedule = "0:(0.5)"
+    hybrid_comp_mask_contrast_schedule = "0:(1)"
+    hybrid_comp_mask_auto_contrast_cutoff_high_schedule = "0:(100)"
+    hybrid_comp_mask_auto_contrast_cutoff_low_schedule = "0:(0)"
+    hybrid_flow_factor_schedule = "0:(1)"
+    hybrid_generate_inputframes = False
+    hybrid_generate_human_masks = "None"  # ['None','PNGs','Video', 'Both']
+    hybrid_use_first_frame_as_init_image = True
+    hybrid_motion = "None"  # ['None','Optical Flow','Perspective','Affine']
+    hybrid_motion_use_prev_img = False
+    hybrid_flow_consistency = False
+    hybrid_consistency_blur = 2
+    hybrid_flow_method = "RAFT"  # ['RAFT', 'DIS Medium', 'DIS Fine', 'Farneback']
+    hybrid_composite = 'None'  # ['None', 'Normal', 'Before Motion', 'After Generation']
+    hybrid_use_init_image = False
+    hybrid_comp_mask_type = "None"  # ['None', 'Depth', 'Video Depth', 'Blend', 'Difference']
+    hybrid_comp_mask_inverse = False
+    hybrid_comp_mask_equalize = "None"  # ['None','Before','After','Both']
+    hybrid_comp_mask_auto_contrast = False
+    hybrid_comp_save_extra_frames = False
+    return locals()
+def DeforumArgs():
+    # set default image size and make sure to resize to multiples of 64 if needed
+    W, H = map(lambda x: x - x % 64, (512, 512))
+    # whether to show gradio's info section for all params in the ui. it's a realtime toggle
+    show_info_on_ui = True
+    # **Webui stuff**
+    tiling = False
+    restore_faces = False
+    seed_enable_extras = False
+    seed_resize_from_w = 0
+    seed_resize_from_h = 0
+    # **Sampling Settings**
+    seed = -1  #
+    sampler = 'euler_ancestral'  # ["klms","dpm2","dpm2_ancestral","heun","euler","euler_ancestral","plms", "ddim"]
+    steps = 25  #
+    # **Batch Settings**
+    batch_name = "Deforum_{timestring}"
+    seed_behavior = "iter"  # ["iter","fixed","random","ladder","alternate","schedule"]
+    seed_iter_N = 1
+    # **Init Settings**
+    use_init = False
+    strength = 0.8
+    strength_0_no_init = True  # Set the strength to 0 automatically when no init image is used
+    init_image = "https://deforum.github.io/a1/I1.png"
+    # Whiter areas of the mask are areas that change more
+    use_mask = False
+    use_alpha_as_mask = False  # use the alpha channel of the init image as the mask
+    mask_file = "https://deforum.github.io/a1/M1.jpg"
+    invert_mask = False
+    # Adjust mask image, 1.0 is no adjustment. Should be positive numbers.
+    mask_contrast_adjust = 1.0
+    mask_brightness_adjust = 1.0
+    # Overlay the masked image at the end of the generation so it does not get degraded by encoding and decoding
+    overlay_mask = True
+    # Blur edges of final overlay mask, if used. Minimum = 0 (no blur)
+    mask_overlay_blur = 4
+    fill = 1  # MASKARGSEXPANSION Todo : Rename and convert to same formatting as used in img2img masked content
+    full_res_mask = True
+    full_res_mask_padding = 4
+    reroll_blank_frames = 'reroll'  # reroll, interrupt, or ignore
+    reroll_patience = 10
+    return locals()
+def LoopArgs():
+    use_looper = False
+    init_images = get_guided_imgs_default_json()
+    image_strength_schedule = "0:(0.75)"
+    blendFactorMax = "0:(0.35)"
+    blendFactorSlope = "0:(0.25)"
+    tweening_frames_schedule = "0:(20)"
+    color_correction_factor = "0:(0.075)"
+    return locals()
+def ParseqArgs():
+    parseq_manifest = None
+    parseq_use_deltas = True
+    return locals()
+def DeforumOutputArgs():
+    skip_video_creation = False
+    fps = 15
+    make_gif = False
+    delete_imgs = False  # True will delete all imgs after a successful mp4 creation
+    image_path = "C:/SD/20230124234916_%09d.png"
+    add_soundtrack = 'None'  # ["File","Init Video"]
+    soundtrack_path = "https://deforum.github.io/a1/A1.mp3"
+    # End-Run upscaling
+    r_upscale_video = False
+    r_upscale_factor = 'x2'  # ['2x', 'x3', 'x4']
+    r_upscale_model = 'realesr-animevideov3'  # 'realesr-animevideov3' (default of realesrgan engine, does 2-4x), the rest do only 4x: 'realesrgan-x4plus', 'realesrgan-x4plus-anime'
+    r_upscale_keep_imgs = True
+    store_frames_in_ram = False
+    # **Interpolate Video Settings**
+    frame_interpolation_engine = "None"  # ["None", "RIFE v4.6", "FILM"]
+    frame_interpolation_x_amount = 2  # [2 to 1000 depends on the engine]
+    frame_interpolation_slow_mo_enabled = False
+    frame_interpolation_slow_mo_amount = 2  # [2 to 10]
+    frame_interpolation_keep_imgs = False
+    return locals()
+def get_component_names():
+    return ['override_settings_with_file', 'custom_settings_file', *DeforumAnimArgs().keys(), 'animation_prompts', 'animation_prompts_positive', 'animation_prompts_negative',
+            *DeforumArgs().keys(), *DeforumOutputArgs().keys(), *ParseqArgs().keys(), *LoopArgs().keys(), *controlnet_component_names()]
+def get_settings_component_names():
+    return [name for name in get_component_names()]
+def pack_args(args_dict):
+    args_dict = {name: args_dict[name] for name in DeforumArgs()}
+    args_dict.update({name: CoreArgs()[name] for name in CoreArgs()})
+    return args_dict
+def pack_anim_args(args_dict):
+    return {name: args_dict[name] for name in DeforumAnimArgs()}
+def pack_video_args(args_dict):
+    return {name: args_dict[name] for name in DeforumOutputArgs()}
+def pack_parseq_args(args_dict):
+    return {name: args_dict[name] for name in ParseqArgs()}
+def pack_loop_args(args_dict):
+    return {name: args_dict[name] for name in LoopArgs()}
+def pack_controlnet_args(args_dict):
+    return {name: args_dict[name] for name in controlnet_component_names()}
+def process_args(args_dict_main, run_id):
+    from .settings import load_args
+    override_settings_with_file = args_dict_main['override_settings_with_file']
+    custom_settings_file = args_dict_main['custom_settings_file']
+    args_dict = pack_args(args_dict_main)
+    anim_args_dict = pack_anim_args(args_dict_main)
+    video_args_dict = pack_video_args(args_dict_main)
+    parseq_args_dict = pack_parseq_args(args_dict_main)
+    loop_args_dict = pack_loop_args(args_dict_main)
+    controlnet_args_dict = pack_controlnet_args(args_dict_main)
+    root = SimpleNamespace(**RootArgs())
+    p = args_dict_main['p']
+    root.animation_prompts = json.loads(args_dict_main['animation_prompts'])
+    args_loaded_ok = True  # can use this later to error cleanly upon wrong gen param in ui
+    if override_settings_with_file:
+        args_loaded_ok = load_args(args_dict_main, args_dict, anim_args_dict, parseq_args_dict, loop_args_dict, controlnet_args_dict, video_args_dict, custom_settings_file, root, run_id)
+    positive_prompts = args_dict_main['animation_prompts_positive']
+    negative_prompts = args_dict_main['animation_prompts_negative']
+    negative_prompts = negative_prompts.replace('--neg', '')  # remove --neg from negative_prompts if received by mistake
+    for key in root.animation_prompts:
+        animationPromptCurr = root.animation_prompts[key]
+        root.animation_prompts[key] = f"{positive_prompts} {animationPromptCurr} {'' if '--neg' in animationPromptCurr else '--neg'} {negative_prompts}"
+    os.makedirs(root.models_path, exist_ok=True)
+    args = SimpleNamespace(**args_dict)
+    anim_args = SimpleNamespace(**anim_args_dict)
+    video_args = SimpleNamespace(**video_args_dict)
+    parseq_args = SimpleNamespace(**parseq_args_dict)
+    loop_args = SimpleNamespace(**loop_args_dict)
+    controlnet_args = SimpleNamespace(**controlnet_args_dict)
+    if args.seed == -1:
+        root.raw_seed = -1
+    args.seed = get_fixed_seed(args.seed)
+    if root.raw_seed != -1:
+        root.raw_seed = args.seed
+    args.timestring = time.strftime('%Y%m%d%H%M%S')
+    args.strength = max(0.0, min(1.0, args.strength))
+    args.prompts = json.loads(args_dict_main['animation_prompts'])
+    args.positive_prompts = args_dict_main['animation_prompts_positive']
+    args.negative_prompts = args_dict_main['animation_prompts_negative']
+    if not args.use_init and not anim_args.hybrid_use_init_image:
+        args.init_image = None
+    if anim_args.animation_mode == 'None':
+        anim_args.max_frames = 1
+    elif anim_args.animation_mode == 'Video Input':
+        args.use_init = True
+    current_arg_list = [args, anim_args, video_args, parseq_args]
+    full_base_folder_path = os.path.join(os.getcwd(), p.outpath_samples)
+    root.raw_batch_name = args.batch_name
+    args.batch_name = substitute_placeholders(args.batch_name, current_arg_list, full_base_folder_path)
+    args.outdir = os.path.join(p.outpath_samples, str(args.batch_name))
+    args.outdir = os.path.join(os.getcwd(), args.outdir)
+    os.makedirs(args.outdir, exist_ok=True)
+    return args_loaded_ok, root, args, anim_args, video_args, parseq_args, loop_args, controlnet_args

scripts/deforum_helpers/auto_navigation.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import numpy as np
+import torch
+# reallybigname - auto-navigation functions in progress...
+# usage:
+# if auto_rotation:
+#    rot_mat = rotate_camera_towards_depth(depth_tensor, auto_rotation_steps, w, h, fov_deg, auto_rotation_depth_target)
+def rotate_camera_towards_depth(depth_tensor, turn_weight, width, height, h_fov=60, target_depth=1):
+    # Compute the depth at the target depth
+    target_depth_index = int(target_depth * depth_tensor.shape[0])
+    target_depth_values = depth_tensor[target_depth_index]
+    max_depth_index = torch.argmax(target_depth_values).item()
+    max_depth_index = (max_depth_index, target_depth_index)
+    max_depth = target_depth_values[max_depth_index[0]].item()
+    # Compute the normalized x and y coordinates
+    x, y = max_depth_index
+    x_normalized = (x / (width - 1)) * 2 - 1
+    y_normalized = (y / (height - 1)) * 2 - 1
+    # Calculate horizontal and vertical field of view (in radians)
+    h_fov_rad = np.radians(h_fov)
+    aspect_ratio = width / height
+    v_fov_rad = h_fov_rad / aspect_ratio
+    # Calculate the world coordinates (x, y) at the target depth
+    x_world = np.tan(h_fov_rad / 2) * max_depth * x_normalized
+    y_world = np.tan(v_fov_rad / 2) * max_depth * y_normalized
+    # Compute the target position using the world coordinates and max_depth
+    target_position = np.array([x_world, y_world, max_depth])
+    # Assuming the camera is initially at the origin, and looking in the negative Z direction
+    cam_position = np.array([0, 0, 0])
+    current_direction = np.array([0, 0, -1])
+    # Compute the direction vector and normalize it
+    direction = target_position - cam_position
+    direction = direction / np.linalg.norm(direction)
+    # Compute the rotation angle based on the turn_weight (number of frames)
+    axis = np.cross(current_direction, direction)
+    axis = axis / np.linalg.norm(axis)
+    angle = np.arcsin(np.linalg.norm(axis))
+    max_angle = np.pi * (0.1 / turn_weight)  # Limit the maximum rotation angle to half of the visible screen
+    rotation_angle = np.clip(np.sign(np.cross(current_direction, direction)) * angle / turn_weight, -max_angle, max_angle)
+    # Compute the rotation matrix
+    rotation_matrix = np.eye(3) + np.sin(rotation_angle) * np.array([
+        [0, -axis[2], axis[1]],
+        [axis[2], 0, -axis[0]],
+        [-axis[1], axis[0], 0]
+    ]) + (1 - np.cos(rotation_angle)) * np.outer(axis, axis)
+    # Convert the NumPy array to a PyTorch tensor
+    rotation_matrix_tensor = torch.from_numpy(rotation_matrix).float()
+    # Add an extra dimension to match the expected shape (1, 3, 3)
+    rotation_matrix_tensor = rotation_matrix_tensor.unsqueeze(0)
+    return rotation_matrix_tensor
+def rotation_matrix(axis, angle):
+    axis = np.asarray(axis)
+    axis = axis / np.linalg.norm(axis)
+    a = np.cos(angle / 2.0)
+    b, c, d = -axis * np.sin(angle / 2.0)
+    aa, bb, cc, dd = a * a, b * b, c * c, d * d
+    bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
+    return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
+                     [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
+                     [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])

scripts/deforum_helpers/colors.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import cv2
+import pkg_resources
+from skimage.exposure import match_histograms
+def maintain_colors(prev_img, color_match_sample, mode):
+    skimage_version = pkg_resources.get_distribution('scikit-image').version
+    is_skimage_v20_or_higher = pkg_resources.parse_version(skimage_version) >= pkg_resources.parse_version('0.20.0')
+    match_histograms_kwargs = {'channel_axis': -1} if is_skimage_v20_or_higher else {'multichannel': True}
+    if mode == 'RGB':
+        return match_histograms(prev_img, color_match_sample, **match_histograms_kwargs)
+    elif mode == 'HSV':
+        prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV)
+        color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV)
+        matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, **match_histograms_kwargs)
+        return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB)
+    else: # LAB
+        prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB)
+        color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB)
+        matched_lab = match_histograms(prev_img_lab, color_match_lab, **match_histograms_kwargs)
+        return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB)

scripts/deforum_helpers/composable_masks.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+# At the moment there are three types of masks: mask from variable, file mask and word mask
+# Variable masks include video_mask (which can be set to auto-generated human masks) and everywhere
+# They are put in {}-brackets
+# Word masks are framed with <>-bracets, like: <cat>, <anime girl>
+# File masks are put in []-brackes
+# Empty strings are counted as the whole frame
+# We want to put them all into a sequence of boolean operations
+# Example:
+# \ <armor>
+# (({human_mask} & [mask1.png]) ^ <apple>)
+# Writing the parser for the boolean sequence
+# using regex and PIL operations
+import re
+from .load_images import get_mask_from_file, check_mask_for_errors, blank_if_none
+from .word_masking import get_word_mask
+from PIL import ImageChops
+from modules.shared import opts
+# val_masks: name, PIL Image mask
+# Returns an image in mode '1' (needed for bool ops), convert to 'L' in the sender function
+def compose_mask(root, args, mask_seq, val_masks, frame_image, inner_idx:int = 0):
+    # Compose_mask recursively: go to inner brackets, then b-op it and go upstack
+    # Step 1:
+    # recursive parenthesis pass
+    # regex is not powerful here
+    seq = ""
+    inner_seq = ""
+    parentheses_counter = 0
+    for c in mask_seq:
+        if c == ')':
+            parentheses_counter = parentheses_counter - 1
+        if parentheses_counter > 0:
+            inner_seq += c
+        if c == '(':
+            parentheses_counter = parentheses_counter + 1
+        if parentheses_counter == 0:
+            if len(inner_seq) > 0:
+                inner_idx += 1
+                seq += compose_mask(root, args, inner_seq, val_masks, frame_image, inner_idx)
+                inner_seq = ""
+            else:
+                seq += c
+    if parentheses_counter != 0:
+        raise Exception('Mismatched parentheses in {mask_seq}!')
+    mask_seq = seq
+    # Step 2:
+    # Load the word masks and file masks as vars
+    # File masks
+    pattern = r'\[(?P<inner>[\S\s]*?)\]'
+    def parse(match_object):
+        nonlocal inner_idx
+        inner_idx += 1
+        content = match_object.groupdict()['inner']
+        val_masks[str(inner_idx)] = get_mask_from_file(content, args).convert('1') # TODO: add caching
+        return f"{{{inner_idx}}}"
+    mask_seq = re.sub(pattern, parse, mask_seq)
+    # Word masks
+    pattern = r'<(?P<inner>[\S\s]*?)>'
+    def parse(match_object):
+        nonlocal inner_idx
+        inner_idx += 1
+        content = match_object.groupdict()['inner']
+        val_masks[str(inner_idx)] = get_word_mask(root, frame_image, content).convert('1')
+        return f"{{{inner_idx}}}"
+    mask_seq = re.sub(pattern, parse, mask_seq)
+    # Now that all inner parenthesis are eliminated we're left with a linear string
+    # Step 3:
+    # Boolean operations with masks
+    # Operators: invert !, and &, or |, xor ^, difference \
+    # Invert vars with '!'
+    pattern = r'![\S\s]*{(?P<inner>[\S\s]*?)}'
+    def parse(match_object):
+        nonlocal inner_idx
+        inner_idx += 1
+        content = match_object.groupdict()['inner']
+        savename = content
+        if content in root.mask_preset_names:
+            inner_idx += 1
+            savename = str(inner_idx)
+        val_masks[savename] = ImageChops.invert(val_masks[content])
+        return f"{{{savename}}}"
+    mask_seq = re.sub(pattern, parse, mask_seq)
+    # Multiply neighbouring vars with '&'
+    # Wait for replacements stall (like in Markov chains)
+    while True:
+        pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*&[\s]*{(?P<inner2>[\S\s]*?)}'
+        def parse(match_object):
+            nonlocal inner_idx
+            inner_idx += 1
+            content = match_object.groupdict()['inner1']
+            content_second = match_object.groupdict()['inner2']
+            savename = content
+            if content in root.mask_preset_names:
+                inner_idx += 1
+                savename = str(inner_idx)
+            val_masks[savename] = ImageChops.logical_and(val_masks[content], val_masks[content_second])
+            return f"{{{savename}}}"
+        prev_mask_seq = mask_seq
+        mask_seq = re.sub(pattern, parse, mask_seq)
+        if mask_seq is prev_mask_seq:
+            break
+    # Add neighbouring vars with '|'
+    while True:
+        pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*?\|[\s]*?{(?P<inner2>[\S\s]*?)}'
+        def parse(match_object):
+            nonlocal inner_idx
+            inner_idx += 1
+            content = match_object.groupdict()['inner1']
+            content_second = match_object.groupdict()['inner2']
+            savename = content
+            if content in root.mask_preset_names:
+                inner_idx += 1
+                savename = str(inner_idx)
+            val_masks[savename] = ImageChops.logical_or(val_masks[content], val_masks[content_second])
+            return f"{{{savename}}}"
+        prev_mask_seq = mask_seq
+        mask_seq = re.sub(pattern, parse, mask_seq)
+        if mask_seq is prev_mask_seq:
+            break
+    # Mutually exclude neighbouring vars with '^'
+    while True:
+        pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*\^[\s]*{(?P<inner2>[\S\s]*?)}'
+        def parse(match_object):
+            nonlocal inner_idx
+            inner_idx += 1
+            content = match_object.groupdict()['inner1']
+            content_second = match_object.groupdict()['inner2']
+            savename = content
+            if content in root.mask_preset_names:
+                inner_idx += 1
+                savename = str(inner_idx)
+            val_masks[savename] = ImageChops.logical_xor(val_masks[content], val_masks[content_second])
+            return f"{{{savename}}}"
+        prev_mask_seq = mask_seq
+        mask_seq = re.sub(pattern, parse, mask_seq)
+        if mask_seq is prev_mask_seq:
+            break
+    # Set-difference the regions with '\'
+    while True:
+        pattern = r'{(?P<inner1>[\S\s]*?)}[\s]*\\[\s]*{(?P<inner2>[\S\s]*?)}'
+        def parse(match_object):
+            content = match_object.groupdict()['inner1']
+            content_second = match_object.groupdict()['inner2']
+            savename = content
+            if content in root.mask_preset_names:
+                nonlocal inner_idx
+                inner_idx += 1
+                savename = str(inner_idx)
+            val_masks[savename] = ImageChops.logical_and(val_masks[content], ImageChops.invert(val_masks[content_second]))
+            return f"{{{savename}}}"
+        prev_mask_seq = mask_seq
+        mask_seq = re.sub(pattern, parse, mask_seq)
+        if mask_seq is prev_mask_seq:
+            break
+    # Step 4:
+    # Output
+    # Now we should have a single var left to return. If not, raise an error message
+    pattern = r'{(?P<inner>[\S\s]*?)}'
+    matches = re.findall(pattern, mask_seq)
+    if len(matches) != 1:
+        raise Exception(f'Wrong composable mask expression format! Broken mask sequence: {mask_seq}')
+    return f"{{{matches[0]}}}"
+def compose_mask_with_check(root, args, mask_seq, val_masks, frame_image):
+    for k, v in val_masks.items():
+        val_masks[k] = blank_if_none(v, args.W, args.H, '1').convert('1')
+    return check_mask_for_errors(val_masks[compose_mask(root, args, mask_seq, val_masks, frame_image, 0)[1:-1]].convert('L'))

scripts/deforum_helpers/consistency_check.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+# The code below is taken from https://github.com/Sxela/flow_tools/blob/main under GPL-3.0 license
+# and modified to fit Deforum's purpose
+# import argparse
+# import PIL.Image
+import numpy as np
+# import scipy.ndimage
+# import glob
+# from tqdm import tqdm
+def make_consistency(flow1, flow2, edges_unreliable=False):
+      # Awesome pythonic consistency check from [maua](https://github.com/maua-maua-maua/maua/blob/44485c745c65cf9d83cb1b1c792a177588e9c9fc/maua/flow/consistency.py) by Hans Brouwer and Henry Rachootin
+      # algorithm based on https://github.com/manuelruder/artistic-videos/blob/master/consistencyChecker/consistencyChecker.cpp
+      # reimplemented in numpy by Hans Brouwer
+      # // consistencyChecker
+      # // Check consistency of forward flow via backward flow.
+      # // (c) Manuel Ruder, Alexey Dosovitskiy, Thomas Brox 2016
+      flow1 = np.flip(flow1, axis=2)
+      flow2 = np.flip(flow2, axis=2)
+      h, w, _ = flow1.shape
+      # get grid of coordinates for each pixel
+      orig_coord = np.flip(np.mgrid[:w, :h], 0).T
+      # find where the flow1 maps each pixel
+      warp_coord = orig_coord + flow1
+      # clip the coordinates in bounds and round down
+      warp_coord_inbound = np.zeros_like(warp_coord)
+      warp_coord_inbound[..., 0] = np.clip(warp_coord[..., 0], 0, h - 2)
+      warp_coord_inbound[..., 1] = np.clip(warp_coord[..., 1], 0, w - 2)
+      warp_coord_floor = np.floor(warp_coord_inbound).astype(int)
+      # for each pixel: bilinear interpolation of the corresponding flow2 values around the point mapped to by flow1
+      alpha = warp_coord_inbound - warp_coord_floor
+      flow2_00 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1]]
+      flow2_01 = flow2[warp_coord_floor[..., 0], warp_coord_floor[..., 1] + 1]
+      flow2_10 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1]]
+      flow2_11 = flow2[warp_coord_floor[..., 0] + 1, warp_coord_floor[..., 1] + 1]
+      flow2_0_blend = (1 - alpha[..., 1, None]) * flow2_00 + alpha[..., 1, None] * flow2_01
+      flow2_1_blend = (1 - alpha[..., 1, None]) * flow2_10 + alpha[..., 1, None] * flow2_11
+      warp_coord_flow2 = (1 - alpha[..., 0, None]) * flow2_0_blend + alpha[..., 0, None] * flow2_1_blend
+      # coordinates that flow2 remaps each flow1-mapped pixel to
+      rewarp_coord = warp_coord + warp_coord_flow2
+      # where the difference in position after flow1 and flow2 are applied is larger than a threshold there is likely an
+      # occlusion. set values to -1 so the final gaussian blur will spread the value a couple pixels around this area
+      squared_diff = np.sum((rewarp_coord - orig_coord) ** 2, axis=2)
+      threshold = 0.01 * np.sum(warp_coord_flow2 ** 2 + flow1 ** 2, axis=2) + 0.5
+      reliable_flow = np.ones((squared_diff.shape[0], squared_diff.shape[1], 3))
+      reliable_flow[...,0] = np.where(squared_diff >= threshold, -0.75, 1)
+      # areas mapping outside of the frame are also occluded (don't need extra region around these though, so set 0)
+      if edges_unreliable:
+          reliable_flow[...,1] = np.where(
+              np.logical_or.reduce(
+                  (
+                      warp_coord[..., 0] < 0,
+                      warp_coord[..., 1] < 0,
+                      warp_coord[..., 0] >= h - 1,
+                      warp_coord[..., 1] >= w - 1,
+                  )
+              ),
+              0,
+              reliable_flow[...,1],
+          )
+      # get derivative of flow, large changes in derivative => edge of moving object
+      dx = np.diff(flow1, axis=1, append=0)
+      dy = np.diff(flow1, axis=0, append=0)
+      motion_edge = np.sum(dx ** 2 + dy ** 2, axis=2)
+      motion_threshold = 0.01 * np.sum(flow1 ** 2, axis=2) + 0.002
+      reliable_flow[...,2] = np.where(np.logical_and(motion_edge > motion_threshold, reliable_flow[...,2] != -0.75), 0, reliable_flow[...,2])
+      return reliable_flow
+# parser = argparse.ArgumentParser()
+# parser.add_argument("--flow_fwd", type=str, required=True, help="Forward flow path or glob pattern")
+# parser.add_argument("--flow_bwd", type=str, required=True, help="Backward flow path or glob pattern")
+# parser.add_argument("--output", type=str, required=True, help="Output consistency map path")
+# parser.add_argument("--output_postfix", type=str, default='_cc', help="Output consistency map name postfix")
+# parser.add_argument("--image_output", action='store_true', help="Output consistency map as b\w image path")
+# parser.add_argument("--skip_numpy_output", action='store_true', help="Don`t save numpy array")
+# parser.add_argument("--blur", type=float, default=2., help="Gaussian blur kernel size (0 for no blur)")
+# parser.add_argument("--bottom_clamp", type=float, default=0., help="Clamp lower values")
+# parser.add_argument("--edges_reliable", action='store_true', help="Consider edges reliable")
+# parser.add_argument("--save_separate_channels", action='store_true', help="Save consistency mask layers as separate channels")
+# args = parser.parse_args()
+# def run(args):
+#   flow_fwd_many = sorted(glob.glob(args.flow_fwd))
+#   flow_bwd_many = sorted(glob.glob(args.flow_bwd))
+#   if len(flow_fwd_many)!= len(flow_bwd_many):
+#     raise Exception('Forward and backward flow file numbers don`t match')
+#     return
+#   for flow_fwd,flow_bwd in tqdm(zip(flow_fwd_many, flow_bwd_many)):
+#     flow_fwd = flow_fwd.replace('\\','/')
+#     flow_bwd = flow_bwd.replace('\\','/')
+#     flow1 = np.load(flow_fwd)
+#     flow2 = np.load(flow_bwd)
+#     consistency_map_multilayer = make_consistency(flow1, flow2, edges_unreliable=not args.edges_reliable)
+#     if args.save_separate_channels:
+#           consistency_map = consistency_map_multilayer
+#     else:
+#           consistency_map = np.ones_like(consistency_map_multilayer[...,0])
+#           consistency_map*=consistency_map_multilayer[...,0]
+#           consistency_map*=consistency_map_multilayer[...,1]
+#           consistency_map*=consistency_map_multilayer[...,2]
+#     # blur
+#     if args.blur>0.:
+#       consistency_map = scipy.ndimage.gaussian_filter(consistency_map, [args.blur, args.blur])
+#     #clip values between bottom_clamp and 1
+#     bottom_clamp = min(max(args.bottom_clamp,0.), 0.999)
+#     consistency_map = consistency_map.clip(bottom_clamp, 1)
+#     out_fname = args.output+'/'+flow_fwd.split('/')[-1][:-4]+args.output_postfix
+#     if not args.skip_numpy_output:
+#       np.save(out_fname, consistency_map)
+#     #save as jpeg
+#     if args.image_output:
+#       PIL.Image.fromarray((consistency_map*255.).astype('uint8')).save(out_fname+'.jpg', quality=90)
+# run(args)

scripts/deforum_helpers/defaults.py ADDED Viewed

	@@ -0,0 +1,218 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+def get_samplers_list():
+    return {
+        'euler a': 'Euler a',
+        'euler': 'Euler',
+        'lms': 'LMS',
+        'heun': 'Heun',
+        'dpm2': 'DPM2',
+        'dpm2 a': 'DPM2 a',
+        'dpm++ 2s a': 'DPM++ 2S a',
+        'dpm++ 2m': 'DPM++ 2M',
+        'dpm++ sde': 'DPM++ SDE',
+        'dpm fast': 'DPM fast',
+        'dpm adaptive': 'DPM adaptive',
+        'lms karras': 'LMS Karras',
+        'dpm2 karras': 'DPM2 Karras',
+        'dpm2 a karras': 'DPM2 a Karras',
+        'dpm++ 2s a karras': 'DPM++ 2S a Karras',
+        'dpm++ 2m karras': 'DPM++ 2M Karras',
+        'dpm++ sde karras': 'DPM++ SDE Karras'
+    }
+def DeforumAnimPrompts():
+    return r"""{
+    "0": "tiny cute swamp bunny, highly detailed, intricate, ultra hd, sharp photo, crepuscular rays, in focus, by tomasz alen kopera",
+    "30": "anthropomorphic clean cat, surrounded by fractals, epic angle and pose, symmetrical, 3d, depth of field, ruan jia and fenghua zhong",
+    "60": "a beautiful coconut --neg photo, realistic",
+    "90": "a beautiful durian, trending on Artstation"
+}
+    """
+# Guided images defaults
+def get_guided_imgs_default_json():
+    return '''{
+    "0": "https://deforum.github.io/a1/Gi1.png",
+    "max_f/4-5": "https://deforum.github.io/a1/Gi2.png",
+    "max_f/2-10": "https://deforum.github.io/a1/Gi3.png",
+    "3*max_f/4-15": "https://deforum.github.io/a1/Gi4.jpg",
+    "max_f-20": "https://deforum.github.io/a1/Gi1.png"
+}'''
+def get_hybrid_info_html():
+    return """
+        <p style="padding-bottom:0">
+            <b style="text-shadow: blue -1px -1px;">Hybrid Video Compositing in 2D/3D Mode</b>
+            <span style="color:#DDD;font-size:0.7rem;text-shadow: black -1px -1px;margin-left:10px;">
+                by <a href="https://github.com/reallybigname">reallybigname</a>
+            </span>
+        </p>
+        <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em;">
+            <li>Composite video with previous frame init image in <b>2D or 3D animation_mode</b> <i>(not for Video Input mode)</i></li>
+            <li>Uses your <b>Init</b> settings for <b>video_init_path, extract_nth_frame, overwrite_extracted_frames</b></li>
+            <li>In Keyframes tab, you can also set <b>color_coherence</b> = '<b>Video Input</b>'</li>
+            <li><b>color_coherence_video_every_N_frames</b> lets you only match every N frames</li>
+            <li>Color coherence may be used with hybrid composite off, to just use video color.</li>
+            <li>Hybrid motion may be used with hybrid composite off, to just use video motion.</li>
+        </ul>
+        Hybrid Video Schedules
+        <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em;">
+            <li>The alpha schedule controls overall alpha for video mix, whether using a composite mask or not.</li>
+            <li>The <b>hybrid_comp_mask_blend_alpha_schedule</b> only affects the 'Blend' <b>hybrid_comp_mask_type</b>.</li>
+            <li>Mask contrast schedule is from 0-255. Normal is 1. Affects all masks.</li>
+            <li>Autocontrast low/high cutoff schedules 0-100. Low 0 High 100 is full range. <br>(<i><b>hybrid_comp_mask_auto_contrast</b> must be enabled</i>)</li>
+        </ul>
+        <a style='color:SteelBlue;' target='_blank' href='https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/Animation-Settings#hybrid-video-mode-for-2d3d-animations'>Click Here</a> for more info/ a Guide.
+        """
+def get_composable_masks_info_html():
+    return """
+        <ul style="list-style-type:circle; margin-left:0.75em; margin-bottom:0.2em">
+        <li>To enable, check use_mask in the Init tab</li>
+        <li>Supports boolean operations: (! - negation, & - and, | - or, ^ - xor, \ - difference, () - nested operations)</li>
+        <li>default variables: in \{\}, like \{init_mask\}, \{video_mask\}, \{everywhere\}</li>
+        <li>masks from files: in [], like [mask1.png]</li>
+        <li>description-based: <i>word masks</i> in &lt;&gt;, like &lt;apple&gt;, &lt;hair&gt</li>
+        </ul>
+        """
+def get_parseq_info_html():
+    return """
+        <p>Use a <a style='color:SteelBlue;' target='_blank' href='https://sd-parseq.web.app/deforum'>Parseq</a> manifest for your animation (leave blank to ignore).</p>
+        <p style="margin-top:1em; margin-bottom:1em;">
+            Fields managed in your Parseq manifest override the values and schedules set in other parts of this UI. You can select which values to override by using the "Managed Fields" section in Parseq.
+        </p>
+        """
+def get_prompts_info_html():
+    return """
+        <ul style="list-style-type:circle; margin-left:0.75em; margin-bottom:0.2em">
+        <li>Please always keep values in math functions above 0.</li>
+        <li>There is *no* Batch mode like in vanilla deforum. Please Use the txt2img tab for that.</li>
+        <li>For negative prompts, please write your positive prompt, then --neg ugly, text, assymetric, or any other negative tokens of your choice. OR:</li>
+        <li>Use the negative_prompts field to automatically append all words as a negative prompt. *Don't* add --neg in the negative_prompts field!</li>
+        <li>Prompts are stored in JSON format. If you've got an error, check it in a <a style="color:SteelBlue" href="https://odu.github.io/slingjsonlint/">JSON Validator</a></li>
+        </ul>
+        """
+def get_guided_imgs_info_html():
+    return """
+        <p>You can use this as a guided image tool or as a looper depending on your settings in the keyframe images field.
+        Set the keyframes and the images that you want to show up.
+        Note: the number of frames between each keyframe should be greater than the tweening frames.</p>
+        <p>Prerequisites and Important Info:</p>
+        <ul style="list-style-type:circle; margin-left:2em; margin-bottom:0em">
+            <li>This mode works ONLY with 2D/3D animation modes. Interpolation and Video Input modes aren't supported.</li>
+            <li>Init tab's strength slider should be greater than 0. Recommended value (.65 - .80).</li>
+            <li>'seed_behavior' will be forcibly set to 'schedule'.</li>
+        </ul>
+        <p>Looping recommendations:</p>
+        <ul style="list-style-type:circle; margin-left:2em; margin-bottom:0em">
+            <li>seed_schedule should start and end on the same seed.<br />
+            Example: seed_schedule could use 0:(5), 1:(-1), 219:(-1), 220:(5)</li>
+            <li>The 1st and last keyframe images should match.</li>
+            <li>Set your total number of keyframes to be 21 more than the last inserted keyframe image.<br />
+            Example: Default args should use 221 as the total keyframes.</li>
+            <li>Prompts are stored in JSON format. If you've got an error, check it in the validator,
+            <a style="color:SteelBlue" href="https://odu.github.io/slingjsonlint/">like here</a></li>
+        </ul>
+        <p>The Guided images mode exposes the following variables for the prompts and the schedules:</p>
+        <ul style="list-style-type:circle; margin-left:2em; margin-bottom:0em">
+            <li><b>s</b> is the <i>initial</i> seed for the whole video generation.</li>
+            <li><b>max_f</b> is the length of the video, in frames.<br />
+            Example: seed_schedule could use 0:(s), 1:(-1), "max_f-2":(-1), "max_f-1":(s)</li>
+            <li><b>t</b> is the current frame number.<br />
+            Example: strength_schedule could use 0:(0.25 * cos((72 / 60 * 3.141 * (t + 0) / 30))**13 + 0.7) to make alternating changes each 30 frames</li>
+        </ul>
+        """
+def get_main_info_html():
+    return """
+        <p>StableDiffusion WebUI-based re-implementation of <strong><a href="https://deforum.github.io">deforum.github.io</a> maintained by <a href="https://github.com/kabachuha">kabachuha</a></strong> & <a href="https://github.com/hithereai">hithereai</a></strong></p>
+        <p><a  style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/FAQ-&-Troubleshooting">FOR HELP CLICK HERE</a></p>
+        <ul style="list-style-type:circle; margin-left:1em">
+        <li>The code for this extension: <a  style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui">here</a>.</li>
+        <li>Join the <a style="color:SteelBlue" href="https://discord.gg/deforum">official Deforum Discord</a> to share your creations and suggestions.</li>
+        <li>Official Deforum Wiki: <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki">here</a>.</li>
+        <li>Anime-inclined great guide (by FizzleDorf) with lots of examples: <a style="color:SteelBlue" href="https://rentry.org/AnimAnon-Deforum">here</a>.</li>
+        <li>For advanced keyframing with Math functions, see <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/Maths-in-Deforum">here</a>.</li>
+        <li>Alternatively, use <a style="color:SteelBlue" href="https://sd-parseq.web.app/deforum">sd-parseq</a> as a UI to define your animation schedules (see the Parseq section in the Init tab).</li>
+        <li><a style="color:SteelBlue" href="https://www.framesync.xyz/">framesync.xyz</a> is also a good option, it makes compact math formulae for Deforum keyframes by selecting various waveforms.</li>
+        <li>The other site allows for making keyframes using <a style="color:SteelBlue" href="https://www.chigozie.co.uk/keyframe-string-generator/">interactive splines and Bezier curves</a> (select Disco output format).</li>
+        <li>If you want to use Width/Height which are not multiples of 64, please change noise_type to 'Uniform', in Keyframes --> Noise.</li>
+        </ul>
+        <italic>If you liked this extension, please <a style="color:SteelBlue" href="https://github.com/deforum-art/deforum-for-automatic1111-webui">give it a star on GitHub</a>!</italic> 😊
+        <ul style="list-style-type:circle; margin-left:1em">
+        <li>This program comes with <strong>ABSOLUTELY NO WARRANTY</strong>. This is free software, and you are welcome to redistribute it under certain conditions. See more information in the <a style="color:SteelBlue" href="https://github.com/deforum-art/sd-webui-deforum/blob/automatic1111-webui/LICENSE">LICENSE</a></li>
+        </ul>
+        """
+def get_frame_interpolation_info_html():
+    return """
+        Use <a href="https://github.com/megvii-research/ECCV2022-RIFE">RIFE</a> / <a href="https://film-net.github.io/">FILM</a> Frame Interpolation to smooth out, slow-mo (or both) any video.</p>
+         <p style="margin-top:1em">
+            Supported engines:
+            <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em">
+                <li>RIFE v4.6 and FILM.</li>
+            </ul>
+        </p>
+         <p style="margin-top:1em">
+            Important notes:
+            <ul style="list-style-type:circle; margin-left:1em; margin-bottom:1em">
+                <li>Frame Interpolation will *not* run if any of the following are enabled: 'Store frames in ram' / 'Skip video for run all'.</li>
+                <li>Audio (if provided) will *not* be transferred to the interpolated video if Slow-Mo is enabled.</li>
+                <li>'add_soundtrack' and 'soundtrack_path' aren't being honoured in "Interpolate an existing video" mode. Original vid audio will be used instead with the same slow-mo rules above.</li>
+                <li>In "Interpolate existing pics" mode, FPS is determined *only* by output FPS slider. Audio will be added if requested even with slow-mo "enabled", as it does *nothing* in this mode.</li>
+            </ul>
+        </p>
+        """
+def get_frames_to_video_info_html():
+    return """
+        <p style="margin-top:0em">
+        Important Notes:
+        <ul style="list-style-type:circle; margin-left:1em; margin-bottom:0.25em">
+            <li>Enter relative to webui folder or Full-Absolute path, and make sure it ends with something like this: '20230124234916_%09d.png', just replace 20230124234916 with your batch ID. The %09d is important, don't forget it!</li>
+            <li>In the filename, '%09d' represents the 9 counting numbers, For '20230124234916_000000001.png', use '20230124234916_%09d.png'</li>
+            <li>If non-deforum frames, use the correct number of counting digits. For files like 'bunnies-0000.jpg', you'd use 'bunnies-%04d.jpg'</li>
+        </ul>
+        """
+def get_gradio_html(section_name):
+    if section_name.lower() == 'hybrid_video':
+        return get_hybrid_info_html()
+    elif section_name.lower() == 'composable_masks':
+        return get_composable_masks_info_html()
+    elif section_name.lower() == 'parseq':
+        return get_parseq_info_html()
+    elif section_name.lower() == 'prompts':
+        return get_prompts_info_html()
+    elif section_name.lower() == 'guided_imgs':
+        return get_guided_imgs_info_html()
+    elif section_name.lower() == 'main':
+        return get_main_info_html()
+    elif section_name.lower() == 'frame_interpolation':
+        return get_frame_interpolation_info_html()
+    elif section_name.lower() == 'frames_to_video':
+        return get_frames_to_video_info_html()
+    else:
+        return None
+mask_fill_choices=['fill', 'original', 'latent noise', 'latent nothing']

scripts/deforum_helpers/deforum_controlnet.py ADDED Viewed

	@@ -0,0 +1,336 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+# This helper script is responsible for ControlNet/Deforum integration
+# https://github.com/Mikubill/sd-webui-controlnet — controlnet repo
+import os
+import gradio as gr
+import scripts
+from PIL import Image
+import numpy as np
+import importlib
+from modules import scripts
+from .deforum_controlnet_gradio import hide_ui_by_cn_status, hide_file_textboxes, ToolButton
+from .general_utils import count_files_in_folder, clean_gradio_path_strings  # TODO: do it another way
+from .video_audio_utilities import vid2frames, convert_image
+from .animation_key_frames import ControlNetKeys
+from .load_images import load_image
+cnet = None
+# number of CN model tabs to show in the deforum gui
+num_of_models = 5
+def find_controlnet():
+    global cnet
+    if cnet: return cnet
+    try:
+        cnet = importlib.import_module('extensions.sd-webui-controlnet.scripts.external_code', 'external_code')
+    except:
+        try:
+            cnet = importlib.import_module('extensions-builtin.sd-webui-controlnet.scripts.external_code', 'external_code')
+        except:
+            pass
+    if cnet:
+        print(f"\033[0;32m*Deforum ControlNet support: enabled*\033[0m")
+        return True
+    return None
+def controlnet_infotext():
+    return """Requires the <a style='color:SteelBlue;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet'>ControlNet</a> extension to be installed.</p>
+            <p">If Deforum crashes due to CN updates, go <a style='color:Orange;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet/issues'>here</a> and report your problem.</p>
+           """
+def is_controlnet_enabled(controlnet_args):
+    for i in range(1, num_of_models + 1):
+        if getattr(controlnet_args, f'cn_{i}_enabled', False):
+            return True
+    return False
+def setup_controlnet_ui_raw():
+    cnet = find_controlnet()
+    cn_models = cnet.get_models()
+    cn_preprocessors = cnet.get_modules()
+    cn_modules = cnet.get_modules_detail()
+    preprocessor_sliders_config = {}
+    for config_name, config_values in cn_modules.items():
+        sliders = config_values.get('sliders', [])
+        preprocessor_sliders_config[config_name] = sliders
+    model_free_preprocessors = ["reference_only", "reference_adain", "reference_adain+attn"]
+    flag_preprocessor_resolution = "Preprocessor Resolution"
+    def build_sliders(module, pp):
+        grs = []
+        if module not in preprocessor_sliders_config:
+            grs += [
+                gr.update(label=flag_preprocessor_resolution, value=512, minimum=64, maximum=2048, step=1, visible=not pp, interactive=not pp),
+                gr.update(visible=False, interactive=False),
+                gr.update(visible=False, interactive=False),
+                gr.update(visible=True)
+            ]
+        else:
+            for slider_config in preprocessor_sliders_config[module]:
+                if isinstance(slider_config, dict):
+                    visible = True
+                    if slider_config['name'] == flag_preprocessor_resolution:
+                        visible = not pp
+                    grs.append(gr.update(
+                        label=slider_config['name'],
+                        value=slider_config['value'],
+                        minimum=slider_config['min'],
+                        maximum=slider_config['max'],
+                        step=slider_config['step'] if 'step' in slider_config else 1,
+                        visible=visible,
+                        interactive=visible))
+                else:
+                    grs.append(gr.update(visible=False, interactive=False))
+            while len(grs) < 3:
+                grs.append(gr.update(visible=False, interactive=False))
+            grs.append(gr.update(visible=True))
+        if module in model_free_preprocessors:
+            grs += [gr.update(visible=False, value='None'), gr.update(visible=False)]
+        else:
+            grs += [gr.update(visible=True), gr.update(visible=True)]
+        return grs
+    refresh_symbol = '\U0001f504'  # 🔄
+    switch_values_symbol = '\U000021C5'  # ⇅
+    model_dropdowns = []
+    infotext_fields = []
+    def create_model_in_tab_ui(cn_id):
+        with gr.Row():
+            enabled = gr.Checkbox(label="Enable", value=False, interactive=True)
+            pixel_perfect = gr.Checkbox(label="Pixel Perfect", value=False, visible=False, interactive=True)
+            low_vram = gr.Checkbox(label="Low VRAM", value=False, visible=False, interactive=True)
+            overwrite_frames = gr.Checkbox(label='Overwrite input frames', value=True, visible=False, interactive=True)
+        with gr.Row(visible=False) as mod_row:
+            module = gr.Dropdown(cn_preprocessors, label=f"Preprocessor", value="none", interactive=True)
+            model = gr.Dropdown(cn_models, label=f"Model", value="None", interactive=True)
+            refresh_models = ToolButton(value=refresh_symbol)
+            refresh_models.click(refresh_all_models, model, model)
+        with gr.Row(visible=False) as weight_row:
+            weight = gr.Textbox(label="Weight schedule", lines=1, value='0:(1)', interactive=True)
+        with gr.Row(visible=False) as start_cs_row:
+            guidance_start = gr.Textbox(label="Starting Control Step schedule", lines=1, value='0:(0.0)', interactive=True)
+        with gr.Row(visible=False) as end_cs_row:
+            guidance_end = gr.Textbox(label="Ending Control Step schedule", lines=1, value='0:(1.0)', interactive=True)
+            model_dropdowns.append(model)
+        with gr.Column(visible=False) as advanced_column:
+            processor_res = gr.Slider(label="Annotator resolution", value=64, minimum=64, maximum=2048, interactive=False)
+            threshold_a = gr.Slider(label="Threshold A", value=64, minimum=64, maximum=1024, interactive=False)
+            threshold_b = gr.Slider(label="Threshold B", value=64, minimum=64, maximum=1024, interactive=False)
+        with gr.Row(visible=False) as vid_path_row:
+            vid_path = gr.Textbox(value='', label="ControlNet Input Video/ Image Path", interactive=True)
+        with gr.Row(visible=False) as mask_vid_path_row:  # invisible temporarily since 26-04-23 until masks are fixed
+            mask_vid_path = gr.Textbox(value='', label="ControlNet Mask Video/ Image Path (*NOT WORKING, kept in UI for CN's devs testing!*)", interactive=True)
+        with gr.Row(visible=False) as control_mode_row:
+            control_mode = gr.Radio(choices=["Balanced", "My prompt is more important", "ControlNet is more important"], value="Balanced", label="Control Mode", interactive=True)
+        with gr.Row(visible=False) as env_row:
+            resize_mode = gr.Radio(choices=["Outer Fit (Shrink to Fit)", "Inner Fit (Scale to Fit)", "Just Resize"], value="Inner Fit (Scale to Fit)", label="Resize Mode", interactive=True)
+        with gr.Row(visible=False) as control_loopback_row:
+            loopback_mode = gr.Checkbox(label="LoopBack mode", value=False, interactive=True)
+        hide_output_list = [pixel_perfect, low_vram, mod_row, module, weight_row, start_cs_row, end_cs_row, env_row, overwrite_frames, vid_path_row, control_mode_row, mask_vid_path_row,
+                            control_loopback_row]  # add mask_vid_path_row when masks are working again
+        for cn_output in hide_output_list:
+            enabled.change(fn=hide_ui_by_cn_status, inputs=enabled, outputs=cn_output)
+        module.change(build_sliders, inputs=[module, pixel_perfect], outputs=[processor_res, threshold_a, threshold_b, advanced_column, model, refresh_models])
+        # hide vid/image input fields
+        loopback_outs = [vid_path_row, mask_vid_path_row]
+        for loopback_output in loopback_outs:
+            loopback_mode.change(fn=hide_file_textboxes, inputs=loopback_mode, outputs=loopback_output)
+        # handle pixel perfect ui changes
+        pixel_perfect.change(build_sliders, inputs=[module, pixel_perfect], outputs=[processor_res, threshold_a, threshold_b, advanced_column, model, refresh_models])
+        infotext_fields.extend([
+            (module, f"ControlNet Preprocessor"),
+            (model, f"ControlNet Model"),
+            (weight, f"ControlNet Weight"),
+        ])
+        return {key: value for key, value in locals().items() if key in [
+            "enabled", "pixel_perfect", "low_vram", "module", "model", "weight",
+            "guidance_start", "guidance_end", "processor_res", "threshold_a", "threshold_b", "resize_mode", "control_mode",
+            "overwrite_frames", "vid_path", "mask_vid_path", "loopback_mode"
+        ]}
+    def refresh_all_models(*inputs):
+        cn_models = cnet.get_models(update=True)
+        dd = inputs[0]
+        selected = dd if dd in cn_models else "None"
+        return gr.Dropdown.update(value=selected, choices=cn_models)
+    with gr.Tabs():
+        model_params = {}
+        for i in range(1, num_of_models + 1):
+            with gr.Tab(f"CN Model {i}"):
+                model_params[i] = create_model_in_tab_ui(i)
+                for key, value in model_params[i].items():
+                    locals()[f"cn_{i}_{key}"] = value
+    return locals()
+def setup_controlnet_ui():
+    if not find_controlnet():
+        gr.HTML("""<a style='target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet'>ControlNet not found. Please install it :)</a>""", elem_id='controlnet_not_found_html_msg')
+        return {}
+    try:
+        return setup_controlnet_ui_raw()
+    except Exception as e:
+        print(f"'ControlNet UI setup failed with error: '{e}'!")
+        gr.HTML(f"""
+                Failed to setup ControlNet UI, check the reason in your commandline log. Please, downgrade your CN extension to <a style='color:Orange;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet/archive/c9340671d6d59e5a79fc404f78f747f969f87374.zip'>c9340671d6d59e5a79fc404f78f747f969f87374</a> or report the problem <a style='color:Orange;' target='_blank' href='https://github.com/Mikubill/sd-webui-controlnet/issues'>here</a>.
+                """, elem_id='controlnet_not_found_html_msg')
+        return {}
+def controlnet_component_names():
+    if not find_controlnet():
+        return []
+    return [f'cn_{i}_{component}' for i in range(1, num_of_models + 1) for component in [
+        'overwrite_frames', 'vid_path', 'mask_vid_path', 'enabled',
+        'low_vram', 'pixel_perfect',
+        'module', 'model', 'weight', 'guidance_start', 'guidance_end',
+        'processor_res', 'threshold_a', 'threshold_b', 'resize_mode', 'control_mode', 'loopback_mode'
+    ]]
+def process_with_controlnet(p, args, anim_args, loop_args, controlnet_args, root, is_img2img=True, frame_idx=0):
+    CnSchKeys = ControlNetKeys(anim_args, controlnet_args)
+    def read_cn_data(cn_idx):
+        cn_mask_np, cn_image_np = None, None
+        # Loopback mode ENABLED:
+        if getattr(controlnet_args, f'cn_{cn_idx}_loopback_mode'):
+            # On very first frame, check if use init enabled, and if init image is provided
+            if frame_idx == 0 and args.use_init and args.init_image is not None:
+                cn_image_np = load_image(args.init_image)
+                # convert to uint8 for compatibility with CN
+                cn_image_np = np.array(cn_image_np).astype('uint8')
+            # Not first frame, use previous img (init_sample)
+            elif frame_idx > 0 and args.init_sample:
+                cn_image_np = np.array(args.init_sample).astype('uint8')
+        else:  # loopback mode is DISABLED
+            cn_inputframes = os.path.join(args.outdir, f'controlnet_{cn_idx}_inputframes')  # set input frames folder path
+            if os.path.exists(cn_inputframes):
+                if count_files_in_folder(cn_inputframes) == 1:
+                    cn_frame_path = os.path.join(cn_inputframes, "000000000.jpg")
+                    print(f'Reading ControlNet *static* base frame at {cn_frame_path}')
+                else:
+                    cn_frame_path = os.path.join(cn_inputframes, f"{frame_idx:09}.jpg")
+                    print(f'Reading ControlNet {cn_idx} base frame #{frame_idx} at {cn_frame_path}')
+                if os.path.exists(cn_frame_path):
+                    cn_image_np = np.array(Image.open(cn_frame_path).convert("RGB")).astype('uint8')
+            cn_maskframes = os.path.join(args.outdir, f'controlnet_{cn_idx}_maskframes')  # set mask frames folder path
+            if os.path.exists(cn_maskframes):
+                if count_files_in_folder(cn_maskframes) == 1:
+                    cn_mask_frame_path = os.path.join(cn_inputframes, "000000000.jpg")
+                    print(f'Reading ControlNet *static* mask frame at {cn_mask_frame_path}')
+                else:
+                    cn_mask_frame_path = os.path.join(args.outdir, f'controlnet_{cn_idx}_maskframes', f"{frame_idx:09}.jpg")
+                    print(f'Reading ControlNet {cn_idx} mask frame #{frame_idx} at {cn_mask_frame_path}')
+                if os.path.exists(cn_mask_frame_path):
+                    cn_mask_np = np.array(Image.open(cn_mask_frame_path).convert("RGB")).astype('uint8')
+        return cn_mask_np, cn_image_np
+    cnet = find_controlnet()
+    cn_data = [read_cn_data(i) for i in range(1, num_of_models + 1)]
+    # Check if any loopback_mode is set to True
+    any_loopback_mode = any(getattr(controlnet_args, f'cn_{i}_loopback_mode') for i in range(1, num_of_models + 1))
+    cn_inputframes_list = [os.path.join(args.outdir, f'controlnet_{i}_inputframes') for i in range(1, num_of_models + 1)]
+    if not any(os.path.exists(cn_inputframes) for cn_inputframes in cn_inputframes_list) and not any_loopback_mode:
+        print(f'\033[33mNeither the base nor the masking frames for ControlNet were found. Using the regular pipeline\033[0m')
+    p.scripts = scripts.scripts_img2img if is_img2img else scripts.scripts_txt2img
+    def create_cnu_dict(cn_args, prefix, img_np, mask_np, frame_idx, CnSchKeys):
+        keys = [
+            "enabled", "module", "model", "weight", "resize_mode", "control_mode", "low_vram", "pixel_perfect",
+            "processor_res", "threshold_a", "threshold_b", "guidance_start", "guidance_end"
+        ]
+        cnu = {k: getattr(cn_args, f"{prefix}_{k}") for k in keys}
+        model_num = int(prefix.split('_')[-1])  # Extract model number from prefix (e.g., "cn_1" -> 1)
+        if 1 <= model_num <= 5:
+            # if in loopmode and no init image (img_np, after processing in this case) provided, disable CN unit for the very first frame. Will be enabled in the next frame automatically
+            if getattr(cn_args, f"cn_{model_num}_loopback_mode") and frame_idx == 0 and img_np is None:
+                cnu['enabled'] = False
+            cnu['weight'] = getattr(CnSchKeys, f"cn_{model_num}_weight_schedule_series")[frame_idx]
+            cnu['guidance_start'] = getattr(CnSchKeys, f"cn_{model_num}_guidance_start_schedule_series")[frame_idx]
+            cnu['guidance_end'] = getattr(CnSchKeys, f"cn_{model_num}_guidance_end_schedule_series")[frame_idx]
+        cnu['image'] = {'image': img_np, 'mask': mask_np} if mask_np is not None else img_np
+        return cnu
+    masks_np, images_np = zip(*cn_data)
+    cn_units = [cnet.ControlNetUnit(**create_cnu_dict(controlnet_args, f"cn_{i + 1}", img_np, mask_np, frame_idx, CnSchKeys))
+                for i, (img_np, mask_np) in enumerate(zip(images_np, masks_np))]
+    p.script_args = {"enabled": True}
+    cnet.update_cn_script_in_processing(p, cn_units, is_img2img=is_img2img, is_ui=False)
+def process_controlnet_input_frames(args, anim_args, controlnet_args, video_path, mask_path, outdir_suffix, id):
+    if (video_path or mask_path) and getattr(controlnet_args, f'cn_{id}_enabled'):
+        frame_path = os.path.join(args.outdir, f'controlnet_{id}_{outdir_suffix}')
+        os.makedirs(frame_path, exist_ok=True)
+        accepted_image_extensions = ('.jpg', '.jpeg', '.png', '.bmp')
+        if video_path and video_path.lower().endswith(accepted_image_extensions):
+            convert_image(video_path, os.path.join(frame_path, '000000000.jpg'))
+            print(f"Copied CN Model {id}'s single input image to inputframes folder!")
+        elif mask_path and mask_path.lower().endswith(accepted_image_extensions):
+            convert_image(mask_path, os.path.join(frame_path, '000000000.jpg'))
+            print(f"Copied CN Model {id}'s single input image to inputframes *mask* folder!")
+        else:
+            print(f'Unpacking ControlNet {id} {"video mask" if mask_path else "base video"}')
+            print(f"Exporting Video Frames to {frame_path}...")
+            vid2frames(
+                video_path=video_path or mask_path,
+                video_in_frame_path=frame_path,
+                n=1 if anim_args.animation_mode != 'Video Input' else anim_args.extract_nth_frame,
+                overwrite=getattr(controlnet_args, f'cn_{id}_overwrite_frames'),
+                extract_from_frame=0 if anim_args.animation_mode != 'Video Input' else anim_args.extract_from_frame,
+                extract_to_frame=(anim_args.max_frames - 1) if anim_args.animation_mode != 'Video Input' else anim_args.extract_to_frame,
+                numeric_files_output=True
+            )
+            print(f"Loading {anim_args.max_frames} input frames from {frame_path} and saving video frames to {args.outdir}")
+            print(f'ControlNet {id} {"video mask" if mask_path else "base video"} unpacked!')
+def unpack_controlnet_vids(args, anim_args, controlnet_args):
+    # this func gets called from render.py once for an entire animation run -->
+    # tries to trigger an extraction of CN input frames (regular + masks) from video or image
+    for i in range(1, num_of_models + 1):
+        # LoopBack mode is enabled, no need to extract a video or copy an init image
+        if getattr(controlnet_args, f'cn_{i}_loopback_mode'):
+            print(f"ControlNet #{i} is in LoopBack mode, skipping video/ image extraction stage.")
+            continue
+        vid_path = clean_gradio_path_strings(getattr(controlnet_args, f'cn_{i}_vid_path', None))
+        mask_path = clean_gradio_path_strings(getattr(controlnet_args, f'cn_{i}_mask_vid_path', None))
+        if vid_path:  # Process base video, if available
+            process_controlnet_input_frames(args, anim_args, controlnet_args, vid_path, None, 'inputframes', i)
+        if mask_path:  # Process mask video, if available
+            process_controlnet_input_frames(args, anim_args, controlnet_args, None, mask_path, 'maskframes', i)

scripts/deforum_helpers/deforum_controlnet_gradio.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import gradio as gr
+# print (cnet_1.get_modules())
+    # *** TODO: re-enable table printing! disabled only temp! 13-04-23 ***
+    # table = Table(title="ControlNet params",padding=0, box=box.ROUNDED)
+    # TODO: auto infer the names and the values for the table
+    # field_names = []
+    # field_names += ["module", "model", "weight", "inv", "guide_start", "guide_end", "guess", "resize", "rgb_bgr", "proc res", "thr a", "thr b"]
+    # for field_name in field_names:
+        # table.add_column(field_name, justify="center")
+    # cn_model_name = str(controlnet_args.cn_1_model)
+    # rows = []
+    # rows += [controlnet_args.cn_1_module, cn_model_name[len('control_'):] if 'control_' in cn_model_name else cn_model_name, controlnet_args.cn_1_weight, controlnet_args.cn_1_invert_image, controlnet_args.cn_1_guidance_start, controlnet_args.cn_1_guidance_end, controlnet_args.cn_1_guess_mode, controlnet_args.cn_1_resize_mode, controlnet_args.cn_1_rgbbgr_mode, controlnet_args.cn_1_processor_res, controlnet_args.cn_1_threshold_a, controlnet_args.cn_1_threshold_b]
+    # rows = [str(x) for x in rows]
+    # table.add_row(*rows)
+    # console.print(table)
+def hide_ui_by_cn_status(choice):
+    return gr.update(visible=True) if choice else gr.update(visible=False)
+def hide_file_textboxes(choice):
+    return gr.update(visible=False) if choice else gr.update(visible=True)
+class ToolButton(gr.Button, gr.components.FormComponent):
+        """Small button with single emoji as text, fits inside gradio forms"""
+        def __init__(self, **kwargs):
+            super().__init__(variant="tool", **kwargs)
+        def get_block_name(self):
+            return "button"
+model_free_preprocessors = ["reference_only", "reference_adain", "reference_adain+attn"]
+flag_preprocessor_resolution = "Preprocessor Resolution"
+def build_sliders(module, pp, preprocessor_sliders_config):
+    grs = []
+    if module not in preprocessor_sliders_config:
+        grs += [
+            gr.update(label=flag_preprocessor_resolution, value=512, minimum=64, maximum=2048, step=1, visible=not pp, interactive=not pp),
+            gr.update(visible=False, interactive=False),
+            gr.update(visible=False, interactive=False),
+            gr.update(visible=True)
+        ]
+    else:
+        for slider_config in preprocessor_sliders_config[module]:
+            if isinstance(slider_config, dict):
+                visible = True
+                if slider_config['name'] == flag_preprocessor_resolution:
+                    visible = not pp
+                grs.append(gr.update(
+                    label=slider_config['name'],
+                    value=slider_config['value'],
+                    minimum=slider_config['min'],
+                    maximum=slider_config['max'],
+                    step=slider_config['step'] if 'step' in slider_config else 1,
+                    visible=visible,
+                    interactive=visible))
+            else:
+                grs.append(gr.update(visible=False, interactive=False))
+        while len(grs) < 3:
+            grs.append(gr.update(visible=False, interactive=False))
+        grs.append(gr.update(visible=True))
+    if module in model_free_preprocessors:
+        grs += [gr.update(visible=False, value='None'), gr.update(visible=False)]
+    else:
+        grs += [gr.update(visible=True), gr.update(visible=True)]
+    return grs

scripts/deforum_helpers/deforum_tqdm.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+from math import ceil
+import tqdm
+from modules.shared import progress_print_out, opts, cmd_opts
+class DeforumTQDM:
+    def __init__(self, args, anim_args, parseq_args, video_args):
+        self._tqdm = None
+        self._args = args
+        self._anim_args = anim_args
+        self._parseq_args = parseq_args
+        self._video_args = video_args
+    def reset(self):
+        from .animation_key_frames import DeformAnimKeys
+        from .parseq_adapter import ParseqAnimKeys
+        deforum_total = 0
+        # FIXME: get only amount of steps
+        use_parseq = self._parseq_args.parseq_manifest is not None and self._parseq_args.parseq_manifest.strip()
+        keys = DeformAnimKeys(self._anim_args) if not use_parseq else ParseqAnimKeys(self._parseq_args, self._anim_args, self._video_args, mute=True)
+        start_frame = 0
+        if self._anim_args.resume_from_timestring:
+            for tmp in os.listdir(self._args.outdir):
+                filename = tmp.split("_")
+                # don't use saved depth maps to count number of frames
+                if self._anim_args.resume_timestring in filename and "depth" not in filename:
+                    start_frame += 1
+            start_frame = start_frame - 1
+        using_vid_init = self._anim_args.animation_mode == 'Video Input'
+        turbo_steps = 1 if using_vid_init else int(self._anim_args.diffusion_cadence)
+        if self._anim_args.resume_from_timestring:
+            last_frame = start_frame - 1
+            if turbo_steps > 1:
+                last_frame -= last_frame % turbo_steps
+            if turbo_steps > 1:
+                turbo_next_frame_idx = last_frame
+                turbo_prev_frame_idx = turbo_next_frame_idx
+                start_frame = last_frame + turbo_steps
+        frame_idx = start_frame
+        had_first = False
+        while frame_idx < self._anim_args.max_frames:
+            strength = keys.strength_schedule_series[frame_idx]
+            if not had_first and self._args.use_init and self._args.init_image is not None and self._args.init_image != '':
+                deforum_total += int(ceil(self._args.steps * (1 - strength)))
+                had_first = True
+            elif not had_first:
+                deforum_total += self._args.steps
+                had_first = True
+            else:
+                deforum_total += int(ceil(self._args.steps * (1 - strength)))
+            if turbo_steps > 1:
+                frame_idx += turbo_steps
+            else:
+                frame_idx += 1
+        self._tqdm = tqdm.tqdm(
+            desc="Deforum progress",
+            total=deforum_total,
+            position=1,
+            file=progress_print_out
+        )
+    def update(self):
+        if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
+            return
+        if self._tqdm is None:
+            self.reset()
+        self._tqdm.update()
+    def updateTotal(self, new_total):
+        if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
+            return
+        if self._tqdm is None:
+            self.reset()
+        self._tqdm.total = new_total
+    def clear(self):
+        if self._tqdm is not None:
+            self._tqdm.close()
+            self._tqdm = None

scripts/deforum_helpers/deprecation_utils.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+# This file is used to map deprecated setting names in a dictionary
+# and print a message containing the old and the new names
+deprecation_map = {
+    "histogram_matching": None,
+    "flip_2d_perspective": "enable_perspective_flip",
+    "skip_video_for_run_all": "skip_video_creation",
+    "color_coherence": [
+        ("Match Frame 0 HSV", "HSV", False),
+        ("Match Frame 0 LAB", "LAB", False),
+        ("Match Frame 0 RGB", "RGB", False),
+        # ,("removed_value", None, True) # for removed values, if we'll need in the future
+    ],
+    "hybrid_composite": [
+        (False, "None", False),
+        (True, "Normal", False),
+    ],
+    "optical_flow_redo_generation": [
+        (False, "None", False),
+        (True, "DIS Fine", False),
+    ],
+    "optical_flow_cadence": [
+        (False, "None", False),
+        (True, "DIS Fine", False),
+    ],
+    "cn_1_resize_mode": [
+        ("Envelope (Outer Fit)", "Outer Fit (Shrink to Fit)", False),
+        ("Scale to Fit (Inner Fit)", "Inner Fit (Scale to Fit)", False),
+    ],
+    "cn_2_resize_mode": [
+        ("Envelope (Outer Fit)", "Outer Fit (Shrink to Fit)", False),
+        ("Scale to Fit (Inner Fit)", "Inner Fit (Scale to Fit)", False),
+    ],
+    "cn_3_resize_mode": [
+        ("Envelope (Outer Fit)", "Outer Fit (Shrink to Fit)", False),
+        ("Scale to Fit (Inner Fit)", "Inner Fit (Scale to Fit)", False),
+    ],
+    "use_zoe_depth": ("depth_algorithm", [("True", "Zoe+AdaBins (old)"), ("False", "Midas+AdaBins (old)")]),
+}
+def dynamic_num_to_schedule_formatter(old_value):
+    return f"0:({old_value})"
+for i in range(1, 6): # 5 CN models in total
+    deprecation_map[f"cn_{i}_weight"] = dynamic_num_to_schedule_formatter
+    deprecation_map[f"cn_{i}_guidance_start"] = dynamic_num_to_schedule_formatter
+    deprecation_map[f"cn_{i}_guidance_end"] = dynamic_num_to_schedule_formatter
+def handle_deprecated_settings(settings_json):
+    # Set legacy_colormatch mode to True when importing old files, so results are backwards-compatible. Print a message about it too
+    if 'legacy_colormatch' not in settings_json:
+        settings_json['legacy_colormatch'] = True
+        print('\033[33mlegacy_colormatch is missing from settings file, so we are setting it to *True* for backwards compatability. You are welcome to test your file with that setting being disabled for better color coherency.\033[0m')
+        print("")
+    for setting_name, deprecation_info in deprecation_map.items():
+        if setting_name in settings_json:
+            if deprecation_info is None:
+                print(f"WARNING: Setting '{setting_name}' has been removed. It will be discarded and the default value used instead!")
+            elif isinstance(deprecation_info, tuple):
+                new_setting_name, value_map = deprecation_info
+                old_value = str(settings_json.pop(setting_name))  # Convert the boolean value to a string for comparison
+                new_value = next((v for k, v in value_map if k == old_value), None)
+                if new_value is not None:
+                    print(f"WARNING: Setting '{setting_name}' has been renamed to '{new_setting_name}' with value '{new_value}'. The saved settings file will reflect the change")
+                    settings_json[new_setting_name] = new_value
+            elif callable(deprecation_info):
+                old_value = settings_json[setting_name]
+                if isinstance(old_value, (int, float)):
+                    new_value = deprecation_info(old_value)
+                    print(f"WARNING: Value '{old_value}' for setting '{setting_name}' has been replaced with '{new_value}'. The saved settings file will reflect the change")
+                    settings_json[setting_name] = new_value
+            elif isinstance(deprecation_info, str):
+                print(f"WARNING: Setting '{setting_name}' has been renamed to '{deprecation_info}'. The saved settings file will reflect the change")
+                settings_json[deprecation_info] = settings_json.pop(setting_name)
+            elif isinstance(deprecation_info, list):
+                for old_value, new_value, is_removed in deprecation_info:
+                    if settings_json[setting_name] == old_value:
+                        if is_removed:
+                            print(f"WARNING: Value '{old_value}' for setting '{setting_name}' has been removed. It will be discarded and the default value used instead!")
+                        else:
+                            print(f"WARNING: Value '{old_value}' for setting '{setting_name}' has been replaced with '{new_value}'. The saved settings file will reflect the change")
+                            settings_json[setting_name] = new_value

scripts/deforum_helpers/depth.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import gc
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from einops import rearrange, repeat
+from modules import devices
+from modules.shared import cmd_opts
+from .depth_adabins import AdaBinsModel
+from .depth_leres import LeReSDepth
+from .depth_midas import MidasDepth
+from .depth_zoe import ZoeDepth
+from .general_utils import debug_print
+class DepthModel:
+    _instance = None
+    def __new__(cls, *args, **kwargs):
+        keep_in_vram = kwargs.get('keep_in_vram', False)
+        depth_algorithm = kwargs.get('depth_algorithm', 'Midas-3-Hybrid')
+        Width, Height = kwargs.get('Width', 512), kwargs.get('Height', 512)
+        midas_weight = kwargs.get('midas_weight', 0.2)
+        model_switched = cls._instance and cls._instance.depth_algorithm != depth_algorithm
+        resolution_changed = cls._instance and (cls._instance.Width != Width or cls._instance.Height != Height)
+        zoe_algorithm = 'zoe' in depth_algorithm.lower()
+        model_deleted = cls._instance and cls._instance.should_delete
+        should_reload = (cls._instance is None or model_deleted or model_switched or (zoe_algorithm and resolution_changed))
+        if should_reload:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialize(models_path=args[0], device=args[1], half_precision=not cmd_opts.no_half, keep_in_vram=keep_in_vram, depth_algorithm=depth_algorithm, Width=Width, Height=Height, midas_weight=midas_weight)
+        elif cls._instance.should_delete and keep_in_vram:
+            cls._instance._initialize(models_path=args[0], device=args[1], half_precision=not cmd_opts.no_half, keep_in_vram=keep_in_vram, depth_algorithm=depth_algorithm, Width=Width, Height=Height, midas_weight=midas_weight)
+        cls._instance.should_delete = not keep_in_vram
+        return cls._instance
+    def _initialize(self, models_path, device, half_precision=not cmd_opts.no_half, keep_in_vram=False, depth_algorithm='Midas-3-Hybrid', Width=512, Height=512, midas_weight=1.0):
+        self.models_path = models_path
+        self.device = device
+        self.half_precision = half_precision
+        self.keep_in_vram = keep_in_vram
+        self.depth_algorithm = depth_algorithm
+        self.Width, self.Height = Width, Height
+        self.midas_weight = midas_weight
+        self.depth_min, self.depth_max = 1000, -1000
+        self.adabins_helper = None
+        self._initialize_model()
+    def _initialize_model(self):
+        depth_algo = self.depth_algorithm.lower()
+        if depth_algo.startswith('zoe'):
+            self.zoe_depth = ZoeDepth(self.Width, self.Height)
+            if depth_algo == 'zoe+adabins (old)':
+                self.adabins_model = AdaBinsModel(self.models_path, keep_in_vram=self.keep_in_vram)
+                self.adabins_helper = self.adabins_model.adabins_helper
+        elif depth_algo == 'leres':
+            self.leres_depth = LeReSDepth(width=448, height=448, models_path=self.models_path, checkpoint_name='res101.pth', backbone='resnext101')
+        elif depth_algo == 'adabins':
+            self.adabins_model = AdaBinsModel(self.models_path, keep_in_vram=self.keep_in_vram)
+            self.adabins_helper = self.adabins_model.adabins_helper
+        elif depth_algo.startswith('midas'):
+            self.midas_depth = MidasDepth(self.models_path, self.device, half_precision=self.half_precision, midas_model_type=self.depth_algorithm)
+            if depth_algo == 'midas+adabins (old)':
+                self.adabins_model = AdaBinsModel(self.models_path, keep_in_vram=self.keep_in_vram)
+                self.adabins_helper = self.adabins_model.adabins_helper
+        else:
+            raise Exception(f"Unknown depth_algorithm: {self.depth_algorithm}")
+    def predict(self, prev_img_cv2, midas_weight, half_precision) -> torch.Tensor:
+        img_pil = Image.fromarray(cv2.cvtColor(prev_img_cv2.astype(np.uint8), cv2.COLOR_RGB2BGR))
+        if self.depth_algorithm.lower().startswith('zoe'):
+            depth_tensor = self.zoe_depth.predict(img_pil).to(self.device)
+            if self.depth_algorithm.lower() == 'zoe+adabins (old)' and midas_weight < 1.0:
+                use_adabins, adabins_depth = AdaBinsModel._instance.predict(img_pil, prev_img_cv2)
+                if use_adabins: # if there was no error in getting the adabins depth, align midas with adabins
+                    depth_tensor = self.blend_and_align_with_adabins(depth_tensor, adabins_depth, midas_weight)
+        elif self.depth_algorithm.lower() == 'leres':
+            depth_tensor = self.leres_depth.predict(prev_img_cv2.astype(np.float32) / 255.0)
+        elif self.depth_algorithm.lower() == 'adabins':
+            use_adabins, adabins_depth = AdaBinsModel._instance.predict(img_pil, prev_img_cv2)
+            depth_tensor = torch.tensor(adabins_depth)
+            if use_adabins is False:
+                raise Exception("Error getting depth from AdaBins") # TODO: fallback to something else maybe?
+        elif self.depth_algorithm.lower().startswith('midas'):
+            depth_tensor = self.midas_depth.predict(prev_img_cv2, half_precision)
+            if self.depth_algorithm.lower() == 'midas+adabins (old)' and midas_weight < 1.0:
+                use_adabins, adabins_depth = AdaBinsModel._instance.predict(img_pil, prev_img_cv2)
+                if use_adabins: # if there was no error in getting the adabins depth, align midas with adabins
+                    depth_tensor = self.blend_and_align_with_adabins(depth_tensor, adabins_depth, midas_weight)
+        else: # Unknown!
+            raise Exception(f"Unknown depth_algorithm passed to depth.predict function: {self.depth_algorithm}")
+        return depth_tensor
+    def blend_and_align_with_adabins(self, depth_tensor, adabins_depth, midas_weight):
+        depth_tensor = torch.subtract(50.0, depth_tensor) / 19.0 # align midas depth with adabins depth. Original alignment code from Disco Diffusion
+        blended_depth_map = (depth_tensor.cpu().numpy() * midas_weight + adabins_depth * (1.0 - midas_weight))
+        depth_tensor = torch.from_numpy(np.expand_dims(blended_depth_map, axis=0)).squeeze().to(self.device)
+        debug_print(f"Blended Midas Depth with AdaBins Depth")
+        return depth_tensor
+    def to(self, device):
+        self.device = device
+        if self.depth_algorithm.lower().startswith('zoe'):
+            self.zoe_depth.zoe.to(device)
+        elif self.depth_algorithm.lower() == 'leres':
+            self.leres_depth.to(device)
+        elif self.depth_algorithm.lower().startswith('midas'):
+            self.midas_depth.to(device)
+        if hasattr(self, 'adabins_model'):
+            self.adabins_model.to(device)
+        gc.collect()
+        torch.cuda.empty_cache()
+    def to_image(self, depth: torch.Tensor):
+        depth = depth.cpu().numpy()
+        depth = np.expand_dims(depth, axis=0) if len(depth.shape) == 2 else depth
+        self.depth_min, self.depth_max = min(self.depth_min, depth.min()), max(self.depth_max, depth.max())
+        denom = max(1e-8, self.depth_max - self.depth_min)
+        temp = rearrange((depth - self.depth_min) / denom * 255, 'c h w -> h w c')
+        return Image.fromarray(repeat(temp, 'h w 1 -> h w c', c=3).astype(np.uint8))
+    def save(self, filename: str, depth: torch.Tensor):
+        self.to_image(depth).save(filename)
+    def delete_model(self):
+        for attr in ['zoe_depth', 'leres_depth']:
+            if hasattr(self, attr):
+                getattr(self, attr).delete()
+                delattr(self, attr)
+        if hasattr(self, 'midas_depth'):
+            del self.midas_depth
+        if hasattr(self, 'adabins_model'):
+            self.adabins_model.delete_model()
+        gc.collect()
+        torch.cuda.empty_cache()
+        devices.torch_gc()

scripts/deforum_helpers/depth_adabins.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import torch
+import numpy as np
+from PIL import Image
+import torchvision.transforms.functional as TF
+from .general_utils import download_file_with_checksum
+from infer import InferenceHelper
+class AdaBinsModel:
+    _instance = None
+    def __new__(cls, *args, **kwargs):
+        keep_in_vram = kwargs.get('keep_in_vram', False)
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        cls._instance._initialize(*args, keep_in_vram=keep_in_vram)
+        return cls._instance
+    def _initialize(self, models_path, keep_in_vram=False):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.keep_in_vram = keep_in_vram
+        self.adabins_helper = None
+        download_file_with_checksum(url='https://github.com/hithereai/deforum-for-automatic1111-webui/releases/download/AdaBins/AdaBins_nyu.pt', expected_checksum='643db9785c663aca72f66739427642726b03acc6c4c1d3755a4587aa2239962746410d63722d87b49fc73581dbc98ed8e3f7e996ff7b9c0d56d0fbc98e23e41a', dest_folder=models_path, dest_filename='AdaBins_nyu.pt')
+        self.adabins_helper = InferenceHelper(models_path=models_path, dataset='nyu', device=self.device)
+    def predict(self, img_pil, prev_img_cv2):
+        w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
+        adabins_depth = np.array([])
+        use_adabins = True
+        MAX_ADABINS_AREA, MIN_ADABINS_AREA = 500000, 448 * 448
+        image_pil_area, resized = w * h, False
+        if image_pil_area not in range(MIN_ADABINS_AREA, MAX_ADABINS_AREA + 1):
+            scale = ((MAX_ADABINS_AREA if image_pil_area > MAX_ADABINS_AREA else MIN_ADABINS_AREA) / image_pil_area) ** 0.5
+            depth_input = img_pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS if image_pil_area > MAX_ADABINS_AREA else Image.BICUBIC)
+            print(f"AdaBins depth resized to {depth_input.width}x{depth_input.height}")
+            resized = True
+        else:
+            depth_input = img_pil
+        try:
+            with torch.no_grad():
+                _, adabins_depth = self.adabins_helper.predict_pil(depth_input)
+            if resized:
+                adabins_depth = TF.resize(torch.from_numpy(adabins_depth), torch.Size([h, w]), interpolation=TF.InterpolationMode.BICUBIC).cpu().numpy()
+            adabins_depth = adabins_depth.squeeze()
+        except Exception as e:
+            print("AdaBins exception encountered. Falling back to pure MiDaS/Zoe (only if running in Legacy Midas/Zoe+AdaBins mode)")
+            use_adabins = False
+        torch.cuda.empty_cache()
+        return use_adabins, adabins_depth
+    def to(self, device):
+        self.device = device
+        if self.adabins_helper is not None:
+            self.adabins_helper.to(device)
+    def delete_model(self):
+        del self.adabins_helper

scripts/deforum_helpers/depth_leres.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import torch
+import cv2
+import os
+import numpy as np
+import torchvision.transforms as transforms
+from .general_utils import download_file_with_checksum
+from leres.lib.multi_depth_model_woauxi import RelDepthModel
+from leres.lib.net_tools import load_ckpt
+class LeReSDepth:
+    def __init__(self, width=448, height=448, models_path=None, checkpoint_name='res101.pth', backbone='resnext101'):
+        self.width = width
+        self.height = height
+        self.models_path = models_path
+        self.checkpoint_name = checkpoint_name
+        self.backbone = backbone
+        download_file_with_checksum(url='https://cloudstor.aarnet.edu.au/plus/s/lTIJF4vrvHCAI31/download', expected_checksum='7fdc870ae6568cb28d56700d0be8fc45541e09cea7c4f84f01ab47de434cfb7463cacae699ad19fe40ee921849f9760dedf5e0dec04a62db94e169cf203f55b1', dest_folder=models_path, dest_filename=self.checkpoint_name)
+        self.depth_model = RelDepthModel(backbone=self.backbone)
+        self.depth_model.eval()
+        self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+        self.depth_model.to(self.DEVICE)
+        load_ckpt(os.path.join(self.models_path, self.checkpoint_name), self.depth_model, None, None)
+    @staticmethod
+    def scale_torch(img):
+        if len(img.shape) == 2:
+            img = img[np.newaxis, :, :]
+        if img.shape[2] == 3:
+            transform = transforms.Compose([transforms.ToTensor(),
+                                            transforms.Normalize((0.485, 0.456, 0.406) , (0.229, 0.224, 0.225))])
+            img = transform(img)
+        else:
+            img = img.astype(np.float32)
+            img = torch.from_numpy(img)
+        return img
+    def predict(self, image):
+        resized_image = cv2.resize(image, (self.width, self.height))
+        img_torch = self.scale_torch(resized_image)[None, :, :, :]
+        pred_depth = self.depth_model.inference(img_torch).cpu().numpy().squeeze()
+        pred_depth_ori = cv2.resize(pred_depth, (image.shape[1], image.shape[0]))
+        return torch.from_numpy(pred_depth_ori).unsqueeze(0).to(self.DEVICE)
+    def save_raw_depth(self, depth, filepath):
+        depth_normalized = (depth / depth.max() * 60000).astype(np.uint16)
+        cv2.imwrite(filepath, depth_normalized)
+    def to(self, device):
+        self.DEVICE = device
+        self.depth_model = self.depth_model.to(device)
+    def delete(self):
+        del self.depth_model

scripts/deforum_helpers/depth_midas.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import cv2
+import torch
+import numpy as np
+from .general_utils import download_file_with_checksum
+from midas.dpt_depth import DPTDepthModel
+from midas.transforms import Resize, NormalizeImage, PrepareForNet
+import torchvision.transforms as T
+class MidasDepth:
+    def __init__(self, models_path, device, half_precision=True, midas_model_type='Midas-3-Hybrid'):
+        if midas_model_type.lower() == 'midas-3.1-beitlarge':
+            self.midas_model_filename = 'dpt_beit_large_512.pt'
+            self.midas_model_checksum='66cbb00ea7bccd6e43d3fd277bd21002d8d8c2c5c487e5fcd1e1d70c691688a19122418b3ddfa94e62ab9f086957aa67bbec39afe2b41c742aaaf0699ee50b33'
+            self.midas_model_url = 'https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt'
+            self.resize_px = 512
+            self.backbone = 'beitl16_512'
+        else:
+            self.midas_model_filename = 'dpt_large-midas-2f21e586.pt'
+            self.midas_model_checksum = 'fcc4829e65d00eeed0a38e9001770676535d2e95c8a16965223aba094936e1316d569563552a852d471f310f83f597e8a238987a26a950d667815e08adaebc06'
+            self.midas_model_url = 'https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt'
+            self.resize_px = 384
+            self.backbone = 'vitl16_384'
+        self.device = device
+        self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        self.midas_transform = T.Compose([
+            Resize(self.resize_px, self.resize_px, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32,
+                   resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC),
+            self.normalization,
+            PrepareForNet()
+        ])
+        download_file_with_checksum(url=self.midas_model_url, expected_checksum=self.midas_model_checksum, dest_folder=models_path, dest_filename=self.midas_model_filename)
+        self.load_midas_model(models_path, self.midas_model_filename)
+        if half_precision:
+            self.midas_model = self.midas_model.half()
+    def load_midas_model(self, models_path, midas_model_filename):
+        model_file = os.path.join(models_path, midas_model_filename)
+        print(f"Loading MiDaS model from {midas_model_filename}...")
+        self.midas_model = DPTDepthModel(
+            path=model_file,
+            backbone=self.backbone,
+            non_negative=True,
+        )
+        self.midas_model.eval().to(self.device, memory_format=torch.channels_last if self.device == torch.device("cuda") else None)
+    def predict(self, prev_img_cv2, half_precision):
+        img_midas = prev_img_cv2.astype(np.float32) / 255.0
+        img_midas_input = self.midas_transform({"image": img_midas})["image"]
+        sample = torch.from_numpy(img_midas_input).float().to(self.device).unsqueeze(0)
+        if self.device.type == "cuda" or self.device.type == "mps":
+            sample = sample.to(memory_format=torch.channels_last)
+            if half_precision:
+                sample = sample.half()
+        with torch.no_grad():
+            midas_depth = self.midas_model.forward(sample)
+        midas_depth = torch.nn.functional.interpolate(
+            midas_depth.unsqueeze(1),
+            size=img_midas.shape[:2],
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze().cpu().numpy()
+        torch.cuda.empty_cache()
+        depth_tensor = torch.from_numpy(np.expand_dims(midas_depth, axis=0)).squeeze().to(self.device)
+        return depth_tensor
+    def to(self, device):
+        self.device = device
+        self.midas_model = self.midas_model.to(device, memory_format=torch.channels_last if device == torch.device("cuda") else None)

scripts/deforum_helpers/depth_zoe.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import torch
+from zoedepth.models.builder import build_model
+from zoedepth.utils.config import get_config
+class ZoeDepth:
+    def __init__(self, width=512, height=512):
+        conf = get_config("zoedepth_nk", "infer")
+        conf.img_size = [width, height]
+        self.model_zoe = build_model(conf)
+        self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+        self.zoe = self.model_zoe.to(self.DEVICE)
+        self.width = width
+        self.height = height
+    def predict(self, image):
+        self.zoe.core.prep.resizer._Resize__width = self.width
+        self.zoe.core.prep.resizer._Resize__height = self.height
+        depth_tensor = self.zoe.infer_pil(image, output_type="tensor")
+        return depth_tensor
+    def to(self, device):
+        self.DEVICE = device
+        self.zoe = self.model_zoe.to(device)
+    def save_raw_depth(self, depth, filepath):
+        depth.save(filepath, format='PNG', mode='I;16')
+    def delete(self):
+        del self.model_zoe
+        del self.zoe

scripts/deforum_helpers/frame_interpolation.py ADDED Viewed

	@@ -0,0 +1,239 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+from pathlib import Path
+from rife.inference_video import run_rife_new_video_infer
+from .video_audio_utilities import get_quick_vid_info, vid2frames, media_file_has_audio, extract_number, ffmpeg_stitch_video
+from film_interpolation.film_inference import run_film_interp_infer
+from .general_utils import duplicate_pngs_from_folder, checksum, convert_images_from_list
+from modules.shared import opts
+DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
+# gets 'RIFE v4.3', returns: 'RIFE43'
+def extract_rife_name(string):
+    parts = string.split()
+    if len(parts) != 2 or parts[0] != "RIFE" or (parts[1][0] != "v" or not parts[1][1:].replace('.','').isdigit()):
+        raise ValueError("Input string should contain exactly 2 words, first word should be 'RIFE' and second word should start with 'v' followed by 2 numbers")
+    return "RIFE"+parts[1][1:].replace('.','')
+# This function usually gets a filename, and converts it to a legal linux/windows *folder* name
+def clean_folder_name(string):
+    illegal_chars = "/\\<>:\"|?*.,\" "
+    translation_table = str.maketrans(illegal_chars, "_"*len(illegal_chars))
+    return string.translate(translation_table)
+def set_interp_out_fps(interp_x, slow_x_enabled, slom_x, in_vid_fps):
+    if interp_x == 'Disabled' or in_vid_fps in ('---', None, '', 'None'):
+        return '---'
+    fps = float(in_vid_fps) * int(interp_x)
+    # if slom_x != -1:
+    if slow_x_enabled:
+        fps /= int(slom_x)
+    return int(fps) if fps.is_integer() else fps
+# get uploaded video frame count, fps, and return 3 valuees for the gradio UI: in fcount, in fps, out fps (using the set_interp_out_fps function above)
+def gradio_f_interp_get_fps_and_fcount(vid_path, interp_x, slow_x_enabled, slom_x):
+    if vid_path is None:
+        return '---', '---', '---'
+    fps, fcount, resolution = get_quick_vid_info(vid_path.name)
+    expected_out_fps = set_interp_out_fps(interp_x, slow_x_enabled, slom_x, fps)
+    return (str(round(fps,2)) if fps is not None else '---', (round(fcount,2)) if fcount is not None else '---', round(expected_out_fps,2))
+# handle call to interpolate an uploaded video from gradio button in args.py (the function that calls this func is named 'upload_vid_to_rife')
+def process_interp_vid_upload_logic(file, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, in_vid_fps, f_models_path, vid_file_name):
+    print("got a request to *frame interpolate* an existing video.")
+    _, _, resolution = get_quick_vid_info(file.name)
+    folder_name = clean_folder_name(Path(vid_file_name).stem)
+    outdir = opts.outdir_samples or os.path.join(os.getcwd(), 'outputs')
+    outdir_no_tmp = outdir + f'/frame-interpolation/{folder_name}'
+    i = 1
+    while os.path.exists(outdir_no_tmp):
+        outdir_no_tmp = f"{outdir}/frame-interpolation/{folder_name}_{i}"
+        i += 1
+    outdir = os.path.join(outdir_no_tmp, 'tmp_input_frames')
+    os.makedirs(outdir, exist_ok=True)
+    vid2frames(video_path=file.name, video_in_frame_path=outdir, overwrite=True, extract_from_frame=0, extract_to_frame=-1, numeric_files_output=True, out_img_format='png')
+    # check if the uploaded vid has an audio stream. If it doesn't, set audio param to None so that ffmpeg won't try to add non-existing audio to final video.
+    audio_file_to_pass = None
+    if media_file_has_audio(file.name, f_location):
+        audio_file_to_pass = file.name
+    process_video_interpolation(frame_interpolation_engine=engine, frame_interpolation_x_amount=x_am, frame_interpolation_slow_mo_enabled = sl_enabled,frame_interpolation_slow_mo_amount=sl_am, orig_vid_fps=in_vid_fps, deforum_models_path=f_models_path, real_audio_track=audio_file_to_pass, raw_output_imgs_path=outdir, img_batch_id=None, ffmpeg_location=f_location, ffmpeg_crf=f_crf, ffmpeg_preset=f_preset, keep_interp_imgs=keep_imgs, orig_vid_name=folder_name, resolution=resolution)
+# handle params before talking with the actual interpolation module (rifee/film, more to be added)
+def process_video_interpolation(frame_interpolation_engine, frame_interpolation_x_amount, frame_interpolation_slow_mo_enabled, frame_interpolation_slow_mo_amount, orig_vid_fps, deforum_models_path, real_audio_track, raw_output_imgs_path, img_batch_id, ffmpeg_location, ffmpeg_crf, ffmpeg_preset, keep_interp_imgs, orig_vid_name, resolution, dont_change_fps=False, srt_path=None):
+    is_random_pics_run = dont_change_fps
+    fps = float(orig_vid_fps) * (1 if is_random_pics_run else frame_interpolation_x_amount)
+    fps /= int(frame_interpolation_slow_mo_amount) if frame_interpolation_slow_mo_enabled and not is_random_pics_run else 1
+    # disable audio-adding by setting real_audio_track to None if slow-mo is enabled
+    if real_audio_track is not None and frame_interpolation_slow_mo_enabled:
+        real_audio_track = None
+    # disable subtitles by setting srt_path to None if slow-mo is enabled'
+    if srt_path is not None and frame_interpolation_slow_mo_enabled:
+        srt_path = None
+    if frame_interpolation_engine == 'None':
+        return
+    elif frame_interpolation_engine.startswith("RIFE"):
+        # make sure interp_x is valid and in range
+        if frame_interpolation_x_amount not in range(2, 11):
+            raise Error("frame_interpolation_x_amount must be between 2x and 10x")
+        # set UHD to True if res' is 2K or higher
+        if resolution:
+            UHD = resolution[0] >= 2048 and resolution[1] >= 2048
+        else:
+            UHD = False
+        # e.g from "RIFE v2.3 to RIFE23"
+        actual_model_folder_name = extract_rife_name(frame_interpolation_engine)
+        # run actual rife interpolation and video stitching etc - the whole suite
+        run_rife_new_video_infer(interp_x_amount=frame_interpolation_x_amount, slow_mo_enabled = frame_interpolation_slow_mo_enabled, slow_mo_x_amount=frame_interpolation_slow_mo_amount, model=actual_model_folder_name, fps=fps, deforum_models_path=deforum_models_path, audio_track=real_audio_track, raw_output_imgs_path=raw_output_imgs_path, img_batch_id=img_batch_id, ffmpeg_location=ffmpeg_location, ffmpeg_crf=ffmpeg_crf, ffmpeg_preset=ffmpeg_preset, keep_imgs=keep_interp_imgs, orig_vid_name=orig_vid_name, UHD=UHD, srt_path=srt_path)
+    elif frame_interpolation_engine == 'FILM':
+        prepare_film_inference(deforum_models_path=deforum_models_path, x_am=frame_interpolation_x_amount, sl_enabled=frame_interpolation_slow_mo_enabled, sl_am=frame_interpolation_slow_mo_amount, keep_imgs=keep_interp_imgs, raw_output_imgs_path=raw_output_imgs_path, img_batch_id=img_batch_id, f_location=ffmpeg_location, f_crf=ffmpeg_crf, f_preset=ffmpeg_preset, fps=fps, audio_track=real_audio_track, orig_vid_name=orig_vid_name, is_random_pics_run=is_random_pics_run, srt_path=srt_path)
+    else:
+        print("Unknown Frame Interpolation engine chosen. Doing nothing.")
+        return
+def prepare_film_inference(deforum_models_path, x_am, sl_enabled, sl_am, keep_imgs, raw_output_imgs_path, img_batch_id, f_location, f_crf, f_preset, fps, audio_track, orig_vid_name, is_random_pics_run, srt_path=None):
+    import shutil
+    parent_folder = os.path.dirname(raw_output_imgs_path)
+    grandparent_folder = os.path.dirname(parent_folder)
+    if orig_vid_name is not None:
+        interp_vid_path = os.path.join(parent_folder, str(orig_vid_name) +'_FILM_x' + str(x_am))
+    else:
+        interp_vid_path = os.path.join(raw_output_imgs_path, str(img_batch_id) +'_FILM_x' + str(x_am))
+    film_model_name = 'film_net_fp16.pt'
+    film_model_folder = os.path.join(deforum_models_path,'film_interpolation')
+    film_model_path = os.path.join(film_model_folder, film_model_name) # actual full path to the film .pt model file
+    output_interp_imgs_folder = os.path.join(raw_output_imgs_path, 'interpolated_frames_film')
+    # set custom name depending on if we interpolate after a run, or interpolate a video (related/unrelated to deforum, we don't know) directly from within the interpolation tab
+    # interpolated_path = os.path.join(args.raw_output_imgs_path, 'interpolated_frames_rife')
+    if orig_vid_name is not None: # interpolating a video/ set of pictures (deforum or unrelated)
+        custom_interp_path = "{}_{}".format(output_interp_imgs_folder, orig_vid_name)
+    else: # interpolating after a deforum run:
+        custom_interp_path = "{}_{}".format(output_interp_imgs_folder, img_batch_id)
+    # interp_vid_path = os.path.join(raw_output_imgs_path, str(img_batch_id) + '_FILM_x' + str(x_am))
+    img_path_for_ffmpeg = os.path.join(custom_interp_path, "frame_%09d.png")
+    if sl_enabled:
+        interp_vid_path = interp_vid_path + '_slomo_x' + str(sl_am)
+    interp_vid_path = interp_vid_path + '.mp4'
+    # In this folder we temporarily keep the original frames (converted/ copy-pasted and img format depends on scenario)
+    temp_convert_raw_png_path = os.path.join(raw_output_imgs_path, "tmp_film_folder")
+    if is_random_pics_run: # pass dummy so it just copy-paste the imgs instead of re-writing them
+        total_frames = duplicate_pngs_from_folder(raw_output_imgs_path, temp_convert_raw_png_path, img_batch_id, 'DUMMY')
+    else: #re-write pics as png to avert a problem with 24 and 32 mixed outputs from the same animation run
+        total_frames = duplicate_pngs_from_folder(raw_output_imgs_path, temp_convert_raw_png_path, img_batch_id, None)
+    check_and_download_film_model('film_net_fp16.pt', film_model_folder) # TODO: split this part
+    # get number of in-between-frames to provide to FILM - mimics how RIFE works, we should get the same amount of total frames in the end
+    film_in_between_frames_count = calculate_frames_to_add(total_frames, x_am)
+    # Run actual FILM inference
+    run_film_interp_infer(
+    model_path = film_model_path,
+    input_folder = temp_convert_raw_png_path,
+    save_folder = custom_interp_path, # output folder is created in the infer part
+    inter_frames = film_in_between_frames_count)
+    add_soundtrack = 'None'
+    if not audio_track is None:
+        add_soundtrack = 'File'
+    print (f"*Passing interpolated frames to ffmpeg...*")
+    exception_raised = False
+    try:
+        ffmpeg_stitch_video(ffmpeg_location=f_location, fps=fps, outmp4_path=interp_vid_path, stitch_from_frame=0, stitch_to_frame=999999999, imgs_path=img_path_for_ffmpeg, add_soundtrack=add_soundtrack, audio_path=audio_track, crf=f_crf, preset=f_preset, srt_path=srt_path)
+    except Exception as e:
+        exception_raised = True
+        print(f"An error occurred while stitching the video: {e}")
+    if orig_vid_name and (keep_imgs or exception_raised):
+        shutil.move(custom_interp_path, parent_folder)
+    if not keep_imgs and not exception_raised:
+        if fps <= 450: # keep interp frames automatically if out_vid fps is above 450
+            shutil.rmtree(custom_interp_path, ignore_errors=True)
+    # delete duplicated raw non-interpolated frames
+    shutil.rmtree(temp_convert_raw_png_path, ignore_errors=True)
+    # remove folder with raw (non-interpolated) vid input frames in case of input VID and not PNGs
+    if orig_vid_name:
+        shutil.rmtree(raw_output_imgs_path, ignore_errors=True)
+def check_and_download_film_model(model_name, model_dest_folder):
+    from basicsr.utils.download_util import load_file_from_url
+    if model_name == 'film_net_fp16.pt':
+        model_dest_path = os.path.join(model_dest_folder, model_name)
+        download_url = 'https://github.com/hithereai/frame-interpolation-pytorch/releases/download/film_net_fp16.pt/film_net_fp16.pt'
+        film_model_hash = '0a823815b111488ac2b7dd7fe6acdd25d35a22b703e8253587764cf1ee3f8f93676d24154d9536d2ce5bc3b2f102fb36dfe0ca230dfbe289d5cd7bde5a34ec12'
+    else: # Unknown FILM model
+        raise Exception("Got a request to download an unknown FILM model. Can't proceed.")
+    if os.path.exists(model_dest_path):
+        return
+    try:
+        os.makedirs(model_dest_folder, exist_ok=True)
+        # download film model from url
+        load_file_from_url(download_url, model_dest_folder)
+        # verify checksum
+        if checksum(model_dest_path) != film_model_hash:
+            raise Exception(f"Error while downloading {model_name}. Please download from: {download_url}, and put in: {model_dest_folder}")
+    except Exception as e:
+        raise Exception(f"Error while downloading {model_name}. Please download from: {download_url}, and put in: {model_dest_folder}")
+# get film no. of frames to add after each pic from tot frames in interp_x values
+def calculate_frames_to_add(total_frames, interp_x):
+    frames_to_add = (total_frames * interp_x - total_frames) / (total_frames - 1)
+    return int(round(frames_to_add))
+def process_interp_pics_upload_logic(pic_list, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, fps, f_models_path, resolution, add_soundtrack, audio_track):
+    pic_path_list = [pic.name for pic in pic_list]
+    print(f"got a request to *frame interpolate* a set of {len(pic_list)} images.")
+    folder_name = clean_folder_name(Path(pic_list[0].orig_name).stem)
+    outdir_no_tmp = os.path.join(os.getcwd(), 'outputs', 'frame-interpolation', folder_name)
+    i = 1
+    while os.path.exists(outdir_no_tmp):
+        outdir_no_tmp = os.path.join(os.getcwd(), 'outputs', 'frame-interpolation', folder_name + '_' + str(i))
+        i += 1
+    outdir = os.path.join(outdir_no_tmp, 'tmp_input_frames')
+    os.makedirs(outdir, exist_ok=True)
+    convert_images_from_list(paths=pic_path_list, output_dir=outdir,format='png')
+    audio_file_to_pass = None
+    # todo? add handling of vid input sound? if needed at all...
+    if add_soundtrack == 'File':
+        audio_file_to_pass = audio_track
+         # todo: upgrade function so it takes url and check if audio really exist before passing? not crucial as ffmpeg sofly fallbacks if needed
+         # if media_file_has_audio(audio_track, f_location):
+    # pass param so it won't duplicate the images at all as we already do it in here?!
+    process_video_interpolation(frame_interpolation_engine=engine, frame_interpolation_x_amount=x_am, frame_interpolation_slow_mo_enabled = sl_enabled,frame_interpolation_slow_mo_amount=sl_am, orig_vid_fps=fps, deforum_models_path=f_models_path, real_audio_track=audio_file_to_pass, raw_output_imgs_path=outdir, img_batch_id=None, ffmpeg_location=f_location, ffmpeg_crf=f_crf, ffmpeg_preset=f_preset, keep_interp_imgs=keep_imgs, orig_vid_name=folder_name, resolution=resolution, dont_change_fps=True)

scripts/deforum_helpers/general_utils.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import shutil
+import hashlib
+from modules.shared import opts
+from basicsr.utils.download_util import load_file_from_url
+def debug_print(message):
+    DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
+    if DEBUG_MODE:
+        print(message)
+def checksum(filename, hash_factory=hashlib.blake2b, chunk_num_blocks=128):
+    h = hash_factory()
+    with open(filename,'rb') as f:
+        while chunk := f.read(chunk_num_blocks*h.block_size):
+            h.update(chunk)
+    return h.hexdigest()
+def get_os():
+    import platform
+    return {"Windows": "Windows", "Linux": "Linux", "Darwin": "Mac"}.get(platform.system(), "Unknown")
+# used in src/rife/inference_video.py and more, soon
+def duplicate_pngs_from_folder(from_folder, to_folder, img_batch_id, orig_vid_name):
+    import cv2
+    #TODO: don't copy-paste at all if the input is a video (now it copy-pastes, and if input is deforum run is also converts to make sure no errors rise cuz of 24-32 bit depth differences)
+    temp_convert_raw_png_path = os.path.join(from_folder, to_folder)
+    os.makedirs(temp_convert_raw_png_path, exist_ok=True)
+    frames_handled = 0
+    for f in os.listdir(from_folder):
+        if ('png' in f or 'jpg' in f) and '-' not in f and '_depth_' not in f and ((img_batch_id is not None and f.startswith(img_batch_id) or img_batch_id is None)):
+            frames_handled +=1
+            original_img_path = os.path.join(from_folder, f)
+            if orig_vid_name is not None:
+                shutil.copy(original_img_path, temp_convert_raw_png_path)
+            else:
+                image = cv2.imread(original_img_path)
+                new_path = os.path.join(temp_convert_raw_png_path, f)
+                cv2.imwrite(new_path, image, [cv2.IMWRITE_PNG_COMPRESSION, 0])
+    return frames_handled
+def convert_images_from_list(paths, output_dir, format):
+    import os
+    from PIL import Image
+    # Ensure that the output directory exists
+    os.makedirs(output_dir, exist_ok=True)
+    # Loop over all input images
+    for i, path in enumerate(paths):
+        # Open the image
+        with Image.open(path) as img:
+            # Generate the output filename
+            filename = f"{i+1:09d}.{format}"
+            # Save the image to the output directory
+            img.save(os.path.join(output_dir, filename))
+def get_deforum_version():
+    from modules import extensions as mext
+    try:
+        for ext in mext.extensions:
+            if ext.name in ["deforum", "deforum-for-automatic1111-webui", "sd-webui-deforum"] and ext.enabled:
+                ext.read_info_from_repo() # need this call to get exten info on ui-launch, not to be removed
+                return ext.version
+        return "Unknown"
+    except:
+        return "Unknown"
+def custom_placeholder_format(value_dict, placeholder_match):
+    key = placeholder_match.group(1).lower()
+    value = value_dict.get(key, key) or "_"
+    if isinstance(value, dict) and value:
+        first_key = list(value.keys())[0]
+        value = str(value[first_key][0]) if isinstance(value[first_key], list) and value[first_key] else str(value[first_key])
+    return str(value)[:50]
+def test_long_path_support(base_folder_path):
+    long_folder_name = 'A' * 300
+    long_path = os.path.join(base_folder_path, long_folder_name)
+    try:
+        os.makedirs(long_path)
+        shutil.rmtree(long_path)
+        return True
+    except OSError:
+        return False
+def get_max_path_length(base_folder_path):
+    if get_os() == 'Windows':
+        return (32767 if test_long_path_support(base_folder_path) else 260) - len(base_folder_path) - 1
+    return 4096 - len(base_folder_path) - 1
+def substitute_placeholders(template, arg_list, base_folder_path):
+    import re
+    # Find and update timestring values if resume_from_timestring is True
+    resume_from_timestring = next((arg_obj.resume_from_timestring for arg_obj in arg_list if hasattr(arg_obj, 'resume_from_timestring')), False)
+    resume_timestring = next((arg_obj.resume_timestring for arg_obj in arg_list if hasattr(arg_obj, 'resume_timestring')), None)
+    if resume_from_timestring and resume_timestring:
+        for arg_obj in arg_list:
+            if hasattr(arg_obj, 'timestring'):
+                arg_obj.timestring = resume_timestring
+    max_length = get_max_path_length(base_folder_path)
+    values = {attr.lower(): getattr(arg_obj, attr)
+              for arg_obj in arg_list
+              for attr in dir(arg_obj) if not callable(getattr(arg_obj, attr)) and not attr.startswith('__')}
+    formatted_string = re.sub(r"{(\w+)}", lambda m: custom_placeholder_format(values, m), template)
+    formatted_string = re.sub(r'[<>:"/\\|?*\s,]', '_', formatted_string)
+    return formatted_string[:max_length]
+def count_files_in_folder(folder_path):
+    import glob
+    file_pattern = folder_path + "/*"
+    file_count = len(glob.glob(file_pattern))
+    return file_count
+def clean_gradio_path_strings(input_str):
+    if isinstance(input_str, str) and input_str.startswith('"') and input_str.endswith('"'):
+        return input_str[1:-1]
+    else:
+        return input_str
+def download_file_with_checksum(url, expected_checksum, dest_folder, dest_filename):
+    expected_full_path = os.path.join(dest_folder, dest_filename)
+    if not os.path.exists(expected_full_path) and not os.path.isdir(expected_full_path):
+        load_file_from_url(url=url, model_dir=dest_folder, file_name=dest_filename, progress=True)
+        if checksum(expected_full_path) != expected_checksum:
+            raise Exception(f"Error while downloading {dest_filename}.]nPlease manually download from: {url}\nAnd place it in: {dest_folder}")

scripts/deforum_helpers/generate.py ADDED Viewed

	@@ -0,0 +1,324 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+from PIL import Image
+import math
+import json
+import itertools
+import requests
+import numexpr
+from modules import processing, sd_models
+from modules.shared import sd_model, state, cmd_opts
+from .deforum_controlnet import is_controlnet_enabled, process_with_controlnet
+from .prompt import split_weighted_subprompts
+from .load_images import load_img, prepare_mask, check_mask_for_errors
+from .webui_sd_pipeline import get_webui_sd_pipeline
+from .rich import console
+from .defaults import get_samplers_list
+from .prompt import check_is_number
+def load_mask_latent(mask_input, shape):
+    # mask_input (str or PIL Image.Image): Path to the mask image or a PIL Image object
+    # shape (list-like len(4)): shape of the image to match, usually latent_image.shape
+    if isinstance(mask_input, str):  # mask input is probably a file name
+        if mask_input.startswith('http://') or mask_input.startswith('https://'):
+            mask_image = Image.open(requests.get(mask_input, stream=True).raw).convert('RGBA')
+        else:
+            mask_image = Image.open(mask_input).convert('RGBA')
+    elif isinstance(mask_input, Image.Image):
+        mask_image = mask_input
+    else:
+        raise Exception("mask_input must be a PIL image or a file name")
+    mask_w_h = (shape[-1], shape[-2])
+    mask = mask_image.resize(mask_w_h, resample=Image.LANCZOS)
+    mask = mask.convert("L")
+    return mask
+def isJson(myjson):
+    try:
+        json.loads(myjson)
+    except ValueError as e:
+        return False
+    return True
+# Add pairwise implementation here not to upgrade
+# the whole python to 3.10 just for one function
+def pairwise_repl(iterable):
+    a, b = itertools.tee(iterable)
+    next(b, None)
+    return zip(a, b)
+def generate(args, keys, anim_args, loop_args, controlnet_args, root, frame=0, sampler_name=None):
+    if state.interrupted:
+        return None
+    if args.reroll_blank_frames == 'ignore':
+        return generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
+    image, caught_vae_exception = generate_with_nans_check(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
+    if caught_vae_exception or not image.getbbox():
+        patience = args.reroll_patience
+        print("Blank frame detected! If you don't have the NSFW filter enabled, this may be due to a glitch!")
+        if args.reroll_blank_frames == 'reroll':
+            while caught_vae_exception or not image.getbbox():
+                print("Rerolling with +1 seed...")
+                args.seed += 1
+                image, caught_vae_exception = generate_with_nans_check(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
+                patience -= 1
+                if patience == 0:
+                    print("Rerolling with +1 seed failed for 10 iterations! Try setting webui's precision to 'full' and if it fails, please report this to the devs! Interrupting...")
+                    state.interrupted = True
+                    state.current_image = image
+                    return None
+        elif args.reroll_blank_frames == 'interrupt':
+            print("Interrupting to save your eyes...")
+            state.interrupted = True
+            state.current_image = image
+            return None
+    return image
+def generate_with_nans_check(args, keys, anim_args, loop_args, controlnet_args, root, frame=0, sampler_name=None):
+    if cmd_opts.disable_nan_check:
+        image = generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
+    else:
+        try:
+            image = generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame, sampler_name)
+        except Exception as e:
+            if "A tensor with all NaNs was produced in VAE." in repr(e):
+                print(e)
+                return None, True
+            else:
+                raise e
+    return image, False
+def generate_inner(args, keys, anim_args, loop_args, controlnet_args, root, frame=0, sampler_name=None):
+    # Setup the pipeline
+    p = get_webui_sd_pipeline(args, root)
+    p.prompt, p.negative_prompt = split_weighted_subprompts(args.prompt, frame, anim_args.max_frames)
+    if not args.use_init and args.strength > 0 and args.strength_0_no_init:
+        args.strength = 0
+    processed = None
+    mask_image = None
+    init_image = None
+    image_init0 = None
+    if loop_args.use_looper and anim_args.animation_mode in ['2D', '3D']:
+        args.strength = loop_args.imageStrength
+        tweeningFrames = loop_args.tweeningFrameSchedule
+        blendFactor = .07
+        colorCorrectionFactor = loop_args.colorCorrectionFactor
+        jsonImages = json.loads(loop_args.imagesToKeyframe)
+        # find which image to show
+        parsedImages = {}
+        frameToChoose = 0
+        max_f = anim_args.max_frames - 1
+        for key, value in jsonImages.items():
+            if check_is_number(key):  # default case 0:(1 + t %5), 30:(5-t%2)
+                parsedImages[key] = value
+            else:  # math on the left hand side case 0:(1 + t %5), maxKeyframes/2:(5-t%2)
+                parsedImages[int(numexpr.evaluate(key))] = value
+        framesToImageSwapOn = list(map(int, list(parsedImages.keys())))
+        for swappingFrame in framesToImageSwapOn[1:]:
+            frameToChoose += (frame >= int(swappingFrame))
+        # find which frame to do our swapping on for tweening
+        skipFrame = 25
+        for fs, fe in pairwise_repl(framesToImageSwapOn):
+            if fs <= frame <= fe:
+                skipFrame = fe - fs
+        if frame % skipFrame <= tweeningFrames:  # number of tweening frames
+            blendFactor = loop_args.blendFactorMax - loop_args.blendFactorSlope * math.cos((frame % tweeningFrames) / (tweeningFrames / 2))
+        init_image2, _ = load_img(list(jsonImages.values())[frameToChoose],
+                                  shape=(args.W, args.H),
+                                  use_alpha_as_mask=args.use_alpha_as_mask)
+        image_init0 = list(jsonImages.values())[0]
+    else:  # they passed in a single init image
+        image_init0 = args.init_image
+    available_samplers = get_samplers_list()
+    if sampler_name is not None:
+        if sampler_name in available_samplers.keys():
+            p.sampler_name = available_samplers[sampler_name]
+        else:
+            raise RuntimeError(f"Sampler name '{sampler_name}' is invalid. Please check the available sampler list in the 'Run' tab")
+    if args.checkpoint is not None:
+        info = sd_models.get_closet_checkpoint_match(args.checkpoint)
+        if info is None:
+            raise RuntimeError(f"Unknown checkpoint: {args.checkpoint}")
+        sd_models.reload_model_weights(info=info)
+    if args.init_sample is not None:
+        # TODO: cleanup init_sample remains later
+        img = args.init_sample
+        init_image = img
+        image_init0 = img
+        if loop_args.use_looper and isJson(loop_args.imagesToKeyframe) and anim_args.animation_mode in ['2D', '3D']:
+            init_image = Image.blend(init_image, init_image2, blendFactor)
+            correction_colors = Image.blend(init_image, init_image2, colorCorrectionFactor)
+            p.color_corrections = [processing.setup_color_correction(correction_colors)]
+    # this is the first pass
+    elif (loop_args.use_looper and anim_args.animation_mode in ['2D', '3D']) or (args.use_init and ((args.init_image != None and args.init_image != ''))):
+        init_image, mask_image = load_img(image_init0,  # initial init image
+                                          shape=(args.W, args.H),
+                                          use_alpha_as_mask=args.use_alpha_as_mask)
+    else:
+        if anim_args.animation_mode != 'Interpolation':
+            print(f"Not using an init image (doing pure txt2img)")
+        p_txt = processing.StableDiffusionProcessingTxt2Img(
+            sd_model=sd_model,
+            outpath_samples=root.tmp_deforum_run_duplicated_folder,
+            outpath_grids=root.tmp_deforum_run_duplicated_folder,
+            prompt=p.prompt,
+            styles=p.styles,
+            negative_prompt=p.negative_prompt,
+            seed=p.seed,
+            subseed=p.subseed,
+            subseed_strength=p.subseed_strength,
+            seed_resize_from_h=p.seed_resize_from_h,
+            seed_resize_from_w=p.seed_resize_from_w,
+            sampler_name=p.sampler_name,
+            batch_size=p.batch_size,
+            n_iter=p.n_iter,
+            steps=p.steps,
+            cfg_scale=p.cfg_scale,
+            width=p.width,
+            height=p.height,
+            restore_faces=p.restore_faces,
+            tiling=p.tiling,
+            enable_hr=False,
+            denoising_strength=0,
+        )
+        print_combined_table(args, anim_args, p_txt, keys, frame)  # print dynamic table to cli
+        if is_controlnet_enabled(controlnet_args):
+            process_with_controlnet(p_txt, args, anim_args, loop_args, controlnet_args, root, is_img2img=False, frame_idx=frame)
+        processed = processing.process_images(p_txt)
+    if processed is None:
+        # Mask functions
+        if args.use_mask:
+            mask_image = args.mask_image
+            mask = prepare_mask(args.mask_file if mask_image is None else mask_image,
+                                (args.W, args.H),
+                                args.mask_contrast_adjust,
+                                args.mask_brightness_adjust)
+            p.inpainting_mask_invert = args.invert_mask
+            p.inpainting_fill = args.fill
+            p.inpaint_full_res = args.full_res_mask
+            p.inpaint_full_res_padding = args.full_res_mask_padding
+            # prevent loaded mask from throwing errors in Image operations if completely black and crop and resize in webui pipeline
+            # doing this after contrast and brightness adjustments to ensure that mask is not passed as black or blank
+            mask = check_mask_for_errors(mask, args.invert_mask)
+            args.noise_mask = mask
+        else:
+            mask = None
+        assert not ((mask is not None and args.use_mask and args.overlay_mask) and (
+                args.init_sample is None and init_image is None)), "Need an init image when use_mask == True and overlay_mask == True"
+        p.init_images = [init_image]
+        p.image_mask = mask
+        p.image_cfg_scale = args.pix2pix_img_cfg_scale
+        print_combined_table(args, anim_args, p, keys, frame)  # print dynamic table to cli
+        if is_controlnet_enabled(controlnet_args):
+            process_with_controlnet(p, args, anim_args, loop_args, controlnet_args, root, is_img2img=True, frame_idx=frame)
+        processed = processing.process_images(p)
+    if root.initial_info is None:
+        root.initial_info = processed.info
+    if root.first_frame is None:
+        root.first_frame = processed.images[0]
+    results = processed.images[0]
+    return results
+def print_combined_table(args, anim_args, p, keys, frame_idx):
+    from rich.table import Table
+    from rich import box
+    table = Table(padding=0, box=box.ROUNDED)
+    field_names1 = ["Steps", "CFG"]
+    if anim_args.animation_mode != 'Interpolation':
+        field_names1.append("Denoise")
+    field_names1 += ["Subseed", "Subs. str"] * (anim_args.enable_subseed_scheduling)
+    field_names1 += ["Sampler"] * anim_args.enable_sampler_scheduling
+    field_names1 += ["Checkpoint"] * anim_args.enable_checkpoint_scheduling
+    for field_name in field_names1:
+        table.add_column(field_name, justify="center")
+    rows1 = [str(p.steps), str(p.cfg_scale)]
+    if anim_args.animation_mode != 'Interpolation':
+        rows1.append(f"{p.denoising_strength:.5g}" if p.denoising_strength is not None else "None")
+    rows1 += [str(p.subseed), f"{p.subseed_strength:.5g}"] * anim_args.enable_subseed_scheduling
+    rows1 += [p.sampler_name] * anim_args.enable_sampler_scheduling
+    rows1 += [str(args.checkpoint)] * anim_args.enable_checkpoint_scheduling
+    rows2 = []
+    if anim_args.animation_mode not in ['Video Input', 'Interpolation']:
+        if anim_args.animation_mode == '2D':
+            field_names2 = ["Angle", "Zoom"]
+        else:
+            field_names2 = []
+        field_names2 += ["Tr X", "Tr Y"]
+        if anim_args.animation_mode == '3D':
+            field_names2 += ["Tr Z", "Ro X", "Ro Y", "Ro Z"]
+            if anim_args.aspect_ratio_schedule.replace(" ", "") != '0:(1)':
+                field_names2 += ["Asp. Ratio"]
+        if anim_args.enable_perspective_flip:
+            field_names2 += ["Pf T", "Pf P", "Pf G", "Pf F"]
+        for field_name in field_names2:
+            table.add_column(field_name, justify="center")
+        if anim_args.animation_mode == '2D':
+            rows2 += [f"{keys.angle_series[frame_idx]:.5g}", f"{keys.zoom_series[frame_idx]:.5g}"]
+        rows2 += [f"{keys.translation_x_series[frame_idx]:.5g}", f"{keys.translation_y_series[frame_idx]:.5g}"]
+        if anim_args.animation_mode == '3D':
+            rows2 += [f"{keys.translation_z_series[frame_idx]:.5g}", f"{keys.rotation_3d_x_series[frame_idx]:.5g}",
+                      f"{keys.rotation_3d_y_series[frame_idx]:.5g}", f"{keys.rotation_3d_z_series[frame_idx]:.5g}"]
+            if anim_args.aspect_ratio_schedule.replace(" ", "") != '0:(1)':
+                rows2 += [f"{keys.aspect_ratio_series[frame_idx]:.5g}"]
+        if anim_args.enable_perspective_flip:
+            rows2 += [f"{keys.perspective_flip_theta_series[frame_idx]:.5g}", f"{keys.perspective_flip_phi_series[frame_idx]:.5g}",
+                      f"{keys.perspective_flip_gamma_series[frame_idx]:.5g}", f"{keys.perspective_flip_fv_series[frame_idx]:.5g}"]
+    table.add_row(*rows1, *rows2)
+    console.print(table)

scripts/deforum_helpers/gradio_funcs.py ADDED Viewed

	@@ -0,0 +1,214 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import gradio as gr
+import modules.paths as ph
+from .general_utils import get_os
+from .upscaling import process_ncnn_upscale_vid_upload_logic
+from .video_audio_utilities import extract_number, get_quick_vid_info, get_ffmpeg_params
+from .frame_interpolation import process_interp_vid_upload_logic, process_interp_pics_upload_logic
+from .vid2depth import process_depth_vid_upload_logic
+f_models_path = ph.models_path + '/Deforum'
+# START gradio-to-frame-interoplation/ upscaling functions
+def upload_vid_to_interpolate(file, engine, x_am, sl_enabled, sl_am, keep_imgs, in_vid_fps):
+    # print msg and do nothing if vid not uploaded or interp_x not provided
+    if not file or engine == 'None':
+        return print("Please upload a video and set a proper value for 'Interp X'. Can't interpolate x0 times :)")
+    f_location, f_crf, f_preset = get_ffmpeg_params()
+    process_interp_vid_upload_logic(file, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, in_vid_fps, f_models_path, file.orig_name)
+def upload_pics_to_interpolate(pic_list, engine, x_am, sl_enabled, sl_am, keep_imgs, fps, add_audio, audio_track):
+    from PIL import Image
+    if pic_list is None or len(pic_list) < 2:
+        return print("Please upload at least 2 pics for interpolation.")
+    f_location, f_crf, f_preset = get_ffmpeg_params()
+    # make sure all uploaded pics have the same resolution
+    pic_sizes = [Image.open(picture_path.name).size for picture_path in pic_list]
+    if len(set(pic_sizes)) != 1:
+        return print("All uploaded pics need to be of the same Width and Height / resolution.")
+    resolution = pic_sizes[0]
+    process_interp_pics_upload_logic(pic_list, engine, x_am, sl_enabled, sl_am, keep_imgs, f_location, f_crf, f_preset, fps, f_models_path, resolution, add_audio, audio_track)
+def ncnn_upload_vid_to_upscale(vid_path, in_vid_fps, in_vid_res, out_vid_res, upscale_model, upscale_factor, keep_imgs):
+    if vid_path is None:
+        print("Please upload a video :)")
+        return
+    f_location, f_crf, f_preset = get_ffmpeg_params()
+    current_user = get_os()
+    process_ncnn_upscale_vid_upload_logic(vid_path, in_vid_fps, in_vid_res, out_vid_res, f_models_path, upscale_model, upscale_factor, keep_imgs, f_location, f_crf, f_preset, current_user)
+def upload_vid_to_depth(vid_to_depth_chosen_file, mode, thresholding, threshold_value, threshold_value_max, adapt_block_size, adapt_c, invert, end_blur, midas_weight_vid2depth, depth_keep_imgs):
+    # print msg and do nothing if vid not uploaded
+    if not vid_to_depth_chosen_file:
+        return print("Please upload a video :()")
+    f_location, f_crf, f_preset = get_ffmpeg_params()
+    process_depth_vid_upload_logic(vid_to_depth_chosen_file, mode, thresholding, threshold_value, threshold_value_max, adapt_block_size, adapt_c, invert, end_blur, midas_weight_vid2depth,
+                                   vid_to_depth_chosen_file.orig_name, depth_keep_imgs, f_location, f_crf, f_preset, f_models_path)
+# END gradio-to-frame-interoplation/ upscaling functions
+def change_visibility_from_skip_video(choice):
+    return gr.update(visible=False) if choice else gr.update(visible=True)
+def update_r_upscale_factor(choice):
+    return gr.update(value='x4', choices=['x4']) if choice != 'realesr-animevideov3' else gr.update(value='x2', choices=['x2', 'x3', 'x4'])
+def change_perlin_visibility(choice):
+    return gr.update(visible=choice == "perlin")
+def legacy_3d_mode(choice):
+    return gr.update(visible=choice.lower() in ["midas+adabins (old)", 'zoe+adabins (old)'])
+def change_color_coherence_image_path_visibility(choice):
+    return gr.update(visible=choice == "Image")
+def change_color_coherence_video_every_N_frames_visibility(choice):
+    return gr.update(visible=choice == "Video Input")
+def change_seed_iter_visibility(choice):
+    return gr.update(visible=choice == "iter")
+def change_seed_schedule_visibility(choice):
+    return gr.update(visible=choice == "schedule")
+def disable_pers_flip_accord(choice):
+    return gr.update(visible=True) if choice in ['2D', '3D'] else gr.update(visible=False)
+def per_flip_handle(anim_mode, per_f_enabled):
+    if anim_mode in ['2D', '3D'] and per_f_enabled:
+        return gr.update(visible=True)
+    return gr.update(visible=False)
+def change_max_frames_visibility(choice):
+    return gr.update(visible=choice != "Video Input")
+def change_diffusion_cadence_visibility(choice):
+    return gr.update(visible=choice not in ['Video Input', 'Interpolation'])
+def disble_3d_related_stuff(choice):
+    return gr.update(visible=False) if choice != '3D' else gr.update(visible=True)
+def only_show_in_non_3d_mode(choice):
+    return gr.update(visible=False) if choice == '3D' else gr.update(visible=True)
+def enable_2d_related_stuff(choice):
+    return gr.update(visible=True) if choice == '2D' else gr.update(visible=False)
+def disable_by_interpolation(choice):
+    return gr.update(visible=False) if choice in ['Interpolation'] else gr.update(visible=True)
+def disable_by_video_input(choice):
+    return gr.update(visible=False) if choice in ['Video Input'] else gr.update(visible=True)
+def hide_if_none(choice):
+    return gr.update(visible=choice != "None")
+def change_gif_button_visibility(choice):
+    if choice is None or choice == "":
+        return gr.update(visible=True)
+    return gr.update(visible=False, value=False) if int(choice) > 30 else gr.update(visible=True)
+def hide_if_false(choice):
+    return gr.update(visible=True) if choice else gr.update(visible=False)
+def hide_if_true(choice):
+    return gr.update(visible=False) if choice else gr.update(visible=True)
+def disable_by_hybrid_composite_dynamic(choice, comp_mask_type):
+    if choice in ['Normal', 'Before Motion', 'After Generation']:
+        if comp_mask_type != 'None':
+            return gr.update(visible=True)
+    return gr.update(visible=False)
+def disable_by_non_optical_flow(choice):
+    return gr.update(visible=False) if choice != 'Optical Flow' else gr.update(visible=True)
+# Upscaling Gradio UI related funcs
+def vid_upscale_gradio_update_stats(vid_path, upscale_factor):
+    if not vid_path:
+        return '---', '---', '---', '---'
+    factor = extract_number(upscale_factor)
+    fps, fcount, resolution = get_quick_vid_info(vid_path.name)
+    in_res_str = f"{resolution[0]}*{resolution[1]}"
+    out_res_str = f"{resolution[0] * factor}*{resolution[1] * factor}"
+    return fps, fcount, in_res_str, out_res_str
+def update_upscale_out_res(in_res, upscale_factor):
+    if not in_res:
+        return '---'
+    factor = extract_number(upscale_factor)
+    w, h = [int(x) * factor for x in in_res.split('*')]
+    return f"{w}*{h}"
+def update_upscale_out_res_by_model_name(in_res, upscale_model_name):
+    if not upscale_model_name or in_res == '---':
+        return '---'
+    factor = 2 if upscale_model_name == 'realesr-animevideov3' else 4
+    return f"{int(in_res.split('*')[0]) * factor}*{int(in_res.split('*')[1]) * factor}"
+def hide_optical_flow_cadence(cadence_value):
+    return gr.update(visible=True) if cadence_value > 1 else gr.update(visible=False)
+def hide_interp_by_interp_status(choice):
+    return gr.update(visible=False) if choice == 'None' else gr.update(visible=True)
+def change_interp_x_max_limit(engine_name, current_value):
+    if engine_name == 'FILM':
+        return gr.update(maximum=300)
+    elif current_value > 10:
+        return gr.update(maximum=10, value=2)
+    return gr.update(maximum=10)
+def hide_interp_stats(choice):
+    return gr.update(visible=True) if choice is not None else gr.update(visible=False)
+def show_hybrid_html_msg(choice):
+    return gr.update(visible=True) if choice not in ['2D', '3D'] else gr.update(visible=False)
+def change_hybrid_tab_status(choice):
+    return gr.update(visible=True) if choice in ['2D', '3D'] else gr.update(visible=False)
+def show_leres_html_msg(choice):
+    return gr.update(visible=True) if choice.lower() == 'leres' else gr.update(visible=False)
+def show_when_ddim(sampler_name):
+    return gr.update(visible=True) if sampler_name.lower() == 'ddim' else gr.update(visible=False)
+def show_when_ancestral_samplers(sampler_name):
+    return gr.update(visible=True) if sampler_name.lower() in ['euler a', 'dpm++ 2s a', 'dpm2 a', 'dpm2 a karras', 'dpm++ 2s a karras'] else gr.update(visible=False)
+def change_css(checkbox_status):
+    if checkbox_status:
+        display = "block"
+    else:
+        display = "none"
+    html_template = f'''
+        <style>
+            #tab_deforum_interface .svelte-e8n7p6, #f_interp_accord {{
+                display: {display} !important;
+            }}
+        </style>
+        '''
+    return html_template

scripts/deforum_helpers/human_masking.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os, cv2
+import torch
+from pathlib import Path
+from multiprocessing import freeze_support
+def extract_frames(input_video_path, output_imgs_path):
+    # Open the video file
+    vidcap = cv2.VideoCapture(input_video_path)
+    # Get the total number of frames in the video
+    frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Create the output directory if it does not exist
+    os.makedirs(output_imgs_path, exist_ok=True)
+    # Extract the frames
+    for i in range(frame_count):
+        success, image = vidcap.read()
+        if success:
+            cv2.imwrite(os.path.join(output_imgs_path, f"frame{i}.png"), image)
+    print(f"{frame_count} frames extracted and saved to {output_imgs_path}")
+def video2humanmasks(input_frames_path, output_folder_path, output_type, fps):
+    # freeze support is needed for video outputting
+    freeze_support()
+    # check if input path exists and is a directory
+    if not os.path.exists(input_frames_path) or not os.path.isdir(input_frames_path):
+        raise ValueError("Invalid input path: {}".format(input_frames_path))
+    # check if output path exists and is a directory
+    if not os.path.exists(output_folder_path) or not os.path.isdir(output_folder_path):
+        raise ValueError("Invalid output path: {}".format(output_folder_path))
+    # check if output_type is valid
+    valid_output_types = ["video", "pngs", "both"]
+    if output_type.lower() not in valid_output_types:
+        raise ValueError("Invalid output type: {}. Must be one of {}".format(output_type, valid_output_types))
+    # try to predict where torch cache lives, so we can try and fetch models from cache in the next step
+    predicted_torch_model_cache_path = os.path.join(Path.home(), ".cache", "torch", "hub", "hithereai_RobustVideoMatting_master")
+    predicted_rvm_cache_testilfe = os.path.join(predicted_torch_model_cache_path, "hubconf.py")
+    # try to fetch the models from cache, and only if it can't be find, download from the internet (to enable offline usage)
+    try:
+        # Try to fetch the models from cache
+        convert_video = torch.hub.load(predicted_torch_model_cache_path, "converter", source='local')
+        model = torch.hub.load(predicted_torch_model_cache_path, "resnet50", source='local').cuda()
+    except:
+        # Download from the internet if not found in cache
+        convert_video = torch.hub.load("hithereai/RobustVideoMatting", "converter")
+        model = torch.hub.load("hithereai/RobustVideoMatting", "resnet50").cuda()
+    output_alpha_vid_path = os.path.join(output_folder_path, "human_masked_video.mp4")
+    # extract humans masks from the input folder' imgs.
+    # in this step PNGs will be extracted only if output_type is set to PNGs. Otherwise a video will be made, and in the case of Both, the video will be extracted in the next step to PNGs
+    convert_video(
+    model,
+    input_source=input_frames_path,  # full path of the folder that contains all of the extracted input imgs
+    output_type='video' if output_type.upper() in ("VIDEO", "BOTH") else 'png_sequence',
+    output_alpha=output_alpha_vid_path if output_type.upper() in ("VIDEO", "BOTH") else output_folder_path,
+    output_video_mbps=4,
+    output_video_fps=fps,
+    downsample_ratio=None, # None for auto
+    seq_chunk=12, # Process n frames at once for better parallelism
+    progress=True # show extraction progress
+    )
+    if output_type.lower() == "both":
+        extract_frames(output_alpha_vid_path, output_folder_path)

scripts/deforum_helpers/hybrid_video.py ADDED Viewed

	@@ -0,0 +1,611 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import pathlib
+import random
+import cv2
+import numpy as np
+import PIL
+from PIL import Image, ImageChops, ImageOps, ImageEnhance
+from scipy.ndimage.filters import gaussian_filter
+from .consistency_check import make_consistency
+from .human_masking import video2humanmasks
+from .load_images import load_image
+from .video_audio_utilities import vid2frames, get_quick_vid_info, get_frame_name
+def delete_all_imgs_in_folder(folder_path):
+        files = list(pathlib.Path(folder_path).glob('*.jpg'))
+        files.extend(list(pathlib.Path(folder_path).glob('*.png')))
+        for f in files: os.remove(f)
+def hybrid_generation(args, anim_args, root):
+    video_in_frame_path = os.path.join(args.outdir, 'inputframes')
+    hybrid_frame_path = os.path.join(args.outdir, 'hybridframes')
+    human_masks_path = os.path.join(args.outdir, 'human_masks')
+    # create hybridframes folder whether using init_image or inputframes
+    os.makedirs(hybrid_frame_path, exist_ok=True)
+    if anim_args.hybrid_generate_inputframes:
+        # create folders for the video input frames and optional hybrid frames to live in
+        os.makedirs(video_in_frame_path, exist_ok=True)
+        # delete frames if overwrite = true
+        if anim_args.overwrite_extracted_frames:
+            delete_all_imgs_in_folder(hybrid_frame_path)
+        # save the video frames from input video
+        print(f"Video to extract: {anim_args.video_init_path}")
+        print(f"Extracting video (1 every {anim_args.extract_nth_frame}) frames to {video_in_frame_path}...")
+        video_fps = vid2frames(video_path=anim_args.video_init_path, video_in_frame_path=video_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
+    # extract alpha masks of humans from the extracted input video imgs
+    if anim_args.hybrid_generate_human_masks != "None":
+        # create a folder for the human masks imgs to live in
+        print(f"Checking /creating a folder for the human masks")
+        os.makedirs(human_masks_path, exist_ok=True)
+        # delete frames if overwrite = true
+        if anim_args.overwrite_extracted_frames:
+            delete_all_imgs_in_folder(human_masks_path)
+        # in case that generate_input_frames isn't selected, we won't get the video fps rate as vid2frames isn't called, So we'll check the video fps in here instead
+        if not anim_args.hybrid_generate_inputframes:
+            _, video_fps, _ = get_quick_vid_info(anim_args.video_init_path)
+        # calculate the correct fps of the masked video according to the original video fps and 'extract_nth_frame'
+        output_fps = video_fps/anim_args.extract_nth_frame
+        # generate the actual alpha masks from the input imgs
+        print(f"Extracting alpha humans masks from the input frames")
+        video2humanmasks(video_in_frame_path, human_masks_path, anim_args.hybrid_generate_human_masks, output_fps)
+    # get sorted list of inputfiles
+    inputfiles = sorted(pathlib.Path(video_in_frame_path).glob('*.jpg'))
+    if not anim_args.hybrid_use_init_image:
+        # determine max frames from length of input frames
+        anim_args.max_frames = len(inputfiles)
+        print(f"Using {anim_args.max_frames} input frames from {video_in_frame_path}...")
+    # use first frame as init
+    if anim_args.hybrid_use_first_frame_as_init_image:
+        for f in inputfiles:
+            args.init_image = str(f)
+            args.use_init = True
+            print(f"Using init_image from video: {args.init_image}")
+            break
+    return args, anim_args, inputfiles
+def hybrid_composite(args, anim_args, frame_idx, prev_img, depth_model, hybrid_comp_schedules, root):
+    video_frame = os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg")
+    video_depth_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_vid_depth{frame_idx:09}.jpg")
+    depth_frame = os.path.join(args.outdir, f"{args.timestring}_depth_{frame_idx-1:09}.png")
+    mask_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_mask{frame_idx:09}.jpg")
+    comp_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_comp{frame_idx:09}.jpg")
+    prev_frame = os.path.join(args.outdir, 'hybridframes', get_frame_name(anim_args.video_init_path) + f"_prev{frame_idx:09}.jpg")
+    prev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2RGB)
+    prev_img_hybrid = Image.fromarray(prev_img)
+    if anim_args.hybrid_use_init_image:
+        video_image = load_image(args.init_image)
+    else:
+        video_image = Image.open(video_frame)
+    video_image = video_image.resize((args.W, args.H), PIL.Image.LANCZOS)
+    hybrid_mask = None
+    # composite mask types
+    if anim_args.hybrid_comp_mask_type == 'Depth': # get depth from last generation
+        hybrid_mask = Image.open(depth_frame)
+    elif anim_args.hybrid_comp_mask_type == 'Video Depth': # get video depth
+        video_depth = depth_model.predict(np.array(video_image), anim_args.midas_weight, root.half_precision)
+        depth_model.save(video_depth_frame, video_depth)
+        hybrid_mask = Image.open(video_depth_frame)
+    elif anim_args.hybrid_comp_mask_type == 'Blend': # create blend mask image
+        hybrid_mask = Image.blend(ImageOps.grayscale(prev_img_hybrid), ImageOps.grayscale(video_image), hybrid_comp_schedules['mask_blend_alpha'])
+    elif anim_args.hybrid_comp_mask_type == 'Difference': # create difference mask image
+        hybrid_mask = ImageChops.difference(ImageOps.grayscale(prev_img_hybrid), ImageOps.grayscale(video_image))
+    # optionally invert mask, if mask type is defined
+    if anim_args.hybrid_comp_mask_inverse and anim_args.hybrid_comp_mask_type != "None":
+        hybrid_mask = ImageOps.invert(hybrid_mask)
+    # if a mask type is selected, make composition
+    if hybrid_mask is None:
+        hybrid_comp = video_image
+    else:
+        # ensure grayscale
+        hybrid_mask = ImageOps.grayscale(hybrid_mask)
+        # equalization before
+        if anim_args.hybrid_comp_mask_equalize in ['Before', 'Both']:
+            hybrid_mask = ImageOps.equalize(hybrid_mask)
+        # contrast
+        hybrid_mask = ImageEnhance.Contrast(hybrid_mask).enhance(hybrid_comp_schedules['mask_contrast'])
+        # auto contrast with cutoffs lo/hi
+        if anim_args.hybrid_comp_mask_auto_contrast:
+            hybrid_mask = autocontrast_grayscale(np.array(hybrid_mask), hybrid_comp_schedules['mask_auto_contrast_cutoff_low'], hybrid_comp_schedules['mask_auto_contrast_cutoff_high'])
+            hybrid_mask = Image.fromarray(hybrid_mask)
+            hybrid_mask = ImageOps.grayscale(hybrid_mask)
+        if anim_args.hybrid_comp_save_extra_frames:
+            hybrid_mask.save(mask_frame)
+        # equalization after
+        if anim_args.hybrid_comp_mask_equalize in ['After', 'Both']:
+            hybrid_mask = ImageOps.equalize(hybrid_mask)
+        # do compositing and save
+        hybrid_comp = Image.composite(prev_img_hybrid, video_image, hybrid_mask)
+        if anim_args.hybrid_comp_save_extra_frames:
+            hybrid_comp.save(comp_frame)
+    # final blend of composite with prev_img, or just a blend if no composite is selected
+    hybrid_blend = Image.blend(prev_img_hybrid, hybrid_comp, hybrid_comp_schedules['alpha'])
+    if anim_args.hybrid_comp_save_extra_frames:
+        hybrid_blend.save(prev_frame)
+    prev_img = cv2.cvtColor(np.array(hybrid_blend), cv2.COLOR_RGB2BGR)
+    # restore to np array and return
+    return args, prev_img
+def get_matrix_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_motion):
+    print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}")
+    img1 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions), cv2.COLOR_BGR2GRAY)
+    img2 = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY)
+    M = get_transformation_matrix_from_images(img1, img2, hybrid_motion)
+    return M
+def get_matrix_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, prev_img, hybrid_motion):
+    print(f"Calculating {hybrid_motion} RANSAC matrix for frames {frame_idx} to {frame_idx+1}")
+    # first handle invalid images by returning default matrix
+    height, width = prev_img.shape[:2]
+    if height == 0 or width == 0 or prev_img != np.uint8:
+        return get_hybrid_motion_default_matrix(hybrid_motion)
+    else:
+        prev_img_gray = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY)
+        img = cv2.cvtColor(get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions), cv2.COLOR_BGR2GRAY)
+        M = get_transformation_matrix_from_images(prev_img_gray, img, hybrid_motion)
+        return M
+def get_flow_for_hybrid_motion(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False):
+    print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}")
+    i1 = get_resized_image_from_filename(str(inputfiles[frame_idx]), dimensions)
+    i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions)
+    if consistency_check:
+        flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check
+        if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path)
+    else:
+        flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow) # old single flow forward
+    if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
+    return flow
+def get_flow_for_hybrid_motion_prev(frame_idx, dimensions, inputfiles, hybrid_frame_path, prev_flow, prev_img, method, raft_model, consistency_check=True, consistency_blur=0, do_flow_visualization=False):
+    print(f"Calculating {method} optical flow {'w/consistency mask' if consistency_check else ''} for frames {frame_idx} to {frame_idx+1}")
+    reliable_flow = None
+    # first handle invalid images by returning default flow
+    height, width = prev_img.shape[:2]
+    if height == 0 or width == 0:
+        flow = get_hybrid_motion_default_flow(dimensions)
+    else:
+        i1 = prev_img.astype(np.uint8)
+        i2 = get_resized_image_from_filename(str(inputfiles[frame_idx+1]), dimensions)
+        if consistency_check:
+            flow, reliable_flow = get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur) # forward flow w/backward consistency check
+            if do_flow_visualization: save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path)
+        else:
+            flow = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
+    if do_flow_visualization: save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path)
+    return flow
+def get_reliable_flow_from_images(i1, i2, method, raft_model, prev_flow, consistency_blur, reliability=0):
+    flow_forward = get_flow_from_images(i1, i2, method, raft_model, prev_flow)
+    flow_backward = get_flow_from_images(i2, i1, method, raft_model, None)
+    reliable_flow = make_consistency(flow_forward, flow_backward, edges_unreliable=False)
+    if consistency_blur > 0:
+        reliable_flow = custom_gaussian_blur(reliable_flow.astype(np.float32), 1, consistency_blur)
+    return filter_flow(flow_forward, reliable_flow, consistency_blur, reliability), reliable_flow
+def custom_gaussian_blur(input_array, blur_size, sigma):
+    return gaussian_filter(input_array, sigma=(sigma, sigma, 0), order=0, mode='constant', cval=0.0, truncate=blur_size)
+def filter_flow(flow, reliable_flow, reliability=0.5, consistency_blur=0):
+    # reliability from reliabile flow: -0.75 is bad, 0 is meh/outside, 1 is great
+    # Create a mask from the first channel of the reliable_flow array
+    mask = reliable_flow[..., 0]
+    # to set everything to 1 or 0 based on reliability
+    # mask = np.where(mask >= reliability, 1, 0)
+    # Expand the mask to match the shape of the forward_flow array
+    mask = np.repeat(mask[..., np.newaxis], flow.shape[2], axis=2)
+    # Apply the mask to the flow
+    return flow * mask
+def image_transform_ransac(image_cv2, M, hybrid_motion, depth=None):
+    if hybrid_motion == "Perspective":
+        return image_transform_perspective(image_cv2, M, depth)
+    else: # Affine
+        return image_transform_affine(image_cv2, M, depth)
+def image_transform_optical_flow(img, flow, flow_factor):
+    # if flow factor not normal, calculate flow factor
+    if flow_factor != 1:
+        flow = flow * flow_factor
+    # flow is reversed, so you need to reverse it:
+    flow = -flow
+    h, w = img.shape[:2]
+    flow[:, :, 0] += np.arange(w)
+    flow[:, :, 1] += np.arange(h)[:,np.newaxis]
+    return remap(img, flow)
+def image_transform_affine(image_cv2, M, depth=None):
+    if depth is None:
+        return cv2.warpAffine(
+            image_cv2,
+            M,
+            (image_cv2.shape[1],image_cv2.shape[0]),
+            borderMode=cv2.BORDER_REFLECT_101
+        )
+    else:  # NEED TO IMPLEMENT THE FOLLOWING FUNCTION
+        return depth_based_affine_warp(
+            image_cv2,
+            depth,
+            M
+        )
+def image_transform_perspective(image_cv2, M, depth=None):
+    if depth is None:
+        return cv2.warpPerspective(
+            image_cv2,
+            M,
+            (image_cv2.shape[1], image_cv2.shape[0]),
+            borderMode=cv2.BORDER_REFLECT_101
+        )
+    else:  # NEED TO IMPLEMENT THE FOLLOWING FUNCTION
+        return render_3d_perspective(
+            image_cv2,
+            depth,
+            M
+        )
+def get_hybrid_motion_default_matrix(hybrid_motion):
+    if hybrid_motion == "Perspective":
+        arr = np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
+    else:
+        arr = np.array([[1., 0., 0.], [0., 1., 0.]])
+    return arr
+def get_hybrid_motion_default_flow(dimensions):
+    cols, rows = dimensions
+    flow = np.zeros((rows, cols, 2), np.float32)
+    return flow
+def get_transformation_matrix_from_images(img1, img2, hybrid_motion, confidence=0.75):
+    # Create SIFT detector and feature extractor
+    sift = cv2.SIFT_create()
+    # Detect keypoints and compute descriptors
+    kp1, des1 = sift.detectAndCompute(img1, None)
+    kp2, des2 = sift.detectAndCompute(img2, None)
+    # Create BFMatcher object and match descriptors
+    bf = cv2.BFMatcher()
+    matches = bf.knnMatch(des1, des2, k=2)
+    # Apply ratio test to filter good matches
+    good_matches = []
+    for m, n in matches:
+        if m.distance < confidence * n.distance:
+            good_matches.append(m)
+    if len(good_matches) <= 8:
+        get_hybrid_motion_default_matrix(hybrid_motion)
+    # Convert keypoints to numpy arrays
+    src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
+    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
+    if len(src_pts) <= 8 or len(dst_pts) <= 8:
+        return get_hybrid_motion_default_matrix(hybrid_motion)
+    elif hybrid_motion == "Perspective": # Perspective transformation (3x3)
+        transformation_matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
+        return transformation_matrix
+    else: # Affine - rigid transformation (no skew 3x2)
+        transformation_rigid_matrix, rigid_mask = cv2.estimateAffinePartial2D(src_pts, dst_pts)
+        return transformation_rigid_matrix
+def get_flow_from_images(i1, i2, method, raft_model, prev_flow=None):
+    if method == "RAFT":
+        if raft_model is None:
+            raise Exception("RAFT Model not provided to get_flow_from_images function, cannot continue.")
+        return get_flow_from_images_RAFT(i1, i2, raft_model)
+    elif method == "DIS Medium":
+        return get_flow_from_images_DIS(i1, i2, 'medium', prev_flow)
+    elif method == "DIS Fine":
+        return get_flow_from_images_DIS(i1, i2, 'fine', prev_flow)
+    elif method == "DenseRLOF": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
+        return get_flow_from_images_Dense_RLOF(i1, i2, prev_flow)
+    elif method == "SF": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
+        return get_flow_from_images_SF(i1, i2, prev_flow)
+    elif method == "DualTVL1": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
+        return get_flow_from_images_DualTVL1(i1, i2, prev_flow)
+    elif method == "DeepFlow": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
+        return get_flow_from_images_DeepFlow(i1, i2, prev_flow)
+    elif method == "PCAFlow": # Unused - requires running opencv-contrib-python (full opencv) INSTEAD of opencv-python
+        return get_flow_from_images_PCAFlow(i1, i2, prev_flow)
+    elif method == "Farneback": # Farneback Normal:
+        return get_flow_from_images_Farneback(i1, i2, prev_flow)
+    # if we reached this point, something went wrong. raise an error:
+    raise RuntimeError(f"Invald flow method name: '{method}'")
+def get_flow_from_images_RAFT(i1, i2, raft_model):
+    flow = raft_model.predict(i1, i2)
+    return flow
+def get_flow_from_images_DIS(i1, i2, preset, prev_flow):
+    # DIS PRESETS CHART KEY: finest scale, grad desc its, patch size
+    # DIS_MEDIUM: 1, 25, 8 | DIS_FAST: 2, 16, 8 | DIS_ULTRAFAST: 2, 12, 8
+    if preset == 'medium': preset_code = cv2.DISOPTICAL_FLOW_PRESET_MEDIUM
+    elif preset == 'fast': preset_code = cv2.DISOPTICAL_FLOW_PRESET_FAST
+    elif preset == 'ultrafast': preset_code = cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST
+    elif preset in ['slow','fine']: preset_code = None
+    i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
+    i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
+    dis = cv2.DISOpticalFlow_create(preset_code)
+    # custom presets
+    if preset == 'slow':
+        dis.setGradientDescentIterations(192)
+        dis.setFinestScale(1)
+        dis.setPatchSize(8)
+        dis.setPatchStride(4)
+    if preset == 'fine':
+        dis.setGradientDescentIterations(192)
+        dis.setFinestScale(0)
+        dis.setPatchSize(8)
+        dis.setPatchStride(4)
+    return dis.calc(i1, i2, prev_flow)
+def get_flow_from_images_Dense_RLOF(i1, i2, last_flow=None):
+    return cv2.optflow.calcOpticalFlowDenseRLOF(i1, i2, flow = last_flow)
+def get_flow_from_images_SF(i1, i2, last_flow=None, layers = 3, averaging_block_size = 2, max_flow = 4):
+    return cv2.optflow.calcOpticalFlowSF(i1, i2, layers, averaging_block_size, max_flow)
+def get_flow_from_images_DualTVL1(i1, i2, prev_flow):
+    i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
+    i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
+    f = cv2.optflow.DualTVL1OpticalFlow_create()
+    return f.calc(i1, i2, prev_flow)
+def get_flow_from_images_DeepFlow(i1, i2, prev_flow):
+    i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
+    i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
+    f = cv2.optflow.createOptFlow_DeepFlow()
+    return f.calc(i1, i2, prev_flow)
+def get_flow_from_images_PCAFlow(i1, i2, prev_flow):
+    i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
+    i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
+    f = cv2.optflow.createOptFlow_PCAFlow()
+    return f.calc(i1, i2, prev_flow)
+def get_flow_from_images_Farneback(i1, i2, preset="normal", last_flow=None, pyr_scale = 0.5, levels = 3, winsize = 15, iterations = 3, poly_n = 5, poly_sigma = 1.2, flags = 0):
+    flags = cv2.OPTFLOW_FARNEBACK_GAUSSIAN         # Specify the operation flags
+    pyr_scale = 0.5   # The image scale (<1) to build pyramids for each image
+    if preset == "fine":
+        levels = 13       # The number of pyramid layers, including the initial image
+        winsize = 77      # The averaging window size
+        iterations = 13   # The number of iterations at each pyramid level
+        poly_n = 15       # The size of the pixel neighborhood used to find polynomial expansion in each pixel
+        poly_sigma = 0.8  # The standard deviation of the Gaussian used to smooth derivatives used as a basis for the polynomial expansion
+    else: # "normal"
+        levels = 5        # The number of pyramid layers, including the initial image
+        winsize = 21      # The averaging window size
+        iterations = 5    # The number of iterations at each pyramid level
+        poly_n = 7        # The size of the pixel neighborhood used to find polynomial expansion in each pixel
+        poly_sigma = 1.2  # The standard deviation of the Gaussian used to smooth derivatives used as a basis for the polynomial expansion
+    i1 = cv2.cvtColor(i1, cv2.COLOR_BGR2GRAY)
+    i2 = cv2.cvtColor(i2, cv2.COLOR_BGR2GRAY)
+    flags = 0 # flags = cv2.OPTFLOW_USE_INITIAL_FLOW
+    flow = cv2.calcOpticalFlowFarneback(i1, i2, last_flow, pyr_scale, levels, winsize, iterations, poly_n, poly_sigma, flags)
+    return flow
+def save_flow_visualization(frame_idx, dimensions, flow, inputfiles, hybrid_frame_path):
+    flow_img_file = os.path.join(hybrid_frame_path, f"flow{frame_idx:09}.jpg")
+    flow_img = cv2.imread(str(inputfiles[frame_idx]))
+    flow_img = cv2.resize(flow_img, (dimensions[0], dimensions[1]), cv2.INTER_AREA)
+    flow_img = cv2.cvtColor(flow_img, cv2.COLOR_RGB2GRAY)
+    flow_img = cv2.cvtColor(flow_img, cv2.COLOR_GRAY2BGR)
+    flow_img = draw_flow_lines_in_grid_in_color(flow_img, flow)
+    flow_img = cv2.cvtColor(flow_img, cv2.COLOR_BGR2RGB)
+    cv2.imwrite(flow_img_file, flow_img)
+    print(f"Saved optical flow visualization: {flow_img_file}")
+def save_flow_mask_visualization(frame_idx, reliable_flow, hybrid_frame_path, color=True):
+    flow_mask_img_file = os.path.join(hybrid_frame_path, f"flow_mask{frame_idx:09}.jpg")
+    if color:
+        # Normalize the reliable_flow array to the range [0, 255]
+        normalized_reliable_flow = (reliable_flow - reliable_flow.min()) / (reliable_flow.max() - reliable_flow.min()) * 255
+        # Change the data type to np.uint8
+        mask_image = normalized_reliable_flow.astype(np.uint8)
+    else:
+        # Extract the first channel of the reliable_flow array
+        first_channel = reliable_flow[..., 0]
+        # Normalize the first channel to the range [0, 255]
+        normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255
+        # Change the data type to np.uint8
+        grayscale_image = normalized_first_channel.astype(np.uint8)
+        # Replicate the grayscale channel three times to form a BGR image
+        mask_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2)
+    cv2.imwrite(flow_mask_img_file, mask_image)
+    print(f"Saved mask flow visualization: {flow_mask_img_file}")
+def reliable_flow_to_image(reliable_flow):
+    # Extract the first channel of the reliable_flow array
+    first_channel = reliable_flow[..., 0]
+    # Normalize the first channel to the range [0, 255]
+    normalized_first_channel = (first_channel - first_channel.min()) / (first_channel.max() - first_channel.min()) * 255
+    # Change the data type to np.uint8
+    grayscale_image = normalized_first_channel.astype(np.uint8)
+    # Replicate the grayscale channel three times to form a BGR image
+    bgr_image = np.stack((grayscale_image, grayscale_image, grayscale_image), axis=2)
+    return bgr_image
+def draw_flow_lines_in_grid_in_color(img, flow, step=8, magnitude_multiplier=1, min_magnitude = 0, max_magnitude = 10000):
+    flow = flow * magnitude_multiplier
+    h, w = img.shape[:2]
+    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
+    fx, fy = flow[y,x].T
+    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
+    lines = np.int32(lines + 0.5)
+    vis = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    vis = cv2.cvtColor(vis, cv2.COLOR_GRAY2BGR)
+    mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
+    hsv = np.zeros((flow.shape[0], flow.shape[1], 3), dtype=np.uint8)
+    hsv[...,0] = ang*180/np.pi/2
+    hsv[...,1] = 255
+    hsv[...,2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
+    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+    vis = cv2.add(vis, bgr)
+    # Iterate through the lines
+    for (x1, y1), (x2, y2) in lines:
+        # Calculate the magnitude of the line
+        magnitude = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
+        # Only draw the line if it falls within the magnitude range
+        if min_magnitude <= magnitude <= max_magnitude:
+            b = int(bgr[y1, x1, 0])
+            g = int(bgr[y1, x1, 1])
+            r = int(bgr[y1, x1, 2])
+            color = (b, g, r)
+            cv2.arrowedLine(vis, (x1, y1), (x2, y2), color, thickness=1, tipLength=0.1)
+    return vis
+def draw_flow_lines_in_color(img, flow, threshold=3, magnitude_multiplier=1, min_magnitude = 0, max_magnitude = 10000):
+    # h, w = img.shape[:2]
+    vis = img.copy()  # Create a copy of the input image
+    # Find the locations in the flow field where the magnitude of the flow is greater than the threshold
+    mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
+    idx = np.where(mag > threshold)
+    # Create HSV image
+    hsv = np.zeros((flow.shape[0], flow.shape[1], 3), dtype=np.uint8)
+    hsv[...,0] = ang*180/np.pi/2
+    hsv[...,1] = 255
+    hsv[...,2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
+    # Convert HSV image to BGR
+    bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+    # Add color from bgr
+    vis = cv2.add(vis, bgr)
+    # Draw an arrow at each of these locations to indicate the direction of the flow
+    for i, (y, x) in enumerate(zip(idx[0], idx[1])):
+        # Calculate the magnitude of the line
+        x2 = x + magnitude_multiplier * int(flow[y, x, 0])
+        y2 = y + magnitude_multiplier * int(flow[y, x, 1])
+        magnitude = np.sqrt((x2 - x)**2 + (y2 - y)**2)
+        # Only draw the line if it falls within the magnitude range
+        if min_magnitude <= magnitude <= max_magnitude:
+            if i % random.randint(100, 200) == 0:
+                b = int(bgr[y, x, 0])
+                g = int(bgr[y, x, 1])
+                r = int(bgr[y, x, 2])
+                color = (b, g, r)
+                cv2.arrowedLine(vis, (x, y), (x2, y2), color, thickness=1, tipLength=0.25)
+    return vis
+def autocontrast_grayscale(image, low_cutoff=0, high_cutoff=100):
+    # Perform autocontrast on a grayscale np array image.
+    # Find the minimum and maximum values in the image
+    min_val = np.percentile(image, low_cutoff)
+    max_val = np.percentile(image, high_cutoff)
+    # Scale the image so that the minimum value is 0 and the maximum value is 255
+    image = 255 * (image - min_val) / (max_val - min_val)
+    # Clip values that fall outside the range [0, 255]
+    image = np.clip(image, 0, 255)
+    return image
+def get_resized_image_from_filename(im, dimensions):
+    img = cv2.imread(im)
+    return cv2.resize(img, (dimensions[0], dimensions[1]), cv2.INTER_AREA)
+def remap(img, flow):
+    border_mode = cv2.BORDER_REFLECT_101
+    h, w = img.shape[:2]
+    displacement = int(h * 0.25), int(w * 0.25)
+    larger_img = cv2.copyMakeBorder(img, displacement[0], displacement[0], displacement[1], displacement[1], border_mode)
+    lh, lw = larger_img.shape[:2]
+    larger_flow = extend_flow(flow, lw, lh)
+    remapped_img = cv2.remap(larger_img, larger_flow, None, cv2.INTER_LINEAR, border_mode)
+    output_img = center_crop_image(remapped_img, w, h)
+    return output_img
+def center_crop_image(img, w, h):
+    y, x, _ = img.shape
+    width_indent = int((x - w) / 2)
+    height_indent = int((y - h) / 2)
+    cropped_img = img[height_indent:y-height_indent, width_indent:x-width_indent]
+    return cropped_img
+def extend_flow(flow, w, h):
+    # Get the shape of the original flow image
+    flow_h, flow_w = flow.shape[:2]
+    # Calculate the position of the image in the new image
+    x_offset = int((w - flow_w) / 2)
+    y_offset = int((h - flow_h) / 2)
+    # Generate the X and Y grids
+    x_grid, y_grid = np.meshgrid(np.arange(w), np.arange(h))
+    # Create the new flow image and set it to the X and Y grids
+    new_flow = np.dstack((x_grid, y_grid)).astype(np.float32)
+    # Shift the values of the original flow by the size of the border
+    flow[:,:,0] += x_offset
+    flow[:,:,1] += y_offset
+    # Overwrite the middle of the grid with the original flow
+    new_flow[y_offset:y_offset+flow_h, x_offset:x_offset+flow_w, :] = flow
+    # Return the extended image
+    return new_flow
+def abs_flow_to_rel_flow(flow, width, height):
+    fx, fy = flow[:,:,0], flow[:,:,1]
+    max_flow_x = np.max(np.abs(fx))
+    max_flow_y = np.max(np.abs(fy))
+    max_flow = max(max_flow_x, max_flow_y)
+    rel_fx = fx / (max_flow * width)
+    rel_fy = fy / (max_flow * height)
+    return np.dstack((rel_fx, rel_fy))
+def rel_flow_to_abs_flow(rel_flow, width, height):
+    rel_fx, rel_fy = rel_flow[:,:,0], rel_flow[:,:,1]
+    max_flow_x = np.max(np.abs(rel_fx * width))
+    max_flow_y = np.max(np.abs(rel_fy * height))
+    max_flow = max(max_flow_x, max_flow_y)
+    fx = rel_fx * (max_flow * width)
+    fy = rel_fy * (max_flow * height)
+    return np.dstack((fx, fy))

scripts/deforum_helpers/image_sharpening.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import cv2
+import numpy as np
+def unsharp_mask(img, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0, mask=None):
+    if amount == 0:
+        return img
+    # Return a sharpened version of the image, using an unsharp mask.
+    # If mask is not None, only areas under mask are handled
+    blurred = cv2.GaussianBlur(img, kernel_size, sigma)
+    sharpened = float(amount + 1) * img - float(amount) * blurred
+    sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
+    sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
+    sharpened = sharpened.round().astype(np.uint8)
+    if threshold > 0:
+        low_contrast_mask = np.absolute(img - blurred) < threshold
+        np.copyto(sharpened, img, where=low_contrast_mask)
+    if mask is not None:
+        mask = np.array(mask)
+        masked_sharpened = cv2.bitwise_and(sharpened, sharpened, mask=mask)
+        masked_img = cv2.bitwise_and(img, img, mask=255-mask)
+        sharpened = cv2.add(masked_img, masked_sharpened)
+    return sharpened

scripts/deforum_helpers/load_images.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import requests
+import os
+from PIL import Image
+import socket
+import torchvision.transforms.functional as TF
+from .general_utils import clean_gradio_path_strings
+def load_img(path : str, shape=None, use_alpha_as_mask=False):
+    # use_alpha_as_mask: Read the alpha channel of the image as the mask image
+    image = load_image(path)
+    image = image.convert('RGBA') if use_alpha_as_mask else image.convert('RGB')
+    image = image.resize(shape, resample=Image.LANCZOS) if shape is not None else image
+    mask_image = None
+    if use_alpha_as_mask:
+        # Split alpha channel into a mask_image
+        red, green, blue, alpha = Image.Image.split(image) # not interested in R G or B, just in the alpha channel
+        mask_image = alpha.convert('L')
+        image = image.convert('RGB')
+        # check using init image alpha as mask if mask is not blank
+        extrema = mask_image.getextrema()
+        if (extrema == (0,0)) or extrema == (255,255):
+            print("use_alpha_as_mask==True: Using the alpha channel from the init image as a mask, but the alpha channel is blank.")
+            print("ignoring alpha as mask.")
+            mask_image = None
+    return image, mask_image
+def load_image(image_path :str):
+    image_path = clean_gradio_path_strings(image_path)
+    image = None
+    if image_path.startswith('http://') or image_path.startswith('https://'):
+        try:
+            host = socket.gethostbyname("www.google.com")
+            s = socket.create_connection((host, 80), 2)
+            s.close()
+        except:
+            raise ConnectionError("There is no active internet connection available (couldn't connect to google.com as a network test) - please use *local* masks and init files only.")
+        try:
+            response = requests.get(image_path, stream=True)
+        except requests.exceptions.RequestException as e:
+            raise ConnectionError("Failed to download image due to no internet connection. Error: {}".format(e))
+        if response.status_code == 404 or response.status_code != 200:
+            raise ConnectionError("Init image url or mask image url is not valid")
+        image = Image.open(response.raw).convert('RGB')
+    else:
+        if not os.path.exists(image_path):
+            raise RuntimeError("Init image path or mask image path is not valid")
+        image = Image.open(image_path).convert('RGB')
+    return image
+def prepare_mask(mask_input, mask_shape, mask_brightness_adjust=1.0, mask_contrast_adjust=1.0):
+    """
+    prepares mask for use in webui
+    """
+    if isinstance(mask_input, Image.Image):
+        mask = mask_input
+    else :
+        mask = load_image(mask_input)
+    mask = mask.resize(mask_shape, resample=Image.LANCZOS)
+    if mask_brightness_adjust != 1:
+        mask = TF.adjust_brightness(mask, mask_brightness_adjust)
+    if mask_contrast_adjust != 1:
+        mask = TF.adjust_contrast(mask, mask_contrast_adjust)
+    mask = mask.convert('L')
+    return mask
+# "check_mask_for_errors" may have prevented errors in composable masks,
+# but it CAUSES errors on any frame where it's all black.
+# Bypassing the check below until we can fix it even better.
+# This may break composable masks, but it makes ACTUAL masks usable.
+def check_mask_for_errors(mask_input, invert_mask=False):
+    extrema = mask_input.getextrema()
+    if (invert_mask):
+        if extrema == (255,255):
+            print("after inverting mask will be blank. ignoring mask")
+            return None
+    elif extrema == (0,0):
+        print("mask is blank. ignoring mask")
+        return None
+    else:
+        return mask_input
+def get_mask(args):
+    return prepare_mask(args.mask_file, (args.W, args.H), args.mask_contrast_adjust, args.mask_brightness_adjust)
+def get_mask_from_file(mask_file, args):
+    return prepare_mask(mask_file, (args.W, args.H), args.mask_contrast_adjust, args.mask_brightness_adjust)
+def blank_if_none(mask, w, h, mode):
+    return Image.new(mode, (w, h), (0)) if mask is None else mask
+def none_if_blank(mask):
+    return None if mask.getextrema() == (0,0) else mask

scripts/deforum_helpers/masks.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import cv2
+import gc
+import numpy as np
+from PIL import Image, ImageOps
+from .video_audio_utilities import get_frame_name
+from .load_images import load_image
+def do_overlay_mask(args, anim_args, img, frame_idx, is_bgr_array=False):
+    if is_bgr_array:
+        img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
+        img = Image.fromarray(img)
+    if anim_args.use_mask_video:
+        current_mask = Image.open(os.path.join(args.outdir, 'maskframes', get_frame_name(anim_args.video_mask_path) + f"{frame_idx:09}.jpg"))
+        current_frame = Image.open(os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg"))
+    elif args.use_mask:
+        current_mask = args.mask_image if args.mask_image is not None else load_image(args.mask_file)
+        if args.init_image is None:
+            current_frame = img
+        else:
+            current_frame = load_image(args.init_image)
+    current_mask = current_mask.resize((args.W, args.H), Image.LANCZOS)
+    current_frame = current_frame.resize((args.W, args.H), Image.LANCZOS)
+    current_mask = ImageOps.grayscale(current_mask)
+    if args.invert_mask:
+        current_mask = ImageOps.invert(current_mask)
+    img = Image.composite(img, current_frame, current_mask)
+    if is_bgr_array:
+        img = np.array(img)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+    del(current_mask, current_frame)
+    gc.collect()
+    return img

scripts/deforum_helpers/noise.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import torch
+from torch.nn.functional import interpolate
+import numpy as np
+from PIL import ImageOps
+import math
+from .animation import sample_to_cv2
+import cv2
+from modules.shared import opts
+DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
+deforum_noise_gen = torch.Generator(device='cpu')
+# 2D Perlin noise in PyTorch https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57
+def rand_perlin_2d(shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
+    delta = (res[0] / shape[0], res[1] / shape[1])
+    d = (shape[0] // res[0], shape[1] // res[1])
+    grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1]), indexing='ij'), dim = -1) % 1
+    angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1, generator=deforum_noise_gen)
+    gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
+    tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
+    dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1]  ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
+    n00 = dot(tile_grads([0, -1], [0, -1]), [0,  0])
+    n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
+    n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
+    n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
+    t = fade(grid[:shape[0], :shape[1]])
+    return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
+def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
+    noise = torch.zeros(shape)
+    frequency = 1
+    amplitude = 1
+    for _ in range(int(octaves)):
+        noise += amplitude * rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
+        frequency *= 2
+        amplitude *= persistence
+    return noise
+def condition_noise_mask(noise_mask, invert_mask = False):
+    if invert_mask:
+        noise_mask = ImageOps.invert(noise_mask)
+    noise_mask = np.array(noise_mask.convert("L"))
+    noise_mask = noise_mask.astype(np.float32) / 255.0
+    noise_mask = np.around(noise_mask, decimals=0)
+    noise_mask = torch.from_numpy(noise_mask)
+    #noise_mask = torch.round(noise_mask)
+    return noise_mask
+def add_noise(sample, noise_amt: float, seed: int, noise_type: str, noise_args, noise_mask = None, invert_mask = False):
+    deforum_noise_gen.manual_seed(seed) # Reproducibility
+    perlin_w = sample.shape[0]
+    perlin_h = sample.shape[1]
+    perlin_w, perlin_h = map(lambda x: x - x % 64, (perlin_w, perlin_h)) # rescale perlin to multiplies of 64
+    sample2dshape = (perlin_w, perlin_h)
+    noise = torch.randn((sample.shape[2], perlin_w, perlin_h), generator=deforum_noise_gen) # White noise
+    if noise_type == 'perlin':
+        # rand_perlin_2d_octaves is between -1 and 1, so we need to shift it to be between 0 and 1
+        # print(sample.shape)
+        noise = noise * ((rand_perlin_2d_octaves(sample2dshape, (int(noise_args[0]), int(noise_args[1])), octaves=noise_args[2], persistence=noise_args[3]) + torch.ones(sample2dshape)) / 2)
+        noise = interpolate(noise.unsqueeze(1), size=(sample.shape[0], sample.shape[1])).squeeze(1) # rescale perlin back to the target resolution
+    if noise_mask is not None:
+        noise_mask = condition_noise_mask(noise_mask, invert_mask)
+        noise_to_add = sample_to_cv2(noise * noise_mask)
+    else:
+        noise_to_add = sample_to_cv2(noise)
+    sample = cv2.addWeighted(sample, 1-noise_amt, noise_to_add, noise_amt, 0)
+    return sample

scripts/deforum_helpers/parseq_adapter.py ADDED Viewed

	@@ -0,0 +1,210 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import copy
+import json
+import logging
+import operator
+from operator import itemgetter
+import numpy as np
+import pandas as pd
+import requests
+from .animation_key_frames import DeformAnimKeys
+from .rich import console
+logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
+class ParseqAnimKeys():
+    def __init__(self, parseq_args, anim_args, video_args, mute=False):
+        # Resolve manifest either directly from supplied value
+        # or via supplied URL
+        manifestOrUrl = parseq_args.parseq_manifest.strip()
+        if (manifestOrUrl.startswith('http')):
+            logging.info(f"Loading Parseq manifest from URL: {manifestOrUrl}")
+            try:
+                body = requests.get(manifestOrUrl).text
+                logging.debug(f"Loaded remote manifest: {body}")
+                self.parseq_json = json.loads(body)
+                # Add the parseq manifest without the detailed frame data to parseq_args.
+                # This ensures it will be saved in the settings file, so that you can always
+                # see exactly what parseq prompts and keyframes were used, even if what the URL
+                # points to changes.
+                parseq_args.fetched_parseq_manifest_summary = copy.deepcopy(self.parseq_json)
+                if parseq_args.fetched_parseq_manifest_summary['rendered_frames']:
+                    del parseq_args.fetched_parseq_manifest_summary['rendered_frames']
+                if parseq_args.fetched_parseq_manifest_summary['rendered_frames_meta']:
+                    del parseq_args.fetched_parseq_manifest_summary['rendered_frames_meta']
+            except Exception as e:
+                logging.error(f"Unable to load Parseq manifest from URL: {manifestOrUrl}")
+                raise e
+        else:
+            self.parseq_json = json.loads(manifestOrUrl)
+        self.default_anim_keys = DeformAnimKeys(anim_args)
+        self.rendered_frames = self.parseq_json['rendered_frames']
+        self.max_frame = self.get_max('frame')
+        self.required_frames = anim_args.max_frames
+        # TODO these values are currently only used to emit a subtle warning. User must ensure the output FPS set in parseq
+        # matches the one set in Deforum to avoid unexpected results.
+        # In the future we may wish to override video_args.fps value with the one from parseq.
+        self.required_fps = video_args.fps
+        self.config_output_fps = self.parseq_json['options']['output_fps']
+        if not mute:
+            self.print_parseq_table()
+        count_defined_frames = len(self.rendered_frames)
+        expected_defined_frames = self.max_frame+1 # frames are 0-indexed
+        if (expected_defined_frames != count_defined_frames):
+            logging.warning(f"There may be duplicated or missing frame data in the Parseq input: expected {expected_defined_frames} frames including frame 0 because the highest frame number is {self.max_frame}, but there are {count_defined_frames} frames defined.")
+        # Parseq treats input values as absolute values. So if you want to
+        # progressively rotate 180 degrees over 4 frames, you specify: 45, 90, 135, 180.
+        # However, many animation parameters are relative to the previous frame if there is enough
+        # loopback strength. So if you want to rotate 180 degrees over 5 frames, the animation engine expects:
+        # 45, 45, 45, 45. Therefore, for such parameter, we use the fact that Parseq supplies delta values.
+        optional_delta = '_delta' if parseq_args.parseq_use_deltas else ''
+        self.angle_series = self.parseq_to_anim_series('angle' + optional_delta)
+        self.zoom_series = self.parseq_to_anim_series('zoom' + optional_delta)
+        self.translation_x_series = self.parseq_to_anim_series('translation_x' + optional_delta)
+        self.translation_y_series = self.parseq_to_anim_series('translation_y' + optional_delta)
+        self.translation_z_series = self.parseq_to_anim_series('translation_z' + optional_delta)
+        self.rotation_3d_x_series = self.parseq_to_anim_series('rotation_3d_x' + optional_delta)
+        self.rotation_3d_y_series = self.parseq_to_anim_series('rotation_3d_y' + optional_delta)
+        self.rotation_3d_z_series = self.parseq_to_anim_series('rotation_3d_z' + optional_delta)
+        self.perspective_flip_theta_series = self.parseq_to_anim_series('perspective_flip_theta' + optional_delta)
+        self.perspective_flip_phi_series = self.parseq_to_anim_series('perspective_flip_phi' + optional_delta)
+        self.perspective_flip_gamma_series = self.parseq_to_anim_series('perspective_flip_gamma' + optional_delta)
+        # Non-motion animation args
+        self.perspective_flip_fv_series = self.parseq_to_anim_series('perspective_flip_fv')
+        self.noise_schedule_series = self.parseq_to_anim_series('noise')
+        self.strength_schedule_series = self.parseq_to_anim_series('strength')
+        self.sampler_schedule_series = self.parseq_to_anim_series('sampler_schedule')
+        self.contrast_schedule_series = self.parseq_to_anim_series('contrast')
+        self.cfg_scale_schedule_series = self.parseq_to_anim_series('scale')
+        self.steps_schedule_series = self.parseq_to_anim_series("steps_schedule")
+        self.seed_schedule_series = self.parseq_to_anim_series('seed')
+        self.fov_series = self.parseq_to_anim_series('fov')
+        self.near_series = self.parseq_to_anim_series('near')
+        self.far_series = self.parseq_to_anim_series('far')
+        self.prompts = self.parseq_to_anim_series('deforum_prompt') # formatted as "{positive} --neg {negative}"
+        self.subseed_schedule_series = self.parseq_to_anim_series('subseed')
+        self.subseed_strength_schedule_series = self.parseq_to_anim_series('subseed_strength')
+        self.kernel_schedule_series = self.parseq_to_anim_series('antiblur_kernel')
+        self.sigma_schedule_series = self.parseq_to_anim_series('antiblur_sigma')
+        self.amount_schedule_series = self.parseq_to_anim_series('antiblur_amount')
+        self.threshold_schedule_series = self.parseq_to_anim_series('antiblur_threshold')
+    def print_parseq_table(self):
+        from rich.table import Table
+        from rich import box
+        table = Table(padding=0, box=box.ROUNDED, show_lines=True)
+        table.add_column("", style="white bold")
+        table.add_column("Parseq", style="cyan")
+        table.add_column("Deforum", style="green")
+        table.add_row("Fields", '\n'.join(self.managed_fields()), '\n'.join(self.unmanaged_fields()))
+        table.add_row("Prompts", "✅" if self.manages_prompts() else "❌", "✅" if not self.manages_prompts() else "❌")
+        table.add_row("Frames", str(len(self.rendered_frames)), str(self.required_frames) + (" ⚠️" if self.required_frames != len(self.rendered_frames) else ""))
+        table.add_row("FPS", str(self.config_output_fps), str(self.required_fps) + (" ⚠️" if self.required_fps != self.config_output_fps else ""))
+        console.print("\nUse this table to validate your Parseq & Deforum setup:")
+        console.print(table)
+    def manages_prompts(self):
+        return 'deforum_prompt' in self.rendered_frames[0].keys()
+    def managed_fields(self):
+        return [field for field in self.rendered_frames[0].keys()
+                            if (field not in ['frame', 'deforum_prompt']
+                                    and not field.endswith('_delta')
+                                    and not field.endswith('_pc'))]
+    def unmanaged_fields(self):
+        managed_fields = self.managed_fields()
+        all_fields = [self.strip_suffixes(property) for property, _ in vars(self.default_anim_keys).items() if property not in ['fi'] and not property.startswith('_')]
+        return [field for field in all_fields if field not in managed_fields]
+    def get_max(self, seriesName):
+        return max(self.rendered_frames, key=itemgetter(seriesName))[seriesName]
+    def parseq_to_anim_series(self, seriesName):
+        # Check if valus is present in first frame of JSON data. If not, assume it's undefined.
+        # The Parseq contract is that the first frame (at least) must define values for all fields.
+        try:
+            if self.rendered_frames[0][seriesName] is not None:
+                logging.debug(f"Found {seriesName} in first frame of Parseq data. Assuming it's defined.")
+        except KeyError:
+            return None
+        key_frame_series = pd.Series([np.nan for a in range(self.required_frames)])
+        for frame in self.rendered_frames:
+            frame_idx = frame['frame']
+            if frame_idx < self.required_frames:
+                if not np.isnan(key_frame_series[frame_idx]):
+                    logging.warning(f"Duplicate frame definition {frame_idx} detected for data {seriesName}. Latest wins.")
+                key_frame_series[frame_idx] = frame[seriesName]
+        # If the animation will have more frames than Parseq defines,
+        # duplicate final value to match the required frame count.
+        while (frame_idx < self.required_frames):
+            key_frame_series[frame_idx] = operator.itemgetter(-1)(self.rendered_frames)[seriesName]
+            frame_idx += 1
+        return key_frame_series
+    # fallback to anim_args if the series is not defined in the Parseq data
+    def __getattribute__(inst, name):
+        try:
+            definedField = super(ParseqAnimKeys, inst).__getattribute__(name)
+        except AttributeError:
+            # No field with this name has been explicitly extracted from the JSON data.
+            # It must be a new parameter. Let's see if it's in the raw JSON.
+            parseqName = inst.strip_suffixes(name)
+            # returns None if not defined in Parseq JSON data
+            definedField = inst.parseq_to_anim_series(parseqName)
+            if (definedField is not None):
+                # add the field to the instance so we don't compute it again.
+                setattr(inst, name, definedField)
+        if (definedField is not None):
+            return definedField
+        else:
+            logging.debug(f"Data for {name} not defined in Parseq data. Falling back to standard Deforum values.")
+            return getattr(inst.default_anim_keys, name)
+    # parseq doesn't use _series, _schedule or _schedule_series suffixes in the
+    # JSON data - remove them.
+    def strip_suffixes(self, name):
+        strippableSuffixes = ['_series', '_schedule']
+        parseqName = name
+        while any(parseqName.endswith(suffix) for suffix in strippableSuffixes):
+            for suffix in strippableSuffixes:
+                if parseqName.endswith(suffix):
+                    parseqName = parseqName[:-len(suffix)]
+        return parseqName

scripts/deforum_helpers/parseq_adapter_test.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+##
+# From /scripts directory, run like: python -m unittest deforum_helpers.parseq_adapter_test
+##
+import unittest
+from .parseq_adapter import ParseqAnimKeys
+from .animation_key_frames import DeformAnimKeys
+from unittest.mock import patch
+from unittest.mock import MagicMock, PropertyMock
+from types import SimpleNamespace
+class TestParseqAnimKeys(unittest.TestCase):
+    @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
+    def test_withprompt(self, mock_deformanimkeys):
+        parseq_args = SimpleNamespace(parseq_use_deltas=True, parseq_manifest="""
+            {
+                "options": {
+                    "output_fps": 30
+                },
+                "rendered_frames": [
+                    {
+                        "frame": 0,
+                        "deforum_prompt": "blah"
+                    },
+                    {
+                        "frame": 1,
+                        "deforum_prompt": "blah"
+                    }
+                ]
+            }
+            """)
+        anim_args = SimpleNamespace(max_frames=2)
+        video_args = SimpleNamespace(fps=30)
+        parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
+        self.assertTrue(parseq_anim_keys.manages_prompts())
+    @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
+    def test_withoutprompt(self, mock_deformanimkeys):
+        parseq_args = SimpleNamespace(parseq_use_deltas=True, parseq_manifest="""
+            {
+                "options": {
+                    "output_fps": 30
+                },
+                "rendered_frames": [
+                    {
+                        "frame": 0
+                    },
+                    {
+                        "frame": 1
+                    }
+                ]
+            }
+            """)
+        anim_args = SimpleNamespace(max_frames=2)
+        video_args = SimpleNamespace(fps=30)
+        parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
+        self.assertFalse(parseq_anim_keys.manages_prompts())
+    @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
+    def test_usedelta(self, mock_deformanimkeys):
+        parseq_args = SimpleNamespace(parseq_use_deltas=True, parseq_manifest="""
+            {
+                "options": {
+                    "output_fps": 30
+                },
+                "rendered_frames": [
+                    {
+                        "frame": 0,
+                        "angle": 90,
+                        "angle_delta": 90
+                    },
+                    {
+                        "frame": 1,
+                        "angle": 180,
+                        "angle_delta": 90
+                    }
+                ]
+            }
+            """)
+        anim_args = SimpleNamespace(max_frames=2)
+        video_args = SimpleNamespace(fps=30)
+        parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
+        self.assertEqual(parseq_anim_keys.angle_series[1], 90)
+    @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
+    def test_usenondelta(self, mock_deformanimkeys):
+        parseq_args = SimpleNamespace(parseq_use_deltas=False, parseq_manifest="""
+            {
+                "options": {
+                    "output_fps": 30
+                },
+                "rendered_frames": [
+                    {
+                        "frame": 0,
+                        "angle": 90,
+                        "angle_delta": 90
+                    },
+                    {
+                        "frame": 1,
+                        "angle": 180,
+                        "angle_delta": 90
+                    }
+                ]
+            }
+            """)
+        anim_args = SimpleNamespace(max_frames=2)
+        video_args = SimpleNamespace(fps=30)
+        parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
+        self.assertEqual(parseq_anim_keys.angle_series[1], 180)
+    @patch('deforum_helpers.parseq_adapter.DeformAnimKeys')
+    def test_fallbackonundefined(self, mock_deformanimkeys):
+        parseq_args = SimpleNamespace(parseq_use_deltas=False, parseq_manifest="""
+            {
+                "options": {
+                    "output_fps": 30
+                },
+                "rendered_frames": [
+                    {
+                        "frame": 0
+                    },
+                    {
+                        "frame": 1
+                    }
+                ]
+            }
+            """)
+        anim_args = SimpleNamespace(max_frames=1)
+        video_args = SimpleNamespace(fps=20)
+        parseq_anim_keys = ParseqAnimKeys(parseq_args, anim_args, video_args)
+        #TODO - this is a hacky check to make sure we're falling back to the mock.
+        #There must be a better way to inject an expected value via patch and check for that...
+        self.assertRegex(str(parseq_anim_keys.angle_series[0]), r'MagicMock')
+if __name__ == '__main__':
+    unittest.main()

scripts/deforum_helpers/prompt.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import re
+import numexpr
+def check_is_number(value):
+    float_pattern = r'^(?=.)([+-]?([0-9]*)(\.([0-9]+))?)$'
+    return re.match(float_pattern, value)
+def parse_weight(match, frame = 0, max_frames = 0)->float:
+    w_raw = match.group("weight")
+    max_f = max_frames  # this line has to be left intact as it's in use by numexpr even though it looks like it doesn't
+    if w_raw is None:
+        return 1
+    if check_is_number(w_raw):
+        return float(w_raw)
+    else:
+        t = frame
+        if len(w_raw) < 3:
+            print('the value inside `-characters cannot represent a math function')
+            return 1
+        return float(numexpr.evaluate(w_raw[1:-1]))
+def split_weighted_subprompts(text, frame = 0, max_frames = 0):
+    """
+    splits the prompt based on deforum webui implementation, moved from generate.py
+    """
+    math_parser = re.compile("""
+            (?P<weight>(
+            `[\S\s]*?`# a math function wrapped in `-characters
+            ))
+            """, re.VERBOSE)
+    parsed_prompt = re.sub(math_parser, lambda m: str(parse_weight(m, frame)), text)
+    negative_prompts = []
+    positive_prompts = []
+    prompt_split = parsed_prompt.split("--neg")
+    if len(prompt_split) > 1:
+        positive_prompts, negative_prompts = parsed_prompt.split("--neg") #TODO: add --neg to vanilla Deforum for compat
+    else:
+        positive_prompts = prompt_split[0]
+        negative_prompts = ""
+    return positive_prompts, negative_prompts
+def interpolate_prompts(animation_prompts, max_frames):
+    import numpy as np
+    import pandas as pd
+    # Get prompts sorted by keyframe
+    max_f = max_frames
+    parsed_animation_prompts = {}
+    for key, value in animation_prompts.items():
+        if check_is_number(key):# default case 0:(1 + t %5), 30:(5-t%2)
+            parsed_animation_prompts[key] = value
+        else:# math on the left hand side case 0:(1 + t %5), maxKeyframes/2:(5-t%2)
+            parsed_animation_prompts[int(numexpr.evaluate(key))] = value
+    sorted_prompts = sorted(parsed_animation_prompts.items(), key=lambda item: int(item[0]))
+    # Setup container for interpolated prompts
+    prompt_series = pd.Series([np.nan for a in range(max_frames)])
+    # For every keyframe prompt except the last
+    for i in range(0,len(sorted_prompts)-1):
+        # Get current and next keyframe
+        current_frame = int(sorted_prompts[i][0])
+        next_frame = int(sorted_prompts[i+1][0])
+        # Ensure there's no weird ordering issues or duplication in the animation prompts
+        # (unlikely because we sort above, and the json parser will strip dupes)
+        if current_frame>=next_frame:
+            print(f"WARNING: Sequential prompt keyframes {i}:{current_frame} and {i+1}:{next_frame} are not monotonously increasing; skipping interpolation.")
+            continue
+        # Get current and next keyframes' positive and negative prompts (if any)
+        current_prompt = sorted_prompts[i][1]
+        next_prompt = sorted_prompts[i+1][1]
+        current_positive, current_negative, *_ = current_prompt.split("--neg") + [None]
+        next_positive, next_negative, *_ = next_prompt.split("--neg") + [None]
+        # Calculate how much to shift the weight from current to next prompt at each frame
+        weight_step = 1/(next_frame-current_frame)
+        # Apply weighted prompt interpolation for each frame between current and next keyframe
+        # using the syntax:  prompt1 :weight1 AND prompt1 :weight2 --neg nprompt1 :weight1 AND nprompt1 :weight2
+        # (See: https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#composable-diffusion )
+        for f in range(current_frame,next_frame):
+            next_weight = weight_step * (f-current_frame)
+            current_weight = 1 - next_weight
+            # We will build the prompt incrementally depending on which prompts are present
+            prompt_series[f] = ''
+            # Cater for the case where neither, either or both current & next have positive prompts:
+            if current_positive:
+                prompt_series[f] += f" ({current_positive}):{current_weight}"
+            if current_positive and next_positive:
+                prompt_series[f] += f" AND "
+            if next_positive:
+                prompt_series[f] += f" ({next_positive}):{next_weight}"
+            # Cater for the case where neither, either or both current & next have negative prompts:
+            if len(current_negative) > 1 or len(next_negative) > 1:
+                prompt_series[f] += " --neg "
+                if len(current_negative) > 1:
+                    prompt_series[f] += f" ({current_negative}):{current_weight}"
+                if len(current_negative) > 1 and len(next_negative) > 1:
+                    prompt_series[f] += f" AND "
+                if len(next_negative) > 1:
+                    prompt_series[f] += f" ({next_negative}):{next_weight}"
+    # Set explicitly declared keyframe prompts (overwriting interpolated values at the keyframe idx). This ensures:
+    # - That final prompt is set, and
+    # - Gives us a chance to emit warnings if any keyframe prompts are already using composable diffusion
+    for i, prompt in parsed_animation_prompts.items():
+        prompt_series[int(i)] = prompt
+        if ' AND ' in prompt:
+            print(f"WARNING: keyframe {i}'s prompt is using composable diffusion (aka the 'AND' keyword). This will cause unexpected behaviour with interpolation.")
+    # Return the filled series, in case max_frames is greater than the last keyframe or any ranges were skipped.
+    return prompt_series.ffill().bfill()
+def prepare_prompt(prompt_series, max_frames, seed, frame_idx):
+    max_f = max_frames - 1
+    pattern = r'`.*?`'
+    regex = re.compile(pattern)
+    prompt_parsed = prompt_series
+    for match in regex.finditer(prompt_parsed):
+        matched_string = match.group(0)
+        parsed_string = matched_string.replace('t', f'{frame_idx}').replace("max_f" , f"{max_f}").replace('`','')
+        parsed_value = numexpr.evaluate(parsed_string)
+        prompt_parsed = prompt_parsed.replace(matched_string, str(parsed_value))
+    prompt_to_print, *after_neg = prompt_parsed.strip().split("--neg")
+    prompt_to_print = prompt_to_print.strip()
+    after_neg = "".join(after_neg).strip()
+    print(f"\033[32mSeed: \033[0m{seed}")
+    print(f"\033[35mPrompt: \033[0m{prompt_to_print}")
+    if after_neg and after_neg.strip():
+        print(f"\033[91mNeg Prompt: \033[0m{after_neg}")
+        prompt_to_print += f"--neg {after_neg}"
+    # set value back into the prompt
+    return prompt_to_print

scripts/deforum_helpers/render.py ADDED Viewed

	@@ -0,0 +1,627 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import pandas as pd
+import cv2
+import numpy as np
+import numexpr
+import gc
+import random
+import PIL
+import time
+from PIL import Image, ImageOps
+from .generate import generate, isJson
+from .noise import add_noise
+from .animation import anim_frame_warp
+from .animation_key_frames import DeformAnimKeys, LooperAnimKeys
+from .video_audio_utilities import get_frame_name, get_next_frame
+from .depth import DepthModel
+from .colors import maintain_colors
+from .parseq_adapter import ParseqAnimKeys
+from .seed import next_seed
+from .image_sharpening import unsharp_mask
+from .load_images import get_mask, load_img, load_image, get_mask_from_file
+from .hybrid_video import (
+    hybrid_generation, hybrid_composite, get_matrix_for_hybrid_motion, get_matrix_for_hybrid_motion_prev, get_flow_for_hybrid_motion,get_flow_for_hybrid_motion_prev, image_transform_ransac, image_transform_optical_flow, get_flow_from_images, abs_flow_to_rel_flow, rel_flow_to_abs_flow)
+from .save_images import save_image
+from .composable_masks import compose_mask_with_check
+from .settings import save_settings_from_animation_run
+from .deforum_controlnet import unpack_controlnet_vids, is_controlnet_enabled
+from .subtitle_handler import init_srt_file, write_frame_subtitle, format_animation_params
+from .resume import get_resume_vars
+from .masks import do_overlay_mask
+from .prompt import prepare_prompt
+from modules.shared import opts, cmd_opts, state, sd_model
+from modules import lowvram, devices, sd_hijack
+from .RAFT import RAFT
+def render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
+    if opts.data.get("deforum_save_gen_info_as_srt", False): # create .srt file and set timeframe mechanism using FPS
+        srt_filename = os.path.join(args.outdir, f"{args.timestring}.srt")
+        srt_frame_duration = init_srt_file(srt_filename, video_args.fps)
+    if anim_args.animation_mode in ['2D','3D']:
+        # handle hybrid video generation
+        if anim_args.hybrid_composite != 'None' or anim_args.hybrid_motion in ['Affine', 'Perspective', 'Optical Flow']:
+            args, anim_args, inputfiles = hybrid_generation(args, anim_args, root)
+            # path required by hybrid functions, even if hybrid_comp_save_extra_frames is False
+            hybrid_frame_path = os.path.join(args.outdir, 'hybridframes')
+        # initialize prev_flow
+        if anim_args.hybrid_motion == 'Optical Flow':
+            prev_flow = None
+        if loop_args.use_looper:
+            print("Using Guided Images mode: seed_behavior will be set to 'schedule' and 'strength_0_no_init' to False")
+            if args.strength == 0:
+                raise RuntimeError("Strength needs to be greater than 0 in Init tab")
+            args.strength_0_no_init = False
+            args.seed_behavior = "schedule"
+            if not isJson(loop_args.init_images):
+                raise RuntimeError("The images set for use with keyframe-guidance are not in a proper JSON format")
+    # handle controlnet video input frames generation
+    if is_controlnet_enabled(controlnet_args):
+        unpack_controlnet_vids(args, anim_args, controlnet_args)
+    # use parseq if manifest is provided
+    use_parseq = parseq_args.parseq_manifest != None and parseq_args.parseq_manifest.strip()
+    # expand key frame strings to values
+    keys = DeformAnimKeys(anim_args, args.seed) if not use_parseq else ParseqAnimKeys(parseq_args, anim_args, video_args)
+    loopSchedulesAndData = LooperAnimKeys(loop_args, anim_args, args.seed)
+    # create output folder for the batch
+    os.makedirs(args.outdir, exist_ok=True)
+    print(f"Saving animation frames to:\n{args.outdir}")
+    # save settings.txt file for the current run
+    save_settings_from_animation_run(args, anim_args, parseq_args, loop_args, controlnet_args, video_args, root)
+    # resume from timestring
+    if anim_args.resume_from_timestring:
+        args.timestring = anim_args.resume_timestring
+    # Always enable pseudo-3d with parseq. No need for an extra toggle:
+    # Whether it's used or not in practice is defined by the schedules
+    if use_parseq:
+        anim_args.flip_2d_perspective = True
+    # expand prompts out to per-frame
+    if use_parseq and keys.manages_prompts():
+        prompt_series = keys.prompts
+    else:
+        prompt_series = pd.Series([np.nan for a in range(anim_args.max_frames)])
+        for i, prompt in root.animation_prompts.items():
+            if str(i).isdigit():
+                prompt_series[int(i)] = prompt
+            else:
+                prompt_series[int(numexpr.evaluate(i))] = prompt
+        prompt_series = prompt_series.ffill().bfill()
+    # check for video inits
+    using_vid_init = anim_args.animation_mode == 'Video Input'
+    # load depth model for 3D
+    predict_depths = (anim_args.animation_mode == '3D' and anim_args.use_depth_warping) or anim_args.save_depth_maps
+    predict_depths = predict_depths or (anim_args.hybrid_composite and anim_args.hybrid_comp_mask_type in ['Depth','Video Depth'])
+    if predict_depths:
+        keep_in_vram = opts.data.get("deforum_keep_3d_models_in_vram")
+        device = ('cpu' if cmd_opts.lowvram or cmd_opts.medvram else root.device)
+        depth_model = DepthModel(root.models_path, device, root.half_precision, keep_in_vram=keep_in_vram, depth_algorithm=anim_args.depth_algorithm, Width=args.W, Height=args.H, midas_weight=anim_args.midas_weight)
+        # depth-based hybrid composite mask requires saved depth maps
+        if anim_args.hybrid_composite != 'None' and anim_args.hybrid_comp_mask_type =='Depth':
+            anim_args.save_depth_maps = True
+    else:
+        depth_model = None
+        anim_args.save_depth_maps = False
+    raft_model = None
+    load_raft = (anim_args.optical_flow_cadence == "RAFT" and int(anim_args.diffusion_cadence) > 1) or \
+            (anim_args.hybrid_motion == "Optical Flow" and anim_args.hybrid_flow_method == "RAFT") or \
+            (anim_args.optical_flow_redo_generation == "RAFT")
+    if load_raft:
+        print("Loading RAFT model...")
+        raft_model = RAFT()
+    # state for interpolating between diffusion steps
+    turbo_steps = 1 if using_vid_init else int(anim_args.diffusion_cadence)
+    turbo_prev_image, turbo_prev_frame_idx = None, 0
+    turbo_next_image, turbo_next_frame_idx = None, 0
+    # initialize vars
+    prev_img = None
+    color_match_sample = None
+    start_frame = 0
+    # resume animation (requires at least two frames - see function)
+    if anim_args.resume_from_timestring:
+        # determine last frame and frame to start on
+        prev_frame, next_frame, prev_img, next_img = get_resume_vars(
+            folder=args.outdir,
+            timestring=anim_args.resume_timestring,
+            cadence=turbo_steps
+        )
+        # set up turbo step vars
+        if turbo_steps > 1:
+            turbo_prev_image, turbo_prev_frame_idx = prev_img, prev_frame
+            turbo_next_image, turbo_next_frame_idx = next_img, next_frame
+        # advance start_frame to next frame
+        start_frame = next_frame + 1
+    frame_idx = start_frame
+    # reset the mask vals as they are overwritten in the compose_mask algorithm
+    mask_vals = {}
+    noise_mask_vals = {}
+    mask_vals['everywhere'] = Image.new('1', (args.W, args.H), 1)
+    noise_mask_vals['everywhere'] = Image.new('1', (args.W, args.H), 1)
+    mask_image = None
+    if args.use_init and args.init_image != None and args.init_image != '':
+        _, mask_image = load_img(args.init_image,
+                                        shape=(args.W, args.H),
+                                        use_alpha_as_mask=args.use_alpha_as_mask)
+        mask_vals['video_mask'] = mask_image
+        noise_mask_vals['video_mask'] = mask_image
+    # Grab the first frame masks since they wont be provided until next frame
+    # Video mask overrides the init image mask, also, won't be searching for init_mask if use_mask_video is set
+    # Made to solve https://github.com/deforum-art/deforum-for-automatic1111-webui/issues/386
+    if anim_args.use_mask_video:
+        args.mask_file = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+        args.noise_mask = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+        mask_vals['video_mask'] = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+        noise_mask_vals['video_mask'] = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+    elif mask_image is None and args.use_mask:
+        mask_vals['video_mask'] = get_mask(args)
+        noise_mask_vals['video_mask'] = get_mask(args) # TODO?: add a different default noisc mask
+    # get color match for 'Image' color coherence only once, before loop
+    if anim_args.color_coherence == 'Image':
+        color_match_sample = load_image(anim_args.color_coherence_image_path)
+        color_match_sample = color_match_sample.resize((args.W, args.H), PIL.Image.LANCZOS)
+        color_match_sample = cv2.cvtColor(np.array(color_match_sample), cv2.COLOR_RGB2BGR)
+    #Webui
+    state.job_count = anim_args.max_frames
+    while frame_idx < anim_args.max_frames:
+        #Webui
+        state.job = f"frame {frame_idx + 1}/{anim_args.max_frames}"
+        state.job_no = frame_idx + 1
+        if state.skipped:
+            print("\n** PAUSED **")
+            state.skipped = False
+            while not state.skipped:
+                time.sleep(0.1)
+            print("** RESUMING **")
+        print(f"\033[36mAnimation frame: \033[0m{frame_idx}/{anim_args.max_frames}  ")
+        noise = keys.noise_schedule_series[frame_idx]
+        strength = keys.strength_schedule_series[frame_idx]
+        scale = keys.cfg_scale_schedule_series[frame_idx]
+        contrast = keys.contrast_schedule_series[frame_idx]
+        kernel = int(keys.kernel_schedule_series[frame_idx])
+        sigma = keys.sigma_schedule_series[frame_idx]
+        amount = keys.amount_schedule_series[frame_idx]
+        threshold = keys.threshold_schedule_series[frame_idx]
+        cadence_flow_factor = keys.cadence_flow_factor_schedule_series[frame_idx]
+        redo_flow_factor = keys.redo_flow_factor_schedule_series[frame_idx]
+        hybrid_comp_schedules = {
+            "alpha": keys.hybrid_comp_alpha_schedule_series[frame_idx],
+            "mask_blend_alpha": keys.hybrid_comp_mask_blend_alpha_schedule_series[frame_idx],
+            "mask_contrast": keys.hybrid_comp_mask_contrast_schedule_series[frame_idx],
+            "mask_auto_contrast_cutoff_low": int(keys.hybrid_comp_mask_auto_contrast_cutoff_low_schedule_series[frame_idx]),
+            "mask_auto_contrast_cutoff_high": int(keys.hybrid_comp_mask_auto_contrast_cutoff_high_schedule_series[frame_idx]),
+            "flow_factor": keys.hybrid_flow_factor_schedule_series[frame_idx]
+        }
+        scheduled_sampler_name = None
+        scheduled_clipskip = None
+        scheduled_noise_multiplier = None
+        scheduled_ddim_eta = None
+        scheduled_ancestral_eta = None
+        mask_seq = None
+        noise_mask_seq = None
+        if anim_args.enable_steps_scheduling and keys.steps_schedule_series[frame_idx] is not None:
+            args.steps = int(keys.steps_schedule_series[frame_idx])
+        if anim_args.enable_sampler_scheduling and keys.sampler_schedule_series[frame_idx] is not None:
+            scheduled_sampler_name = keys.sampler_schedule_series[frame_idx].casefold()
+        if anim_args.enable_clipskip_scheduling and keys.clipskip_schedule_series[frame_idx] is not None:
+            scheduled_clipskip = int(keys.clipskip_schedule_series[frame_idx])
+        if anim_args.enable_noise_multiplier_scheduling and keys.noise_multiplier_schedule_series[frame_idx] is not None:
+            scheduled_noise_multiplier = float(keys.noise_multiplier_schedule_series[frame_idx])
+        if anim_args.enable_ddim_eta_scheduling and keys.ddim_eta_schedule_series[frame_idx] is not None:
+            scheduled_ddim_eta = float(keys.ddim_eta_schedule_series[frame_idx])
+        if anim_args.enable_ancestral_eta_scheduling and keys.ancestral_eta_schedule_series[frame_idx] is not None:
+            scheduled_ancestral_eta = float(keys.ancestral_eta_schedule_series[frame_idx])
+        if args.use_mask and keys.mask_schedule_series[frame_idx] is not None:
+            mask_seq = keys.mask_schedule_series[frame_idx]
+        if anim_args.use_noise_mask and keys.noise_mask_schedule_series[frame_idx] is not None:
+            noise_mask_seq = keys.noise_mask_schedule_series[frame_idx]
+        if args.use_mask and not anim_args.use_noise_mask:
+            noise_mask_seq = mask_seq
+        depth = None
+        if anim_args.animation_mode == '3D' and (cmd_opts.lowvram or cmd_opts.medvram):
+            # Unload the main checkpoint and load the depth model
+            lowvram.send_everything_to_cpu()
+            sd_hijack.model_hijack.undo_hijack(sd_model)
+            devices.torch_gc()
+            if predict_depths: depth_model.to(root.device)
+        if turbo_steps == 1 and opts.data.get("deforum_save_gen_info_as_srt"):
+            params_string = format_animation_params(keys, prompt_series, frame_idx)
+            write_frame_subtitle(srt_filename, frame_idx, srt_frame_duration, f"F#: {frame_idx}; Cadence: false; Seed: {args.seed}; {params_string}")
+            params_string = None
+        # emit in-between frames
+        if turbo_steps > 1:
+            tween_frame_start_idx = max(start_frame, frame_idx-turbo_steps)
+            cadence_flow = None
+            for tween_frame_idx in range(tween_frame_start_idx, frame_idx):
+                # update progress during cadence
+                state.job = f"frame {tween_frame_idx + 1}/{anim_args.max_frames}"
+                state.job_no = tween_frame_idx + 1
+                # cadence vars
+                tween = float(tween_frame_idx - tween_frame_start_idx + 1) / float(frame_idx - tween_frame_start_idx)
+                advance_prev = turbo_prev_image is not None and tween_frame_idx > turbo_prev_frame_idx
+                advance_next = tween_frame_idx > turbo_next_frame_idx
+                # optical flow cadence setup before animation warping
+                if anim_args.animation_mode in ['2D', '3D'] and anim_args.optical_flow_cadence != 'None':
+                    if keys.strength_schedule_series[tween_frame_start_idx] > 0:
+                        if cadence_flow is None and turbo_prev_image is not None and turbo_next_image is not None:
+                            cadence_flow = get_flow_from_images(turbo_prev_image, turbo_next_image, anim_args.optical_flow_cadence, raft_model) / 2
+                            turbo_next_image = image_transform_optical_flow(turbo_next_image, -cadence_flow, 1)
+                if opts.data.get("deforum_save_gen_info_as_srt"):
+                    params_string = format_animation_params(keys, prompt_series, tween_frame_idx)
+                    write_frame_subtitle(srt_filename, tween_frame_idx, srt_frame_duration, f"F#: {tween_frame_idx}; Cadence: {tween < 1.0}; Seed: {args.seed}; {params_string}")
+                    params_string = None
+                print(f"Creating in-between {'' if cadence_flow is None else anim_args.optical_flow_cadence + ' optical flow '}cadence frame: {tween_frame_idx}; tween:{tween:0.2f};")
+                if depth_model is not None:
+                    assert(turbo_next_image is not None)
+                    depth = depth_model.predict(turbo_next_image, anim_args.midas_weight, root.half_precision)
+                if advance_prev:
+                    turbo_prev_image, _ = anim_frame_warp(turbo_prev_image, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device, half_precision=root.half_precision)
+                if advance_next:
+                    turbo_next_image, _ = anim_frame_warp(turbo_next_image, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device, half_precision=root.half_precision)
+                # hybrid video motion - warps turbo_prev_image or turbo_next_image to match motion
+                if tween_frame_idx > 0:
+                    if anim_args.hybrid_motion in ['Affine', 'Perspective']:
+                        if anim_args.hybrid_motion_use_prev_img:
+                            matrix = get_matrix_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, prev_img, anim_args.hybrid_motion)
+                            if advance_prev:
+                                turbo_prev_image = image_transform_ransac(turbo_prev_image, matrix, anim_args.hybrid_motion)
+                            if advance_next:
+                                turbo_next_image = image_transform_ransac(turbo_next_image, matrix, anim_args.hybrid_motion)
+                        else:
+                            matrix = get_matrix_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, anim_args.hybrid_motion)
+                            if advance_prev:
+                                turbo_prev_image = image_transform_ransac(turbo_prev_image, matrix, anim_args.hybrid_motion)
+                            if advance_next:
+                                turbo_next_image = image_transform_ransac(turbo_next_image, matrix, anim_args.hybrid_motion)
+                    if anim_args.hybrid_motion in ['Optical Flow']:
+                        if anim_args.hybrid_motion_use_prev_img:
+                            flow = get_flow_for_hybrid_motion_prev(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
+                            if advance_prev:
+                                turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor'])
+                            if advance_next:
+                                turbo_next_image = image_transform_optical_flow(turbo_next_image, flow, hybrid_comp_schedules['flow_factor'])
+                            prev_flow = flow
+                        else:
+                            flow = get_flow_for_hybrid_motion(tween_frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
+                            if advance_prev:
+                                turbo_prev_image = image_transform_optical_flow(turbo_prev_image, flow, hybrid_comp_schedules['flow_factor'])
+                            if advance_next:
+                                turbo_next_image = image_transform_optical_flow(turbo_next_image, flow, hybrid_comp_schedules['flow_factor'])
+                            prev_flow = flow
+                # do optical flow cadence after animation warping
+                if cadence_flow is not None:
+                    cadence_flow = abs_flow_to_rel_flow(cadence_flow, args.W, args.H)
+                    cadence_flow, _ = anim_frame_warp(cadence_flow, args, anim_args, keys, tween_frame_idx, depth_model, depth=depth, device=root.device, half_precision=root.half_precision)
+                    cadence_flow_inc = rel_flow_to_abs_flow(cadence_flow, args.W, args.H) * tween
+                    if advance_prev:
+                        turbo_prev_image = image_transform_optical_flow(turbo_prev_image, cadence_flow_inc, cadence_flow_factor)
+                    if advance_next:
+                        turbo_next_image = image_transform_optical_flow(turbo_next_image, cadence_flow_inc, cadence_flow_factor)
+                turbo_prev_frame_idx = turbo_next_frame_idx = tween_frame_idx
+                if turbo_prev_image is not None and tween < 1.0:
+                    img = turbo_prev_image*(1.0-tween) + turbo_next_image*tween
+                else:
+                    img = turbo_next_image
+                # intercept and override to grayscale
+                if anim_args.color_force_grayscale:
+                    img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2GRAY)
+                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+                # overlay mask
+                if args.overlay_mask and (anim_args.use_mask_video or args.use_mask):
+                    img = do_overlay_mask(args, anim_args, img, tween_frame_idx, True)
+                # get prev_img during cadence
+                prev_img = img
+                # current image update for cadence frames (left commented because it doesn't currently update the preview)
+                # state.current_image = Image.fromarray(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB))
+                # saving cadence frames
+                filename = f"{args.timestring}_{tween_frame_idx:09}.png"
+                cv2.imwrite(os.path.join(args.outdir, filename), img)
+                if anim_args.save_depth_maps:
+                    depth_model.save(os.path.join(args.outdir, f"{args.timestring}_depth_{tween_frame_idx:09}.png"), depth)
+        # get color match for video outside of prev_img conditional
+        hybrid_available = anim_args.hybrid_composite != 'None' or anim_args.hybrid_motion in ['Optical Flow', 'Affine', 'Perspective']
+        if anim_args.color_coherence == 'Video Input' and hybrid_available:
+            if int(frame_idx) % int(anim_args.color_coherence_video_every_N_frames) == 0:
+                prev_vid_img = Image.open(os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg"))
+                prev_vid_img = prev_vid_img.resize((args.W, args.H), PIL.Image.LANCZOS)
+                color_match_sample = np.asarray(prev_vid_img)
+                color_match_sample = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2BGR)
+        # after 1st frame, prev_img exists
+        if prev_img is not None:
+            # apply transforms to previous frame
+            prev_img, depth = anim_frame_warp(prev_img, args, anim_args, keys, frame_idx, depth_model, depth=None, device=root.device, half_precision=root.half_precision)
+            # do hybrid compositing before motion
+            if anim_args.hybrid_composite == 'Before Motion':
+                args, prev_img = hybrid_composite(args, anim_args, frame_idx, prev_img, depth_model, hybrid_comp_schedules, root)
+            # hybrid video motion - warps prev_img to match motion, usually to prepare for compositing
+            if anim_args.hybrid_motion in ['Affine', 'Perspective']:
+                if anim_args.hybrid_motion_use_prev_img:
+                    matrix = get_matrix_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, prev_img, anim_args.hybrid_motion)
+                else:
+                    matrix = get_matrix_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, anim_args.hybrid_motion)
+                prev_img = image_transform_ransac(prev_img, matrix, anim_args.hybrid_motion)
+            if anim_args.hybrid_motion in ['Optical Flow']:
+                if anim_args.hybrid_motion_use_prev_img:
+                    flow = get_flow_for_hybrid_motion_prev(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, prev_img, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
+                else:
+                    flow = get_flow_for_hybrid_motion(frame_idx-1, (args.W, args.H), inputfiles, hybrid_frame_path, prev_flow, anim_args.hybrid_flow_method, raft_model, anim_args.hybrid_flow_consistency, anim_args.hybrid_consistency_blur, anim_args.hybrid_comp_save_extra_frames)
+                prev_img = image_transform_optical_flow(prev_img, flow, hybrid_comp_schedules['flow_factor'])
+                prev_flow = flow
+            # do hybrid compositing after motion (normal)
+            if anim_args.hybrid_composite == 'Normal':
+                args, prev_img = hybrid_composite(args, anim_args, frame_idx, prev_img, depth_model, hybrid_comp_schedules, root)
+            # apply color matching
+            if anim_args.color_coherence != 'None':
+                if color_match_sample is None:
+                    color_match_sample = prev_img.copy()
+                else:
+                    prev_img = maintain_colors(prev_img, color_match_sample, anim_args.color_coherence)
+            # intercept and override to grayscale
+            if anim_args.color_force_grayscale:
+                prev_img = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY)
+                prev_img = cv2.cvtColor(prev_img, cv2.COLOR_GRAY2BGR)
+            # apply scaling
+            contrast_image = (prev_img * contrast).round().astype(np.uint8)
+            # anti-blur
+            if amount > 0:
+                contrast_image = unsharp_mask(contrast_image, (kernel, kernel), sigma, amount, threshold, mask_image if args.use_mask else None)
+            # apply frame noising
+            if args.use_mask or anim_args.use_noise_mask:
+                args.noise_mask = compose_mask_with_check(root, args, noise_mask_seq, noise_mask_vals, Image.fromarray(cv2.cvtColor(contrast_image, cv2.COLOR_BGR2RGB)))
+            noised_image = add_noise(contrast_image, noise, args.seed, anim_args.noise_type,
+                            (anim_args.perlin_w, anim_args.perlin_h, anim_args.perlin_octaves, anim_args.perlin_persistence),
+                             args.noise_mask, args.invert_mask)
+            # use transformed previous frame as init for current
+            args.use_init = True
+            args.init_sample = Image.fromarray(cv2.cvtColor(noised_image, cv2.COLOR_BGR2RGB))
+            args.strength = max(0.0, min(1.0, strength))
+        args.scale = scale
+        # Pix2Pix Image CFG Scale - does *nothing* with non pix2pix checkpoints
+        args.pix2pix_img_cfg_scale = float(keys.pix2pix_img_cfg_scale_series[frame_idx])
+        # grab prompt for current frame
+        args.prompt = prompt_series[frame_idx]
+        if args.seed_behavior == 'schedule' or use_parseq:
+            args.seed = int(keys.seed_schedule_series[frame_idx])
+        if anim_args.enable_checkpoint_scheduling:
+            args.checkpoint = keys.checkpoint_schedule_series[frame_idx]
+        else:
+            args.checkpoint = None
+        #SubSeed scheduling
+        if anim_args.enable_subseed_scheduling:
+            args.subseed = int(keys.subseed_schedule_series[frame_idx])
+            args.subseed_strength = float(keys.subseed_strength_schedule_series[frame_idx])
+        if use_parseq:
+            args.seed_enable_extras = True
+            anim_args.enable_subseed_scheduling = True
+            args.subseed = int(keys.subseed_schedule_series[frame_idx])
+            args.subseed_strength = keys.subseed_strength_schedule_series[frame_idx]
+        # set value back into the prompt - prepare and report prompt and seed
+        args.prompt = prepare_prompt(args.prompt, anim_args.max_frames, args.seed, frame_idx)
+        # grab init image for current frame
+        if using_vid_init:
+            init_frame = get_next_frame(args.outdir, anim_args.video_init_path, frame_idx, False)
+            print(f"Using video init frame {init_frame}")
+            args.init_image = init_frame
+            args.strength = max(0.0, min(1.0, strength))
+        if anim_args.use_mask_video:
+            args.mask_file = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+            args.noise_mask = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+            mask_vals['video_mask'] = get_mask_from_file(get_next_frame(args.outdir, anim_args.video_mask_path, frame_idx, True), args)
+        if args.use_mask:
+            args.mask_image = compose_mask_with_check(root, args, mask_seq, mask_vals, args.init_sample) if args.init_sample is not None else None # we need it only after the first frame anyway
+        # setting up some arguments for the looper
+        loop_args.imageStrength = loopSchedulesAndData.image_strength_schedule_series[frame_idx]
+        loop_args.blendFactorMax = loopSchedulesAndData.blendFactorMax_series[frame_idx]
+        loop_args.blendFactorSlope = loopSchedulesAndData.blendFactorSlope_series[frame_idx]
+        loop_args.tweeningFrameSchedule = loopSchedulesAndData.tweening_frames_schedule_series[frame_idx]
+        loop_args.colorCorrectionFactor = loopSchedulesAndData.color_correction_factor_series[frame_idx]
+        loop_args.use_looper = loopSchedulesAndData.use_looper
+        loop_args.imagesToKeyframe = loopSchedulesAndData.imagesToKeyframe
+        if 'img2img_fix_steps' in opts.data and opts.data["img2img_fix_steps"]: # disable "with img2img do exactly x steps" from general setting, as it *ruins* deforum animations
+            opts.data["img2img_fix_steps"] = False
+        if scheduled_clipskip is not None:
+            opts.data["CLIP_stop_at_last_layers"] = scheduled_clipskip
+        if scheduled_noise_multiplier is not None:
+            opts.data["initial_noise_multiplier"] = scheduled_noise_multiplier
+        if scheduled_ddim_eta is not None:
+            opts.data["eta_ddim"] = scheduled_ddim_eta
+        if scheduled_ancestral_eta is not None:
+            opts.data["eta_ancestral"] = scheduled_ancestral_eta
+        if anim_args.animation_mode == '3D' and (cmd_opts.lowvram or cmd_opts.medvram):
+            if predict_depths: depth_model.to('cpu')
+            devices.torch_gc()
+            lowvram.setup_for_low_vram(sd_model, cmd_opts.medvram)
+            sd_hijack.model_hijack.hijack(sd_model)
+        # optical flow redo before generation
+        if anim_args.optical_flow_redo_generation != 'None' and prev_img is not None and strength > 0:
+            print(f"Optical flow redo is diffusing and warping using {anim_args.optical_flow_redo_generation} optical flow before generation.")
+            stored_seed = args.seed
+            args.seed = random.randint(0, 2**32 - 1)
+            disposable_image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
+            disposable_image = cv2.cvtColor(np.array(disposable_image), cv2.COLOR_RGB2BGR)
+            disposable_flow = get_flow_from_images(prev_img, disposable_image, anim_args.optical_flow_redo_generation, raft_model)
+            disposable_image = cv2.cvtColor(disposable_image, cv2.COLOR_BGR2RGB)
+            disposable_image = image_transform_optical_flow(disposable_image, disposable_flow, redo_flow_factor)
+            args.seed = stored_seed
+            args.init_sample = Image.fromarray(disposable_image)
+            del(disposable_image,disposable_flow,stored_seed)
+            gc.collect()
+        # diffusion redo
+        if int(anim_args.diffusion_redo) > 0 and prev_img is not None and strength > 0:
+            stored_seed = args.seed
+            for n in range(0,int(anim_args.diffusion_redo)):
+                print(f"Redo generation {n+1} of {int(anim_args.diffusion_redo)} before final generation")
+                args.seed = random.randint(0, 2**32 - 1)
+                disposable_image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
+                disposable_image = cv2.cvtColor(np.array(disposable_image), cv2.COLOR_RGB2BGR)
+                # color match on last one only
+                if (n == int(anim_args.diffusion_redo)):
+                    disposable_image = maintain_colors(prev_img, color_match_sample, anim_args.color_coherence)
+                args.seed = stored_seed
+                args.init_sample = Image.fromarray(cv2.cvtColor(disposable_image, cv2.COLOR_BGR2RGB))
+            del(disposable_image, stored_seed)
+            gc.collect()
+        # generation
+        image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
+        if image is None:
+            break
+        # do hybrid video after generation
+        if frame_idx > 0 and anim_args.hybrid_composite == 'After Generation':
+            image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+            args, image = hybrid_composite(args, anim_args, frame_idx, image, depth_model, hybrid_comp_schedules, root)
+            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        # color matching on first frame is after generation, color match was collected earlier, so we do an extra generation to avoid the corruption introduced by the color match of first output
+        if frame_idx == 0 and (anim_args.color_coherence == 'Image' or (anim_args.color_coherence == 'Video Input' and hybrid_available)):
+            image = maintain_colors(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR), color_match_sample, anim_args.color_coherence)
+            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        elif color_match_sample is not None and anim_args.color_coherence != 'None' and not anim_args.legacy_colormatch:
+            image = maintain_colors(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR), color_match_sample, anim_args.color_coherence)
+            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        # intercept and override to grayscale
+        if anim_args.color_force_grayscale:
+            image = ImageOps.grayscale(image)
+            image = ImageOps.colorize(image, black ="black", white ="white")
+        # overlay mask
+        if args.overlay_mask and (anim_args.use_mask_video or args.use_mask):
+            image = do_overlay_mask(args, anim_args, image, frame_idx)
+        # on strength 0, set color match to generation
+        if ((not anim_args.legacy_colormatch and not args.use_init) or (anim_args.legacy_colormatch and strength == 0)) and not anim_args.color_coherence in ['Image', 'Video Input']:
+            color_match_sample = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
+        opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+        if not using_vid_init:
+            prev_img = opencv_image
+        if turbo_steps > 1:
+            turbo_prev_image, turbo_prev_frame_idx = turbo_next_image, turbo_next_frame_idx
+            turbo_next_image, turbo_next_frame_idx = opencv_image, frame_idx
+            frame_idx += turbo_steps
+        else:
+            filename = f"{args.timestring}_{frame_idx:09}.png"
+            save_image(image, 'PIL', filename, args, video_args, root)
+            if anim_args.save_depth_maps:
+                if cmd_opts.lowvram or cmd_opts.medvram:
+                    lowvram.send_everything_to_cpu()
+                    sd_hijack.model_hijack.undo_hijack(sd_model)
+                    devices.torch_gc()
+                    depth_model.to(root.device)
+                depth = depth_model.predict(opencv_image, anim_args.midas_weight, root.half_precision)
+                depth_model.save(os.path.join(args.outdir, f"{args.timestring}_depth_{frame_idx:09}.png"), depth)
+                if cmd_opts.lowvram or cmd_opts.medvram:
+                    depth_model.to('cpu')
+                    devices.torch_gc()
+                    lowvram.setup_for_low_vram(sd_model, cmd_opts.medvram)
+                    sd_hijack.model_hijack.hijack(sd_model)
+            frame_idx += 1
+        state.current_image = image
+        args.seed = next_seed(args)
+    if predict_depths and not keep_in_vram:
+        depth_model.delete_model() # handles adabins too
+    if load_raft:
+        raft_model.delete_model()

scripts/deforum_helpers/render_modes.py ADDED Viewed

	@@ -0,0 +1,175 @@

+# 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
+# Copyright (C) 2023 Artem Khrapov (kabachuha) and Deforum team listed in AUTHORS.md
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+# Contact the dev team: https://discord.gg/deforum
+import os
+import time
+import pathlib
+import re
+import numexpr
+from modules.shared import opts, state
+from .render import render_animation
+from .seed import next_seed
+from .video_audio_utilities import vid2frames
+from .prompt import interpolate_prompts
+from .generate import generate
+from .animation_key_frames import DeformAnimKeys
+from .parseq_adapter import ParseqAnimKeys
+from .save_images import save_image
+from .settings import save_settings_from_animation_run
+def render_input_video(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
+    # create a folder for the video input frames to live in
+    video_in_frame_path = os.path.join(args.outdir, 'inputframes')
+    os.makedirs(video_in_frame_path, exist_ok=True)
+    # save the video frames from input video
+    print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {video_in_frame_path}...")
+    vid2frames(video_path = anim_args.video_init_path, video_in_frame_path=video_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
+    # determine max frames from length of input frames
+    anim_args.max_frames = len([f for f in pathlib.Path(video_in_frame_path).glob('*.jpg')])
+    args.use_init = True
+    print(f"Loading {anim_args.max_frames} input frames from {video_in_frame_path} and saving video frames to {args.outdir}")
+    if anim_args.use_mask_video:
+        # create a folder for the mask video input frames to live in
+        mask_in_frame_path = os.path.join(args.outdir, 'maskframes')
+        os.makedirs(mask_in_frame_path, exist_ok=True)
+        # save the video frames from mask video
+        print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {mask_in_frame_path}...")
+        vid2frames(video_path=anim_args.video_mask_path,video_in_frame_path=mask_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
+        max_mask_frames = len([f for f in pathlib.Path(mask_in_frame_path).glob('*.jpg')])
+        # limit max frames if there are less frames in the video mask compared to input video
+        if max_mask_frames < anim_args.max_frames :
+            anim_args.max_mask_frames
+            print ("Video mask contains less frames than init video, max frames limited to number of mask frames.")
+        args.use_mask = True
+        args.overlay_mask = True
+    render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root)
+# Modified a copy of the above to allow using masking video with out a init video.
+def render_animation_with_video_mask(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
+    # create a folder for the video input frames to live in
+    mask_in_frame_path = os.path.join(args.outdir, 'maskframes')
+    os.makedirs(mask_in_frame_path, exist_ok=True)
+    # save the video frames from mask video
+    print(f"Exporting Video Frames (1 every {anim_args.extract_nth_frame}) frames to {mask_in_frame_path}...")
+    vid2frames(video_path=anim_args.video_mask_path, video_in_frame_path=mask_in_frame_path, n=anim_args.extract_nth_frame, overwrite=anim_args.overwrite_extracted_frames, extract_from_frame=anim_args.extract_from_frame, extract_to_frame=anim_args.extract_to_frame)
+    args.use_mask = True
+    #args.overlay_mask = True
+    # determine max frames from length of input frames
+    anim_args.max_frames = len([f for f in pathlib.Path(mask_in_frame_path).glob('*.jpg')])
+    #args.use_init = True
+    print(f"Loading {anim_args.max_frames} input frames from {mask_in_frame_path} and saving video frames to {args.outdir}")
+    render_animation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root)
+def get_parsed_value(value, frame_idx, max_f):
+    pattern = r'`.*?`'
+    regex = re.compile(pattern)
+    parsed_value = value
+    for match in regex.finditer(parsed_value):
+        matched_string = match.group(0)
+        parsed_string = matched_string.replace('t', f'{frame_idx}').replace("max_f" , f"{max_f}").replace('`','')
+        value = numexpr.evaluate(parsed_string)
+        parsed_value = parsed_value.replace(matched_string, str(value))
+    return parsed_value
+def render_interpolation(args, anim_args, video_args, parseq_args, loop_args, controlnet_args, root):
+    # use parseq if manifest is provided
+    use_parseq = parseq_args.parseq_manifest != None and parseq_args.parseq_manifest.strip()
+    # expand key frame strings to values
+    keys = DeformAnimKeys(anim_args) if not use_parseq else ParseqAnimKeys(parseq_args, anim_args, video_args)
+    # create output folder for the batch
+    os.makedirs(args.outdir, exist_ok=True)
+    print(f"Saving interpolation animation frames to {args.outdir}")
+    # save settings.txt file for the current run
+    save_settings_from_animation_run(args, anim_args, parseq_args, loop_args, controlnet_args, video_args, root)
+    # Compute interpolated prompts
+    if use_parseq and keys.manages_prompts():
+        print("Parseq prompts are assumed to already be interpolated - not doing any additional prompt interpolation")
+        prompt_series = keys.prompts
+    else:
+        print("Generating interpolated prompts for all frames")
+        prompt_series = interpolate_prompts(root.animation_prompts, anim_args.max_frames)
+    state.job_count = anim_args.max_frames
+    frame_idx = 0
+    # INTERPOLATION MODE
+    while frame_idx < anim_args.max_frames:
+        # print data to cli
+        prompt_to_print = get_parsed_value(prompt_series[frame_idx].strip(), frame_idx, anim_args.max_frames)
+        if prompt_to_print.endswith("--neg"):
+            prompt_to_print = prompt_to_print[:-5]
+        print(f"\033[36mInterpolation frame: \033[0m{frame_idx}/{anim_args.max_frames}  ")
+        print(f"\033[32mSeed: \033[0m{args.seed}")
+        print(f"\033[35mPrompt: \033[0m{prompt_to_print}")
+        state.job = f"frame {frame_idx + 1}/{anim_args.max_frames}"
+        state.job_no = frame_idx + 1
+        if state.interrupted:
+            break
+        if state.skipped:
+            print("\n** PAUSED **")
+            state.skipped = False
+            while not state.skipped:
+                time.sleep(0.1)
+            print("** RESUMING **")
+        # grab inputs for current frame generation
+        args.prompt = prompt_to_print
+        args.scale = keys.cfg_scale_schedule_series[frame_idx]
+        args.pix2pix_img_cfg_scale = keys.pix2pix_img_cfg_scale_series[frame_idx]
+        scheduled_sampler_name = keys.sampler_schedule_series[frame_idx].casefold() if anim_args.enable_sampler_scheduling and keys.sampler_schedule_series[frame_idx] is not None else None
+        args.steps = int(keys.steps_schedule_series[frame_idx]) if anim_args.enable_steps_scheduling and keys.steps_schedule_series[frame_idx] is not None else args.steps
+        scheduled_clipskip = int(keys.clipskip_schedule_series[frame_idx]) if anim_args.enable_clipskip_scheduling and keys.clipskip_schedule_series[frame_idx] is not None else None
+        args.checkpoint = keys.checkpoint_schedule_series[frame_idx] if anim_args.enable_checkpoint_scheduling else None
+        if anim_args.enable_subseed_scheduling:
+            args.subseed = int(keys.subseed_schedule_series[frame_idx])
+            args.subseed_strength = keys.subseed_strength_schedule_series[frame_idx]
+        else:
+            args.subseed, args.subseed_strength = keys.subseed_schedule_series[frame_idx], keys.subseed_strength_schedule_series[frame_idx]
+        if use_parseq:
+            anim_args.enable_subseed_scheduling = True
+            args.subseed, args.subseed_strength = int(keys.subseed_schedule_series[frame_idx]), keys.subseed_strength_schedule_series[frame_idx]
+        args.seed = int(keys.seed_schedule_series[frame_idx]) if args.seed_behavior == 'schedule' or use_parseq else args.seed
+        opts.data["CLIP_stop_at_last_layers"] = scheduled_clipskip if scheduled_clipskip is not None else opts.data["CLIP_stop_at_last_layers"]
+        image = generate(args, keys, anim_args, loop_args, controlnet_args, root, frame_idx, sampler_name=scheduled_sampler_name)
+        filename = f"{args.timestring}_{frame_idx:09}.png"
+        save_image(image, 'PIL', filename, args, video_args, root)
+        state.current_image = image
+        if args.seed_behavior != 'schedule':
+            args.seed = next_seed(args)
+        frame_idx += 1