diff --git "a/FateZero/colab_fatezero.ipynb" "b/FateZero/colab_fatezero.ipynb" new file mode 100644--- /dev/null +++ "b/FateZero/colab_fatezero.ipynb" @@ -0,0 +1,528 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "fZ_xQvU70UQc" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ChenyangQiQi/FateZero/blob/main/colab_fatezero.ipynb)\n", + "\n", + "# FateZero: Fusing Attentions for Zero-shot Text-based Video Editing\n", + "\n", + "[Chenyang Qi](https://chenyangqiqi.github.io/), [Xiaodong Cun](http://vinthony.github.io/), [Yong Zhang](https://yzhang2016.github.io), [Chenyang Lei](https://chenyanglei.github.io/), [Xintao Wang](https://xinntao.github.io/), [Ying Shan](https://scholar.google.com/citations?hl=zh-CN&user=4oXBp9UAAAAJ), and [Qifeng Chen](https://cqf.io)\n", + "\n", + "\n", + "[![Project Website](https://img.shields.io/badge/Project-Website-orange)](https://fate-zero-edit.github.io/)\n", + "[![arXiv](https://img.shields.io/badge/arXiv-2303.09535-b31b1b.svg)](https://arxiv.org/abs/2303.09535)\n", + "[![GitHub](https://img.shields.io/github/stars/ChenyangQiQi/FateZero?style=social)](https://github.com/ChenyangQiQi/FateZero)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XU7NuMAA2drw", + "outputId": "82c4a90d-0ed6-4ad5-c74d-0a0ed3d98bbe" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tesla T4, 15360 MiB, 15101 MiB\n" + ] + } + ], + "source": [ + "#@markdown Check type of GPU and VRAM available.\n", + "!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D1PRgre3Gt5U", + "outputId": "ac1db329-a373-4c82-9b0d-77f4e5cb7140" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into '/content/FateZero'...\n", + "remote: Enumerating objects: 332, done.\u001b[K\n", + "remote: Counting objects: 100% (53/53), done.\u001b[K\n", + "remote: Compressing objects: 100% (7/7), done.\u001b[K\n", + "remote: Total 332 (delta 50), reused 47 (delta 46), pack-reused 279\u001b[K\n", + "Receiving objects: 100% (332/332), 34.21 MiB | 14.26 MiB/s, done.\n", + "Resolving deltas: 100% (157/157), done.\n", + "/content/FateZero\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m145.0/145.0 KB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for lit (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m524.9/524.9 KB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m74.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 MB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m96.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.8/212.8 KB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 KB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.6/41.6 KB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 KB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m88.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.9/26.9 MB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.9/50.9 MB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.8/199.8 KB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m105.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 KB\u001b[0m \u001b[31m15.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 KB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.5/84.5 KB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.5/106.5 KB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 KB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 KB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.8/57.8 KB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.1/57.1 KB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 KB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.2/114.2 KB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m158.8/158.8 KB\u001b[0m \u001b[31m20.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m264.6/264.6 KB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.9/66.9 KB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.6/69.6 KB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 KB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for ffmpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], + "source": [ + "#@title Install requirements\n", + "\n", + "!git clone https://github.com/ChenyangQiQi/FateZero /content/FateZero\n", + "%cd /content/FateZero\n", + "# %pip install -r requirements.txt\n", + "%pip install -q -U --pre triton\n", + "%pip install -q diffusers[torch]==0.11.1 transformers==4.26.0 bitsandbytes==0.35.4 \\\n", + "decord accelerate omegaconf einops ftfy gradio imageio-ffmpeg xformers" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m6I6kZNG3Inb", + "outputId": "f3bcb6eb-a79c-4810-d575-e926c8e7564f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updated git hooks.\n", + "Git LFS initialized.\n", + "Cloning into 'ckpt/CompVis/stable-diffusion-v1-4'...\n", + "remote: Enumerating objects: 738, done.\u001b[K\n", + "remote: Counting objects: 100% (12/12), done.\u001b[K\n", + "remote: Compressing objects: 100% (12/12), done.\u001b[K\n", + "remote: Total 738 (delta 3), reused 1 (delta 0), pack-reused 726\u001b[K\n", + "Receiving objects: 100% (738/738), 682.52 KiB | 954.00 KiB/s, done.\n", + "Resolving deltas: 100% (123/123), done.\n", + "Filtering content: 100% (8/8), 10.20 GiB | 63.59 MiB/s, done.\n", + "[*] MODEL_NAME=./ckpt/CompVis/stable-diffusion-v1-4\n" + ] + } + ], + "source": [ + "#@title Download pretrained model\n", + "\n", + "#@markdown Name/Path of the initial model.\n", + "MODEL_NAME = \"CompVis/stable-diffusion-v1-4\" #@param {type:\"string\"}\n", + "\n", + "#@markdown If model should be download from a remote repo. Untick it if the model is loaded from a local path.\n", + "download_pretrained_model = True #@param {type:\"boolean\"}\n", + "if download_pretrained_model:\n", + " !git lfs install\n", + " !git clone https://huggingface.co/$MODEL_NAME ckpt/$MODEL_NAME\n", + " MODEL_NAME = f\"./ckpt/{MODEL_NAME}\"\n", + "print(f\"[*] MODEL_NAME={MODEL_NAME}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qn5ILIyDJIcX" + }, + "source": [ + "# **Usage**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i4L2yDXGflaC" + }, + "source": [ + "## FateZero Edit with low resource cost\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fXZs1veYIMMw", + "outputId": "c665eaba-ef12-498e-d173-6432e977fc07" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "save new configue to config/car-turn.yaml\n" + ] + } + ], + "source": [ + "#@markdown Edit config\n", + "\n", + "#@markdown More details of the configuration will be given soon.\n", + "\n", + "from omegaconf import OmegaConf\n", + "\n", + "VIDEO_FILE = 'data/car-turn' #@param {type:\"string\"}\n", + "\n", + "VIDEO_ID = VIDEO_FILE.split('/')[-1]\n", + "\n", + "RESULT_DIR = 'result/'+VIDEO_ID\n", + "\n", + "CONFIG_NAME = \"config/\"+VIDEO_ID+\".yaml\" \n", + "\n", + "source_prompt = \"a silver jeep driving down a curvy road in the countryside\" #@param {type:\"string\"}\n", + "edit_prompt = \"watercolor painting of a silver jeep driving down a curvy road in the countryside\" #@param {type:\"string\"}\n", + "EMPHYSIS_WORD = \"watercolor\" #@param {type:\"string\"}\n", + "EMPHYSIS_VALUE = 10 #@param {type:\"number\"}\n", + "video_length = 8 #@param {type:\"number\"}\n", + "INVERSION_STEP = 8 #@param {type:\"number\"}\n", + "REPLACE_STRENGTH = 0.8 #@param {type:\"slider\", min:0, max:1, step:0.1}\n", + "STORE_ATTENTION_ON_disk = False #@param {type:\"boolean\"}\n", + "width = 512 \n", + "height = 512 \n", + "\n", + "config = {\n", + " \"pretrained_model_path\": MODEL_NAME,\n", + " \"logdir\": RESULT_DIR,\n", + " \"train_dataset\": {\n", + " \"path\": VIDEO_FILE,\n", + " \"prompt\": source_prompt,\n", + " \"n_sample_frame\": video_length,\n", + " \"sampling_rate\": 1,\n", + " \"stride\": 80,\n", + " \"offset\": \n", + " {\n", + " \"left\": 0,\n", + " \"right\": 0,\n", + " \"top\": 0,\n", + " \"bottom\": 0,\n", + " }\n", + " },\n", + " \"validation_sample_logger_config\":{\n", + " \"use_train_latents\": True,\n", + " \"use_inversion_attention\": True,\n", + " \"guidance_scale\": 7.5,\n", + " \"prompts\":[\n", + " source_prompt,\n", + " edit_prompt,\n", + " ],\n", + " \"p2p_config\":[ \n", + " {\n", + " \"cross_replace_steps\":{\n", + " \"default_\":0.8\n", + " },\n", + " \"self_replace_steps\": 0.8,\n", + " \"masked_self_attention\": True,\n", + " \"bend_th\": [2, 2],\n", + " \"is_replace_controller\": False \n", + " },\n", + " {\n", + " \"cross_replace_steps\":{\n", + " \"default_\":0.8\n", + " },\n", + " \"self_replace_steps\": 0.8,\n", + " \"eq_params\":{\n", + " \"words\":[EMPHYSIS_WORD],\n", + " \"values\": [EMPHYSIS_VALUE]\n", + " },\n", + " \"use_inversion_attention\": True,\n", + " \"is_replace_controller\": False \n", + " }]\n", + " ,\n", + " \"clip_length\": \"${..train_dataset.n_sample_frame}\",\n", + " \"sample_seeds\": [0],\n", + " \"num_inference_steps\": INVERSION_STEP,\n", + " \"prompt2prompt_edit\": True\n", + " },\n", + " \"disk_store\": STORE_ATTENTION_ON_disk,\n", + " \"model_config\":{\n", + " \"lora\": 160,\n", + " \"SparseCausalAttention_index\": ['mid'],\n", + " \"least_sc_channel\": 640\n", + " },\n", + " \"test_pipeline_config\":{\n", + " \"target\": \"video_diffusion.pipelines.p2pDDIMSpatioTemporalPipeline.p2pDDIMSpatioTemporalPipeline\",\n", + " \"num_inference_steps\": \"${..validation_sample_logger.num_inference_steps}\"\n", + " },\n", + " \"epsilon\": 1e-5,\n", + " \"train_steps\": 10,\n", + " \"seed\": 0,\n", + " \"learning_rate\": 1e-5,\n", + " \"train_temporal_conv\": False,\n", + " \"guidance_scale\": \"${validation_sample_logger_config.guidance_scale}\"\n", + "}\n", + "\n", + "OmegaConf.save(config, CONFIG_NAME)\n", + "print('save new configue to ', CONFIG_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jjcSXTp-u-Eg", + "outputId": "194d964e-08dc-4d3d-c0fd-7e56ed2eb187" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-03-22 09:04:20.819710: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-22 09:04:24.565385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", + "2023-03-22 09:04:24.565750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", + "2023-03-22 09:04:24.565782: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "The following values were not passed to `accelerate launch` and had defaults used instead:\n", + "\t`--num_processes` was set to a value of `1`\n", + "\t`--num_machines` was set to a value of `1`\n", + "\t`--mixed_precision` was set to a value of `'no'`\n", + "\t`--dynamo_backend` was set to a value of `'no'`\n", + "To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`.\n", + "2023-03-22 09:04:31.342590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", + "2023-03-22 09:04:31.342704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", + "2023-03-22 09:04:31.342734: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", + "The config attributes {'scaling_factor': 0.18215} were passed to AutoencoderKL, but are not expected and will be ignored. Please verify your config.json configuration file.\n", + "use fp16\n", + "Number of attention layer registered 32\n", + " Invert clean image to noise latents by DDIM and Unet\n", + "100% 8/8 [00:25<00:00, 3.19s/it]\n", + "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (3328, 307) to (3328, 320) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n", + "Number of attention layer registered 32\n", + "Generating sample images: 0% 0/2 [00:00\n", + " \n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "import os, sys\n", + "import glob\n", + "\n", + "# get the last from results\n", + "mp4_name = sorted(glob.glob('./result/*/sample/step_0.mp4'))[-1]\n", + "\n", + "print(mp4_name)\n", + "mp4 = open('{}'.format(mp4_name),'rb').read()\n", + "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", + "\n", + "print('Display animation: {}'.format(mp4_name), file=sys.stderr)\n", + "display(HTML(\"\"\"\n", + " \n", + " \"\"\" % data_url))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cBb3wTEXfhRo" + }, + "source": [ + "## Edit your video" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mQR2cjDZV9tu" + }, + "outputs": [], + "source": [ + "#@markdown Upload your video(.mp4) by running this cell or skip this cell using the default data\n", + "\n", + "import os\n", + "from google.colab import files\n", + "import shutil\n", + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "\n", + "uploaded = files.upload()\n", + "for filename in uploaded.keys():\n", + " dst_path = os.path.join(\"data\", filename)\n", + " shutil.move(filename, dst_path)\n", + " \n", + "file_id = dst_path.replace('.mp4', '')\n", + "\n", + "! mkdir -p $file_id\n", + "! ffmpeg -hide_banner -loglevel error -i $dst_path -vf scale=\"512:512\" -vf fps=25 $file_id/%05d.png\n", + "\n", + "mp4 = open('{}'.format(dst_path),'rb').read()\n", + "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n", + "\n", + "display(HTML(\"\"\"\n", + " \n", + " \"\"\" % data_url))\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "2.7.18 (default, Jul 1 2022, 12:27:04) \n[GCC 9.4.0]" + }, + "vscode": { + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}