diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..efc57177347a87f71011ab9aedea1ecaf08667c6 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,20 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/bear/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/bear/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/boat/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/cows/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/cows/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/dog/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/clips/flamingo/output_video_gray.mp4 filter=lfs diff=lfs merge=lfs -text
+EvalDataset/ref/goat/0000.jpg filter=lfs diff=lfs merge=lfs -text
+EvalDataset/ref/hockey/0000.jpg filter=lfs diff=lfs merge=lfs -text
+EvalDataset/ref/horsejump-high/0000.jpg filter=lfs diff=lfs merge=lfs -text
+EvalDataset/ref/motorbike/0000.jpg filter=lfs diff=lfs merge=lfs -text
+EvalDataset/ref/surf/0000.jpg filter=lfs diff=lfs merge=lfs -text
+examples/bear/video.mp4 filter=lfs diff=lfs merge=lfs -text
+examples/cows/video.mp4 filter=lfs diff=lfs merge=lfs -text
+examples/flamingo/video.mp4 filter=lfs diff=lfs merge=lfs -text
+gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
+gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4 filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..6a5b7bb32ac229eeab8cead3a765a2d91f5b9406
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,137 @@
+checkpoints/
+wandb/
+.vscode
+.DS_Store
+*ckpt*/
+# Custom
+*.pt
+data/local
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/README.md b/README.md
index e4715eb76d75c0d1ef4dc50d21e869234a70ba14..cf98d63d5fb06482fac1aba313ae5e3f220c715c 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,6 @@
---
title: ViTExCo
-emoji: ๐
-colorFrom: gray
-colorTo: green
+app_file: app.py
sdk: gradio
sdk_version: 3.40.1
-app_file: app.py
-pinned: false
---
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/UI.py b/UI.py
new file mode 100644
index 0000000000000000000000000000000000000000..033046d4e8709d171221bc145df3422cfeed9e64
--- /dev/null
+++ b/UI.py
@@ -0,0 +1,81 @@
+import streamlit as st
+from PIL import Image
+import torchvision.transforms as transforms
+from streamlit_image_comparison import image_comparison
+import numpy as np
+import torch
+import torchvision
+
+######################################### Utils ########################################
+video_extensions = ["mp4"]
+image_extensions = ["png", "jpg"]
+
+
+def check_type(file_name: str):
+ for image_extension in image_extensions:
+ if file_name.endswith(image_extension):
+ return "image"
+ for video_extension in video_extensions:
+ if file_name.endswith(video_extension):
+ return "video"
+ return None
+
+
+transform = transforms.Compose(
+ [transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
+)
+
+
+###################################### Load model ######################################
+@st.cache_resource
+def load_model():
+ model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
+ model.eval()
+ return model
+
+
+model = load_model()
+########################################## UI ##########################################
+st.title("Colorization")
+
+uploaded_file = st.file_uploader("Upload grayscale image or video", type=image_extensions + video_extensions)
+if uploaded_file:
+ # Image
+ if check_type(file_name=uploaded_file.name) == "image":
+ image = np.array(Image.open(uploaded_file), dtype=np.float32)
+
+ input_tensor = torchvision.transforms.functional.normalize(
+ torch.tensor(image).permute(2, 0, 1),
+ mean=[0.485, 0.456, 0.406],
+ std=[0.229, 0.224, 0.225],
+ ).unsqueeze(0)
+ process_button = st.button("Process")
+ if process_button:
+ with st.spinner("Tแปซ tแปซ coi..."):
+ prediction = model(input_tensor)
+ segment = prediction["out"][0].permute(1, 2, 0)
+ segment = segment.detach().numpy()
+
+ st.image(segment)
+ st.image(image)
+
+ image_comparison(
+ img1=image,
+ img2=np.array(segment),
+ label1="Grayscale",
+ label2="Colorized",
+ make_responsive=True,
+ show_labels=True,
+ )
+ # Video
+ else:
+ # video = open(uploaded_file.name)
+ st.video("https://youtu.be/dQw4w9WgXcQ")
+
+hide_menu_style = """
+
+ """
+st.markdown(hide_menu_style, unsafe_allow_html=True)
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..79efaf4e23a61a2f6b7eb9318b89b04c58d96b73
--- /dev/null
+++ b/app.py
@@ -0,0 +1,215 @@
+import numpy as np
+import shutil
+import os
+import argparse
+import torch
+import glob
+from tqdm import tqdm
+from PIL import Image
+from collections import OrderedDict
+from src.models.vit.config import load_config
+import torchvision.transforms as transforms
+import cv2
+from skimage import io
+
+from src.models.CNN.ColorVidNet import GeneralColorVidNet
+from src.models.vit.embed import GeneralEmbedModel
+from src.models.CNN.NonlocalNet import GeneralWarpNet
+from src.models.CNN.FrameColor import frame_colorization
+from src.utils import (
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ uncenter_l,
+ tensor_lab2rgb,
+ SquaredPadding,
+ UnpaddingSquare
+)
+
+import gradio as gr
+
+def load_params(ckpt_file):
+ params = torch.load(ckpt_file, map_location=device)
+ new_params = []
+ for key, value in params.items():
+ new_params.append((key, value))
+ return OrderedDict(new_params)
+
+def custom_transform(transforms, img):
+ for transform in transforms:
+ if isinstance(transform, SquaredPadding):
+ img,padding=transform(img, return_paddings=True)
+ else:
+ img = transform(img)
+ return img.to(device), padding
+
+def save_frames(predicted_rgb, video_name, frame_name):
+ if predicted_rgb is not None:
+ predicted_rgb = np.clip(predicted_rgb, 0, 255).astype(np.uint8)
+ # frame_path_parts = frame_path.split(os.sep)
+ # if os.path.exists(os.path.join(OUTPUT_RESULT_PATH, frame_path_parts[-2])):
+ # shutil.rmtree(os.path.join(OUTPUT_RESULT_PATH, frame_path_parts[-2]))
+ # os.makedirs(os.path.join(OUTPUT_RESULT_PATH, frame_path_parts[-2]), exist_ok=True)
+ predicted_rgb = np.transpose(predicted_rgb, (1,2,0))
+ pil_img = Image.fromarray(predicted_rgb)
+ pil_img.save(os.path.join(OUTPUT_RESULT_PATH, video_name, frame_name))
+
+def extract_frames_from_video(video_path):
+ cap = cv2.VideoCapture(video_path)
+ fps = cap.get(cv2.CAP_PROP_FPS)
+
+ # remove if exists folder
+ output_frames_path = os.path.join(INPUT_VIDEO_FRAMES_PATH, os.path.basename(video_path))
+ if os.path.exists(output_frames_path):
+ shutil.rmtree(output_frames_path)
+
+ # make new folder
+ os.makedirs(output_frames_path)
+
+ currentframe = 0
+ frame_path_list = []
+ while(True):
+
+ # reading from frame
+ ret,frame = cap.read()
+
+ if ret:
+ name = os.path.join(output_frames_path, f'{currentframe:09d}.jpg')
+ frame_path_list.append(name)
+ cv2.imwrite(name, frame)
+ currentframe += 1
+ else:
+ break
+
+ cap.release()
+ cv2.destroyAllWindows()
+
+ return frame_path_list, fps
+
+def combine_frames_from_folder(frames_list_path, fps = 30):
+ frames_list = glob.glob(f'{frames_list_path}/*.jpg')
+ frames_list.sort()
+
+ sample_shape = cv2.imread(frames_list[0]).shape
+
+ output_video_path = os.path.join(frames_list_path, 'output_video.mp4')
+ out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (sample_shape[1], sample_shape[0]))
+ for filename in frames_list:
+ img = cv2.imread(filename)
+ out.write(img)
+
+ out.release()
+ return output_video_path
+
+
+def upscale_image(I_current_rgb, I_current_ab_predict):
+ H, W = I_current_rgb.size
+ high_lab_transforms = [
+ SquaredPadding(target_size=max(H,W)),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize()
+ ]
+ # current_frame_pil_rgb = Image.fromarray(np.clip(I_current_rgb.squeeze(0).permute(1,2,0).cpu().numpy() * 255, 0, 255).astype('uint8'))
+ high_lab_current, paddings = custom_transform(high_lab_transforms, I_current_rgb)
+ high_lab_current = torch.unsqueeze(high_lab_current,dim=0).to(device)
+ high_l_current = high_lab_current[:, 0:1, :, :]
+ high_ab_current = high_lab_current[:, 1:3, :, :]
+ upsampler = torch.nn.Upsample(scale_factor=max(H,W)/224,mode="bilinear")
+ high_ab_predict = upsampler(I_current_ab_predict)
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(high_l_current), high_ab_predict), dim=1))
+ upadded = UnpaddingSquare()
+ I_predict_rgb = upadded(I_predict_rgb, paddings)
+ return I_predict_rgb
+
+def colorize_video(video_path, ref_np):
+ frames_list, fps = extract_frames_from_video(video_path)
+
+ frame_ref = Image.fromarray(ref_np).convert("RGB")
+ I_last_lab_predict = None
+ IB_lab, IB_paddings = custom_transform(transforms, frame_ref)
+ IB_lab = IB_lab.unsqueeze(0).to(device)
+ IB_l = IB_lab[:, 0:1, :, :]
+ IB_ab = IB_lab[:, 1:3, :, :]
+
+ with torch.no_grad():
+ I_reference_lab = IB_lab
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1)).to(device)
+ features_B = embed_net(I_reference_rgb)
+
+ video_path_parts = frames_list[0].split(os.sep)
+
+ if os.path.exists(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2])):
+ shutil.rmtree(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2]))
+ os.makedirs(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2]), exist_ok=True)
+
+ for frame_path in tqdm(frames_list):
+ curr_frame = Image.open(frame_path).convert("RGB")
+ IA_lab, IA_paddings = custom_transform(transforms, curr_frame)
+ IA_lab = IA_lab.unsqueeze(0).to(device)
+ IA_l = IA_lab[:, 0:1, :, :]
+ IA_ab = IA_lab[:, 1:3, :, :]
+
+ if I_last_lab_predict is None:
+ I_last_lab_predict = torch.zeros_like(IA_lab).to(device)
+
+ with torch.no_grad():
+ I_current_lab = IA_lab
+ I_current_ab_predict, _ = frame_colorization(
+ IA_l,
+ I_reference_lab,
+ I_last_lab_predict,
+ features_B,
+ embed_net,
+ nonlocal_net,
+ colornet,
+ luminance_noise=0,
+ temperature=1e-10,
+ joint_training=False
+ )
+ I_last_lab_predict = torch.cat((IA_l, I_current_ab_predict), dim=1)
+
+ # IA_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(IA_l), I_current_ab_predict), dim=1))
+ IA_predict_rgb = upscale_image(curr_frame, I_current_ab_predict)
+ #IA_predict_rgb = torch.nn.functional.upsample_bilinear(IA_predict_rgb, scale_factor=2)
+ save_frames(IA_predict_rgb.squeeze(0).cpu().numpy() * 255, video_path_parts[-2], os.path.basename(frame_path))
+ return combine_frames_from_folder(os.path.join(OUTPUT_RESULT_PATH, video_path_parts[-2]), fps)
+
+if __name__ == '__main__':
+ # Init global variables
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+ INPUT_VIDEO_FRAMES_PATH = 'inputs'
+ OUTPUT_RESULT_PATH = 'outputs'
+ weight_path = 'checkpoints'
+
+ embed_net=GeneralEmbedModel(pretrained_model="swin-tiny", device=device).to(device)
+ nonlocal_net = GeneralWarpNet(feature_channel=128).to(device)
+ colornet=GeneralColorVidNet(7).to(device)
+
+ embed_net.eval()
+ nonlocal_net.eval()
+ colornet.eval()
+
+ # Load weights
+ # embed_net_params = load_params(os.path.join(weight_path, "embed_net.pth"))
+ nonlocal_net_params = load_params(os.path.join(weight_path, "nonlocal_net.pth"))
+ colornet_params = load_params(os.path.join(weight_path, "colornet.pth"))
+
+ # embed_net.load_state_dict(embed_net_params, strict=True)
+ nonlocal_net.load_state_dict(nonlocal_net_params, strict=True)
+ colornet.load_state_dict(colornet_params, strict=True)
+
+ transforms = [SquaredPadding(target_size=224),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize()]
+
+ examples = [[vid, ref] for vid, ref in zip(sorted(glob.glob('examples/*/*.mp4')), sorted(glob.glob('examples/*/*.jpg')))]
+ demo = gr.Interface(colorize_video,
+ inputs=[gr.Video(), gr.Image()],
+ outputs="playable_video",
+ examples=examples,
+ cache_examples=True)
+ demo.launch()
diff --git a/checkpoints/colornet.pth b/checkpoints/colornet.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a11ecf6fde0aade0a82c4c412145681a168a863
--- /dev/null
+++ b/checkpoints/colornet.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5257ae325e292cd5fb2eff47095e1c4e4815455bd5fb6dc5ed2ee2b923172875
+size 131239411
diff --git a/checkpoints/embed_net.pth b/checkpoints/embed_net.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0439349777f69682d5b01e03b96659ad64c817c9
--- /dev/null
+++ b/checkpoints/embed_net.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc711755a75c43025dabe9407cbd11d164eaa9e21f26430d0c16c7493410d902
+size 110352261
diff --git a/checkpoints/nonlocal_net.pth b/checkpoints/nonlocal_net.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ee2aae80a5637970c868178d95a16d491e3b4f7e
--- /dev/null
+++ b/checkpoints/nonlocal_net.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b94c6990f20088bc3cc3fe0b29a6d52e6e746b915c506f0cd349fc6ad6197e72
+size 73189765
diff --git a/cmd.txt b/cmd.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b080a831c60ebd081c316bd8f1ac1709173272d5
--- /dev/null
+++ b/cmd.txt
@@ -0,0 +1,21 @@
+python train.py --video_data_root_list datasets/images/images \
+ --flow_data_root_list datasets/flow_fp16/flow_fp16 \
+ --mask_data_root_list datasets/pgm/pgm \
+ --data_root_imagenet datasets/imgnet \
+ --annotation_file_path datasets/final_annot.csv \
+ --imagenet_pairs_file datasets/pairs.txt \
+ --gpu_ids 0 \
+ --workers 12 \
+ --batch_size 2 \
+ --real_reference_probability 0.99 \
+ --weight_contextual 1 \
+ --weight_perceptual 0.1 \
+ --weight_smoothness 5 \
+ --weight_gan 0.9 \
+ --weight_consistent 0.1 \
+ --use_wandb True \
+ --wandb_token "f05d31e6b15339b1cfc5ee1c77fe51f66fc3ea9e" \
+ --wandb_name "vit_tiny_patch16_384_nofeat" \
+ --checkpoint_step 500 \
+ --epoch_train_discriminator 3 \
+ --epoch 20
\ No newline at end of file
diff --git a/cmd_ddp.txt b/cmd_ddp.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d8e8d82c32b11f646692aa97bd2a76e9a3d51c3
--- /dev/null
+++ b/cmd_ddp.txt
@@ -0,0 +1,20 @@
+!torchrun --nnodes=1 --nproc_per_node=2 train_ddp.py --video_data_root_list $video_data_root_list \
+ --flow_data_root_list $flow_data_root_list \
+ --mask_data_root_list $mask_data_root_list \
+ --data_root_imagenet $data_root_imagenet \
+ --annotation_file_path $annotation_file_path \
+ --imagenet_pairs_file $imagenet_pairs_file \
+ --gpu_ids "0,1" \
+ --workers 2 \
+ --batch_size 2 \
+ --real_reference_probability 0.99 \
+ --weight_contextual 1 \
+ --weight_perceptual 0.1 \
+ --weight_smoothness 5 \
+ --weight_gan 0.9 \
+ --weight_consistent 0.1 \
+ --wandb_token "165e7148081f263b423722115e2ad40fa5339ecf" \
+ --wandb_name "vit_tiny_patch16_384_nofeat" \
+ --checkpoint_step 2000 \
+ --epoch_train_discriminator 2 \
+ --epoch 10
\ No newline at end of file
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/examples.zip b/examples.zip
new file mode 100644
index 0000000000000000000000000000000000000000..f3a62a9625eab85ad8a0d79f6b954a3fec8bd98b
--- /dev/null
+++ b/examples.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4531bd3abdec6df90efb0d19fadd54284bdc70d5edfff19752a205159eb4db
+size 6955837
diff --git a/examples/bear/ref.jpg b/examples/bear/ref.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..20e3da29f586ff4adafddd1e5bd7e4a8da4ba9b0
Binary files /dev/null and b/examples/bear/ref.jpg differ
diff --git a/examples/bear/video.mp4 b/examples/bear/video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..2e3f77af2d48b9ab40bc5b1818c2bc816c703ac4
--- /dev/null
+++ b/examples/bear/video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb4cec5064873a4616f78bdb653830683a4842b2a5cfd0665b395cff4d120d04
+size 1263445
diff --git a/examples/boat/ref.jpg b/examples/boat/ref.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a824073c110425a5d071317722fe63833294beea
Binary files /dev/null and b/examples/boat/ref.jpg differ
diff --git a/examples/boat/video.mp4 b/examples/boat/video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..77d209492bcea9cb76ece5be5979b55b6f98da6c
Binary files /dev/null and b/examples/boat/video.mp4 differ
diff --git a/examples/cows/ref.jpg b/examples/cows/ref.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0e5bd05c7878ed97be25c87fc733e39e12226a96
Binary files /dev/null and b/examples/cows/ref.jpg differ
diff --git a/examples/cows/video.mp4 b/examples/cows/video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..7809e9e5d985dfd63967e74d2a2d109028aec8c7
--- /dev/null
+++ b/examples/cows/video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ac08603d719cd7a8d71fac76c9318d3e8f1e516e9b3c2a06323a0e4e78f6410
+size 2745681
diff --git a/examples/flamingo/ref.jpg b/examples/flamingo/ref.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0a10d911be10a92e54a669cc4ba917bc3d683db1
Binary files /dev/null and b/examples/flamingo/ref.jpg differ
diff --git a/examples/flamingo/video.mp4 b/examples/flamingo/video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..8b0e90ba000fa95f8915d28770bd5c9e7606bad0
--- /dev/null
+++ b/examples/flamingo/video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a103fd4991a00e419e5236b885fe9d220704ba0a6ac794c87aaa3f62a4f1561
+size 1239570
diff --git a/gradio_cached_examples/13/log.csv b/gradio_cached_examples/13/log.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fb995e3b2badc50a4ca445b09f4718bf1ccd0826
--- /dev/null
+++ b/gradio_cached_examples/13/log.csv
@@ -0,0 +1,5 @@
+output,flag,username,timestamp
+/content/ViTExCo/gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4,,,2023-08-15 09:45:37.897615
+/content/ViTExCo/gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4,,,2023-08-15 09:46:01.048997
+/content/ViTExCo/gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4,,,2023-08-15 09:46:34.503322
+/content/ViTExCo/gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4,,,2023-08-15 09:46:58.088903
diff --git a/gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4 b/gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..ece03d2d27a95954b0d80802385976754e90505c
--- /dev/null
+++ b/gradio_cached_examples/13/output/003c3114319372a78bf2f812ebaf0041afa280fb/output_video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ab666998e14fb00281a90f8801753eca001a432641ae2770007a8336b4c64e
+size 1213824
diff --git a/gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4 b/gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..96013646f3a76c9161c88f081db6dbec0eff5ff1
Binary files /dev/null and b/gradio_cached_examples/13/output/74c76e483235b7e80665e32d7fcdcc3da2be7644/output_video.mp4 differ
diff --git a/gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4 b/gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..1eb87f5afa098667ef5ccb41aabbdafe2b54d211
--- /dev/null
+++ b/gradio_cached_examples/13/output/7969adca8ae38cb3b38ff8e7bb54688d942c7bc8/output_video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c367dab34e596f7f0fed34c7e2384525de2ba1824b410d0770bdbd17bc9e72a
+size 1793060
diff --git a/gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4 b/gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..92b58e5f1f571227c24fbc04291189de1d4f86f1
Binary files /dev/null and b/gradio_cached_examples/13/output/e6d6153dedeb9fec586b3241311cc49dbc17bc85/output_video.mp4 differ
diff --git a/inputs/video.mp4/000000000.jpg b/inputs/video.mp4/000000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4cc97d06e4db87df257e66be4e35f703188cfbca
Binary files /dev/null and b/inputs/video.mp4/000000000.jpg differ
diff --git a/inputs/video.mp4/000000001.jpg b/inputs/video.mp4/000000001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b6f6d43bc315ae3b7a6536058b394e68f1e4d131
Binary files /dev/null and b/inputs/video.mp4/000000001.jpg differ
diff --git a/inputs/video.mp4/000000002.jpg b/inputs/video.mp4/000000002.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..112bb9a5194fc47550310c22601cdc8acc5324aa
Binary files /dev/null and b/inputs/video.mp4/000000002.jpg differ
diff --git a/inputs/video.mp4/000000003.jpg b/inputs/video.mp4/000000003.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..91b0c77d598bba7334bf6ee5714d5a2748e71891
Binary files /dev/null and b/inputs/video.mp4/000000003.jpg differ
diff --git a/inputs/video.mp4/000000004.jpg b/inputs/video.mp4/000000004.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..91f8dfe650925ff56ee28a2582e849bb044620fb
Binary files /dev/null and b/inputs/video.mp4/000000004.jpg differ
diff --git a/inputs/video.mp4/000000005.jpg b/inputs/video.mp4/000000005.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5913826729f81711e929c4555f82f20150b05d27
Binary files /dev/null and b/inputs/video.mp4/000000005.jpg differ
diff --git a/inputs/video.mp4/000000006.jpg b/inputs/video.mp4/000000006.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ee8b1e30810dfb65d1b9c82b990b22f78f1e4525
Binary files /dev/null and b/inputs/video.mp4/000000006.jpg differ
diff --git a/inputs/video.mp4/000000007.jpg b/inputs/video.mp4/000000007.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..af964d773c8a0932bb1c9339cb782067f21a3305
Binary files /dev/null and b/inputs/video.mp4/000000007.jpg differ
diff --git a/inputs/video.mp4/000000008.jpg b/inputs/video.mp4/000000008.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6af0fb9c7735eb8119b8b329d16883afa75663d8
Binary files /dev/null and b/inputs/video.mp4/000000008.jpg differ
diff --git a/inputs/video.mp4/000000009.jpg b/inputs/video.mp4/000000009.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9a0e45801d3d57beb7fca1337e7f23e95c64039d
Binary files /dev/null and b/inputs/video.mp4/000000009.jpg differ
diff --git a/inputs/video.mp4/000000010.jpg b/inputs/video.mp4/000000010.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fda58171652b283c51adb2868cdb731da0b74e5a
Binary files /dev/null and b/inputs/video.mp4/000000010.jpg differ
diff --git a/inputs/video.mp4/000000011.jpg b/inputs/video.mp4/000000011.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c73537a50ed92aca64d1828f9174d45d90898472
Binary files /dev/null and b/inputs/video.mp4/000000011.jpg differ
diff --git a/inputs/video.mp4/000000012.jpg b/inputs/video.mp4/000000012.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7b710e7c7b7e2cb8e35b0417020c4ae5772ea5a4
Binary files /dev/null and b/inputs/video.mp4/000000012.jpg differ
diff --git a/inputs/video.mp4/000000013.jpg b/inputs/video.mp4/000000013.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bb34aff550b49adc1e4cc174581a08c13d0b4492
Binary files /dev/null and b/inputs/video.mp4/000000013.jpg differ
diff --git a/inputs/video.mp4/000000014.jpg b/inputs/video.mp4/000000014.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6bb2f0c8b367da7ad427677cd4d8eb3e8f43e117
Binary files /dev/null and b/inputs/video.mp4/000000014.jpg differ
diff --git a/inputs/video.mp4/000000015.jpg b/inputs/video.mp4/000000015.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..12040c05ed1b713d522e62f8838de3225e28d4af
Binary files /dev/null and b/inputs/video.mp4/000000015.jpg differ
diff --git a/inputs/video.mp4/000000016.jpg b/inputs/video.mp4/000000016.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..efbc2dd46d9fec13f4a5bf7958e751c23ffdac98
Binary files /dev/null and b/inputs/video.mp4/000000016.jpg differ
diff --git a/inputs/video.mp4/000000017.jpg b/inputs/video.mp4/000000017.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1eb25e126212f109ec29909e57447c6f49952717
Binary files /dev/null and b/inputs/video.mp4/000000017.jpg differ
diff --git a/inputs/video.mp4/000000018.jpg b/inputs/video.mp4/000000018.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..42381e6e1460e5e88f05ce68fc3f68c69765627b
Binary files /dev/null and b/inputs/video.mp4/000000018.jpg differ
diff --git a/inputs/video.mp4/000000019.jpg b/inputs/video.mp4/000000019.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..566c8f9b7ea64cf1b6242111884d01a4e454db3e
Binary files /dev/null and b/inputs/video.mp4/000000019.jpg differ
diff --git a/inputs/video.mp4/000000020.jpg b/inputs/video.mp4/000000020.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bee6c3f5458d99336456933d018b204b7785c091
Binary files /dev/null and b/inputs/video.mp4/000000020.jpg differ
diff --git a/inputs/video.mp4/000000021.jpg b/inputs/video.mp4/000000021.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d7d2a61c8a02e8057c4edc88bb3e9fb3dcca51b5
Binary files /dev/null and b/inputs/video.mp4/000000021.jpg differ
diff --git a/inputs/video.mp4/000000022.jpg b/inputs/video.mp4/000000022.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..da9c6514e545a6ff9e9b12a5bb04e38eef6b7304
Binary files /dev/null and b/inputs/video.mp4/000000022.jpg differ
diff --git a/inputs/video.mp4/000000023.jpg b/inputs/video.mp4/000000023.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ee8aab46fdb09d1a0ab95f3d9b21f4183f21dbd8
Binary files /dev/null and b/inputs/video.mp4/000000023.jpg differ
diff --git a/inputs/video.mp4/000000024.jpg b/inputs/video.mp4/000000024.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1873ebfa50ab03e60c9e7c7e3e5844f0ee5e2e24
Binary files /dev/null and b/inputs/video.mp4/000000024.jpg differ
diff --git a/inputs/video.mp4/000000025.jpg b/inputs/video.mp4/000000025.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e778b7c95d06c59017b8abe7a5ac846fc89363b7
Binary files /dev/null and b/inputs/video.mp4/000000025.jpg differ
diff --git a/inputs/video.mp4/000000026.jpg b/inputs/video.mp4/000000026.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ae32acdc1fe9a5d3615ce3bdc753b6cc64c79c68
Binary files /dev/null and b/inputs/video.mp4/000000026.jpg differ
diff --git a/inputs/video.mp4/000000027.jpg b/inputs/video.mp4/000000027.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..907df4995041d7223bc6615d943329e367ee7867
Binary files /dev/null and b/inputs/video.mp4/000000027.jpg differ
diff --git a/inputs/video.mp4/000000028.jpg b/inputs/video.mp4/000000028.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0aa11d05d704ac7351bb7162b529fafd20c5b78f
Binary files /dev/null and b/inputs/video.mp4/000000028.jpg differ
diff --git a/inputs/video.mp4/000000029.jpg b/inputs/video.mp4/000000029.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..efa8771390695e90c440a773b59171d9047bc3c2
Binary files /dev/null and b/inputs/video.mp4/000000029.jpg differ
diff --git a/inputs/video.mp4/000000030.jpg b/inputs/video.mp4/000000030.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3923d3700b136d6d4579d48f52e5ab0c0300a245
Binary files /dev/null and b/inputs/video.mp4/000000030.jpg differ
diff --git a/inputs/video.mp4/000000031.jpg b/inputs/video.mp4/000000031.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6afc4b41ae471e648a721762e7186f75f7a43b28
Binary files /dev/null and b/inputs/video.mp4/000000031.jpg differ
diff --git a/inputs/video.mp4/000000032.jpg b/inputs/video.mp4/000000032.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0fd34d50f09ab35e807256fecdcaefef27b44d25
Binary files /dev/null and b/inputs/video.mp4/000000032.jpg differ
diff --git a/inputs/video.mp4/000000033.jpg b/inputs/video.mp4/000000033.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..03232e86e8609f96c38c7d328033369e51d3d88f
Binary files /dev/null and b/inputs/video.mp4/000000033.jpg differ
diff --git a/inputs/video.mp4/000000034.jpg b/inputs/video.mp4/000000034.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f97f5675959149ce18bc7f54a0cdda29c02d1468
Binary files /dev/null and b/inputs/video.mp4/000000034.jpg differ
diff --git a/inputs/video.mp4/000000035.jpg b/inputs/video.mp4/000000035.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..09d805df2de7d99283cf0a07192baa87cd10450c
Binary files /dev/null and b/inputs/video.mp4/000000035.jpg differ
diff --git a/inputs/video.mp4/000000036.jpg b/inputs/video.mp4/000000036.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5cd30a2c04127ce8825261153c203556cc90f07a
Binary files /dev/null and b/inputs/video.mp4/000000036.jpg differ
diff --git a/inputs/video.mp4/000000037.jpg b/inputs/video.mp4/000000037.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9344ed2b97c59dc1681f78fd23eebe54af87366f
Binary files /dev/null and b/inputs/video.mp4/000000037.jpg differ
diff --git a/inputs/video.mp4/000000038.jpg b/inputs/video.mp4/000000038.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c0daadedffbedd7a23743ad17fd7081d89003b37
Binary files /dev/null and b/inputs/video.mp4/000000038.jpg differ
diff --git a/inputs/video.mp4/000000039.jpg b/inputs/video.mp4/000000039.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f1e3e7bfd9315b18641a6a616f2f69de1749ad8b
Binary files /dev/null and b/inputs/video.mp4/000000039.jpg differ
diff --git a/inputs/video.mp4/000000040.jpg b/inputs/video.mp4/000000040.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..02f677b5587e23ba970557d62617fe547362bd01
Binary files /dev/null and b/inputs/video.mp4/000000040.jpg differ
diff --git a/inputs/video.mp4/000000041.jpg b/inputs/video.mp4/000000041.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..181eaa715d53bce66d2afd22beb9a20af1e64e6b
Binary files /dev/null and b/inputs/video.mp4/000000041.jpg differ
diff --git a/inputs/video.mp4/000000042.jpg b/inputs/video.mp4/000000042.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..11c824ebc4cd07e0961c0c1109c8e6efbe82eb99
Binary files /dev/null and b/inputs/video.mp4/000000042.jpg differ
diff --git a/inputs/video.mp4/000000043.jpg b/inputs/video.mp4/000000043.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bd05645e9aa42d95f1adcea4d3d9ba96d066f656
Binary files /dev/null and b/inputs/video.mp4/000000043.jpg differ
diff --git a/inputs/video.mp4/000000044.jpg b/inputs/video.mp4/000000044.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b5e62559a8b28a0d49bd432ba52f0633eb6d35a1
Binary files /dev/null and b/inputs/video.mp4/000000044.jpg differ
diff --git a/inputs/video.mp4/000000045.jpg b/inputs/video.mp4/000000045.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fab42e8efb1a691b41d0a4ebe5c624ddefa795ee
Binary files /dev/null and b/inputs/video.mp4/000000045.jpg differ
diff --git a/inputs/video.mp4/000000046.jpg b/inputs/video.mp4/000000046.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..912d88e420982b0e538af19356dee2663f492383
Binary files /dev/null and b/inputs/video.mp4/000000046.jpg differ
diff --git a/inputs/video.mp4/000000047.jpg b/inputs/video.mp4/000000047.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..22e05f974fe69f63c7fa4806a01292fe13aa98dc
Binary files /dev/null and b/inputs/video.mp4/000000047.jpg differ
diff --git a/inputs/video.mp4/000000048.jpg b/inputs/video.mp4/000000048.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e82d5c7434437cf04e025ab5edf28eb304bbc86
Binary files /dev/null and b/inputs/video.mp4/000000048.jpg differ
diff --git a/inputs/video.mp4/000000049.jpg b/inputs/video.mp4/000000049.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..baebb51c5bc15f2886d1802614d5c54af4859476
Binary files /dev/null and b/inputs/video.mp4/000000049.jpg differ
diff --git a/inputs/video.mp4/000000050.jpg b/inputs/video.mp4/000000050.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a996b5e82f47ded7955f9657408fb21b22273b31
Binary files /dev/null and b/inputs/video.mp4/000000050.jpg differ
diff --git a/inputs/video.mp4/000000051.jpg b/inputs/video.mp4/000000051.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5e240319908f48f00fa3d2461f00f896f7533067
Binary files /dev/null and b/inputs/video.mp4/000000051.jpg differ
diff --git a/inputs/video.mp4/000000052.jpg b/inputs/video.mp4/000000052.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c0f303c8b34bc2ae23df6e8086b2d4eede04ba36
Binary files /dev/null and b/inputs/video.mp4/000000052.jpg differ
diff --git a/inputs/video.mp4/000000053.jpg b/inputs/video.mp4/000000053.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5622e5da0ee7f0924cc4d02f3221c659949b7ed8
Binary files /dev/null and b/inputs/video.mp4/000000053.jpg differ
diff --git a/inputs/video.mp4/000000054.jpg b/inputs/video.mp4/000000054.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d6a1fcd16c2cbd4e39958a484f96f02bf9c2076d
Binary files /dev/null and b/inputs/video.mp4/000000054.jpg differ
diff --git a/inputs/video.mp4/000000055.jpg b/inputs/video.mp4/000000055.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7927a7194f76a09b2404d26bb6c1349f243ccfe7
Binary files /dev/null and b/inputs/video.mp4/000000055.jpg differ
diff --git a/inputs/video.mp4/000000056.jpg b/inputs/video.mp4/000000056.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e938edd8e6432eab4455fa37f2649fda3680e196
Binary files /dev/null and b/inputs/video.mp4/000000056.jpg differ
diff --git a/inputs/video.mp4/000000057.jpg b/inputs/video.mp4/000000057.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7d833168bd8cc9172cc7d0deeb2d1d814b01627d
Binary files /dev/null and b/inputs/video.mp4/000000057.jpg differ
diff --git a/inputs/video.mp4/000000058.jpg b/inputs/video.mp4/000000058.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..97b5b45abd786421f448e67d07778f6261df3afe
Binary files /dev/null and b/inputs/video.mp4/000000058.jpg differ
diff --git a/inputs/video.mp4/000000059.jpg b/inputs/video.mp4/000000059.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c8f37fc6e7d86389d2011727db202c7964b25755
Binary files /dev/null and b/inputs/video.mp4/000000059.jpg differ
diff --git a/inputs/video.mp4/000000060.jpg b/inputs/video.mp4/000000060.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d3c66d1c1b3f151939342d383beb164db2b10d83
Binary files /dev/null and b/inputs/video.mp4/000000060.jpg differ
diff --git a/inputs/video.mp4/000000061.jpg b/inputs/video.mp4/000000061.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f3f78930f5d7b883547b93762649dbf6ae86497f
Binary files /dev/null and b/inputs/video.mp4/000000061.jpg differ
diff --git a/inputs/video.mp4/000000062.jpg b/inputs/video.mp4/000000062.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ab763c94bb69bc648a159e45e1d4c58001358cc2
Binary files /dev/null and b/inputs/video.mp4/000000062.jpg differ
diff --git a/inputs/video.mp4/000000063.jpg b/inputs/video.mp4/000000063.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..722ea065f58d5f2ac413cdf0008e45e7e40311b8
Binary files /dev/null and b/inputs/video.mp4/000000063.jpg differ
diff --git a/inputs/video.mp4/000000064.jpg b/inputs/video.mp4/000000064.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..34397f9173ca48037cbceda2fd51e850af2d6be1
Binary files /dev/null and b/inputs/video.mp4/000000064.jpg differ
diff --git a/inputs/video.mp4/000000065.jpg b/inputs/video.mp4/000000065.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a8ee4c73736120debe6f6d9cd75288bffb388d5f
Binary files /dev/null and b/inputs/video.mp4/000000065.jpg differ
diff --git a/inputs/video.mp4/000000066.jpg b/inputs/video.mp4/000000066.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ae6fcd88c7d87b84cd9d3e63ff8c23f7972c4cd6
Binary files /dev/null and b/inputs/video.mp4/000000066.jpg differ
diff --git a/inputs/video.mp4/000000067.jpg b/inputs/video.mp4/000000067.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f8bb0780ae13173d52b8209f3f8316009668be00
Binary files /dev/null and b/inputs/video.mp4/000000067.jpg differ
diff --git a/inputs/video.mp4/000000068.jpg b/inputs/video.mp4/000000068.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f9f485376354d20c3330f3776db91b43366e71af
Binary files /dev/null and b/inputs/video.mp4/000000068.jpg differ
diff --git a/inputs/video.mp4/000000069.jpg b/inputs/video.mp4/000000069.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6d92d4b320630af838b48041acfde8352cd9cfd5
Binary files /dev/null and b/inputs/video.mp4/000000069.jpg differ
diff --git a/inputs/video.mp4/000000070.jpg b/inputs/video.mp4/000000070.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6c8c651e36b94f67b6478f28616b917d1be3d499
Binary files /dev/null and b/inputs/video.mp4/000000070.jpg differ
diff --git a/inputs/video.mp4/000000071.jpg b/inputs/video.mp4/000000071.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dcb7a91e3ebf28fe475d132ac71001a8d8600e75
Binary files /dev/null and b/inputs/video.mp4/000000071.jpg differ
diff --git a/inputs/video.mp4/000000072.jpg b/inputs/video.mp4/000000072.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..76a9fae73d7656f48500a3f33e8ec38310762157
Binary files /dev/null and b/inputs/video.mp4/000000072.jpg differ
diff --git a/inputs/video.mp4/000000073.jpg b/inputs/video.mp4/000000073.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8a7b91f122c517b660f68e24aad3a5f0d375a37d
Binary files /dev/null and b/inputs/video.mp4/000000073.jpg differ
diff --git a/inputs/video.mp4/000000074.jpg b/inputs/video.mp4/000000074.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..34e15aa4967beb7054a660cbc80bd597eb9e8fa9
Binary files /dev/null and b/inputs/video.mp4/000000074.jpg differ
diff --git a/inputs/video.mp4/000000075.jpg b/inputs/video.mp4/000000075.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..14f82bf5a78fb5a38884e7a8c05fc9194f19a497
Binary files /dev/null and b/inputs/video.mp4/000000075.jpg differ
diff --git a/inputs/video.mp4/000000076.jpg b/inputs/video.mp4/000000076.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b2c71a14dc9db8e891e876b5f66c6713c5fb735e
Binary files /dev/null and b/inputs/video.mp4/000000076.jpg differ
diff --git a/inputs/video.mp4/000000077.jpg b/inputs/video.mp4/000000077.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bdc946208c0eb04e74ac69e853dc89917025c8e3
Binary files /dev/null and b/inputs/video.mp4/000000077.jpg differ
diff --git a/inputs/video.mp4/000000078.jpg b/inputs/video.mp4/000000078.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..16ed47aea63e56bb4c74fe19a4c05a48927fa72b
Binary files /dev/null and b/inputs/video.mp4/000000078.jpg differ
diff --git a/inputs/video.mp4/000000079.jpg b/inputs/video.mp4/000000079.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cfd370710874df663e90c7c39b6012c0d20e6979
Binary files /dev/null and b/inputs/video.mp4/000000079.jpg differ
diff --git a/notebooks/data_preparation.ipynb b/notebooks/data_preparation.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b2bd1c1a7183b249d2b43097208c12e73fc9992f
--- /dev/null
+++ b/notebooks/data_preparation.ipynb
@@ -0,0 +1,2313 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import cv2\n",
+ "import glob, os\n",
+ "import matplotlib.pyplot as plt\n",
+ "import time\n",
+ "from tqdm import tqdm\n",
+ "\n",
+ "annot_folder = '/home/kuhaku/Code/FPT/VideoColorization/dataset/hollywood/annotations'\n",
+ "vid_folder = '/home/kuhaku/Code/FPT/VideoColorization/dataset/hollywood/videoclips'\n",
+ "saved_folder = '/home/kuhaku/Code/FPT/VideoColorization/dataset/train/input'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 59%|โโโโโโ | 278/475 [01:16<01:23, 2.35it/s][mpeg4 @ 0x562a0e578180] ac-tex damaged at 18 8\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 250\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 13 11\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 332\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 13 11\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 332\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 4 9\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 265\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] illegal mb_num in video packet (448 420) \n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] illegal mb_num in video packet (448 420) \n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] illegal mb_num in video packet (0 420) \n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] ac-tex damaged at 25 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 83\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] I cbpy damaged at 21 11\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 340\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 21 11\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 340\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 4 9\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 265\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 3 0\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 3\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 20 13\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 397\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] ac-tex damaged at 26 14\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 432\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] ac-tex damaged at 25 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 83\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 1 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 349\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 1 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 349\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 4 9\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 265\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] I cbpy damaged at 9 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 67\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 20 13\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 397\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] illegal mb_num in video packet (448 420) \n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 24 6\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 198\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 16 4\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 132\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] I cbpy damaged at 24 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 82\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 1 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 349\n",
+ "[mpeg4 @ 0x562a0e578180] ac-tex damaged at 4 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 352\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 26 8\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 258\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 1 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 349\n",
+ "[mpeg4 @ 0x562a0e578180] ac-tex damaged at 7 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 355\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 24 6\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 198\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 16 4\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 132\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 16 4\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 132\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 16 4\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 132\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] ac-tex damaged at 25 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 83\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 9 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 357\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 9 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 357\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 4 9\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 265\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 20 13\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 397\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 1 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 349\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 1 12\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 349\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 24 6\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 198\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 16 4\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 132\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 25 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 83\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] mcbpc damaged at 8 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 66\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] marker does not match f_code\n",
+ "[mpeg4 @ 0x562a0e578180] P cbpy damaged at 27 2\n",
+ "[mpeg4 @ 0x562a0e578180] Error at MB: 85\n",
+ "100%|โโโโโโโโโโ| 475/475 [02:41<00:00, 2.95it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "vids = glob.glob(vid_folder + '/*.avi')\n",
+ "\n",
+ "for vid in tqdm(vids):\n",
+ " vid_name = os.path.basename(vid).split('.')[0]\n",
+ " vid_name = vid_name.lower().replace(' ', '_')\n",
+ " vid_name = vid_name.replace('(', '').replace(')', '')\n",
+ " vid_name = vid_name.replace(',', '').replace('?', '')\n",
+ " vid_name = vid_name.replace('!', '').replace('\\'', '')\n",
+ " vid_name = vid_name.replace('-', '')\n",
+ " vid_name = vid_name.replace('__', '_')\n",
+ " cap = cv2.VideoCapture(vid)\n",
+ " fps = cap.get(cv2.CAP_PROP_FPS)\n",
+ " total_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)\n",
+ " ret, frame = cap.read()\n",
+ " index = 0\n",
+ " while ret:\n",
+ " index += 1\n",
+ " if index % 2 == 0:\n",
+ " os.makedirs(os.path.join(saved_folder, vid_name), exist_ok=True)\n",
+ " saved_path = os.path.join(saved_folder, vid_name, \"frame{:06d}.jpg\".format(index))\n",
+ " cv2.imwrite(saved_path, frame)\n",
+ " ret, frame = cap.read()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import shutil"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|โโโโโโโโโโ| 475/475 [02:45<00:00, 2.87it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "frames_vid_folder = glob.glob(saved_folder + '/*')\n",
+ "for frames_vid in tqdm(frames_vid_folder):\n",
+ " frames_list = glob.glob(frames_vid + '/*.jpg')\n",
+ " os.makedirs(os.path.join(frames_vid, 'gt'), exist_ok=True)\n",
+ " os.makedirs(os.path.join(frames_vid, 'gray'), exist_ok=True)\n",
+ " for frame in frames_list:\n",
+ " image = cv2.imread(frame)\n",
+ " gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n",
+ " cv2.imwrite(os.path.join(frames_vid, 'gray', os.path.basename(frame)), gray_image)\n",
+ " shutil.copyfile(src=frame, dst=os.path.join(frames_vid, 'gt', os.path.basename(frame)))\n",
+ " os.remove(frame)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vid_list = glob.glob(os.path.join(vid_folder, '*.avi'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Casablanca\n",
+ "# Double Indemnity\n",
+ "# Forrest Gump - 00776\n",
+ "# Forrest Gump - 00785\n",
+ "# Its A Wonderful Life\n",
+ "# Lost Weekend, The\n",
+ "# Naked City, The"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import cv2\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import glob\n",
+ "frame_folder = '/home/kuhaku/Code/FPT/VideoColorization/dataset/hollywood/frames'\n",
+ "img_paths = glob.glob(os.path.join(frame_folder, '*.jpg'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "('/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test/lost_weekend_the_01681/gt',\n",
+ " 'frame001716.jpg')"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_path = '/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test/*/gt/*.jpg'\n",
+ "for path in glob.glob(test_path):\n",
+ " folder_name = path.split('/')[-3]\n",
+ " os.makedirs(os.path.join('/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test_', folder_name), exist_ok=True)\n",
+ " shutil.copyfile(path, '/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test_/' + folder_name + \"/\" + os.path.basename(path))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test_/double_indemnity_01977/frame000060.jpg'"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "'/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test_/' + folder_name + \"/\" + os.path.basename(path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test_/frame000060.jpg'"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "'/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test_/' + os.path.basename(path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/home/kuhaku/Code/FPT/VideoColorization/dataset/train/test/double_indemnity_01977/gt/frame000060.jpg'"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "VideoColorization",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.16"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/opt.txt b/opt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4d96bb3d4d62a1462fc61abd8ea87c62e12339a8
--- /dev/null
+++ b/opt.txt
@@ -0,0 +1,46 @@
+train.py 2023-07-08 16:16:10
+------------------------------ Options -------------------------------
+annotation_file_path: dataset/annotation.csv
+batch_accum_size: 10
+batch_size: 2
+beta1: 0.5
+checkpoint_dir: checkpoints
+checkpoint_step: 500
+contextual_loss_direction: forward
+data_root_imagenet: imagenet
+data_root_imagenet_full: imagenet
+data_root_list: dataset
+domain_invariant: False
+epoch: 40
+gpu_ids: 0,1,2,3
+head_out_idx: 8,9,10,11
+ic: 7
+image_size: [384, 384]
+imagenet_pairs_file: imagenet_pairs.txt
+load_pretrained_model: False
+lr: 0.0001
+lr_gamma: 0.9
+lr_step: 2000
+luminance_noise: 2.0
+nonzero_placeholder_probability: 0.0
+permute_data: True
+real_reference_probability: 0.7
+resume: False
+resume_epoch: 0
+step_train_discriminator: 1
+use_dummy: True
+use_feature_transform: False
+use_wandb: False
+vit_version: vit_tiny_patch16_384
+wandb_name:
+wandb_token:
+weight_consistent: 0.05
+weight_contextual: 0.5
+weight_gan: 0.5
+weight_nonlocal_consistent: 0.0
+weight_nonlocal_smoothness: 0.0
+weight_perceptual: 0.02
+weight_smoothness: 5.0
+weigth_l1: 2.0
+workers: 0
+-------------------------------- End ---------------------------------
diff --git a/outputs/video.mp4/000000000.jpg b/outputs/video.mp4/000000000.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..19ec53d39120dee70d96204f8e126dec57d710e3
Binary files /dev/null and b/outputs/video.mp4/000000000.jpg differ
diff --git a/outputs/video.mp4/000000001.jpg b/outputs/video.mp4/000000001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4d3aff5c7ea6209a2868c5be8eb51aa6a18e2e6f
Binary files /dev/null and b/outputs/video.mp4/000000001.jpg differ
diff --git a/outputs/video.mp4/000000002.jpg b/outputs/video.mp4/000000002.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ef47278965885062a9ffd7f4b9b133825c0ee9c7
Binary files /dev/null and b/outputs/video.mp4/000000002.jpg differ
diff --git a/outputs/video.mp4/000000003.jpg b/outputs/video.mp4/000000003.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b0c3011674fe7083010400398d9f0ac0913741c6
Binary files /dev/null and b/outputs/video.mp4/000000003.jpg differ
diff --git a/outputs/video.mp4/000000004.jpg b/outputs/video.mp4/000000004.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c27de8add41119a03b9f2888aa9e87b68ca9ee07
Binary files /dev/null and b/outputs/video.mp4/000000004.jpg differ
diff --git a/outputs/video.mp4/000000005.jpg b/outputs/video.mp4/000000005.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c3197d1a0d9a53db831103184492a1bbb98432c7
Binary files /dev/null and b/outputs/video.mp4/000000005.jpg differ
diff --git a/outputs/video.mp4/000000006.jpg b/outputs/video.mp4/000000006.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b49b974108a33c6e94045366585609d6e553fa6c
Binary files /dev/null and b/outputs/video.mp4/000000006.jpg differ
diff --git a/outputs/video.mp4/000000007.jpg b/outputs/video.mp4/000000007.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fac7b1c4af31ed44207ed8d29e95cbd282d615f6
Binary files /dev/null and b/outputs/video.mp4/000000007.jpg differ
diff --git a/outputs/video.mp4/000000008.jpg b/outputs/video.mp4/000000008.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2a74d0419518d7d0a4eb25b745e5fb9227669870
Binary files /dev/null and b/outputs/video.mp4/000000008.jpg differ
diff --git a/outputs/video.mp4/000000009.jpg b/outputs/video.mp4/000000009.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0f69da59b98032b58545cad2aa6cf67f6182bd04
Binary files /dev/null and b/outputs/video.mp4/000000009.jpg differ
diff --git a/outputs/video.mp4/000000010.jpg b/outputs/video.mp4/000000010.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b1eede07f04b7e139208265f9ce57eb09486b16c
Binary files /dev/null and b/outputs/video.mp4/000000010.jpg differ
diff --git a/outputs/video.mp4/000000011.jpg b/outputs/video.mp4/000000011.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..712e0fceccd6d5f6dec27492e9011f80acd38485
Binary files /dev/null and b/outputs/video.mp4/000000011.jpg differ
diff --git a/outputs/video.mp4/000000012.jpg b/outputs/video.mp4/000000012.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..319ba417642cec8d43ac132364e773ab4a0d4c5a
Binary files /dev/null and b/outputs/video.mp4/000000012.jpg differ
diff --git a/outputs/video.mp4/000000013.jpg b/outputs/video.mp4/000000013.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7794e3328cf6bfff81f27936657531dc27d8850f
Binary files /dev/null and b/outputs/video.mp4/000000013.jpg differ
diff --git a/outputs/video.mp4/000000014.jpg b/outputs/video.mp4/000000014.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0adbef39af14e16e2b9b96a2c06bfa605b20bc5a
Binary files /dev/null and b/outputs/video.mp4/000000014.jpg differ
diff --git a/outputs/video.mp4/000000015.jpg b/outputs/video.mp4/000000015.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b618fbd36fe689b055d24ba0ff26bbc22c72639
Binary files /dev/null and b/outputs/video.mp4/000000015.jpg differ
diff --git a/outputs/video.mp4/000000016.jpg b/outputs/video.mp4/000000016.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..51223ba35c3d38fc7d418d853fab33b75e8038e7
Binary files /dev/null and b/outputs/video.mp4/000000016.jpg differ
diff --git a/outputs/video.mp4/000000017.jpg b/outputs/video.mp4/000000017.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f832a81397647d65f9b2923653c0eb51c3a11f1f
Binary files /dev/null and b/outputs/video.mp4/000000017.jpg differ
diff --git a/outputs/video.mp4/000000018.jpg b/outputs/video.mp4/000000018.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a3acc34281aa8017c687bb507f90bb8e62fafb58
Binary files /dev/null and b/outputs/video.mp4/000000018.jpg differ
diff --git a/outputs/video.mp4/000000019.jpg b/outputs/video.mp4/000000019.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b0a1d0e3ac999786f6116837d03f2d9e0f914511
Binary files /dev/null and b/outputs/video.mp4/000000019.jpg differ
diff --git a/outputs/video.mp4/000000020.jpg b/outputs/video.mp4/000000020.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..84bd6268ee736dc04efc705de6066b9c174f67b1
Binary files /dev/null and b/outputs/video.mp4/000000020.jpg differ
diff --git a/outputs/video.mp4/000000021.jpg b/outputs/video.mp4/000000021.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ede2dc61e2e8ec7a5512c959881e9ca7bc88f397
Binary files /dev/null and b/outputs/video.mp4/000000021.jpg differ
diff --git a/outputs/video.mp4/000000022.jpg b/outputs/video.mp4/000000022.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1969e4dce3b36e3e78913e46bd4c1a5b3a622722
Binary files /dev/null and b/outputs/video.mp4/000000022.jpg differ
diff --git a/outputs/video.mp4/000000023.jpg b/outputs/video.mp4/000000023.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cd7ba8c9a1bc591e04697faa71dd7ab51b63223c
Binary files /dev/null and b/outputs/video.mp4/000000023.jpg differ
diff --git a/outputs/video.mp4/000000024.jpg b/outputs/video.mp4/000000024.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..66646bcb85f86c46c73f37c94a9a7f099bf7a431
Binary files /dev/null and b/outputs/video.mp4/000000024.jpg differ
diff --git a/outputs/video.mp4/000000025.jpg b/outputs/video.mp4/000000025.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4aeff96345839321f004d9606c9163d6639223d9
Binary files /dev/null and b/outputs/video.mp4/000000025.jpg differ
diff --git a/outputs/video.mp4/000000026.jpg b/outputs/video.mp4/000000026.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0809ee2308763e738a25de7b6812a43f9549ff7a
Binary files /dev/null and b/outputs/video.mp4/000000026.jpg differ
diff --git a/outputs/video.mp4/000000027.jpg b/outputs/video.mp4/000000027.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..153167c612cbf41f4f9c178421a2d3d0893ef067
Binary files /dev/null and b/outputs/video.mp4/000000027.jpg differ
diff --git a/outputs/video.mp4/000000028.jpg b/outputs/video.mp4/000000028.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..333f594a41d806ec5cb3fece37ec9b1654fb2512
Binary files /dev/null and b/outputs/video.mp4/000000028.jpg differ
diff --git a/outputs/video.mp4/000000029.jpg b/outputs/video.mp4/000000029.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..95c517b4b3d204cbe4fa8115f601a3f155395ba7
Binary files /dev/null and b/outputs/video.mp4/000000029.jpg differ
diff --git a/outputs/video.mp4/000000030.jpg b/outputs/video.mp4/000000030.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a032c0a18f7e61a0050bc6bf54b82f742b368748
Binary files /dev/null and b/outputs/video.mp4/000000030.jpg differ
diff --git a/outputs/video.mp4/000000031.jpg b/outputs/video.mp4/000000031.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c7d72fea69e21401bfc6ff062a002e0d03c051b
Binary files /dev/null and b/outputs/video.mp4/000000031.jpg differ
diff --git a/outputs/video.mp4/000000032.jpg b/outputs/video.mp4/000000032.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..463c3b58ffbf08a4af844ce65ded9788ca4f804c
Binary files /dev/null and b/outputs/video.mp4/000000032.jpg differ
diff --git a/outputs/video.mp4/000000033.jpg b/outputs/video.mp4/000000033.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2e009493404ac175a71aba81655567f2562ba3d2
Binary files /dev/null and b/outputs/video.mp4/000000033.jpg differ
diff --git a/outputs/video.mp4/000000034.jpg b/outputs/video.mp4/000000034.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..83dfdbeb3608a83d4a34ea1220f421f7f41d9b21
Binary files /dev/null and b/outputs/video.mp4/000000034.jpg differ
diff --git a/outputs/video.mp4/000000035.jpg b/outputs/video.mp4/000000035.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7f2182ab41fbf0d3d46b2e3b4462362ae814805c
Binary files /dev/null and b/outputs/video.mp4/000000035.jpg differ
diff --git a/outputs/video.mp4/000000036.jpg b/outputs/video.mp4/000000036.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9bc2285e7de66aac4d84dbd97bbcd006f06304d5
Binary files /dev/null and b/outputs/video.mp4/000000036.jpg differ
diff --git a/outputs/video.mp4/000000037.jpg b/outputs/video.mp4/000000037.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ce0313a81275c7a8270b92a0b21aede7113ec33f
Binary files /dev/null and b/outputs/video.mp4/000000037.jpg differ
diff --git a/outputs/video.mp4/000000038.jpg b/outputs/video.mp4/000000038.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0982fd130f1c7af275b6efc0b131a525a74df5ed
Binary files /dev/null and b/outputs/video.mp4/000000038.jpg differ
diff --git a/outputs/video.mp4/000000039.jpg b/outputs/video.mp4/000000039.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b54985a9e8cbca5c8d0e7b715deccf175788c761
Binary files /dev/null and b/outputs/video.mp4/000000039.jpg differ
diff --git a/outputs/video.mp4/000000040.jpg b/outputs/video.mp4/000000040.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..38ceb4820c9264ef2b2142716ad46ee786d8da01
Binary files /dev/null and b/outputs/video.mp4/000000040.jpg differ
diff --git a/outputs/video.mp4/000000041.jpg b/outputs/video.mp4/000000041.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..12f9df50ec311be69c8cd5b1b1b104c58760742b
Binary files /dev/null and b/outputs/video.mp4/000000041.jpg differ
diff --git a/outputs/video.mp4/000000042.jpg b/outputs/video.mp4/000000042.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..019e66c460607df003d094ea7ce41111457a70be
Binary files /dev/null and b/outputs/video.mp4/000000042.jpg differ
diff --git a/outputs/video.mp4/000000043.jpg b/outputs/video.mp4/000000043.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c1baa20275351a5c4d67d1efed26f0a4c84df770
Binary files /dev/null and b/outputs/video.mp4/000000043.jpg differ
diff --git a/outputs/video.mp4/000000044.jpg b/outputs/video.mp4/000000044.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..095997eafdf2e6e0b61d2fc2f5a151f4d3b72f25
Binary files /dev/null and b/outputs/video.mp4/000000044.jpg differ
diff --git a/outputs/video.mp4/000000045.jpg b/outputs/video.mp4/000000045.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a28d045a2c2df9a7bebc78953fe495a3ac2810d7
Binary files /dev/null and b/outputs/video.mp4/000000045.jpg differ
diff --git a/outputs/video.mp4/000000046.jpg b/outputs/video.mp4/000000046.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..96c994f5453830a3323dc202dbe38b387e5e2e24
Binary files /dev/null and b/outputs/video.mp4/000000046.jpg differ
diff --git a/outputs/video.mp4/000000047.jpg b/outputs/video.mp4/000000047.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1d92b67dea69f5577a8a9911e2d85e65e54c314f
Binary files /dev/null and b/outputs/video.mp4/000000047.jpg differ
diff --git a/outputs/video.mp4/000000048.jpg b/outputs/video.mp4/000000048.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..36259b7af0301faeb4037cadb2712a5e538f9ff8
Binary files /dev/null and b/outputs/video.mp4/000000048.jpg differ
diff --git a/outputs/video.mp4/000000049.jpg b/outputs/video.mp4/000000049.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dec0d2defb622750389721eedd603f5db8612b1f
Binary files /dev/null and b/outputs/video.mp4/000000049.jpg differ
diff --git a/outputs/video.mp4/000000050.jpg b/outputs/video.mp4/000000050.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..83e7296ce1af30366dfb239fe2a6758a85dce4be
Binary files /dev/null and b/outputs/video.mp4/000000050.jpg differ
diff --git a/outputs/video.mp4/000000051.jpg b/outputs/video.mp4/000000051.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e27b49ed0d5217e62cc9726bdd83c8cb09ccbc23
Binary files /dev/null and b/outputs/video.mp4/000000051.jpg differ
diff --git a/outputs/video.mp4/000000052.jpg b/outputs/video.mp4/000000052.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cecf337d82635a99d81cb39bad0006419f43bdc7
Binary files /dev/null and b/outputs/video.mp4/000000052.jpg differ
diff --git a/outputs/video.mp4/000000053.jpg b/outputs/video.mp4/000000053.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..00e053fde3255e941022b4f4266952948a3be987
Binary files /dev/null and b/outputs/video.mp4/000000053.jpg differ
diff --git a/outputs/video.mp4/000000054.jpg b/outputs/video.mp4/000000054.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a549a0a51d8ad2e35bff3b5a5776a0e07fc3db56
Binary files /dev/null and b/outputs/video.mp4/000000054.jpg differ
diff --git a/outputs/video.mp4/000000055.jpg b/outputs/video.mp4/000000055.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a56b8a3bcbc373c1a939511debb3e02722c77b1f
Binary files /dev/null and b/outputs/video.mp4/000000055.jpg differ
diff --git a/outputs/video.mp4/000000056.jpg b/outputs/video.mp4/000000056.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6b497025c3b63e9b254fb27c0ecbf82a30e412fa
Binary files /dev/null and b/outputs/video.mp4/000000056.jpg differ
diff --git a/outputs/video.mp4/000000057.jpg b/outputs/video.mp4/000000057.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c94da8fefc6af06c269af15df3bbb0c1ef041502
Binary files /dev/null and b/outputs/video.mp4/000000057.jpg differ
diff --git a/outputs/video.mp4/000000058.jpg b/outputs/video.mp4/000000058.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..61c018494a223a1e79e3aa3e6da043152c487adc
Binary files /dev/null and b/outputs/video.mp4/000000058.jpg differ
diff --git a/outputs/video.mp4/000000059.jpg b/outputs/video.mp4/000000059.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bb012741f2e6e3a4dfbae3ac3cb7cd424da27bd1
Binary files /dev/null and b/outputs/video.mp4/000000059.jpg differ
diff --git a/outputs/video.mp4/000000060.jpg b/outputs/video.mp4/000000060.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ed76570fcbfae12561a66d50fdfb17ca3bee5638
Binary files /dev/null and b/outputs/video.mp4/000000060.jpg differ
diff --git a/outputs/video.mp4/000000061.jpg b/outputs/video.mp4/000000061.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..753eacfc795cd4eec02fbd5dc378f86657203007
Binary files /dev/null and b/outputs/video.mp4/000000061.jpg differ
diff --git a/outputs/video.mp4/000000062.jpg b/outputs/video.mp4/000000062.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2c57b31862f3add2f227951fc2f84e3dfbca2091
Binary files /dev/null and b/outputs/video.mp4/000000062.jpg differ
diff --git a/outputs/video.mp4/000000063.jpg b/outputs/video.mp4/000000063.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..256497c7673b0a28ec090d746d7b6d44b196c833
Binary files /dev/null and b/outputs/video.mp4/000000063.jpg differ
diff --git a/outputs/video.mp4/000000064.jpg b/outputs/video.mp4/000000064.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..287259d5fb359a2b789df00cf3d039f2affe87dd
Binary files /dev/null and b/outputs/video.mp4/000000064.jpg differ
diff --git a/outputs/video.mp4/000000065.jpg b/outputs/video.mp4/000000065.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ad5943dcfb4fb6a3d651fdd07b4d8ad539f04dd8
Binary files /dev/null and b/outputs/video.mp4/000000065.jpg differ
diff --git a/outputs/video.mp4/000000066.jpg b/outputs/video.mp4/000000066.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..41fa2f1430412d9464d36d9cf02a56bac0afd48a
Binary files /dev/null and b/outputs/video.mp4/000000066.jpg differ
diff --git a/outputs/video.mp4/000000067.jpg b/outputs/video.mp4/000000067.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a8078301ad450b7d1c859abd8ab207a307f5792f
Binary files /dev/null and b/outputs/video.mp4/000000067.jpg differ
diff --git a/outputs/video.mp4/000000068.jpg b/outputs/video.mp4/000000068.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3560785587d5c77901e5523b427d43e7f4a1709d
Binary files /dev/null and b/outputs/video.mp4/000000068.jpg differ
diff --git a/outputs/video.mp4/000000069.jpg b/outputs/video.mp4/000000069.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..28287c4fc4be43cedc6d6f42099d6a1713aefb14
Binary files /dev/null and b/outputs/video.mp4/000000069.jpg differ
diff --git a/outputs/video.mp4/000000070.jpg b/outputs/video.mp4/000000070.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5dbe1902dba31b81e0701d7448046cfc80bf3b0c
Binary files /dev/null and b/outputs/video.mp4/000000070.jpg differ
diff --git a/outputs/video.mp4/000000071.jpg b/outputs/video.mp4/000000071.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..57a52d8bc6037de60829045bc83b06ae49eb309c
Binary files /dev/null and b/outputs/video.mp4/000000071.jpg differ
diff --git a/outputs/video.mp4/000000072.jpg b/outputs/video.mp4/000000072.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b67e174bd8b889951c9dbb63cc8baaa5e041b35c
Binary files /dev/null and b/outputs/video.mp4/000000072.jpg differ
diff --git a/outputs/video.mp4/000000073.jpg b/outputs/video.mp4/000000073.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ddda3caa3bae3c62c18a5f842eb7697053eca2fa
Binary files /dev/null and b/outputs/video.mp4/000000073.jpg differ
diff --git a/outputs/video.mp4/000000074.jpg b/outputs/video.mp4/000000074.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f0069adf8b36fccd8c1a943b32195a1084cacd86
Binary files /dev/null and b/outputs/video.mp4/000000074.jpg differ
diff --git a/outputs/video.mp4/000000075.jpg b/outputs/video.mp4/000000075.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9aed6377201d2ee9bd2cc6b7384fbcd1c0f775e
Binary files /dev/null and b/outputs/video.mp4/000000075.jpg differ
diff --git a/outputs/video.mp4/000000076.jpg b/outputs/video.mp4/000000076.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..55b91ea6f0912e3ac154a6ada278814fd38d1d9d
Binary files /dev/null and b/outputs/video.mp4/000000076.jpg differ
diff --git a/outputs/video.mp4/000000077.jpg b/outputs/video.mp4/000000077.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e3e88758f1d5effe787a62abbcc33feae8d2fbca
Binary files /dev/null and b/outputs/video.mp4/000000077.jpg differ
diff --git a/outputs/video.mp4/000000078.jpg b/outputs/video.mp4/000000078.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..80cda26da2ef7967f691951a43afd7b5f37ae852
Binary files /dev/null and b/outputs/video.mp4/000000078.jpg differ
diff --git a/outputs/video.mp4/000000079.jpg b/outputs/video.mp4/000000079.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..219824b7eca8bbce00957b8e227ce1e818d83c0f
Binary files /dev/null and b/outputs/video.mp4/000000079.jpg differ
diff --git a/outputs/video.mp4/output_video.mp4 b/outputs/video.mp4/output_video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..96013646f3a76c9161c88f081db6dbec0eff5ff1
Binary files /dev/null and b/outputs/video.mp4/output_video.mp4 differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3977df208962d721c7956cc71b459218f768762
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+einops==0.6.1
+pandas==2.0.2
+pip-chill==1.0.3
+scikit-image==0.21.0
+timm
+tqdm==4.65.0
+wandb==0.15.4
+numba
+opencv-python
+gradio==3.40.1
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/__pycache__/__init__.cpython-310.pyc b/src/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..37827d96d5a1d2d1bd495a5927813f9658acbb47
Binary files /dev/null and b/src/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/__pycache__/utils.cpython-310.pyc b/src/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..599166c08a02a41e282830374647edce8e60e192
Binary files /dev/null and b/src/__pycache__/utils.cpython-310.pyc differ
diff --git a/src/data/__pycache__/functional.cpython-310.pyc b/src/data/__pycache__/functional.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce5b70c6290bee94368cd6d9822e3482d67036f7
Binary files /dev/null and b/src/data/__pycache__/functional.cpython-310.pyc differ
diff --git a/src/data/dataloader.py b/src/data/dataloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..73512b6e53ffa0774d1526d6570150dd0918182f
--- /dev/null
+++ b/src/data/dataloader.py
@@ -0,0 +1,320 @@
+import numpy as np
+import pandas as pd
+from src.utils import (
+ CenterPadCrop_numpy,
+ Distortion_with_flow_cpu,
+ Distortion_with_flow_gpu,
+ Normalize,
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ RGB2Lab,
+ ToTensor,
+ CenterPad,
+ read_flow,
+ SquaredPadding,
+ SquaredPaddingFlow,
+ ResizeFlow
+)
+import torch
+import torch.utils.data as data
+import torchvision.transforms as transforms
+from numpy import random
+import os
+from PIL import Image
+from scipy.ndimage.filters import gaussian_filter
+from scipy.ndimage import map_coordinates
+
+
+def image_loader(path):
+ with open(path, "rb") as f:
+ with Image.open(f) as img:
+ return img.convert("RGB")
+
+
+class CenterCrop(object):
+ """
+ center crop the numpy array
+ """
+
+ def __init__(self, image_size):
+ self.h0, self.w0 = image_size
+
+ def __call__(self, input_numpy):
+ if input_numpy.ndim == 3:
+ h, w, channel = input_numpy.shape
+ output_numpy = np.zeros((self.h0, self.w0, channel))
+ output_numpy = input_numpy[
+ (h - self.h0) // 2 : (h - self.h0) // 2 + self.h0, (w - self.w0) // 2 : (w - self.w0) // 2 + self.w0, :
+ ]
+ else:
+ h, w = input_numpy.shape
+ output_numpy = np.zeros((self.h0, self.w0))
+ output_numpy = input_numpy[
+ (h - self.h0) // 2 : (h - self.h0) // 2 + self.h0, (w - self.w0) // 2 : (w - self.w0) // 2 + self.w0
+ ]
+ return output_numpy
+
+
+class VideosDataset(torch.utils.data.Dataset):
+ def __init__(
+ self,
+ video_data_root,
+ flow_data_root,
+ mask_data_root,
+ imagenet_folder,
+ annotation_file_path,
+ image_size,
+ num_refs=5, # max = 20
+ image_transform=None,
+ real_reference_probability=1,
+ nonzero_placeholder_probability=0.5,
+ ):
+ self.video_data_root = video_data_root
+ self.flow_data_root = flow_data_root
+ self.mask_data_root = mask_data_root
+ self.imagenet_folder = imagenet_folder
+ self.image_transform = image_transform
+ self.CenterPad = CenterPad(image_size)
+ self.ToTensor = ToTensor()
+ self.CenterCrop = transforms.CenterCrop(image_size)
+ self.SquaredPadding = SquaredPadding(image_size[0])
+ self.SquaredPaddingFlow = SquaredPaddingFlow()
+ self.ResizeFlow = ResizeFlow(image_size)
+ self.num_refs = num_refs
+
+ assert os.path.exists(self.video_data_root), "find no video dataroot"
+ assert os.path.exists(self.flow_data_root), "find no flow dataroot"
+ assert os.path.exists(self.imagenet_folder), "find no imagenet folder"
+ # self.epoch = epoch
+ self.image_pairs = pd.read_csv(annotation_file_path)
+ self.real_len = len(self.image_pairs)
+ # self.image_pairs = pd.concat([self.image_pairs] * self.epoch, ignore_index=True)
+ self.real_reference_probability = real_reference_probability
+ self.nonzero_placeholder_probability = nonzero_placeholder_probability
+ print("##### parsing image pairs in %s: %d pairs #####" % (video_data_root, self.__len__()))
+
+ def __getitem__(self, index):
+ (
+ video_name,
+ prev_frame,
+ current_frame,
+ flow_forward_name,
+ mask_name,
+ reference_1_name,
+ reference_2_name,
+ reference_3_name,
+ reference_4_name,
+ reference_5_name
+ ) = self.image_pairs.iloc[index, :5+self.num_refs].values.tolist()
+
+ video_path = os.path.join(self.video_data_root, video_name)
+ flow_path = os.path.join(self.flow_data_root, video_name)
+ mask_path = os.path.join(self.mask_data_root, video_name)
+
+ prev_frame_path = os.path.join(video_path, prev_frame)
+ current_frame_path = os.path.join(video_path, current_frame)
+
+ reference_1_path = os.path.join(self.imagenet_folder, reference_1_name)
+ reference_2_path = os.path.join(self.imagenet_folder, reference_2_name)
+ reference_3_path = os.path.join(self.imagenet_folder, reference_3_name)
+ reference_4_path = os.path.join(self.imagenet_folder, reference_4_name)
+ reference_5_path = os.path.join(self.imagenet_folder, reference_5_name)
+
+ flow_forward_path = os.path.join(flow_path, flow_forward_name)
+ mask_path = os.path.join(mask_path, mask_name)
+
+ reference_gt_1_path = prev_frame_path
+ reference_gt_2_path = current_frame_path
+ try:
+ I1 = Image.open(prev_frame_path).convert("RGB")
+ I2 = Image.open(current_frame_path).convert("RGB")
+
+ I_reference_video = Image.open(random.choice([reference_gt_1_path, reference_gt_2_path])).convert("RGB")
+ reference_path = random.choice(
+ [reference_1_path, reference_2_path, reference_3_path, reference_4_path, reference_5_path]
+ )
+ I_reference_video_real = Image.open(reference_path).convert("RGB")
+
+ flow_forward = read_flow(flow_forward_path) # numpy
+
+ mask = Image.open(mask_path) # PIL
+ # binary mask
+ mask = np.array(mask)
+ mask[mask < 240] = 0
+ mask[mask >= 240] = 1
+ mask = self.ToTensor(mask)
+
+ # transform
+ I1 = self.image_transform(I1)
+ I2 = self.image_transform(I2)
+ I_reference_video = self.image_transform(self.CenterPad(I_reference_video))
+ I_reference_video_real = self.image_transform(self.CenterPad(I_reference_video_real))
+ flow_forward = self.SquaredPaddingFlow(self.ResizeFlow(torch.tensor(flow_forward)))
+
+ mask = self.SquaredPadding(mask, return_pil=False, return_paddings=False)
+
+ if np.random.random() < self.real_reference_probability:
+ I_reference_output = I_reference_video_real # Use reference from imagenet
+ placeholder = torch.zeros_like(I1)
+ self_ref_flag = torch.zeros_like(I1)
+ else:
+ I_reference_output = I_reference_video # Use reference from ground truth
+ placeholder = I2 if np.random.random() < self.nonzero_placeholder_probability else torch.zeros_like(I1)
+ self_ref_flag = torch.ones_like(I1)
+
+ outputs = [
+ I1,
+ I2,
+ I_reference_output,
+ flow_forward,
+ mask,
+ placeholder,
+ self_ref_flag,
+ video_name + prev_frame,
+ video_name + current_frame,
+ reference_path
+ ]
+
+ except Exception as e:
+ print("error in reading image pair: %s" % str(self.image_pairs[index]))
+ print(e)
+ return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+ return outputs
+
+ def __len__(self):
+ return len(self.image_pairs)
+
+
+def parse_imgnet_images(pairs_file):
+ pairs = []
+ with open(pairs_file, "r") as f:
+ lines = f.readlines()
+ for line in lines:
+ line = line.strip().split("|")
+ image_a = line[0]
+ image_b = line[1]
+ pairs.append((image_a, image_b))
+ return pairs
+
+
+class VideosDataset_ImageNet(data.Dataset):
+ def __init__(
+ self,
+ imagenet_data_root,
+ pairs_file,
+ image_size,
+ transforms_imagenet=None,
+ distortion_level=3,
+ brightnessjitter=0,
+ nonzero_placeholder_probability=0.5,
+ extra_reference_transform=None,
+ real_reference_probability=1,
+ distortion_device='cpu'
+ ):
+ self.imagenet_data_root = imagenet_data_root
+ self.image_pairs = pd.read_csv(pairs_file, names=['i1', 'i2'])
+ self.transforms_imagenet_raw = transforms_imagenet
+ self.extra_reference_transform = transforms.Compose(extra_reference_transform)
+ self.real_reference_probability = real_reference_probability
+ self.transforms_imagenet = transforms.Compose(transforms_imagenet)
+ self.image_size = image_size
+ self.real_len = len(self.image_pairs)
+ self.distortion_level = distortion_level
+ self.distortion_transform = Distortion_with_flow_cpu() if distortion_device == 'cpu' else Distortion_with_flow_gpu()
+ self.brightnessjitter = brightnessjitter
+ self.flow_transform = transforms.Compose([CenterPadCrop_numpy(self.image_size), ToTensor()])
+ self.nonzero_placeholder_probability = nonzero_placeholder_probability
+ self.ToTensor = ToTensor()
+ self.Normalize = Normalize()
+ print("##### parsing imageNet pairs in %s: %d pairs #####" % (imagenet_data_root, self.__len__()))
+
+ def __getitem__(self, index):
+ pa, pb = self.image_pairs.iloc[index].values.tolist()
+ if np.random.random() > 0.5:
+ pa, pb = pb, pa
+
+ image_a_path = os.path.join(self.imagenet_data_root, pa)
+ image_b_path = os.path.join(self.imagenet_data_root, pb)
+
+ I1 = image_loader(image_a_path)
+ I2 = I1
+ I_reference_video = I1
+ I_reference_video_real = image_loader(image_b_path)
+ # print("i'm here get image 2")
+ # generate the flow
+ alpha = np.random.rand() * self.distortion_level
+ distortion_range = 50
+ random_state = np.random.RandomState(None)
+ shape = self.image_size[0], self.image_size[1]
+ # dx: flow on the vertical direction; dy: flow on the horizontal direction
+ forward_dx = (
+ gaussian_filter((random_state.rand(*shape) * 2 - 1), distortion_range, mode="constant", cval=0) * alpha * 1000
+ )
+ forward_dy = (
+ gaussian_filter((random_state.rand(*shape) * 2 - 1), distortion_range, mode="constant", cval=0) * alpha * 1000
+ )
+ # print("i'm here get image 3")
+ for transform in self.transforms_imagenet_raw:
+ if type(transform) is RGB2Lab:
+ I1_raw = I1
+ I1 = transform(I1)
+ for transform in self.transforms_imagenet_raw:
+ if type(transform) is RGB2Lab:
+ I2 = self.distortion_transform(I2, forward_dx, forward_dy)
+ I2_raw = I2
+ I2 = transform(I2)
+ # print("i'm here get image 4")
+ I2[0:1, :, :] = I2[0:1, :, :] + torch.randn(1) * self.brightnessjitter
+
+ I_reference_video = self.extra_reference_transform(I_reference_video)
+ for transform in self.transforms_imagenet_raw:
+ I_reference_video = transform(I_reference_video)
+
+ I_reference_video_real = self.transforms_imagenet(I_reference_video_real)
+ # print("i'm here get image 5")
+ flow_forward_raw = np.stack((forward_dy, forward_dx), axis=-1)
+ flow_forward = self.flow_transform(flow_forward_raw)
+
+ # update the mask for the pixels on the border
+ grid_x, grid_y = np.meshgrid(np.arange(self.image_size[0]), np.arange(self.image_size[1]), indexing="ij")
+ grid = np.stack((grid_y, grid_x), axis=-1)
+ grid_warp = grid + flow_forward_raw
+ location_y = grid_warp[:, :, 0].flatten()
+ location_x = grid_warp[:, :, 1].flatten()
+ I2_raw = np.array(I2_raw).astype(float)
+ I21_r = map_coordinates(I2_raw[:, :, 0], np.stack((location_x, location_y)), cval=-1).reshape(
+ (self.image_size[0], self.image_size[1])
+ )
+ I21_g = map_coordinates(I2_raw[:, :, 1], np.stack((location_x, location_y)), cval=-1).reshape(
+ (self.image_size[0], self.image_size[1])
+ )
+ I21_b = map_coordinates(I2_raw[:, :, 2], np.stack((location_x, location_y)), cval=-1).reshape(
+ (self.image_size[0], self.image_size[1])
+ )
+ I21_raw = np.stack((I21_r, I21_g, I21_b), axis=2)
+ mask = np.ones((self.image_size[0], self.image_size[1]))
+ mask[(I21_raw[:, :, 0] == -1) & (I21_raw[:, :, 1] == -1) & (I21_raw[:, :, 2] == -1)] = 0
+ mask[abs(I21_raw - I1_raw).sum(axis=-1) > 50] = 0
+ mask = self.ToTensor(mask)
+ # print("i'm here get image 6")
+ if np.random.random() < self.real_reference_probability:
+ I_reference_output = I_reference_video_real
+ placeholder = torch.zeros_like(I1)
+ self_ref_flag = torch.zeros_like(I1)
+ else:
+ I_reference_output = I_reference_video
+ placeholder = I2 if np.random.random() < self.nonzero_placeholder_probability else torch.zeros_like(I1)
+ self_ref_flag = torch.ones_like(I1)
+
+ # except Exception as e:
+ # if combo_path is not None:
+ # print("problem in ", combo_path)
+ # print("problem in, ", image_a_path)
+ # print(e)
+ # return self.__getitem__(np.random.randint(0, len(self.image_pairs)))
+ # print("i'm here get image 7")
+ return [I1, I2, I_reference_output, flow_forward, mask, placeholder, self_ref_flag, "holder", pb, pa]
+
+ def __len__(self):
+ return len(self.image_pairs)
\ No newline at end of file
diff --git a/src/data/functional.py b/src/data/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..14aa7882d3dfca1ba6649d0b7fdb2c443e3b7f20
--- /dev/null
+++ b/src/data/functional.py
@@ -0,0 +1,84 @@
+from __future__ import division
+
+import torch
+import numbers
+import collections
+import numpy as np
+from PIL import Image, ImageOps
+
+
+def _is_pil_image(img):
+ return isinstance(img, Image.Image)
+
+
+def _is_tensor_image(img):
+ return torch.is_tensor(img) and img.ndimension() == 3
+
+
+def _is_numpy_image(img):
+ return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
+
+
+def to_mytensor(pic):
+ pic_arr = np.array(pic)
+ if pic_arr.ndim == 2:
+ pic_arr = pic_arr[..., np.newaxis]
+ img = torch.from_numpy(pic_arr.transpose((2, 0, 1)))
+ if not isinstance(img, torch.FloatTensor):
+ return img.float() # no normalize .div(255)
+ else:
+ return img
+
+
+def normalize(tensor, mean, std):
+ if not _is_tensor_image(tensor):
+ raise TypeError("tensor is not a torch image.")
+ if tensor.size(0) == 1:
+ tensor.sub_(mean).div_(std)
+ else:
+ for t, m, s in zip(tensor, mean, std):
+ t.sub_(m).div_(s)
+ return tensor
+
+
+def resize(img, size, interpolation=Image.BILINEAR):
+ if not _is_pil_image(img):
+ raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+ if not isinstance(size, int) and (not isinstance(size, collections.Iterable) or len(size) != 2):
+ raise TypeError("Got inappropriate size arg: {}".format(size))
+
+ if not isinstance(size, int):
+ return img.resize(size[::-1], interpolation)
+
+ w, h = img.size
+ if (w <= h and w == size) or (h <= w and h == size):
+ return img
+ if w < h:
+ ow = size
+ oh = int(round(size * h / w))
+ else:
+ oh = size
+ ow = int(round(size * w / h))
+ return img.resize((ow, oh), interpolation)
+
+
+def pad(img, padding, fill=0):
+ if not _is_pil_image(img):
+ raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+ if not isinstance(padding, (numbers.Number, tuple)):
+ raise TypeError("Got inappropriate padding arg")
+ if not isinstance(fill, (numbers.Number, str, tuple)):
+ raise TypeError("Got inappropriate fill arg")
+
+ if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
+ raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " + "{} element tuple".format(len(padding)))
+
+ return ImageOps.expand(img, border=padding, fill=fill)
+
+
+def crop(img, i, j, h, w):
+ if not _is_pil_image(img):
+ raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+ return img.crop((j, i, j + w, i + h))
diff --git a/src/data/transforms.py b/src/data/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..aafd87a4cfad80c2ea0729257ce8d98fe0b9b423
--- /dev/null
+++ b/src/data/transforms.py
@@ -0,0 +1,348 @@
+from __future__ import division
+
+import collections
+import numbers
+import random
+
+import torch
+from PIL import Image
+from skimage import color
+
+import src.data.functional as F
+
+__all__ = [
+ "Compose",
+ "Concatenate",
+ "ToTensor",
+ "Normalize",
+ "Resize",
+ "Scale",
+ "CenterCrop",
+ "Pad",
+ "RandomCrop",
+ "RandomHorizontalFlip",
+ "RandomVerticalFlip",
+ "RandomResizedCrop",
+ "RandomSizedCrop",
+ "FiveCrop",
+ "TenCrop",
+ "RGB2Lab",
+]
+
+
+def CustomFunc(inputs, func, *args, **kwargs):
+ im_l = func(inputs[0], *args, **kwargs)
+ im_ab = func(inputs[1], *args, **kwargs)
+ warp_ba = func(inputs[2], *args, **kwargs)
+ warp_aba = func(inputs[3], *args, **kwargs)
+ im_gbl_ab = func(inputs[4], *args, **kwargs)
+ bgr_mc_im = func(inputs[5], *args, **kwargs)
+
+ layer_data = [im_l, im_ab, warp_ba, warp_aba, im_gbl_ab, bgr_mc_im]
+
+ for l in range(5):
+ layer = inputs[6 + l]
+ err_ba = func(layer[0], *args, **kwargs)
+ err_ab = func(layer[1], *args, **kwargs)
+
+ layer_data.append([err_ba, err_ab])
+
+ return layer_data
+
+
+class Compose(object):
+ """Composes several transforms together.
+
+ Args:
+ transforms (list of ``Transform`` objects): list of transforms to compose.
+
+ Example:
+ >>> transforms.Compose([
+ >>> transforms.CenterCrop(10),
+ >>> transforms.ToTensor(),
+ >>> ])
+ """
+
+ def __init__(self, transforms):
+ self.transforms = transforms
+
+ def __call__(self, inputs):
+ for t in self.transforms:
+ inputs = t(inputs)
+ return inputs
+
+
+class Concatenate(object):
+ """
+ Input: [im_l, im_ab, inputs]
+ inputs = [warp_ba_l, warp_ba_ab, warp_aba, err_pm, err_aba]
+
+ Output:[im_l, err_pm, warp_ba, warp_aba, im_ab, err_aba]
+ """
+
+ def __call__(self, inputs):
+ im_l = inputs[0]
+ im_ab = inputs[1]
+ warp_ba = inputs[2]
+ warp_aba = inputs[3]
+ im_glb_ab = inputs[4]
+ bgr_mc_im = inputs[5]
+ bgr_mc_im = bgr_mc_im[[2, 1, 0], ...]
+
+ err_ba = []
+ err_ab = []
+
+ for l in range(5):
+ layer = inputs[6 + l]
+ err_ba.append(layer[0])
+ err_ab.append(layer[1])
+
+ cerr_ba = torch.cat(err_ba, 0)
+ cerr_ab = torch.cat(err_ab, 0)
+
+ return (im_l, cerr_ba, warp_ba, warp_aba, im_glb_ab, bgr_mc_im, im_ab, cerr_ab)
+
+
+class ToTensor(object):
+ """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
+
+ Converts a PIL Image or numpy.ndarray (H x W x C) in the range
+ [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
+ """
+
+ def __call__(self, inputs):
+ """
+ Args:
+ pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
+
+ Returns:
+ Tensor: Converted image.
+ """
+ return CustomFunc(inputs, F.to_mytensor)
+
+
+class Normalize(object):
+ """Normalize an tensor image with mean and standard deviation.
+ Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
+ will normalize each channel of the input ``torch.*Tensor`` i.e.
+ ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
+
+ Args:
+ mean (sequence): Sequence of means for each channel.
+ std (sequence): Sequence of standard deviations for each channel.
+ """
+
+ def __call__(self, inputs):
+ """
+ Args:
+ tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+
+ Returns:
+ Tensor: Normalized Tensor image.
+ """
+
+ im_l = F.normalize(inputs[0], 50, 1) # [0, 100]
+ im_ab = F.normalize(inputs[1], (0, 0), (1, 1)) # [-100, 100]
+
+ inputs[2][0:1, :, :] = F.normalize(inputs[2][0:1, :, :], 50, 1)
+ inputs[2][1:3, :, :] = F.normalize(inputs[2][1:3, :, :], (0, 0), (1, 1))
+ warp_ba = inputs[2]
+
+ inputs[3][0:1, :, :] = F.normalize(inputs[3][0:1, :, :], 50, 1)
+ inputs[3][1:3, :, :] = F.normalize(inputs[3][1:3, :, :], (0, 0), (1, 1))
+ warp_aba = inputs[3]
+
+ im_gbl_ab = F.normalize(inputs[4], (0, 0), (1, 1)) # [-100, 100]
+
+ bgr_mc_im = F.normalize(inputs[5], (123.68, 116.78, 103.938), (1, 1, 1))
+
+ layer_data = [im_l, im_ab, warp_ba, warp_aba, im_gbl_ab, bgr_mc_im]
+
+ for l in range(5):
+ layer = inputs[6 + l]
+ err_ba = F.normalize(layer[0], 127, 2) # [0, 255]
+ err_ab = F.normalize(layer[1], 127, 2) # [0, 255]
+ layer_data.append([err_ba, err_ab])
+
+ return layer_data
+
+
+class Resize(object):
+ """Resize the input PIL Image to the given size.
+
+ Args:
+ size (sequence or int): Desired output size. If size is a sequence like
+ (h, w), output size will be matched to this. If size is an int,
+ smaller edge of the image will be matched to this number.
+ i.e, if height > width, then image will be rescaled to
+ (size * height / width, size)
+ interpolation (int, optional): Desired interpolation. Default is
+ ``PIL.Image.BILINEAR``
+ """
+
+ def __init__(self, size, interpolation=Image.BILINEAR):
+ assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
+ self.size = size
+ self.interpolation = interpolation
+
+ def __call__(self, inputs):
+ """
+ Args:
+ img (PIL Image): Image to be scaled.
+
+ Returns:
+ PIL Image: Rescaled image.
+ """
+ return CustomFunc(inputs, F.resize, self.size, self.interpolation)
+
+
+class RandomCrop(object):
+ """Crop the given PIL Image at a random location.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ padding (int or sequence, optional): Optional padding on each border
+ of the image. Default is 0, i.e no padding. If a sequence of length
+ 4 is provided, it is used to pad left, top, right, bottom borders
+ respectively.
+ """
+
+ def __init__(self, size, padding=0):
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ self.size = size
+ self.padding = padding
+
+ @staticmethod
+ def get_params(img, output_size):
+ """Get parameters for ``crop`` for a random crop.
+
+ Args:
+ img (PIL Image): Image to be cropped.
+ output_size (tuple): Expected output size of the crop.
+
+ Returns:
+ tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+ """
+ w, h = img.size
+ th, tw = output_size
+ if w == tw and h == th:
+ return 0, 0, h, w
+
+ i = random.randint(0, h - th)
+ j = random.randint(0, w - tw)
+ return i, j, th, tw
+
+ def __call__(self, inputs):
+ """
+ Args:
+ img (PIL Image): Image to be cropped.
+
+ Returns:
+ PIL Image: Cropped image.
+ """
+ if self.padding > 0:
+ inputs = CustomFunc(inputs, F.pad, self.padding)
+
+ i, j, h, w = self.get_params(inputs[0], self.size)
+ return CustomFunc(inputs, F.crop, i, j, h, w)
+
+
+class CenterCrop(object):
+ """Crop the given PIL Image at a random location.
+
+ Args:
+ size (sequence or int): Desired output size of the crop. If size is an
+ int instead of sequence like (h, w), a square crop (size, size) is
+ made.
+ padding (int or sequence, optional): Optional padding on each border
+ of the image. Default is 0, i.e no padding. If a sequence of length
+ 4 is provided, it is used to pad left, top, right, bottom borders
+ respectively.
+ """
+
+ def __init__(self, size, padding=0):
+ if isinstance(size, numbers.Number):
+ self.size = (int(size), int(size))
+ else:
+ self.size = size
+ self.padding = padding
+
+ @staticmethod
+ def get_params(img, output_size):
+ """Get parameters for ``crop`` for a random crop.
+
+ Args:
+ img (PIL Image): Image to be cropped.
+ output_size (tuple): Expected output size of the crop.
+
+ Returns:
+ tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
+ """
+ w, h = img.size
+ th, tw = output_size
+ if w == tw and h == th:
+ return 0, 0, h, w
+
+ i = (h - th) // 2
+ j = (w - tw) // 2
+ return i, j, th, tw
+
+ def __call__(self, inputs):
+ """
+ Args:
+ img (PIL Image): Image to be cropped.
+
+ Returns:
+ PIL Image: Cropped image.
+ """
+ if self.padding > 0:
+ inputs = CustomFunc(inputs, F.pad, self.padding)
+
+ i, j, h, w = self.get_params(inputs[0], self.size)
+ return CustomFunc(inputs, F.crop, i, j, h, w)
+
+
+class RandomHorizontalFlip(object):
+ """Horizontally flip the given PIL Image randomly with a probability of 0.5."""
+
+ def __call__(self, inputs):
+ """
+ Args:
+ img (PIL Image): Image to be flipped.
+
+ Returns:
+ PIL Image: Randomly flipped image.
+ """
+
+ if random.random() < 0.5:
+ return CustomFunc(inputs, F.hflip)
+ return inputs
+
+
+class RGB2Lab(object):
+ def __call__(self, inputs):
+ """
+ Args:
+ img (PIL Image): Image to be flipped.
+
+ Returns:
+ PIL Image: Randomly flipped image.
+ """
+
+ def __call__(self, inputs):
+ image_lab = color.rgb2lab(inputs[0])
+ warp_ba_lab = color.rgb2lab(inputs[2])
+ warp_aba_lab = color.rgb2lab(inputs[3])
+ im_gbl_lab = color.rgb2lab(inputs[4])
+
+ inputs[0] = image_lab[:, :, :1] # l channel
+ inputs[1] = image_lab[:, :, 1:] # ab channel
+ inputs[2] = warp_ba_lab # lab channel
+ inputs[3] = warp_aba_lab # lab channel
+ inputs[4] = im_gbl_lab[:, :, 1:] # ab channel
+
+ return inputs
diff --git a/src/losses.py b/src/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd78f9226bdee39354fa8fb31a05e4aefeb9e55d
--- /dev/null
+++ b/src/losses.py
@@ -0,0 +1,277 @@
+import torch
+import torch.nn as nn
+from src.utils import feature_normalize
+
+
+### START### CONTEXTUAL LOSS ####
+class ContextualLoss(nn.Module):
+ """
+ input is Al, Bl, channel = 1, range ~ [0, 255]
+ """
+
+ def __init__(self):
+ super(ContextualLoss, self).__init__()
+ return None
+
+ def forward(self, X_features, Y_features, h=0.1, feature_centering=True):
+ """
+ X_features&Y_features are are feature vectors or feature 2d array
+ h: bandwidth
+ return the per-sample loss
+ """
+ batch_size = X_features.shape[0]
+ feature_depth = X_features.shape[1]
+
+ # to normalized feature vectors
+ if feature_centering:
+ X_features = X_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(dim=-1).unsqueeze(
+ dim=-1
+ )
+ Y_features = Y_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(dim=-1).unsqueeze(
+ dim=-1
+ )
+ X_features = feature_normalize(X_features).view(
+ batch_size, feature_depth, -1
+ ) # batch_size * feature_depth * feature_size^2
+ Y_features = feature_normalize(Y_features).view(
+ batch_size, feature_depth, -1
+ ) # batch_size * feature_depth * feature_size^2
+
+ # conine distance = 1 - similarity
+ X_features_permute = X_features.permute(0, 2, 1) # batch_size * feature_size^2 * feature_depth
+ d = 1 - torch.matmul(X_features_permute, Y_features) # batch_size * feature_size^2 * feature_size^2
+
+ # normalized distance: dij_bar
+ d_norm = d / (torch.min(d, dim=-1, keepdim=True)[0] + 1e-5) # batch_size * feature_size^2 * feature_size^2
+
+ # pairwise affinity
+ w = torch.exp((1 - d_norm) / h)
+ A_ij = w / torch.sum(w, dim=-1, keepdim=True)
+
+ # contextual loss per sample
+ CX = torch.mean(torch.max(A_ij, dim=1)[0], dim=-1)
+ return -torch.log(CX)
+
+
+class ContextualLoss_forward(nn.Module):
+ """
+ input is Al, Bl, channel = 1, range ~ [0, 255]
+ """
+
+ def __init__(self):
+ super(ContextualLoss_forward, self).__init__()
+ return None
+
+ def forward(self, X_features, Y_features, h=0.1, feature_centering=True):
+ """
+ X_features&Y_features are are feature vectors or feature 2d array
+ h: bandwidth
+ return the per-sample loss
+ """
+ batch_size = X_features.shape[0]
+ feature_depth = X_features.shape[1]
+
+ # to normalized feature vectors
+ if feature_centering:
+ X_features = X_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(dim=-1).unsqueeze(
+ dim=-1
+ )
+ Y_features = Y_features - Y_features.view(batch_size, feature_depth, -1).mean(dim=-1).unsqueeze(dim=-1).unsqueeze(
+ dim=-1
+ )
+ X_features = feature_normalize(X_features).view(
+ batch_size, feature_depth, -1
+ ) # batch_size * feature_depth * feature_size^2
+ Y_features = feature_normalize(Y_features).view(
+ batch_size, feature_depth, -1
+ ) # batch_size * feature_depth * feature_size^2
+
+ # conine distance = 1 - similarity
+ X_features_permute = X_features.permute(0, 2, 1) # batch_size * feature_size^2 * feature_depth
+ d = 1 - torch.matmul(X_features_permute, Y_features) # batch_size * feature_size^2 * feature_size^2
+
+ # normalized distance: dij_bar
+ d_norm = d / (torch.min(d, dim=-1, keepdim=True)[0] + 1e-5) # batch_size * feature_size^2 * feature_size^2
+
+ # pairwise affinity
+ w = torch.exp((1 - d_norm) / h)
+ A_ij = w / torch.sum(w, dim=-1, keepdim=True)
+
+ # contextual loss per sample
+ CX = torch.mean(torch.max(A_ij, dim=-1)[0], dim=1)
+ return -torch.log(CX)
+
+
+### END### CONTEXTUAL LOSS ####
+
+
+##########################
+
+
+def mse_loss_fn(input, target=0):
+ return torch.mean((input - target) ** 2)
+
+
+### START### PERCEPTUAL LOSS ###
+def Perceptual_loss(domain_invariant, weight_perceptual):
+ instancenorm = nn.InstanceNorm2d(512, affine=False)
+
+ def __call__(A_relu5_1, predict_relu5_1):
+ if domain_invariant:
+ feat_loss = (
+ mse_loss_fn(instancenorm(predict_relu5_1), instancenorm(A_relu5_1.detach())) * weight_perceptual * 1e5 * 0.2
+ )
+ else:
+ feat_loss = mse_loss_fn(predict_relu5_1, A_relu5_1.detach()) * weight_perceptual
+ return feat_loss
+
+ return __call__
+
+
+### END### PERCEPTUAL LOSS ###
+
+
+def l1_loss_fn(input, target=0):
+ return torch.mean(torch.abs(input - target))
+
+
+### END#################
+
+
+### START### ADVERSIAL LOSS ###
+def generator_loss_fn(real_data_lab, fake_data_lab, discriminator, weight_gan, device):
+ if weight_gan > 0:
+ y_pred_fake, _ = discriminator(fake_data_lab)
+ y_pred_real, _ = discriminator(real_data_lab)
+
+ y = torch.ones_like(y_pred_real)
+ generator_loss = (
+ (
+ torch.mean((y_pred_real - torch.mean(y_pred_fake) + y) ** 2)
+ + torch.mean((y_pred_fake - torch.mean(y_pred_real) - y) ** 2)
+ )
+ / 2
+ * weight_gan
+ )
+ return generator_loss
+
+ return torch.Tensor([0]).to(device)
+
+
+def discriminator_loss_fn(real_data_lab, fake_data_lab, discriminator):
+ y_pred_fake, _ = discriminator(fake_data_lab.detach())
+ y_pred_real, _ = discriminator(real_data_lab.detach())
+
+ y = torch.ones_like(y_pred_real)
+ discriminator_loss = (
+ torch.mean((y_pred_real - torch.mean(y_pred_fake) - y) ** 2)
+ + torch.mean((y_pred_fake - torch.mean(y_pred_real) + y) ** 2)
+ ) / 2
+ return discriminator_loss
+
+
+### END### ADVERSIAL LOSS #####
+
+
+def consistent_loss_fn(
+ I_current_lab_predict,
+ I_last_ab_predict,
+ I_current_nonlocal_lab_predict,
+ I_last_nonlocal_lab_predict,
+ flow_forward,
+ mask,
+ warping_layer,
+ weight_consistent=0.02,
+ weight_nonlocal_consistent=0.0,
+ device="cuda",
+):
+ def weighted_mse_loss(input, target, weights):
+ out = (input - target) ** 2
+ out = out * weights.expand_as(out)
+ return out.mean()
+
+ def consistent():
+ I_current_lab_predict_warp = warping_layer(I_current_lab_predict, flow_forward)
+ I_current_ab_predict_warp = I_current_lab_predict_warp[:, 1:3, :, :]
+ consistent_loss = weighted_mse_loss(I_current_ab_predict_warp, I_last_ab_predict, mask) * weight_consistent
+ return consistent_loss
+
+ def nonlocal_consistent():
+ I_current_nonlocal_lab_predict_warp = warping_layer(I_current_nonlocal_lab_predict, flow_forward)
+ nonlocal_consistent_loss = (
+ weighted_mse_loss(
+ I_current_nonlocal_lab_predict_warp[:, 1:3, :, :],
+ I_last_nonlocal_lab_predict[:, 1:3, :, :],
+ mask,
+ )
+ * weight_nonlocal_consistent
+ )
+
+ return nonlocal_consistent_loss
+
+ consistent_loss = consistent() if weight_consistent else torch.Tensor([0]).to(device)
+ nonlocal_consistent_loss = nonlocal_consistent() if weight_nonlocal_consistent else torch.Tensor([0]).to(device)
+
+ return consistent_loss + nonlocal_consistent_loss
+
+
+### END### CONSISTENCY LOSS #####
+
+
+### START### SMOOTHNESS LOSS ###
+def smoothness_loss_fn(
+ I_current_l,
+ I_current_lab,
+ I_current_ab_predict,
+ A_relu2_1,
+ weighted_layer_color,
+ nonlocal_weighted_layer,
+ weight_smoothness=5.0,
+ weight_nonlocal_smoothness=0.0,
+ device="cuda",
+):
+ def smoothness(scale_factor=1.0):
+ I_current_lab_predict = torch.cat((I_current_l, I_current_ab_predict), dim=1)
+ IA_ab_weighed = weighted_layer_color(
+ I_current_lab,
+ I_current_lab_predict,
+ patch_size=3,
+ alpha=10,
+ scale_factor=scale_factor,
+ )
+ smoothness_loss = (
+ mse_loss_fn(
+ nn.functional.interpolate(I_current_ab_predict, scale_factor=scale_factor),
+ IA_ab_weighed,
+ )
+ * weight_smoothness
+ )
+
+ return smoothness_loss
+
+ def nonlocal_smoothness(scale_factor=0.25, alpha_nonlocal_smoothness=0.5):
+ nonlocal_smooth_feature = feature_normalize(A_relu2_1)
+ I_current_lab_predict = torch.cat((I_current_l, I_current_ab_predict), dim=1)
+ I_current_ab_weighted_nonlocal = nonlocal_weighted_layer(
+ I_current_lab_predict,
+ nonlocal_smooth_feature.detach(),
+ patch_size=3,
+ alpha=alpha_nonlocal_smoothness,
+ scale_factor=scale_factor,
+ )
+ nonlocal_smoothness_loss = (
+ mse_loss_fn(
+ nn.functional.interpolate(I_current_ab_predict, scale_factor=scale_factor),
+ I_current_ab_weighted_nonlocal,
+ )
+ * weight_nonlocal_smoothness
+ )
+ return nonlocal_smoothness_loss
+
+ smoothness_loss = smoothness() if weight_smoothness else torch.Tensor([0]).to(device)
+ nonlocal_smoothness_loss = nonlocal_smoothness() if weight_nonlocal_smoothness else torch.Tensor([0]).to(device)
+
+ return smoothness_loss + nonlocal_smoothness_loss
+
+
+### END### SMOOTHNESS LOSS #####
diff --git a/src/metrics.py b/src/metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b16301da4ed3f7accc0946c6fafd61b82200cb5
--- /dev/null
+++ b/src/metrics.py
@@ -0,0 +1,95 @@
+# import os
+# import cv2
+# import glob
+# import numpy as np
+# from PIL import Image
+# from scipy.linalg import sqrtm
+
+# import torch
+# from torch import nn
+# import torchvision.transforms as transforms
+
+
+# def PSNR(gt_imgs, pred_imgs):
+# """
+# Calculate PSNR for a batch of images
+# Args:
+# gt_imgs (list): list of ground truth images
+# pred_imgs (list): list of predicted images
+# Returns:
+# float: average PSNR score
+# """
+# total_psnr = 0
+# for idx, (gt, pred) in enumerate(zip(gt_imgs, pred_imgs)):
+# assert gt.shape == pred.shape, f"Shape mismatch at {idx}: GT and prediction"
+# total_psnr += cv2.PSNR(gt, pred)
+# return total_psnr / len(pred_imgs)
+
+
+# class FrechetDistance:
+# def __init__(self, model_name="inception_v3", device="cpu"):
+# self.device = torch.device(device)
+# self.model = torch.hub.load("pytorch/vision:v0.10.0", model_name, pretrained=True) # .to(self.device)
+# self.model.fc = nn.Identity()
+# print(self.model)
+# self.model.eval()
+
+# self.transform = transforms.Compose(
+# [
+# transforms.ToTensor(),
+# transforms.Resize(299),
+# transforms.CenterCrop(299),
+# transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+# ]
+# )
+
+# # Return parts to calculate in FID and FVD
+# def _calculate_act(self, images1, images2):
+# images1 = [self.transform(img) for img in images1]
+# images2 = [self.transform(img) for img in images2]
+
+# images1 = torch.stack(images1).to(self.device)
+# images2 = torch.stack(images2).to(self.device)
+
+# # Get activations
+# act1 = self.model(images1).detach().numpy()
+# act2 = self.model(images2).detach().numpy()
+
+# return act1, act2
+
+# def calculate_fid(self, images1, images2):
+# act1, act2 = self._calculate_act(images1, images2)
+
+# # calculate mean and covariance statistics
+# mu1, sigma1 = act1.mean(axis=0), np.cov(act1, rowvar=False)
+# mu2, sigma2 = act2.mean(axis=0), np.cov(act2, rowvar=False)
+
+# fid = (np.sum((mu1 - mu2) ** 2.0)) + np.trace(sigma1 + sigma2 - 2.0 * sqrtm(sigma1.dot(sigma2)))
+# return fid
+
+# def calculate_fvd(self, frames_list_folder1, frames_list_folder2, batch_size=2):
+# frames_list1 = glob.glob(os.path.join(frames_list_folder1, "*.png"))
+# frames_list2 = glob.glob(os.path.join(frames_list_folder2, "*.png"))
+
+# assert len(frames_list1) == len(frames_list2), "Number of frames in 2 folders must be equal"
+
+# all_act1, all_act2 = [], []
+# for i in range(0, len(frames_list1), batch_size):
+# batch1 = frames_list1[i : min(i + batch_size, len(frames_list1))]
+# batch2 = frames_list2[i : min(i + batch_size, len(frames_list1))]
+
+# img1 = [Image.open(img) for img in batch1]
+# img2 = [Image.open(img) for img in batch2]
+
+# act1, act2 = self._calculate_act(img1, img2)
+
+# all_act1.append(act1)
+# all_act2.append(act2)
+
+# all_act1 = np.concatenate(all_act1, axis=0)
+# all_act2 = np.concatenate(all_act2, axis=0)
+# print(all_act1.shape)
+# print(all_act1.shape)
+# fid = self.calculate_fid(all_act1, all_act2)
+
+# return np.sqrt(fid)
diff --git a/src/models/CNN/ColorVidNet.py b/src/models/CNN/ColorVidNet.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d14f395ae2bb89d0b7432c859179e3891e313b3
--- /dev/null
+++ b/src/models/CNN/ColorVidNet.py
@@ -0,0 +1,281 @@
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+
+
+class ColorVidNet(nn.Module):
+ def __init__(self, ic):
+ super(ColorVidNet, self).__init__()
+ self.conv1_1 = nn.Sequential(nn.Conv2d(ic, 32, 3, 1, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, 1, 1))
+ self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
+ self.conv1_2norm = nn.BatchNorm2d(64, affine=False)
+ self.conv1_2norm_ss = nn.Conv2d(64, 64, 1, 2, bias=False, groups=64)
+ self.conv2_1 = nn.Conv2d(64, 128, 3, 1, 1)
+ self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv2_2norm = nn.BatchNorm2d(128, affine=False)
+ self.conv2_2norm_ss = nn.Conv2d(128, 128, 1, 2, bias=False, groups=128)
+ self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)
+ self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv3_3norm = nn.BatchNorm2d(256, affine=False)
+ self.conv3_3norm_ss = nn.Conv2d(256, 256, 1, 2, bias=False, groups=256)
+ self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)
+ self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv4_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv5_1 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv5_2 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv5_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv6_1 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv6_2 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv6_3 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv6_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv7_1 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv7_2 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv7_3 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv7_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv8_1 = nn.ConvTranspose2d(512, 256, 4, 2, 1)
+ self.conv3_3_short = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv8_2 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv8_3 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv8_3norm = nn.BatchNorm2d(256, affine=False)
+ self.conv9_1 = nn.ConvTranspose2d(256, 128, 4, 2, 1)
+ self.conv2_2_short = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv9_2 = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv9_2norm = nn.BatchNorm2d(128, affine=False)
+ self.conv10_1 = nn.ConvTranspose2d(128, 128, 4, 2, 1)
+ self.conv1_2_short = nn.Conv2d(64, 128, 3, 1, 1)
+ self.conv10_2 = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv10_ab = nn.Conv2d(128, 2, 1, 1)
+
+ # add self.relux_x
+ self.relu1_1 = nn.ReLU()
+ self.relu1_2 = nn.ReLU()
+ self.relu2_1 = nn.ReLU()
+ self.relu2_2 = nn.ReLU()
+ self.relu3_1 = nn.ReLU()
+ self.relu3_2 = nn.ReLU()
+ self.relu3_3 = nn.ReLU()
+ self.relu4_1 = nn.ReLU()
+ self.relu4_2 = nn.ReLU()
+ self.relu4_3 = nn.ReLU()
+ self.relu5_1 = nn.ReLU()
+ self.relu5_2 = nn.ReLU()
+ self.relu5_3 = nn.ReLU()
+ self.relu6_1 = nn.ReLU()
+ self.relu6_2 = nn.ReLU()
+ self.relu6_3 = nn.ReLU()
+ self.relu7_1 = nn.ReLU()
+ self.relu7_2 = nn.ReLU()
+ self.relu7_3 = nn.ReLU()
+ self.relu8_1_comb = nn.ReLU()
+ self.relu8_2 = nn.ReLU()
+ self.relu8_3 = nn.ReLU()
+ self.relu9_1_comb = nn.ReLU()
+ self.relu9_2 = nn.ReLU()
+ self.relu10_1_comb = nn.ReLU()
+ self.relu10_2 = nn.LeakyReLU(0.2, True)
+
+ self.conv8_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(512, 256, 3, 1, 1))
+ self.conv9_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(256, 128, 3, 1, 1))
+ self.conv10_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(128, 128, 3, 1, 1))
+
+ self.conv1_2norm = nn.InstanceNorm2d(64)
+ self.conv2_2norm = nn.InstanceNorm2d(128)
+ self.conv3_3norm = nn.InstanceNorm2d(256)
+ self.conv4_3norm = nn.InstanceNorm2d(512)
+ self.conv5_3norm = nn.InstanceNorm2d(512)
+ self.conv6_3norm = nn.InstanceNorm2d(512)
+ self.conv7_3norm = nn.InstanceNorm2d(512)
+ self.conv8_3norm = nn.InstanceNorm2d(256)
+ self.conv9_2norm = nn.InstanceNorm2d(128)
+
+ def forward(self, x):
+ """x: gray image (1 channel), ab(2 channel), ab_err, ba_err"""
+ conv1_1 = self.relu1_1(self.conv1_1(x))
+ conv1_2 = self.relu1_2(self.conv1_2(conv1_1))
+ conv1_2norm = self.conv1_2norm(conv1_2)
+ conv1_2norm_ss = self.conv1_2norm_ss(conv1_2norm)
+ conv2_1 = self.relu2_1(self.conv2_1(conv1_2norm_ss))
+ conv2_2 = self.relu2_2(self.conv2_2(conv2_1))
+ conv2_2norm = self.conv2_2norm(conv2_2)
+ conv2_2norm_ss = self.conv2_2norm_ss(conv2_2norm)
+ conv3_1 = self.relu3_1(self.conv3_1(conv2_2norm_ss))
+ conv3_2 = self.relu3_2(self.conv3_2(conv3_1))
+ conv3_3 = self.relu3_3(self.conv3_3(conv3_2))
+ conv3_3norm = self.conv3_3norm(conv3_3)
+ conv3_3norm_ss = self.conv3_3norm_ss(conv3_3norm)
+ conv4_1 = self.relu4_1(self.conv4_1(conv3_3norm_ss))
+ conv4_2 = self.relu4_2(self.conv4_2(conv4_1))
+ conv4_3 = self.relu4_3(self.conv4_3(conv4_2))
+ conv4_3norm = self.conv4_3norm(conv4_3)
+ conv5_1 = self.relu5_1(self.conv5_1(conv4_3norm))
+ conv5_2 = self.relu5_2(self.conv5_2(conv5_1))
+ conv5_3 = self.relu5_3(self.conv5_3(conv5_2))
+ conv5_3norm = self.conv5_3norm(conv5_3)
+ conv6_1 = self.relu6_1(self.conv6_1(conv5_3norm))
+ conv6_2 = self.relu6_2(self.conv6_2(conv6_1))
+ conv6_3 = self.relu6_3(self.conv6_3(conv6_2))
+ conv6_3norm = self.conv6_3norm(conv6_3)
+ conv7_1 = self.relu7_1(self.conv7_1(conv6_3norm))
+ conv7_2 = self.relu7_2(self.conv7_2(conv7_1))
+ conv7_3 = self.relu7_3(self.conv7_3(conv7_2))
+ conv7_3norm = self.conv7_3norm(conv7_3)
+ conv8_1 = self.conv8_1(conv7_3norm)
+ conv3_3_short = self.conv3_3_short(conv3_3norm)
+ conv8_1_comb = self.relu8_1_comb(conv8_1 + conv3_3_short)
+ conv8_2 = self.relu8_2(self.conv8_2(conv8_1_comb))
+ conv8_3 = self.relu8_3(self.conv8_3(conv8_2))
+ conv8_3norm = self.conv8_3norm(conv8_3)
+ conv9_1 = self.conv9_1(conv8_3norm)
+ conv2_2_short = self.conv2_2_short(conv2_2norm)
+ conv9_1_comb = self.relu9_1_comb(conv9_1 + conv2_2_short)
+ conv9_2 = self.relu9_2(self.conv9_2(conv9_1_comb))
+ conv9_2norm = self.conv9_2norm(conv9_2)
+ conv10_1 = self.conv10_1(conv9_2norm)
+ conv1_2_short = self.conv1_2_short(conv1_2norm)
+ conv10_1_comb = self.relu10_1_comb(conv10_1 + conv1_2_short)
+ conv10_2 = self.relu10_2(self.conv10_2(conv10_1_comb))
+ conv10_ab = self.conv10_ab(conv10_2)
+
+ return torch.tanh(conv10_ab) * 128
+
+
+class GeneralColorVidNet(nn.Module):
+ def __init__(self, ic):
+ super(GeneralColorVidNet, self).__init__()
+ self.conv1_1 = nn.Sequential(nn.Conv2d(ic, 32, 3, 1, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, 1, 1))
+ self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
+ self.conv1_2norm = nn.BatchNorm2d(64, affine=False)
+ self.conv1_2norm_ss = nn.Conv2d(64, 64, 1, 2, bias=False, groups=64)
+ self.conv2_1 = nn.Conv2d(64, 128, 3, 1, 1)
+ self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv2_2norm = nn.BatchNorm2d(128, affine=False)
+ self.conv2_2norm_ss = nn.Conv2d(128, 128, 1, 2, bias=False, groups=128)
+ self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)
+ self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv3_3norm = nn.BatchNorm2d(256, affine=False)
+ self.conv3_3norm_ss = nn.Conv2d(256, 256, 1, 2, bias=False, groups=256)
+ self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)
+ self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv4_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv5_1 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv5_2 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv5_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv6_1 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv6_2 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv6_3 = nn.Conv2d(512, 512, 3, 1, 2, 2)
+ self.conv6_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv7_1 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv7_2 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv7_3 = nn.Conv2d(512, 512, 3, 1, 1)
+ self.conv7_3norm = nn.BatchNorm2d(512, affine=False)
+ self.conv8_1 = nn.ConvTranspose2d(512, 256, 4, 2, 1)
+ self.conv3_3_short = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv8_2 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv8_3 = nn.Conv2d(256, 256, 3, 1, 1)
+ self.conv8_3norm = nn.BatchNorm2d(256, affine=False)
+ self.conv9_1 = nn.ConvTranspose2d(256, 128, 4, 2, 1)
+ self.conv2_2_short = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv9_2 = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv9_2norm = nn.BatchNorm2d(128, affine=False)
+ self.conv10_1 = nn.ConvTranspose2d(128, 128, 4, 2, 1)
+ self.conv1_2_short = nn.Conv2d(64, 128, 3, 1, 1)
+ self.conv10_2 = nn.Conv2d(128, 128, 3, 1, 1)
+ self.conv10_ab = nn.Conv2d(128, 2, 1, 1)
+
+ # add self.relux_x
+ self.relu1_1 = nn.PReLU()
+ self.relu1_2 = nn.PReLU()
+ self.relu2_1 = nn.PReLU()
+ self.relu2_2 = nn.PReLU()
+ self.relu3_1 = nn.PReLU()
+ self.relu3_2 = nn.PReLU()
+ self.relu3_3 = nn.PReLU()
+ self.relu4_1 = nn.PReLU()
+ self.relu4_2 = nn.PReLU()
+ self.relu4_3 = nn.PReLU()
+ self.relu5_1 = nn.PReLU()
+ self.relu5_2 = nn.PReLU()
+ self.relu5_3 = nn.PReLU()
+ self.relu6_1 = nn.PReLU()
+ self.relu6_2 = nn.PReLU()
+ self.relu6_3 = nn.PReLU()
+ self.relu7_1 = nn.PReLU()
+ self.relu7_2 = nn.PReLU()
+ self.relu7_3 = nn.PReLU()
+ self.relu8_1_comb = nn.PReLU()
+ self.relu8_2 = nn.PReLU()
+ self.relu8_3 = nn.PReLU()
+ self.relu9_1_comb = nn.PReLU()
+ self.relu9_2 = nn.PReLU()
+ self.relu10_1_comb = nn.PReLU()
+ self.relu10_2 = nn.LeakyReLU(0.2, True)
+
+ self.conv8_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(512, 256, 3, 1, 1))
+ self.conv9_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(256, 128, 3, 1, 1))
+ self.conv10_1 = nn.Sequential(nn.Upsample(scale_factor=2, mode="nearest"), nn.Conv2d(128, 128, 3, 1, 1))
+
+ self.conv1_2norm = nn.InstanceNorm2d(64)
+ self.conv2_2norm = nn.InstanceNorm2d(128)
+ self.conv3_3norm = nn.InstanceNorm2d(256)
+ self.conv4_3norm = nn.InstanceNorm2d(512)
+ self.conv5_3norm = nn.InstanceNorm2d(512)
+ self.conv6_3norm = nn.InstanceNorm2d(512)
+ self.conv7_3norm = nn.InstanceNorm2d(512)
+ self.conv8_3norm = nn.InstanceNorm2d(256)
+ self.conv9_2norm = nn.InstanceNorm2d(128)
+
+ def forward(self, x):
+ """x: gray image (1 channel), ab(2 channel), ab_err, ba_err"""
+ conv1_1 = self.relu1_1(self.conv1_1(x))
+ conv1_2 = self.relu1_2(self.conv1_2(conv1_1))
+ conv1_2norm = self.conv1_2norm(conv1_2)
+ conv1_2norm_ss = self.conv1_2norm_ss(conv1_2norm)
+ conv2_1 = self.relu2_1(self.conv2_1(conv1_2norm_ss))
+ conv2_2 = self.relu2_2(self.conv2_2(conv2_1))
+ conv2_2norm = self.conv2_2norm(conv2_2)
+ conv2_2norm_ss = self.conv2_2norm_ss(conv2_2norm)
+ conv3_1 = self.relu3_1(self.conv3_1(conv2_2norm_ss))
+ conv3_2 = self.relu3_2(self.conv3_2(conv3_1))
+ conv3_3 = self.relu3_3(self.conv3_3(conv3_2))
+ conv3_3norm = self.conv3_3norm(conv3_3)
+ conv3_3norm_ss = self.conv3_3norm_ss(conv3_3norm)
+ conv4_1 = self.relu4_1(self.conv4_1(conv3_3norm_ss))
+ conv4_2 = self.relu4_2(self.conv4_2(conv4_1))
+ conv4_3 = self.relu4_3(self.conv4_3(conv4_2))
+ conv4_3norm = self.conv4_3norm(conv4_3)
+ conv5_1 = self.relu5_1(self.conv5_1(conv4_3norm))
+ conv5_2 = self.relu5_2(self.conv5_2(conv5_1))
+ conv5_3 = self.relu5_3(self.conv5_3(conv5_2))
+ conv5_3norm = self.conv5_3norm(conv5_3)
+ conv6_1 = self.relu6_1(self.conv6_1(conv5_3norm))
+ conv6_2 = self.relu6_2(self.conv6_2(conv6_1))
+ conv6_3 = self.relu6_3(self.conv6_3(conv6_2))
+ conv6_3norm = self.conv6_3norm(conv6_3)
+ conv7_1 = self.relu7_1(self.conv7_1(conv6_3norm))
+ conv7_2 = self.relu7_2(self.conv7_2(conv7_1))
+ conv7_3 = self.relu7_3(self.conv7_3(conv7_2))
+ conv7_3norm = self.conv7_3norm(conv7_3)
+ conv8_1 = self.conv8_1(conv7_3norm)
+ conv3_3_short = self.conv3_3_short(conv3_3norm)
+ conv8_1_comb = self.relu8_1_comb(conv8_1 + conv3_3_short)
+ conv8_2 = self.relu8_2(self.conv8_2(conv8_1_comb))
+ conv8_3 = self.relu8_3(self.conv8_3(conv8_2))
+ conv8_3norm = self.conv8_3norm(conv8_3)
+ conv9_1 = self.conv9_1(conv8_3norm)
+ conv2_2_short = self.conv2_2_short(conv2_2norm)
+ conv9_1_comb = self.relu9_1_comb(conv9_1 + conv2_2_short)
+ conv9_2 = self.relu9_2(self.conv9_2(conv9_1_comb))
+ conv9_2norm = self.conv9_2norm(conv9_2)
+ conv10_1 = self.conv10_1(conv9_2norm)
+ conv1_2_short = self.conv1_2_short(conv1_2norm)
+ conv10_1_comb = self.relu10_1_comb(conv10_1 + conv1_2_short)
+ conv10_2 = self.relu10_2(self.conv10_2(conv10_1_comb))
+ conv10_ab = self.conv10_ab(conv10_2)
+
+ return torch.tanh(conv10_ab) * 128
diff --git a/src/models/CNN/FrameColor.py b/src/models/CNN/FrameColor.py
new file mode 100644
index 0000000000000000000000000000000000000000..68fe374641d92bc95d0f2877d1a854a39c21c654
--- /dev/null
+++ b/src/models/CNN/FrameColor.py
@@ -0,0 +1,76 @@
+import torch
+from src.utils import *
+from src.models.vit.vit import FeatureTransform
+
+
+def warp_color(
+ IA_l,
+ IB_lab,
+ features_B,
+ embed_net,
+ nonlocal_net,
+ temperature=0.01,
+):
+ IA_rgb_from_gray = gray2rgb_batch(IA_l)
+
+ with torch.no_grad():
+ A_feat0, A_feat1, A_feat2, A_feat3 = embed_net(IA_rgb_from_gray)
+ B_feat0, B_feat1, B_feat2, B_feat3 = features_B
+
+ A_feat0 = feature_normalize(A_feat0)
+ A_feat1 = feature_normalize(A_feat1)
+ A_feat2 = feature_normalize(A_feat2)
+ A_feat3 = feature_normalize(A_feat3)
+
+ B_feat0 = feature_normalize(B_feat0)
+ B_feat1 = feature_normalize(B_feat1)
+ B_feat2 = feature_normalize(B_feat2)
+ B_feat3 = feature_normalize(B_feat3)
+
+ return nonlocal_net(
+ IB_lab,
+ A_feat0,
+ A_feat1,
+ A_feat2,
+ A_feat3,
+ B_feat0,
+ B_feat1,
+ B_feat2,
+ B_feat3,
+ temperature=temperature,
+ )
+
+
+def frame_colorization(
+ IA_l,
+ IB_lab,
+ IA_last_lab,
+ features_B,
+ embed_net,
+ nonlocal_net,
+ colornet,
+ joint_training=True,
+ luminance_noise=0,
+ temperature=0.01,
+):
+ if luminance_noise:
+ IA_l = IA_l + torch.randn_like(IA_l, requires_grad=False) * luminance_noise
+
+ with torch.autograd.set_grad_enabled(joint_training):
+ nonlocal_BA_lab, similarity_map = warp_color(
+ IA_l,
+ IB_lab,
+ features_B,
+ embed_net,
+ nonlocal_net,
+ temperature=temperature,
+ )
+ nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :]
+ IA_ab_predict = colornet(
+ torch.cat(
+ (IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab),
+ dim=1,
+ )
+ )
+
+ return IA_ab_predict, nonlocal_BA_lab
diff --git a/src/models/CNN/GAN_models.py b/src/models/CNN/GAN_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..137111bb8035c8d0dbd26b6b958c4036260b8821
--- /dev/null
+++ b/src/models/CNN/GAN_models.py
@@ -0,0 +1,268 @@
+# DCGAN-like generator and discriminator
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn import Parameter
+
+
+def l2normalize(v, eps=1e-12):
+ return v / (v.norm() + eps)
+
+
+class SpectralNorm(nn.Module):
+ def __init__(self, module, name="weight", power_iterations=1):
+ super(SpectralNorm, self).__init__()
+ self.module = module
+ self.name = name
+ self.power_iterations = power_iterations
+ if not self._made_params():
+ self._make_params()
+
+ def _update_u_v(self):
+ u = getattr(self.module, self.name + "_u")
+ v = getattr(self.module, self.name + "_v")
+ w = getattr(self.module, self.name + "_bar")
+
+ height = w.data.shape[0]
+ for _ in range(self.power_iterations):
+ v.data = l2normalize(torch.mv(torch.t(w.view(height, -1).data), u.data))
+ u.data = l2normalize(torch.mv(w.view(height, -1).data, v.data))
+
+ sigma = u.dot(w.view(height, -1).mv(v))
+ setattr(self.module, self.name, w / sigma.expand_as(w))
+
+ def _made_params(self):
+ try:
+ u = getattr(self.module, self.name + "_u")
+ v = getattr(self.module, self.name + "_v")
+ w = getattr(self.module, self.name + "_bar")
+ return True
+ except AttributeError:
+ return False
+
+ def _make_params(self):
+ w = getattr(self.module, self.name)
+
+ height = w.data.shape[0]
+ width = w.view(height, -1).data.shape[1]
+
+ u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+ v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
+ u.data = l2normalize(u.data)
+ v.data = l2normalize(v.data)
+ w_bar = Parameter(w.data)
+
+ del self.module._parameters[self.name]
+
+ self.module.register_parameter(self.name + "_u", u)
+ self.module.register_parameter(self.name + "_v", v)
+ self.module.register_parameter(self.name + "_bar", w_bar)
+
+ def forward(self, *args):
+ self._update_u_v()
+ return self.module.forward(*args)
+
+
+class Generator(nn.Module):
+ def __init__(self, z_dim):
+ super(Generator, self).__init__()
+ self.z_dim = z_dim
+
+ self.model = nn.Sequential(
+ nn.ConvTranspose2d(z_dim, 512, 4, stride=1),
+ nn.InstanceNorm2d(512),
+ nn.ReLU(),
+ nn.ConvTranspose2d(512, 256, 4, stride=2, padding=(1, 1)),
+ nn.InstanceNorm2d(256),
+ nn.ReLU(),
+ nn.ConvTranspose2d(256, 128, 4, stride=2, padding=(1, 1)),
+ nn.InstanceNorm2d(128),
+ nn.ReLU(),
+ nn.ConvTranspose2d(128, 64, 4, stride=2, padding=(1, 1)),
+ nn.InstanceNorm2d(64),
+ nn.ReLU(),
+ nn.ConvTranspose2d(64, channels, 3, stride=1, padding=(1, 1)),
+ nn.Tanh(),
+ )
+
+ def forward(self, z):
+ return self.model(z.view(-1, self.z_dim, 1, 1))
+
+
+channels = 3
+leak = 0.1
+w_g = 4
+
+
+class Discriminator(nn.Module):
+ def __init__(self):
+ super(Discriminator, self).__init__()
+
+ self.conv1 = SpectralNorm(nn.Conv2d(channels, 64, 3, stride=1, padding=(1, 1)))
+ self.conv2 = SpectralNorm(nn.Conv2d(64, 64, 4, stride=2, padding=(1, 1)))
+ self.conv3 = SpectralNorm(nn.Conv2d(64, 128, 3, stride=1, padding=(1, 1)))
+ self.conv4 = SpectralNorm(nn.Conv2d(128, 128, 4, stride=2, padding=(1, 1)))
+ self.conv5 = SpectralNorm(nn.Conv2d(128, 256, 3, stride=1, padding=(1, 1)))
+ self.conv6 = SpectralNorm(nn.Conv2d(256, 256, 4, stride=2, padding=(1, 1)))
+ self.conv7 = SpectralNorm(nn.Conv2d(256, 256, 3, stride=1, padding=(1, 1)))
+ self.conv8 = SpectralNorm(nn.Conv2d(256, 512, 4, stride=2, padding=(1, 1)))
+ self.fc = SpectralNorm(nn.Linear(w_g * w_g * 512, 1))
+
+ def forward(self, x):
+ m = x
+ m = nn.LeakyReLU(leak)(self.conv1(m))
+ m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(64)(self.conv2(m)))
+ m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(128)(self.conv3(m)))
+ m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(128)(self.conv4(m)))
+ m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(256)(self.conv5(m)))
+ m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(256)(self.conv6(m)))
+ m = nn.LeakyReLU(leak)(nn.InstanceNorm2d(256)(self.conv7(m)))
+ m = nn.LeakyReLU(leak)(self.conv8(m))
+
+ return self.fc(m.view(-1, w_g * w_g * 512))
+
+
+class Self_Attention(nn.Module):
+ """Self attention Layer"""
+
+ def __init__(self, in_dim):
+ super(Self_Attention, self).__init__()
+ self.chanel_in = in_dim
+
+ self.query_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 1, kernel_size=1))
+ self.key_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 1, kernel_size=1))
+ self.value_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1))
+ self.gamma = nn.Parameter(torch.zeros(1))
+
+ self.softmax = nn.Softmax(dim=-1) #
+
+ def forward(self, x):
+ """
+ inputs :
+ x : input feature maps( B X C X W X H)
+ returns :
+ out : self attention value + input feature
+ attention: B X N X N (N is Width*Height)
+ """
+ m_batchsize, C, width, height = x.size()
+ proj_query = self.query_conv(x).view(m_batchsize, -1, width * height).permute(0, 2, 1) # B X CX(N)
+ proj_key = self.key_conv(x).view(m_batchsize, -1, width * height) # B X C x (*W*H)
+ energy = torch.bmm(proj_query, proj_key) # transpose check
+ attention = self.softmax(energy) # BX (N) X (N)
+ proj_value = self.value_conv(x).view(m_batchsize, -1, width * height) # B X C X N
+
+ out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+ out = out.view(m_batchsize, C, width, height)
+
+ out = self.gamma * out + x
+ return out
+
+
+class Discriminator_x64(nn.Module):
+ """
+ Discriminative Network
+ """
+
+ def __init__(self, in_size=6, ndf=64):
+ super(Discriminator_x64, self).__init__()
+ self.in_size = in_size
+ self.ndf = ndf
+
+ self.layer1 = nn.Sequential(SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)), nn.LeakyReLU(0.2, inplace=True))
+
+ self.layer2 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.attention = Self_Attention(self.ndf)
+ self.layer3 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 2),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.layer4 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 4),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.layer5 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 8),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.layer6 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 16),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+
+ self.last = SpectralNorm(nn.Conv2d(self.ndf * 16, 1, [3, 6], 1, 0))
+
+ def forward(self, input):
+ feature1 = self.layer1(input)
+ feature2 = self.layer2(feature1)
+ feature_attention = self.attention(feature2)
+ feature3 = self.layer3(feature_attention)
+ feature4 = self.layer4(feature3)
+ feature5 = self.layer5(feature4)
+ feature6 = self.layer6(feature5)
+ output = self.last(feature6)
+ output = F.avg_pool2d(output, output.size()[2:]).view(output.size()[0], -1)
+
+ return output, feature4
+
+
+class Discriminator_x64_224(nn.Module):
+ """
+ Discriminative Network
+ """
+
+ def __init__(self, in_size=6, ndf=64):
+ super(Discriminator_x64_224, self).__init__()
+ self.in_size = in_size
+ self.ndf = ndf
+
+ self.layer1 = nn.Sequential(SpectralNorm(nn.Conv2d(self.in_size, self.ndf, 4, 2, 1)), nn.LeakyReLU(0.2, inplace=True))
+
+ self.layer2 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf, self.ndf, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.attention = Self_Attention(self.ndf)
+ self.layer3 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 2),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.layer4 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 4),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.layer5 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 8),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+ self.layer6 = nn.Sequential(
+ SpectralNorm(nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1)),
+ nn.InstanceNorm2d(self.ndf * 16),
+ nn.LeakyReLU(0.2, inplace=True),
+ )
+
+ self.last = SpectralNorm(nn.Conv2d(self.ndf * 16, 1, [3, 3], 1, 0))
+
+ def forward(self, input):
+ feature1 = self.layer1(input)
+ feature2 = self.layer2(feature1)
+ feature_attention = self.attention(feature2)
+ feature3 = self.layer3(feature_attention)
+ feature4 = self.layer4(feature3)
+ feature5 = self.layer5(feature4)
+ feature6 = self.layer6(feature5)
+ output = self.last(feature6)
+ output = F.avg_pool2d(output, output.size()[2:]).view(output.size()[0], -1)
+
+ return output, feature4
diff --git a/src/models/CNN/NonlocalNet.py b/src/models/CNN/NonlocalNet.py
new file mode 100644
index 0000000000000000000000000000000000000000..69477c9442abe2cdcc2a697ceb9fffa37cc55dcf
--- /dev/null
+++ b/src/models/CNN/NonlocalNet.py
@@ -0,0 +1,741 @@
+import sys
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from src.utils import uncenter_l
+
+
+def find_local_patch(x, patch_size):
+ """
+ > We take a tensor `x` and return a tensor `x_unfold` that contains all the patches of size
+ `patch_size` in `x`
+
+ Args:
+ x: the input tensor
+ patch_size: the size of the patch to be extracted.
+ """
+
+ N, C, H, W = x.shape
+ x_unfold = F.unfold(x, kernel_size=(patch_size, patch_size), padding=(patch_size // 2, patch_size // 2), stride=(1, 1))
+
+ return x_unfold.view(N, x_unfold.shape[1], H, W)
+
+
+class WeightedAverage(nn.Module):
+ def __init__(
+ self,
+ ):
+ super(WeightedAverage, self).__init__()
+
+ def forward(self, x_lab, patch_size=3, alpha=1, scale_factor=1):
+ """
+ It takes a 3-channel image (L, A, B) and returns a 2-channel image (A, B) where each pixel is a
+ weighted average of the A and B values of the pixels in a 3x3 neighborhood around it
+
+ Args:
+ x_lab: the input image in LAB color space
+ patch_size: the size of the patch to use for the local average. Defaults to 3
+ alpha: the higher the alpha, the smoother the output. Defaults to 1
+ scale_factor: the scale factor of the input image. Defaults to 1
+
+ Returns:
+ The output of the forward function is a tensor of size (batch_size, 2, height, width)
+ """
+ # alpha=0: less smooth; alpha=inf: smoother
+ x_lab = F.interpolate(x_lab, scale_factor=scale_factor)
+ l = x_lab[:, 0:1, :, :]
+ a = x_lab[:, 1:2, :, :]
+ b = x_lab[:, 2:3, :, :]
+ local_l = find_local_patch(l, patch_size)
+ local_a = find_local_patch(a, patch_size)
+ local_b = find_local_patch(b, patch_size)
+ local_difference_l = (local_l - l) ** 2
+ correlation = nn.functional.softmax(-1 * local_difference_l / alpha, dim=1)
+
+ return torch.cat(
+ (
+ torch.sum(correlation * local_a, dim=1, keepdim=True),
+ torch.sum(correlation * local_b, dim=1, keepdim=True),
+ ),
+ 1,
+ )
+
+
+class WeightedAverage_color(nn.Module):
+ """
+ smooth the image according to the color distance in the LAB space
+ """
+
+ def __init__(
+ self,
+ ):
+ super(WeightedAverage_color, self).__init__()
+
+ def forward(self, x_lab, x_lab_predict, patch_size=3, alpha=1, scale_factor=1):
+ """
+ It takes the predicted a and b channels, and the original a and b channels, and finds the
+ weighted average of the predicted a and b channels based on the similarity of the original a and
+ b channels to the predicted a and b channels
+
+ Args:
+ x_lab: the input image in LAB color space
+ x_lab_predict: the predicted LAB image
+ patch_size: the size of the patch to use for the local color correction. Defaults to 3
+ alpha: controls the smoothness of the output. Defaults to 1
+ scale_factor: the scale factor of the input image. Defaults to 1
+
+ Returns:
+ The return is the weighted average of the local a and b channels.
+ """
+ """ alpha=0: less smooth; alpha=inf: smoother """
+ x_lab = F.interpolate(x_lab, scale_factor=scale_factor)
+ l = uncenter_l(x_lab[:, 0:1, :, :])
+ a = x_lab[:, 1:2, :, :]
+ b = x_lab[:, 2:3, :, :]
+ a_predict = x_lab_predict[:, 1:2, :, :]
+ b_predict = x_lab_predict[:, 2:3, :, :]
+ local_l = find_local_patch(l, patch_size)
+ local_a = find_local_patch(a, patch_size)
+ local_b = find_local_patch(b, patch_size)
+ local_a_predict = find_local_patch(a_predict, patch_size)
+ local_b_predict = find_local_patch(b_predict, patch_size)
+
+ local_color_difference = (local_l - l) ** 2 + (local_a - a) ** 2 + (local_b - b) ** 2
+ # so that sum of weights equal to 1
+ correlation = nn.functional.softmax(-1 * local_color_difference / alpha, dim=1)
+
+ return torch.cat(
+ (
+ torch.sum(correlation * local_a_predict, dim=1, keepdim=True),
+ torch.sum(correlation * local_b_predict, dim=1, keepdim=True),
+ ),
+ 1,
+ )
+
+
+class NonlocalWeightedAverage(nn.Module):
+ def __init__(
+ self,
+ ):
+ super(NonlocalWeightedAverage, self).__init__()
+
+ def forward(self, x_lab, feature, patch_size=3, alpha=0.1, scale_factor=1):
+ """
+ It takes in a feature map and a label map, and returns a smoothed label map
+
+ Args:
+ x_lab: the input image in LAB color space
+ feature: the feature map of the input image
+ patch_size: the size of the patch to be used for the correlation matrix. Defaults to 3
+ alpha: the higher the alpha, the smoother the output.
+ scale_factor: the scale factor of the input image. Defaults to 1
+
+ Returns:
+ weighted_ab is the weighted ab channel of the image.
+ """
+ # alpha=0: less smooth; alpha=inf: smoother
+ # input feature is normalized feature
+ x_lab = F.interpolate(x_lab, scale_factor=scale_factor)
+ batch_size, channel, height, width = x_lab.shape
+ feature = F.interpolate(feature, size=(height, width))
+ batch_size = x_lab.shape[0]
+ x_ab = x_lab[:, 1:3, :, :].view(batch_size, 2, -1)
+ x_ab = x_ab.permute(0, 2, 1)
+
+ local_feature = find_local_patch(feature, patch_size)
+ local_feature = local_feature.view(batch_size, local_feature.shape[1], -1)
+
+ correlation_matrix = torch.matmul(local_feature.permute(0, 2, 1), local_feature)
+ correlation_matrix = nn.functional.softmax(correlation_matrix / alpha, dim=-1)
+
+ weighted_ab = torch.matmul(correlation_matrix, x_ab)
+ weighted_ab = weighted_ab.permute(0, 2, 1).contiguous()
+ weighted_ab = weighted_ab.view(batch_size, 2, height, width)
+ return weighted_ab
+
+
+class CorrelationLayer(nn.Module):
+ def __init__(self, search_range):
+ super(CorrelationLayer, self).__init__()
+ self.search_range = search_range
+
+ def forward(self, x1, x2, alpha=1, raw_output=False, metric="similarity"):
+ """
+ It takes two tensors, x1 and x2, and returns a tensor of shape (batch_size, (search_range * 2 +
+ 1) ** 2, height, width) where each element is the dot product of the corresponding patch in x1
+ and x2
+
+ Args:
+ x1: the first image
+ x2: the image to be warped
+ alpha: the temperature parameter for the softmax function. Defaults to 1
+ raw_output: if True, return the raw output of the network, otherwise return the softmax
+ output. Defaults to False
+ metric: "similarity" or "subtraction". Defaults to similarity
+
+ Returns:
+ The output of the forward function is a softmax of the correlation volume.
+ """
+ shape = list(x1.size())
+ shape[1] = (self.search_range * 2 + 1) ** 2
+ cv = torch.zeros(shape).to(torch.device("cuda"))
+
+ for i in range(-self.search_range, self.search_range + 1):
+ for j in range(-self.search_range, self.search_range + 1):
+ if i < 0:
+ slice_h, slice_h_r = slice(None, i), slice(-i, None)
+ elif i > 0:
+ slice_h, slice_h_r = slice(i, None), slice(None, -i)
+ else:
+ slice_h, slice_h_r = slice(None), slice(None)
+
+ if j < 0:
+ slice_w, slice_w_r = slice(None, j), slice(-j, None)
+ elif j > 0:
+ slice_w, slice_w_r = slice(j, None), slice(None, -j)
+ else:
+ slice_w, slice_w_r = slice(None), slice(None)
+
+ if metric == "similarity":
+ cv[:, (self.search_range * 2 + 1) * i + j, slice_h, slice_w] = (
+ x1[:, :, slice_h, slice_w] * x2[:, :, slice_h_r, slice_w_r]
+ ).sum(1)
+ else: # patchwise subtraction
+ cv[:, (self.search_range * 2 + 1) * i + j, slice_h, slice_w] = -(
+ (x1[:, :, slice_h, slice_w] - x2[:, :, slice_h_r, slice_w_r]) ** 2
+ ).sum(1)
+
+ # TODO sigmoid?
+ if raw_output:
+ return cv
+ else:
+ return nn.functional.softmax(cv / alpha, dim=1)
+
+
+class WTA_scale(torch.autograd.Function):
+ """
+ We can implement our own custom autograd Functions by subclassing
+ torch.autograd.Function and implementing the forward and backward passes
+ which operate on Tensors.
+ """
+
+ @staticmethod
+ def forward(ctx, input, scale=1e-4):
+ """
+ In the forward pass we receive a Tensor containing the input and return a
+ Tensor containing the output. You can cache arbitrary Tensors for use in the
+ backward pass using the save_for_backward method.
+ """
+ activation_max, index_max = torch.max(input, -1, keepdim=True)
+ input_scale = input * scale # default: 1e-4
+ # input_scale = input * scale # default: 1e-4
+ output_max_scale = torch.where(input == activation_max, input, input_scale)
+
+ mask = (input == activation_max).type(torch.float)
+ ctx.save_for_backward(input, mask)
+ return output_max_scale
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ In the backward pass we receive a Tensor containing the gradient of the loss
+ with respect to the output, and we need to compute the gradient of the loss
+ with respect to the input.
+ """
+ input, mask = ctx.saved_tensors
+ mask_ones = torch.ones_like(mask)
+ mask_small_ones = torch.ones_like(mask) * 1e-4
+ # mask_small_ones = torch.ones_like(mask) * 1e-4
+
+ grad_scale = torch.where(mask == 1, mask_ones, mask_small_ones)
+ grad_input = grad_output.clone() * grad_scale
+ return grad_input, None
+
+
+class ResidualBlock(nn.Module):
+ def __init__(self, in_channels, out_channels, kernel_size=3, padding=1, stride=1):
+ super(ResidualBlock, self).__init__()
+ self.padding1 = nn.ReflectionPad2d(padding)
+ self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=0, stride=stride)
+ self.bn1 = nn.InstanceNorm2d(out_channels)
+ self.prelu = nn.PReLU()
+ self.padding2 = nn.ReflectionPad2d(padding)
+ self.conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=0, stride=stride)
+ self.bn2 = nn.InstanceNorm2d(out_channels)
+
+ def forward(self, x):
+ residual = x
+ out = self.padding1(x)
+ out = self.conv1(out)
+ out = self.bn1(out)
+ out = self.prelu(out)
+ out = self.padding2(out)
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out += residual
+ out = self.prelu(out)
+ return out
+
+
+class WarpNet(nn.Module):
+ """input is Al, Bl, channel = 1, range~[0,255]"""
+
+ def __init__(self):
+ super(WarpNet, self).__init__()
+ self.feature_channel = 64
+ self.in_channels = self.feature_channel * 4
+ self.inter_channels = 256
+ # 44*44
+ self.layer2_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(128),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=2),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+ self.layer3_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(256, 128, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(128),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+
+ # 22*22->44*44
+ self.layer4_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(256),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Upsample(scale_factor=2),
+ nn.Dropout(0.2),
+ )
+
+ # 11*11->44*44
+ self.layer5_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(256),
+ nn.PReLU(),
+ nn.Upsample(scale_factor=2),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Upsample(scale_factor=2),
+ nn.Dropout(0.2),
+ )
+
+ self.layer = nn.Sequential(
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ )
+
+ self.theta = nn.Conv2d(
+ in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+ )
+ self.phi = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0)
+
+ self.upsampling = nn.Upsample(scale_factor=4)
+
+ def forward(
+ self,
+ B_lab_map,
+ A_relu2_1,
+ A_relu3_1,
+ A_relu4_1,
+ A_relu5_1,
+ B_relu2_1,
+ B_relu3_1,
+ B_relu4_1,
+ B_relu5_1,
+ temperature=0.001 * 5,
+ detach_flag=False,
+ WTA_scale_weight=1,
+ ):
+ batch_size = B_lab_map.shape[0]
+ channel = B_lab_map.shape[1]
+ image_height = B_lab_map.shape[2]
+ image_width = B_lab_map.shape[3]
+ feature_height = int(image_height / 4)
+ feature_width = int(image_width / 4)
+
+ # scale feature size to 44*44
+ A_feature2_1 = self.layer2_1(A_relu2_1)
+ B_feature2_1 = self.layer2_1(B_relu2_1)
+ A_feature3_1 = self.layer3_1(A_relu3_1)
+ B_feature3_1 = self.layer3_1(B_relu3_1)
+ A_feature4_1 = self.layer4_1(A_relu4_1)
+ B_feature4_1 = self.layer4_1(B_relu4_1)
+ A_feature5_1 = self.layer5_1(A_relu5_1)
+ B_feature5_1 = self.layer5_1(B_relu5_1)
+
+ # concatenate features
+ if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
+ A_feature5_1 = F.pad(A_feature5_1, (0, 0, 1, 1), "replicate")
+ B_feature5_1 = F.pad(B_feature5_1, (0, 0, 1, 1), "replicate")
+
+ A_features = self.layer(torch.cat((A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1), 1))
+ B_features = self.layer(torch.cat((B_feature2_1, B_feature3_1, B_feature4_1, B_feature5_1), 1))
+
+ # pairwise cosine similarity
+ theta = self.theta(A_features).view(batch_size, self.inter_channels, -1) # 2*256*(feature_height*feature_width)
+ theta = theta - theta.mean(dim=-1, keepdim=True) # center the feature
+ theta_norm = torch.norm(theta, 2, 1, keepdim=True) + sys.float_info.epsilon
+ theta = torch.div(theta, theta_norm)
+ theta_permute = theta.permute(0, 2, 1) # 2*(feature_height*feature_width)*256
+ phi = self.phi(B_features).view(batch_size, self.inter_channels, -1) # 2*256*(feature_height*feature_width)
+ phi = phi - phi.mean(dim=-1, keepdim=True) # center the feature
+ phi_norm = torch.norm(phi, 2, 1, keepdim=True) + sys.float_info.epsilon
+ phi = torch.div(phi, phi_norm)
+ f = torch.matmul(theta_permute, phi) # 2*(feature_height*feature_width)*(feature_height*feature_width)
+ if detach_flag:
+ f = f.detach()
+
+ f_similarity = f.unsqueeze_(dim=1)
+ similarity_map = torch.max(f_similarity, -1, keepdim=True)[0]
+ similarity_map = similarity_map.view(batch_size, 1, feature_height, feature_width)
+
+ # f can be negative
+ f_WTA = f if WTA_scale_weight == 1 else WTA_scale.apply(f, WTA_scale_weight)
+ f_WTA = f_WTA / temperature
+ f_div_C = F.softmax(f_WTA.squeeze_(), dim=-1) # 2*1936*1936;
+
+ # downsample the reference color
+ B_lab = F.avg_pool2d(B_lab_map, 4)
+ B_lab = B_lab.view(batch_size, channel, -1)
+ B_lab = B_lab.permute(0, 2, 1) # 2*1936*channel
+
+ # multiply the corr map with color
+ y = torch.matmul(f_div_C, B_lab) # 2*1936*channel
+ y = y.permute(0, 2, 1).contiguous()
+ y = y.view(batch_size, channel, feature_height, feature_width) # 2*3*44*44
+ y = self.upsampling(y)
+ similarity_map = self.upsampling(similarity_map)
+
+ return y, similarity_map
+
+
+class WarpNet_new(nn.Module):
+ """input is Al, Bl, channel = 1, range~[0,255]"""
+
+ def __init__(self, d_model=768):
+ super(WarpNet_new, self).__init__()
+ self.feature_channel = 64
+ self.in_channels = self.feature_channel * 4
+ self.inter_channels = 256
+ # 44*44
+ self.d_model = d_model
+ self.layer2_1 = nn.Sequential(
+ nn.Upsample(scale_factor=8),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(d_model, int(d_model / 2), kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(int(d_model / 2)),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(int(d_model / 2), self.feature_channel, kernel_size=3, padding=0, stride=2),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+ self.layer3_1 = nn.Sequential(
+ nn.Upsample(scale_factor=8),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(d_model, int(d_model / 2), kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(int(d_model / 2)),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(int(d_model / 2), self.feature_channel, kernel_size=3, padding=0, stride=2),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+
+ # 22*22->44*44
+ self.layer4_1 = nn.Sequential(
+ nn.Upsample(scale_factor=8),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(d_model, int(d_model / 2), kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(int(d_model / 2)),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(int(d_model / 2), self.feature_channel, kernel_size=3, padding=0, stride=2),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+
+ # 11*11->44*44
+ self.layer5_1 = nn.Sequential(
+ nn.Upsample(scale_factor=8),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(d_model, int(d_model / 2), kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(int(d_model / 2)),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(int(d_model / 2), self.feature_channel, kernel_size=3, padding=0, stride=2),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+
+ self.layer = nn.Sequential(
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ )
+
+ self.theta = nn.Conv2d(
+ in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+ )
+ self.phi = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0)
+
+ self.upsampling = nn.Upsample(scale_factor=4)
+
+ def forward(
+ self,
+ B_lab_map,
+ A_relu2_1,
+ A_relu3_1,
+ A_relu4_1,
+ A_relu5_1,
+ B_relu2_1,
+ B_relu3_1,
+ B_relu4_1,
+ B_relu5_1,
+ temperature=0.001 * 5,
+ detach_flag=False,
+ WTA_scale_weight=1,
+ ):
+ batch_size = B_lab_map.shape[0]
+ channel = B_lab_map.shape[1]
+ image_height = B_lab_map.shape[2]
+ image_width = B_lab_map.shape[3]
+ feature_height = int(image_height / 4)
+ feature_width = int(image_width / 4)
+
+ A_feature2_1 = self.layer2_1(A_relu2_1)
+ B_feature2_1 = self.layer2_1(B_relu2_1)
+ A_feature3_1 = self.layer3_1(A_relu3_1)
+ B_feature3_1 = self.layer3_1(B_relu3_1)
+ A_feature4_1 = self.layer4_1(A_relu4_1)
+ B_feature4_1 = self.layer4_1(B_relu4_1)
+ A_feature5_1 = self.layer5_1(A_relu5_1)
+ B_feature5_1 = self.layer5_1(B_relu5_1)
+
+ if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
+ A_feature5_1 = F.pad(A_feature5_1, (0, 0, 1, 1), "replicate")
+ B_feature5_1 = F.pad(B_feature5_1, (0, 0, 1, 1), "replicate")
+
+ A_features = self.layer(torch.cat((A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1), 1))
+ B_features = self.layer(torch.cat((B_feature2_1, B_feature3_1, B_feature4_1, B_feature5_1), 1))
+
+ # pairwise cosine similarity
+ theta = self.theta(A_features).view(batch_size, self.inter_channels, -1) # 2*256*(feature_height*feature_width)
+ theta = theta - theta.mean(dim=-1, keepdim=True) # center the feature
+ theta_norm = torch.norm(theta, 2, 1, keepdim=True) + sys.float_info.epsilon
+ theta = torch.div(theta, theta_norm)
+ theta_permute = theta.permute(0, 2, 1) # 2*(feature_height*feature_width)*256
+ phi = self.phi(B_features).view(batch_size, self.inter_channels, -1) # 2*256*(feature_height*feature_width)
+ phi = phi - phi.mean(dim=-1, keepdim=True) # center the feature
+ phi_norm = torch.norm(phi, 2, 1, keepdim=True) + sys.float_info.epsilon
+ phi = torch.div(phi, phi_norm)
+ f = torch.matmul(theta_permute, phi) # 2*(feature_height*feature_width)*(feature_height*feature_width)
+ if detach_flag:
+ f = f.detach()
+
+ f_similarity = f.unsqueeze_(dim=1)
+ similarity_map = torch.max(f_similarity, -1, keepdim=True)[0]
+ similarity_map = similarity_map.view(batch_size, 1, feature_height, feature_width)
+
+ # f can be negative
+ f_WTA = f if WTA_scale_weight == 1 else WTA_scale.apply(f, WTA_scale_weight)
+ f_WTA = f_WTA / temperature
+ f_div_C = F.softmax(f_WTA.squeeze_(), dim=-1) # 2*1936*1936;
+
+ # downsample the reference color
+ B_lab = F.avg_pool2d(B_lab_map, 4)
+ B_lab = B_lab.view(batch_size, channel, -1)
+ B_lab = B_lab.permute(0, 2, 1) # 2*1936*channel
+
+ # multiply the corr map with color
+ y = torch.matmul(f_div_C, B_lab) # 2*1936*channel
+ y = y.permute(0, 2, 1).contiguous()
+ y = y.view(batch_size, channel, feature_height, feature_width) # 2*3*44*44
+ y = self.upsampling(y)
+ similarity_map = self.upsampling(similarity_map)
+
+ return y, similarity_map
+
+
+class GeneralWarpNet(nn.Module):
+ """input is Al, Bl, channel = 1, range~[0,255]"""
+
+ def __init__(self, feature_channel=128):
+ super(GeneralWarpNet, self).__init__()
+ self.feature_channel = feature_channel
+ self.in_channels = self.feature_channel * 4
+ self.inter_channels = 256
+ # 44*44
+ self.layer2_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ # nn.Conv2d(128, 128, kernel_size=3, padding=0, stride=1),
+ # nn.Conv2d(96, 128, kernel_size=3, padding=20, stride=1),
+ nn.Conv2d(96, 128, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(128),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=2),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+ self.layer3_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ # nn.Conv2d(256, 128, kernel_size=3, padding=0, stride=1),
+ # nn.Conv2d(192, 128, kernel_size=3, padding=10, stride=1),
+ nn.Conv2d(192, 128, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(128),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(128, self.feature_channel, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Dropout(0.2),
+ )
+
+ # 22*22->44*44
+ self.layer4_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ # nn.Conv2d(512, 256, kernel_size=3, padding=0, stride=1),
+ # nn.Conv2d(384, 256, kernel_size=3, padding=5, stride=1),
+ nn.Conv2d(384, 256, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(256),
+ nn.PReLU(),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Upsample(scale_factor=2),
+ nn.Dropout(0.2),
+ )
+
+ # 11*11->44*44
+ self.layer5_1 = nn.Sequential(
+ nn.ReflectionPad2d(1),
+ # nn.Conv2d(1024, 256, kernel_size=3, padding=0, stride=1),
+ # nn.Conv2d(768, 256, kernel_size=2, padding=2, stride=1),
+ nn.Conv2d(768, 256, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(256),
+ nn.PReLU(),
+ nn.Upsample(scale_factor=2),
+ nn.ReflectionPad2d(1),
+ nn.Conv2d(256, self.feature_channel, kernel_size=3, padding=0, stride=1),
+ nn.InstanceNorm2d(self.feature_channel),
+ nn.PReLU(),
+ nn.Upsample(scale_factor=2),
+ nn.Dropout(0.2),
+ )
+
+ self.layer = nn.Sequential(
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ ResidualBlock(self.feature_channel * 4, self.feature_channel * 4, kernel_size=3, padding=1, stride=1),
+ )
+
+ self.theta = nn.Conv2d(
+ in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0
+ )
+ self.phi = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0)
+
+ self.upsampling = nn.Upsample(scale_factor=4)
+
+ def forward(
+ self,
+ B_lab_map,
+ A_relu2_1,
+ A_relu3_1,
+ A_relu4_1,
+ A_relu5_1,
+ B_relu2_1,
+ B_relu3_1,
+ B_relu4_1,
+ B_relu5_1,
+ temperature=0.001 * 5,
+ detach_flag=False,
+ WTA_scale_weight=1,
+ ):
+ batch_size = B_lab_map.shape[0]
+ channel = B_lab_map.shape[1]
+ image_height = B_lab_map.shape[2]
+ image_width = B_lab_map.shape[3]
+ feature_height = int(image_height / 4)
+ feature_width = int(image_width / 4)
+
+ # scale feature size to 44*44
+ A_feature2_1 = self.layer2_1(A_relu2_1)
+ B_feature2_1 = self.layer2_1(B_relu2_1)
+ A_feature3_1 = self.layer3_1(A_relu3_1)
+ B_feature3_1 = self.layer3_1(B_relu3_1)
+ A_feature4_1 = self.layer4_1(A_relu4_1)
+ B_feature4_1 = self.layer4_1(B_relu4_1)
+ A_feature5_1 = self.layer5_1(A_relu5_1)
+ B_feature5_1 = self.layer5_1(B_relu5_1)
+
+ # concatenate features
+ if A_feature5_1.shape[2] != A_feature2_1.shape[2] or A_feature5_1.shape[3] != A_feature2_1.shape[3]:
+ A_feature5_1 = F.pad(A_feature5_1, (0, 0, 1, 1), "replicate")
+ B_feature5_1 = F.pad(B_feature5_1, (0, 0, 1, 1), "replicate")
+
+ A_features = self.layer(torch.cat((A_feature2_1, A_feature3_1, A_feature4_1, A_feature5_1), 1))
+ B_features = self.layer(torch.cat((B_feature2_1, B_feature3_1, B_feature4_1, B_feature5_1), 1))
+
+ # pairwise cosine similarity
+ theta = self.theta(A_features).view(batch_size, self.inter_channels, -1) # 2*256*(feature_height*feature_width)
+ theta = theta - theta.mean(dim=-1, keepdim=True) # center the feature
+ theta_norm = torch.norm(theta, 2, 1, keepdim=True) + sys.float_info.epsilon
+ theta = torch.div(theta, theta_norm)
+ theta_permute = theta.permute(0, 2, 1) # 2*(feature_height*feature_width)*256
+ phi = self.phi(B_features).view(batch_size, self.inter_channels, -1) # 2*256*(feature_height*feature_width)
+ phi = phi - phi.mean(dim=-1, keepdim=True) # center the feature
+ phi_norm = torch.norm(phi, 2, 1, keepdim=True) + sys.float_info.epsilon
+ phi = torch.div(phi, phi_norm)
+ f = torch.matmul(theta_permute, phi) # 2*(feature_height*feature_width)*(feature_height*feature_width)
+ if detach_flag:
+ f = f.detach()
+
+ f_similarity = f.unsqueeze_(dim=1)
+ similarity_map = torch.max(f_similarity, -1, keepdim=True)[0]
+ similarity_map = similarity_map.view(batch_size, 1, feature_height, feature_width)
+
+ # f can be negative
+ f_WTA = f if WTA_scale_weight == 1 else WTA_scale.apply(f, WTA_scale_weight)
+ f_WTA = f_WTA / temperature
+ f_div_C = F.softmax(f_WTA.squeeze_(), dim=-1) # 2*1936*1936;
+
+ # downsample the reference color
+ B_lab = F.avg_pool2d(B_lab_map, 4)
+ B_lab = B_lab.view(batch_size, channel, -1)
+ B_lab = B_lab.permute(0, 2, 1) # 2*1936*channel
+
+ # multiply the corr map with color
+ y = torch.matmul(f_div_C, B_lab) # 2*1936*channel
+ y = y.permute(0, 2, 1).contiguous()
+ y = y.view(batch_size, channel, feature_height, feature_width) # 2*3*44*44
+ y = self.upsampling(y)
+ similarity_map = self.upsampling(similarity_map)
+
+ return y, similarity_map
diff --git a/src/models/CNN/__init__.py b/src/models/CNN/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/models/CNN/__pycache__/ColorVidNet.cpython-310.pyc b/src/models/CNN/__pycache__/ColorVidNet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12d9ce50eeeebb42ba0366cc16febca38d30cf55
Binary files /dev/null and b/src/models/CNN/__pycache__/ColorVidNet.cpython-310.pyc differ
diff --git a/src/models/CNN/__pycache__/FrameColor.cpython-310.pyc b/src/models/CNN/__pycache__/FrameColor.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8241f80ae48ee657266126e44cc66ae4f599528b
Binary files /dev/null and b/src/models/CNN/__pycache__/FrameColor.cpython-310.pyc differ
diff --git a/src/models/CNN/__pycache__/NonlocalNet.cpython-310.pyc b/src/models/CNN/__pycache__/NonlocalNet.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa5b36cbcac4559c8a4ddeb9e9934472a01cc702
Binary files /dev/null and b/src/models/CNN/__pycache__/NonlocalNet.cpython-310.pyc differ
diff --git a/src/models/CNN/__pycache__/__init__.cpython-310.pyc b/src/models/CNN/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..451c134a22f844fbb25e3eeee2ef919f7ce010b5
Binary files /dev/null and b/src/models/CNN/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/models/__pycache__/__init__.cpython-310.pyc b/src/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3428795b1984f04bb552685d67c59ceeaf8b077f
Binary files /dev/null and b/src/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/vit/__init__.py b/src/models/vit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/src/models/vit/__pycache__/__init__.cpython-310.pyc b/src/models/vit/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..82b70e0d92e32dfcd56a027b8b7d42c667c2205b
Binary files /dev/null and b/src/models/vit/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/blocks.cpython-310.pyc b/src/models/vit/__pycache__/blocks.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f00084f6c09b78f4c0f9a9d4dc762d8ab447633f
Binary files /dev/null and b/src/models/vit/__pycache__/blocks.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/config.cpython-310.pyc b/src/models/vit/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45d58642b7dfbc4407df93b397c546e6eb2370d9
Binary files /dev/null and b/src/models/vit/__pycache__/config.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/decoder.cpython-310.pyc b/src/models/vit/__pycache__/decoder.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..85da7a210bff670c757f1f4185fdade2a80afcd6
Binary files /dev/null and b/src/models/vit/__pycache__/decoder.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/embed.cpython-310.pyc b/src/models/vit/__pycache__/embed.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12b9fe151e2afa0135f555caba4596222623a70f
Binary files /dev/null and b/src/models/vit/__pycache__/embed.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/factory.cpython-310.pyc b/src/models/vit/__pycache__/factory.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d7f5e297a19c13c3f05f226227984e538f8873a
Binary files /dev/null and b/src/models/vit/__pycache__/factory.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/utils.cpython-310.pyc b/src/models/vit/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f441581eda885fc62fc1faa2156792bb94021069
Binary files /dev/null and b/src/models/vit/__pycache__/utils.cpython-310.pyc differ
diff --git a/src/models/vit/__pycache__/vit.cpython-310.pyc b/src/models/vit/__pycache__/vit.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..99ddfe09f7e968b42b615a5d2f1ce047149264c9
Binary files /dev/null and b/src/models/vit/__pycache__/vit.cpython-310.pyc differ
diff --git a/src/models/vit/blocks.py b/src/models/vit/blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..305114e2274b360dd4fce3eaa08f13922563fbff
--- /dev/null
+++ b/src/models/vit/blocks.py
@@ -0,0 +1,80 @@
+import torch.nn as nn
+from timm.models.layers import DropPath
+
+
+class FeedForward(nn.Module):
+ def __init__(self, dim, hidden_dim, dropout, out_dim=None):
+ super().__init__()
+ self.fc1 = nn.Linear(dim, hidden_dim)
+ self.act = nn.GELU()
+ if out_dim is None:
+ out_dim = dim
+ self.fc2 = nn.Linear(hidden_dim, out_dim)
+ self.drop = nn.Dropout(dropout)
+
+ @property
+ def unwrapped(self):
+ return self
+
+ def forward(self, x):
+ x = self.fc1(x)
+ x = self.act(x)
+ x = self.drop(x)
+ x = self.fc2(x)
+ x = self.drop(x)
+ return x
+
+
+class Attention(nn.Module):
+ def __init__(self, dim, heads, dropout):
+ super().__init__()
+ self.heads = heads
+ head_dim = dim // heads
+ self.scale = head_dim**-0.5
+ self.attn = None
+
+ self.qkv = nn.Linear(dim, dim * 3)
+ self.attn_drop = nn.Dropout(dropout)
+ self.proj = nn.Linear(dim, dim)
+ self.proj_drop = nn.Dropout(dropout)
+
+ @property
+ def unwrapped(self):
+ return self
+
+ def forward(self, x, mask=None):
+ B, N, C = x.shape
+ qkv = self.qkv(x).reshape(B, N, 3, self.heads, C // self.heads).permute(2, 0, 3, 1, 4)
+ q, k, v = (
+ qkv[0],
+ qkv[1],
+ qkv[2],
+ )
+
+ attn = (q @ k.transpose(-2, -1)) * self.scale
+ attn = attn.softmax(dim=-1)
+ attn = self.attn_drop(attn)
+
+ x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+ x = self.proj(x)
+ x = self.proj_drop(x)
+
+ return x, attn
+
+
+class Block(nn.Module):
+ def __init__(self, dim, heads, mlp_dim, dropout, drop_path):
+ super().__init__()
+ self.norm1 = nn.LayerNorm(dim)
+ self.norm2 = nn.LayerNorm(dim)
+ self.attn = Attention(dim, heads, dropout)
+ self.mlp = FeedForward(dim, mlp_dim, dropout)
+ self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+
+ def forward(self, x, mask=None, return_attention=False):
+ y, attn = self.attn(self.norm1(x), mask)
+ if return_attention:
+ return attn
+ x = x + self.drop_path(y)
+ x = x + self.drop_path(self.mlp(self.norm2(x)))
+ return x
diff --git a/src/models/vit/config.py b/src/models/vit/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..9728920e7962562cca44223633fdaaef4c682389
--- /dev/null
+++ b/src/models/vit/config.py
@@ -0,0 +1,22 @@
+import yaml
+from pathlib import Path
+
+import os
+
+
+def load_config():
+ return yaml.load(
+ open(Path(__file__).parent / "config.yml", "r"), Loader=yaml.FullLoader
+ )
+
+
+def check_os_environ(key, use):
+ if key not in os.environ:
+ raise ValueError(
+ f"{key} is not defined in the os variables, it is required for {use}."
+ )
+
+
+def dataset_dir():
+ check_os_environ("DATASET", "data loading")
+ return os.environ["DATASET"]
diff --git a/src/models/vit/config.yml b/src/models/vit/config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..acdc7f899b3cdd5cb46a62b33cb537e3e6ddb843
--- /dev/null
+++ b/src/models/vit/config.yml
@@ -0,0 +1,132 @@
+model:
+ # deit
+ deit_tiny_distilled_patch16_224:
+ image_size: 224
+ patch_size: 16
+ d_model: 192
+ n_heads: 3
+ n_layers: 12
+ normalization: deit
+ distilled: true
+ deit_small_distilled_patch16_224:
+ image_size: 224
+ patch_size: 16
+ d_model: 384
+ n_heads: 6
+ n_layers: 12
+ normalization: deit
+ distilled: true
+ deit_base_distilled_patch16_224:
+ image_size: 224
+ patch_size: 16
+ d_model: 768
+ n_heads: 12
+ n_layers: 12
+ normalization: deit
+ distilled: true
+ deit_base_distilled_patch16_384:
+ image_size: 384
+ patch_size: 16
+ d_model: 768
+ n_heads: 12
+ n_layers: 12
+ normalization: deit
+ distilled: true
+ # vit
+ vit_base_patch8_384:
+ image_size: 384
+ patch_size: 8
+ d_model: 768
+ n_heads: 12
+ n_layers: 12
+ normalization: vit
+ distilled: false
+ vit_tiny_patch16_384:
+ image_size: 384
+ patch_size: 16
+ d_model: 192
+ n_heads: 3
+ n_layers: 12
+ normalization: vit
+ distilled: false
+ vit_small_patch16_384:
+ image_size: 384
+ patch_size: 16
+ d_model: 384
+ n_heads: 6
+ n_layers: 12
+ normalization: vit
+ distilled: false
+ vit_base_patch16_384:
+ image_size: 384
+ patch_size: 16
+ d_model: 768
+ n_heads: 12
+ n_layers: 12
+ normalization: vit
+ distilled: false
+ vit_large_patch16_384:
+ image_size: 384
+ patch_size: 16
+ d_model: 1024
+ n_heads: 16
+ n_layers: 24
+ normalization: vit
+ vit_small_patch32_384:
+ image_size: 384
+ patch_size: 32
+ d_model: 384
+ n_heads: 6
+ n_layers: 12
+ normalization: vit
+ distilled: false
+ vit_base_patch32_384:
+ image_size: 384
+ patch_size: 32
+ d_model: 768
+ n_heads: 12
+ n_layers: 12
+ normalization: vit
+ vit_large_patch32_384:
+ image_size: 384
+ patch_size: 32
+ d_model: 1024
+ n_heads: 16
+ n_layers: 24
+ normalization: vit
+decoder:
+ linear: {}
+ deeplab_dec:
+ encoder_layer: -1
+ mask_transformer:
+ drop_path_rate: 0.0
+ dropout: 0.1
+ n_layers: 2
+dataset:
+ ade20k:
+ epochs: 64
+ eval_freq: 2
+ batch_size: 8
+ learning_rate: 0.001
+ im_size: 512
+ crop_size: 512
+ window_size: 512
+ window_stride: 512
+ pascal_context:
+ epochs: 256
+ eval_freq: 8
+ batch_size: 16
+ learning_rate: 0.001
+ im_size: 520
+ crop_size: 480
+ window_size: 480
+ window_stride: 320
+ cityscapes:
+ epochs: 216
+ eval_freq: 4
+ batch_size: 8
+ learning_rate: 0.01
+ im_size: 1024
+ crop_size: 768
+ window_size: 768
+ window_stride: 512
diff --git a/src/models/vit/decoder.py b/src/models/vit/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..acdb2f83660904423b97f9163bd81a4016dc8723
--- /dev/null
+++ b/src/models/vit/decoder.py
@@ -0,0 +1,34 @@
+import torch.nn as nn
+from einops import rearrange
+from src.models.vit.utils import init_weights
+
+
+class DecoderLinear(nn.Module):
+ def __init__(
+ self,
+ n_cls,
+ d_encoder,
+ scale_factor,
+ dropout_rate=0.3,
+ ):
+ super().__init__()
+ self.scale_factor = scale_factor
+ self.head = nn.Linear(d_encoder, n_cls)
+ self.upsampling = nn.Upsample(scale_factor=scale_factor**2, mode="linear")
+ self.norm = nn.LayerNorm((n_cls, 24 * scale_factor, 24 * scale_factor))
+ self.dropout = nn.Dropout(dropout_rate)
+ self.gelu = nn.GELU()
+ self.apply(init_weights)
+
+ def forward(self, x, img_size):
+ H, _ = img_size
+ x = self.head(x) ####### (2, 577, 64)
+ x = x.transpose(2, 1) ## (2, 64, 576)
+ x = self.upsampling(x) # (2, 64, 576*scale_factor*scale_factor)
+ x = x.transpose(2, 1) ## (2, 576*scale_factor*scale_factor, 64)
+ x = rearrange(x, "b (h w) c -> b c h w", h=H // (16 // self.scale_factor)) # (2, 64, 24*scale_factor, 24*scale_factor)
+ x = self.norm(x)
+ x = self.dropout(x)
+ x = self.gelu(x)
+
+ return x # (2, 64, a, a)
diff --git a/src/models/vit/embed.py b/src/models/vit/embed.py
new file mode 100644
index 0000000000000000000000000000000000000000..d04b4dce8c8406dc9e575ce88a431b5c6863ee4f
--- /dev/null
+++ b/src/models/vit/embed.py
@@ -0,0 +1,72 @@
+from torch import nn
+from typing import List
+from src.models.vit.factory import create_vit
+from src.models.vit.vit import FeatureTransform
+from ...utils import print_num_params
+from timm import create_model
+from einops import rearrange
+
+
+class EmbedModel(nn.Module):
+ def __init__(self, config, head_out_idx: List[int], n_dim_output=3, device="cuda") -> None:
+ super().__init__()
+ self.head_out_idx = head_out_idx
+ self.n_dim_output = n_dim_output
+ self.device = device
+ self.vit = create_vit(config).to(self.device)
+ self.vit.eval()
+ for params in self.vit.parameters():
+ params.requires_grad = False
+ print_num_params(self.vit)
+ print_num_params(self.vit, is_trainable=True)
+
+ if self.n_dim_output == 3:
+ self.feature_transformer = FeatureTransform(config["image_size"], config["d_model"]).to(self.device)
+ print_num_params(self.feature_transformer)
+ print_num_params(self.feature_transformer, is_trainable=True)
+
+ def forward(self, x):
+ vit_outputs = self.vit(x, self.head_out_idx, n_dim_output=self.n_dim_output, return_features=True)
+ feat0, feat1, feat2, feat3 = vit_outputs[0], vit_outputs[1], vit_outputs[2], vit_outputs[3]
+ if self.n_dim_output == 3:
+ feat0, feat1, feat2, feat3 = self.feature_transformer(vit_outputs)
+ return feat0, feat1, feat2, feat3
+
+
+class GeneralEmbedModel(nn.Module):
+ def __init__(self, pretrained_model="swin-tiny", device="cuda") -> None:
+ """
+ vit_tiny_patch16_224.augreg_in21k_ft_in1k
+ swinv2_cr_tiny_ns_224.sw_in1k
+ """
+ super().__init__()
+ self.device = device
+ self.pretrained_model = pretrained_model
+ if pretrained_model == "swin-tiny":
+ self.pretrained = create_model(
+ "swinv2_cr_tiny_ns_224.sw_in1k",
+ pretrained=True,
+ features_only=True,
+ out_indices=[-4, -3, -2, -1],
+ ).to(device)
+ elif pretrained_model == "swin-small":
+ self.pretrained = create_model(
+ "swinv2_cr_small_ns_224.sw_in1k",
+ pretrained=True,
+ features_only=True,
+ out_indices=[-4, -3, -2, -1],
+ ).to(device)
+ else:
+ raise NotImplementedError
+
+ self.pretrained.eval()
+ self.upsample = nn.Upsample(scale_factor=2)
+
+ for params in self.pretrained.parameters():
+ params.requires_grad = False
+
+ def forward(self, x):
+ outputs = self.pretrained(x)
+ outputs = [self.upsample(feat) for feat in outputs]
+
+ return outputs
diff --git a/src/models/vit/factory.py b/src/models/vit/factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab2cad05744bf6ed60ee6278a5b79b92321ac4c5
--- /dev/null
+++ b/src/models/vit/factory.py
@@ -0,0 +1,45 @@
+import os
+import torch
+from timm.models.vision_transformer import default_cfgs
+from timm.models.helpers import load_pretrained, load_custom_pretrained
+from src.models.vit.utils import checkpoint_filter_fn
+from src.models.vit.vit import VisionTransformer
+
+
+def create_vit(model_cfg):
+ model_cfg = model_cfg.copy()
+ backbone = model_cfg.pop("backbone")
+
+ model_cfg.pop("normalization")
+ model_cfg["n_cls"] = 1000
+ mlp_expansion_ratio = 4
+ model_cfg["d_ff"] = mlp_expansion_ratio * model_cfg["d_model"]
+
+ if backbone in default_cfgs:
+ default_cfg = default_cfgs[backbone]
+ else:
+ default_cfg = dict(
+ pretrained=False,
+ num_classes=1000,
+ drop_rate=0.0,
+ drop_path_rate=0.0,
+ drop_block_rate=None,
+ )
+
+ default_cfg["input_size"] = (
+ 3,
+ model_cfg["image_size"][0],
+ model_cfg["image_size"][1],
+ )
+ model = VisionTransformer(**model_cfg)
+ if backbone == "vit_base_patch8_384":
+ path = os.path.expandvars("$TORCH_HOME/hub/checkpoints/vit_base_patch8_384.pth")
+ state_dict = torch.load(path, map_location="cpu")
+ filtered_dict = checkpoint_filter_fn(state_dict, model)
+ model.load_state_dict(filtered_dict, strict=True)
+ elif "deit" in backbone:
+ load_pretrained(model, default_cfg, filter_fn=checkpoint_filter_fn)
+ else:
+ load_custom_pretrained(model, default_cfg)
+
+ return model
diff --git a/src/models/vit/utils.py b/src/models/vit/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..eabc9de77e12d9ed2d1907ca3cedb6424fb16c03
--- /dev/null
+++ b/src/models/vit/utils.py
@@ -0,0 +1,58 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_
+
+
+def resize_pos_embed(posemb, grid_old_shape, grid_new_shape, num_extra_tokens):
+ # Rescale the grid of position embeddings when loading from state_dict. Adapted from
+ # https://github.com/google-research/vision_transformer/blob/00883dd691c63a6830751563748663526e811cee/vit_jax/checkpoint.py#L224
+ posemb_tok, posemb_grid = (
+ posemb[:, :num_extra_tokens],
+ posemb[0, num_extra_tokens:],
+ )
+ if grid_old_shape is None:
+ gs_old_h = int(math.sqrt(len(posemb_grid)))
+ gs_old_w = gs_old_h
+ else:
+ gs_old_h, gs_old_w = grid_old_shape
+
+ gs_h, gs_w = grid_new_shape
+ posemb_grid = posemb_grid.reshape(1, gs_old_h, gs_old_w, -1).permute(0, 3, 1, 2)
+ posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear")
+ posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1)
+ posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
+ return posemb
+
+
+def init_weights(m):
+ if isinstance(m, nn.Linear):
+ trunc_normal_(m.weight, std=0.02)
+ if isinstance(m, nn.Linear) and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, nn.LayerNorm):
+ nn.init.constant_(m.bias, 0)
+ nn.init.constant_(m.weight, 1.0)
+
+
+def checkpoint_filter_fn(state_dict, model):
+ """convert patch embedding weight from manual patchify + linear proj to conv"""
+ out_dict = {}
+ if "model" in state_dict:
+ # For deit models
+ state_dict = state_dict["model"]
+ num_extra_tokens = 1 + ("dist_token" in state_dict.keys())
+ patch_size = model.patch_size
+ image_size = model.patch_embed.image_size
+ for k, v in state_dict.items():
+ if k == "pos_embed" and v.shape != model.pos_embed.shape:
+ # To resize pos embedding when using model at different size from pretrained weights
+ v = resize_pos_embed(
+ v,
+ None,
+ (image_size[0] // patch_size, image_size[1] // patch_size),
+ num_extra_tokens,
+ )
+ out_dict[k] = v
+ return out_dict
diff --git a/src/models/vit/vit.py b/src/models/vit/vit.py
new file mode 100644
index 0000000000000000000000000000000000000000..72f8a24daf623c8389e359ac5c0e3257817e4b9c
--- /dev/null
+++ b/src/models/vit/vit.py
@@ -0,0 +1,202 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.vision_transformer import _load_weights
+from timm.models.layers import trunc_normal_
+from typing import List
+
+# from utils import init_weights, resize_pos_embed
+# from blocks import Block
+
+from src.models.vit.utils import init_weights, resize_pos_embed
+from src.models.vit.blocks import Block
+from src.models.vit.decoder import DecoderLinear
+
+
+class PatchEmbedding(nn.Module):
+ def __init__(self, image_size, patch_size, embed_dim, channels):
+ super().__init__()
+
+ self.image_size = image_size
+ if image_size[0] % patch_size != 0 or image_size[1] % patch_size != 0:
+ raise ValueError("image dimensions must be divisible by the patch size")
+ self.grid_size = image_size[0] // patch_size, image_size[1] // patch_size
+ self.num_patches = self.grid_size[0] * self.grid_size[1]
+ self.patch_size = patch_size
+
+ self.proj = nn.Conv2d(channels, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+ def forward(self, im):
+ B, C, H, W = im.shape
+ x = self.proj(im).flatten(2).transpose(1, 2)
+ return x
+
+
+class VisionTransformer(nn.Module):
+ def __init__(
+ self,
+ image_size,
+ patch_size,
+ n_layers,
+ d_model,
+ d_ff,
+ n_heads,
+ n_cls,
+ dropout=0.1,
+ drop_path_rate=0.0,
+ distilled=False,
+ channels=3,
+ ):
+ super().__init__()
+ self.patch_embed = PatchEmbedding(
+ image_size,
+ patch_size,
+ d_model,
+ channels,
+ )
+ self.patch_size = patch_size
+ self.n_layers = n_layers
+ self.d_model = d_model
+ self.d_ff = d_ff
+ self.n_heads = n_heads
+ self.dropout = nn.Dropout(dropout)
+ self.n_cls = n_cls
+
+ # cls and pos tokens
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
+ self.distilled = distilled
+ if self.distilled:
+ self.dist_token = nn.Parameter(torch.zeros(1, 1, d_model))
+ self.pos_embed = nn.Parameter(torch.randn(1, self.patch_embed.num_patches + 2, d_model))
+ self.head_dist = nn.Linear(d_model, n_cls)
+ else:
+ self.pos_embed = nn.Parameter(torch.randn(1, self.patch_embed.num_patches + 1, d_model))
+
+ # transformer blocks
+ dpr = [x.item() for x in torch.linspace(0, drop_path_rate, n_layers)]
+ self.blocks = nn.ModuleList([Block(d_model, n_heads, d_ff, dropout, dpr[i]) for i in range(n_layers)])
+
+ # output head
+ self.norm = nn.LayerNorm(d_model)
+ self.head = nn.Linear(d_model, n_cls)
+
+ trunc_normal_(self.pos_embed, std=0.02)
+ trunc_normal_(self.cls_token, std=0.02)
+ if self.distilled:
+ trunc_normal_(self.dist_token, std=0.02)
+ self.pre_logits = nn.Identity()
+
+ self.apply(init_weights)
+
+ @torch.jit.ignore
+ def no_weight_decay(self):
+ return {"pos_embed", "cls_token", "dist_token"}
+
+ @torch.jit.ignore()
+ def load_pretrained(self, checkpoint_path, prefix=""):
+ _load_weights(self, checkpoint_path, prefix)
+
+ def forward(self, im, head_out_idx: List[int], n_dim_output=3, return_features=False):
+ B, _, H, W = im.shape
+ PS = self.patch_size
+ assert n_dim_output == 3 or n_dim_output == 4, "n_dim_output must be 3 or 4"
+ x = self.patch_embed(im)
+ cls_tokens = self.cls_token.expand(B, -1, -1)
+ if self.distilled:
+ dist_tokens = self.dist_token.expand(B, -1, -1)
+ x = torch.cat((cls_tokens, dist_tokens, x), dim=1)
+ else:
+ x = torch.cat((cls_tokens, x), dim=1)
+
+ pos_embed = self.pos_embed
+ num_extra_tokens = 1 + self.distilled
+ if x.shape[1] != pos_embed.shape[1]:
+ pos_embed = resize_pos_embed(
+ pos_embed,
+ self.patch_embed.grid_size,
+ (H // PS, W // PS),
+ num_extra_tokens,
+ )
+ x = x + pos_embed
+ x = self.dropout(x)
+ device = x.device
+
+ if n_dim_output == 3:
+ heads_out = torch.zeros(size=(len(head_out_idx), B, (H // PS) ** 2 + 1, self.d_model)).to(device)
+ else:
+ heads_out = torch.zeros(size=(len(head_out_idx), B, self.d_model, H // PS, H // PS)).to(device)
+ self.register_buffer("heads_out", heads_out)
+
+ head_idx = 0
+ for idx_layer, blk in enumerate(self.blocks):
+ x = blk(x)
+ if idx_layer in head_out_idx:
+ if n_dim_output == 3:
+ heads_out[head_idx] = x
+ else:
+ heads_out[head_idx] = x[:, 1:, :].reshape((-1, 24, 24, self.d_model)).permute(0, 3, 1, 2)
+ head_idx += 1
+
+ x = self.norm(x)
+
+ if return_features:
+ return heads_out
+
+ if self.distilled:
+ x, x_dist = x[:, 0], x[:, 1]
+ x = self.head(x)
+ x_dist = self.head_dist(x_dist)
+ x = (x + x_dist) / 2
+ else:
+ x = x[:, 0]
+ x = self.head(x)
+ return x
+
+ def get_attention_map(self, im, layer_id):
+ if layer_id >= self.n_layers or layer_id < 0:
+ raise ValueError(f"Provided layer_id: {layer_id} is not valid. 0 <= {layer_id} < {self.n_layers}.")
+ B, _, H, W = im.shape
+ PS = self.patch_size
+
+ x = self.patch_embed(im)
+ cls_tokens = self.cls_token.expand(B, -1, -1)
+ if self.distilled:
+ dist_tokens = self.dist_token.expand(B, -1, -1)
+ x = torch.cat((cls_tokens, dist_tokens, x), dim=1)
+ else:
+ x = torch.cat((cls_tokens, x), dim=1)
+
+ pos_embed = self.pos_embed
+ num_extra_tokens = 1 + self.distilled
+ if x.shape[1] != pos_embed.shape[1]:
+ pos_embed = resize_pos_embed(
+ pos_embed,
+ self.patch_embed.grid_size,
+ (H // PS, W // PS),
+ num_extra_tokens,
+ )
+ x = x + pos_embed
+
+ for i, blk in enumerate(self.blocks):
+ if i < layer_id:
+ x = blk(x)
+ else:
+ return blk(x, return_attention=True)
+
+
+class FeatureTransform(nn.Module):
+ def __init__(self, img_size, d_encoder, nls_list=[128, 256, 512, 512], scale_factor_list=[8, 4, 2, 1]):
+ super(FeatureTransform, self).__init__()
+ self.img_size = img_size
+
+ self.decoder_0 = DecoderLinear(n_cls=nls_list[0], d_encoder=d_encoder, scale_factor=scale_factor_list[0])
+ self.decoder_1 = DecoderLinear(n_cls=nls_list[1], d_encoder=d_encoder, scale_factor=scale_factor_list[1])
+ self.decoder_2 = DecoderLinear(n_cls=nls_list[2], d_encoder=d_encoder, scale_factor=scale_factor_list[2])
+ self.decoder_3 = DecoderLinear(n_cls=nls_list[3], d_encoder=d_encoder, scale_factor=scale_factor_list[3])
+
+ def forward(self, x_list):
+ feat_3 = self.decoder_3(x_list[3][:, 1:, :], self.img_size) # (2, 512, 24, 24)
+ feat_2 = self.decoder_2(x_list[2][:, 1:, :], self.img_size) # (2, 512, 48, 48)
+ feat_1 = self.decoder_1(x_list[1][:, 1:, :], self.img_size) # (2, 256, 96, 96)
+ feat_0 = self.decoder_0(x_list[0][:, 1:, :], self.img_size) # (2, 128, 192, 192)
+ return feat_0, feat_1, feat_2, feat_3
\ No newline at end of file
diff --git a/src/scheduler.py b/src/scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..87a9a1c5fcbf2df2a9263d49a5d3f5ba87ccb48d
--- /dev/null
+++ b/src/scheduler.py
@@ -0,0 +1,40 @@
+from torch.optim.lr_scheduler import _LRScheduler
+
+class PolynomialLR(_LRScheduler):
+ def __init__(
+ self,
+ optimizer,
+ step_size,
+ iter_warmup,
+ iter_max,
+ power,
+ min_lr=0,
+ last_epoch=-1,
+ ):
+ self.step_size = step_size
+ self.iter_warmup = int(iter_warmup)
+ self.iter_max = int(iter_max)
+ self.power = power
+ self.min_lr = min_lr
+ super(PolynomialLR, self).__init__(optimizer, last_epoch)
+
+ def polynomial_decay(self, lr):
+ iter_cur = float(self.last_epoch)
+ if iter_cur < self.iter_warmup:
+ coef = iter_cur / self.iter_warmup
+ coef *= (1 - self.iter_warmup / self.iter_max) ** self.power
+ else:
+ coef = (1 - iter_cur / self.iter_max) ** self.power
+ return (lr - self.min_lr) * coef + self.min_lr
+
+ def get_lr(self):
+ if (
+ (self.last_epoch == 0)
+ or (self.last_epoch % self.step_size != 0)
+ or (self.last_epoch > self.iter_max)
+ ):
+ return [group["lr"] for group in self.optimizer.param_groups]
+ return [self.polynomial_decay(lr) for lr in self.base_lrs]
+
+ def step_update(self, num_updates):
+ self.step()
\ No newline at end of file
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fbbd8d04d09ac623584005374238bd0edcb40d9
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,806 @@
+import sys
+import time
+import numpy as np
+from PIL import Image
+from skimage import color
+from skimage.transform import resize
+import src.data.functional as F
+import torch
+from torch import nn
+import torch.nn.functional as F_torch
+import torchvision.transforms.functional as F_torchvision
+from numba import cuda, jit
+import math
+
+rgb_from_xyz = np.array(
+ [
+ [3.24048134, -0.96925495, 0.05564664],
+ [-1.53715152, 1.87599, -0.20404134],
+ [-0.49853633, 0.04155593, 1.05731107],
+ ]
+)
+l_norm, ab_norm = 1.0, 1.0
+l_mean, ab_mean = 50.0, 0
+
+
+class SquaredPadding(object):
+ def __init__(self, target_size=384, fill_value=0):
+ self.target_size = target_size
+ self.fill_value = fill_value
+ pass
+
+ def __call__(self, img, return_pil=True, return_paddings=False):
+ if type(img) != torch.Tensor:
+ img = F_torchvision.to_tensor(img)
+
+ H, W = img.size(1), img.size(2)
+ if H > W:
+ H_new, W_new = self.target_size, int(W/H*self.target_size)
+ # Resize image
+ img = F_torchvision.resize(img, (H_new, W_new))
+
+ # Padding image
+ padded_size = H_new - W_new
+ paddings = (padded_size // 2, (padded_size // 2) + (padded_size % 2), 0, 0)
+ padded_img = F_torch.pad(img, paddings, value=self.fill_value)
+ else:
+ H_new, W_new = int(H/W*self.target_size), self.target_size
+ # Resize image
+ img = F_torchvision.resize(img, (H_new, W_new))
+
+ # Padding image
+ padded_size = W_new - H_new
+ paddings = (0, 0, padded_size // 2, (padded_size // 2) + (padded_size % 2))
+ padded_img = F_torch.pad(img, paddings, value=self.fill_value)
+
+ if return_pil:
+ padded_img = F_torchvision.to_pil_image(padded_img)
+
+ if return_paddings:
+ return padded_img, paddings
+
+ return padded_img
+
+class UnpaddingSquare(object):
+ def __init__(self):
+ pass
+
+ def __call__(self, img, paddings):
+ H, W = img.size(1), img.size(2)
+ pad_l, pad_r, pad_t, pad_b = paddings
+ W_ori = W - pad_l - pad_r
+ H_ori = H - pad_t - pad_b
+
+ return F_torchvision.crop(img, top=pad_t, left=pad_l, height=H_ori, width=W_ori)
+
+class ResizeFlow(object):
+ def __init__(self, target_size=(384,384)):
+ self.target_size = target_size
+ pass
+
+ def __call__(self, flow):
+ return F_torch.interpolate(flow.unsqueeze(0), self.target_size, mode='bilinear', align_corners=True).squeeze(0)
+
+class SquaredPaddingFlow(object):
+ def __init__(self, fill_value=0):
+ self.fill_value = fill_value
+
+ def __call__(self, flow):
+ H, W = flow.size(1), flow.size(2)
+
+ if H > W:
+ # Padding flow
+ padded_size = H - W
+ paddings = (padded_size // 2, (padded_size // 2) + (padded_size % 2), 0, 0)
+ padded_img = F_torch.pad(flow, paddings, value=self.fill_value)
+ else:
+ # Padding flow
+ padded_size = W - H
+ paddings = (0, 0, padded_size // 2, (padded_size // 2) + (padded_size % 2))
+ padded_img = F_torch.pad(flow, paddings, value=self.fill_value)
+
+ return padded_img
+
+
+def gray2rgb_batch(l):
+ # gray image tensor to rgb image tensor
+ l_uncenter = uncenter_l(l)
+ l_uncenter = l_uncenter / (2 * l_mean)
+ return torch.cat((l_uncenter, l_uncenter, l_uncenter), dim=1)
+
+
+def vgg_preprocess(tensor):
+ # input is RGB tensor which ranges in [0,1]
+ # output is BGR tensor which ranges in [0,255]
+ tensor_bgr = torch.cat((tensor[:, 2:3, :, :], tensor[:, 1:2, :, :], tensor[:, 0:1, :, :]), dim=1)
+ tensor_bgr_ml = tensor_bgr - torch.Tensor([0.40760392, 0.45795686, 0.48501961]).type_as(tensor_bgr).view(1, 3, 1, 1)
+ return tensor_bgr_ml * 255
+
+
+def tensor_lab2rgb(input):
+ """
+ n * 3* h *w
+ """
+ input_trans = input.transpose(1, 2).transpose(2, 3) # n * h * w * 3
+ L, a, b = (
+ input_trans[:, :, :, 0:1],
+ input_trans[:, :, :, 1:2],
+ input_trans[:, :, :, 2:],
+ )
+ y = (L + 16.0) / 116.0
+ x = (a / 500.0) + y
+ z = y - (b / 200.0)
+
+ neg_mask = z.data < 0
+ z[neg_mask] = 0
+ xyz = torch.cat((x, y, z), dim=3)
+
+ mask = xyz.data > 0.2068966
+ mask_xyz = xyz.clone()
+ mask_xyz[mask] = torch.pow(xyz[mask], 3.0)
+ mask_xyz[~mask] = (xyz[~mask] - 16.0 / 116.0) / 7.787
+ mask_xyz[:, :, :, 0] = mask_xyz[:, :, :, 0] * 0.95047
+ mask_xyz[:, :, :, 2] = mask_xyz[:, :, :, 2] * 1.08883
+
+ rgb_trans = torch.mm(mask_xyz.view(-1, 3), torch.from_numpy(rgb_from_xyz).type_as(xyz)).view(
+ input.size(0), input.size(2), input.size(3), 3
+ )
+ rgb = rgb_trans.transpose(2, 3).transpose(1, 2)
+
+ mask = rgb > 0.0031308
+ mask_rgb = rgb.clone()
+ mask_rgb[mask] = 1.055 * torch.pow(rgb[mask], 1 / 2.4) - 0.055
+ mask_rgb[~mask] = rgb[~mask] * 12.92
+
+ neg_mask = mask_rgb.data < 0
+ large_mask = mask_rgb.data > 1
+ mask_rgb[neg_mask] = 0
+ mask_rgb[large_mask] = 1
+ return mask_rgb
+
+
+###### loss functions ######
+def feature_normalize(feature_in):
+ feature_in_norm = torch.norm(feature_in, 2, 1, keepdim=True) + sys.float_info.epsilon
+ feature_in_norm = torch.div(feature_in, feature_in_norm)
+ return feature_in_norm
+
+
+# denormalization for l
+def uncenter_l(l):
+ return l * l_norm + l_mean
+
+
+def get_grid(x):
+ torchHorizontal = torch.linspace(-1.0, 1.0, x.size(3)).view(1, 1, 1, x.size(3)).expand(x.size(0), 1, x.size(2), x.size(3))
+ torchVertical = torch.linspace(-1.0, 1.0, x.size(2)).view(1, 1, x.size(2), 1).expand(x.size(0), 1, x.size(2), x.size(3))
+
+ return torch.cat([torchHorizontal, torchVertical], 1)
+
+
+class WarpingLayer(nn.Module):
+ def __init__(self, device):
+ super(WarpingLayer, self).__init__()
+ self.device = device
+
+ def forward(self, x, flow):
+ """
+ It takes the input image and the flow and warps the input image according to the flow
+
+ Args:
+ x: the input image
+ flow: the flow tensor, which is a 4D tensor of shape (batch_size, 2, height, width)
+
+ Returns:
+ The warped image
+ """
+ # WarpingLayer uses F.grid_sample, which expects normalized grid
+ # we still output unnormalized flow for the convenience of comparing EPEs with FlowNet2 and original code
+ # so here we need to denormalize the flow
+ flow_for_grip = torch.zeros_like(flow).to(self.device)
+ flow_for_grip[:, 0, :, :] = flow[:, 0, :, :] / ((flow.size(3) - 1.0) / 2.0)
+ flow_for_grip[:, 1, :, :] = flow[:, 1, :, :] / ((flow.size(2) - 1.0) / 2.0)
+
+ grid = (get_grid(x).to(self.device) + flow_for_grip).permute(0, 2, 3, 1)
+ return F_torch.grid_sample(x, grid, align_corners=True)
+
+
+class CenterPad_threshold(object):
+ def __init__(self, image_size, threshold=3 / 4):
+ self.height = image_size[0]
+ self.width = image_size[1]
+ self.threshold = threshold
+
+ def __call__(self, image):
+ # pad the image to 16:9
+ # pad height
+ I = np.array(image)
+
+ # for padded input
+ height_old = np.size(I, 0)
+ width_old = np.size(I, 1)
+ old_size = [height_old, width_old]
+ height = self.height
+ width = self.width
+ I_pad = np.zeros((height, width, np.size(I, 2)))
+
+ ratio = height / width
+
+ if height_old / width_old == ratio:
+ if height_old == height:
+ return Image.fromarray(I.astype(np.uint8))
+ new_size = [int(x * height / height_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ return Image.fromarray(I_resize.astype(np.uint8))
+
+ if height_old / width_old > self.threshold:
+ width_new, height_new = width_old, int(width_old * self.threshold)
+ height_margin = height_old - height_new
+ height_crop_start = height_margin // 2
+ I_crop = I[height_crop_start : (height_crop_start + height_new), :, :]
+ I_resize = resize(I_crop, [height, width], mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+
+ return Image.fromarray(I_resize.astype(np.uint8))
+
+ if height_old / width_old > ratio: # pad the width and crop
+ new_size = [int(x * width / width_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ width_resize = np.size(I_resize, 1)
+ height_resize = np.size(I_resize, 0)
+ start_height = (height_resize - height) // 2
+ I_pad[:, :, :] = I_resize[start_height : (start_height + height), :, :]
+ else: # pad the height and crop
+ new_size = [int(x * height / height_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ width_resize = np.size(I_resize, 1)
+ height_resize = np.size(I_resize, 0)
+ start_width = (width_resize - width) // 2
+ I_pad[:, :, :] = I_resize[:, start_width : (start_width + width), :]
+
+ return Image.fromarray(I_pad.astype(np.uint8))
+
+
+class Normalize(object):
+ def __init__(self):
+ pass
+
+ def __call__(self, inputs):
+ inputs[0:1, :, :] = F.normalize(inputs[0:1, :, :], 50, 1)
+ inputs[1:3, :, :] = F.normalize(inputs[1:3, :, :], (0, 0), (1, 1))
+ return inputs
+
+
+class RGB2Lab(object):
+ def __init__(self):
+ pass
+
+ def __call__(self, inputs):
+ return color.rgb2lab(inputs)
+
+
+class ToTensor(object):
+ def __init__(self):
+ pass
+
+ def __call__(self, inputs):
+ return F.to_mytensor(inputs)
+
+
+class CenterPad(object):
+ def __init__(self, image_size):
+ self.height = image_size[0]
+ self.width = image_size[1]
+
+ def __call__(self, image):
+ # pad the image to 16:9
+ # pad height
+ I = np.array(image)
+
+ # for padded input
+ height_old = np.size(I, 0)
+ width_old = np.size(I, 1)
+ old_size = [height_old, width_old]
+ height = self.height
+ width = self.width
+ I_pad = np.zeros((height, width, np.size(I, 2)))
+
+ ratio = height / width
+ if height_old / width_old == ratio:
+ if height_old == height:
+ return Image.fromarray(I.astype(np.uint8))
+ new_size = [int(x * height / height_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ return Image.fromarray(I_resize.astype(np.uint8))
+
+ if height_old / width_old > ratio: # pad the width and crop
+ new_size = [int(x * width / width_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ width_resize = np.size(I_resize, 1)
+ height_resize = np.size(I_resize, 0)
+ start_height = (height_resize - height) // 2
+ I_pad[:, :, :] = I_resize[start_height : (start_height + height), :, :]
+ else: # pad the height and crop
+ new_size = [int(x * height / height_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ width_resize = np.size(I_resize, 1)
+ height_resize = np.size(I_resize, 0)
+ start_width = (width_resize - width) // 2
+ I_pad[:, :, :] = I_resize[:, start_width : (start_width + width), :]
+
+ return Image.fromarray(I_pad.astype(np.uint8))
+
+
+class CenterPadCrop_numpy(object):
+ """
+ pad the image according to the height
+ """
+
+ def __init__(self, image_size):
+ self.height = image_size[0]
+ self.width = image_size[1]
+
+ def __call__(self, image, threshold=3 / 4):
+ # pad the image to 16:9
+ # pad height
+ I = np.array(image)
+ # for padded input
+ height_old = np.size(I, 0)
+ width_old = np.size(I, 1)
+ old_size = [height_old, width_old]
+ height = self.height
+ width = self.width
+ padding_size = width
+ if image.ndim == 2:
+ I_pad = np.zeros((width, width))
+ else:
+ I_pad = np.zeros((width, width, I.shape[2]))
+
+ ratio = height / width
+ if height_old / width_old == ratio:
+ return I
+
+ # if height_old / width_old > threshold:
+ # width_new, height_new = width_old, int(width_old * threshold)
+ # height_margin = height_old - height_new
+ # height_crop_start = height_margin // 2
+ # I_crop = I[height_start : (height_start + height_new), :]
+ # I_resize = resize(
+ # I_crop, [height, width], mode="reflect", preserve_range=True, clip=False, anti_aliasing=True
+ # )
+ # return I_resize
+
+ if height_old / width_old > ratio: # pad the width and crop
+ new_size = [int(x * width / width_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ width_resize = np.size(I_resize, 1)
+ height_resize = np.size(I_resize, 0)
+ start_height = (height_resize - height) // 2
+ start_height_block = (padding_size - height) // 2
+ if image.ndim == 2:
+ I_pad[start_height_block : (start_height_block + height), :] = I_resize[
+ start_height : (start_height + height), :
+ ]
+ else:
+ I_pad[start_height_block : (start_height_block + height), :, :] = I_resize[
+ start_height : (start_height + height), :, :
+ ]
+ else: # pad the height and crop
+ new_size = [int(x * height / height_old) for x in old_size]
+ I_resize = resize(I, new_size, mode="reflect", preserve_range=True, clip=False, anti_aliasing=True)
+ width_resize = np.size(I_resize, 1)
+ height_resize = np.size(I_resize, 0)
+ start_width = (width_resize - width) // 2
+ start_width_block = (padding_size - width) // 2
+ if image.ndim == 2:
+ I_pad[:, start_width_block : (start_width_block + width)] = I_resize[:, start_width : (start_width + width)]
+
+ else:
+ I_pad[:, start_width_block : (start_width_block + width), :] = I_resize[
+ :, start_width : (start_width + width), :
+ ]
+
+ crop_start_height = (I_pad.shape[0] - height) // 2
+ crop_start_width = (I_pad.shape[1] - width) // 2
+
+ if image.ndim == 2:
+ return I_pad[crop_start_height : (crop_start_height + height), crop_start_width : (crop_start_width + width)]
+ else:
+ return I_pad[crop_start_height : (crop_start_height + height), crop_start_width : (crop_start_width + width), :]
+
+
+@jit(nopython=True, nogil=True)
+def biInterpolation_cpu(distorted, i, j):
+ i = np.uint16(i)
+ j = np.uint16(j)
+ Q11 = distorted[j, i]
+ Q12 = distorted[j, i + 1]
+ Q21 = distorted[j + 1, i]
+ Q22 = distorted[j + 1, i + 1]
+
+ return np.int8(
+ Q11 * (i + 1 - i) * (j + 1 - j) + Q12 * (i - i) * (j + 1 - j) + Q21 * (i + 1 - i) * (j - j) + Q22 * (i - i) * (j - j)
+ )
+
+@jit(nopython=True, nogil=True)
+def iterSearchShader_cpu(padu, padv, xr, yr, W, H, maxIter, precision):
+ # print('processing location', (xr, yr))
+ #
+ if abs(padu[yr, xr]) < precision and abs(padv[yr, xr]) < precision:
+ return xr, yr
+
+ # Our initialize method in this paper, can see the overleaf for detail
+ if (xr + 1) <= (W - 1):
+ dif = padu[yr, xr + 1] - padu[yr, xr]
+ else:
+ dif = padu[yr, xr] - padu[yr, xr - 1]
+ u_next = padu[yr, xr] / (1 + dif)
+ if (yr + 1) <= (H - 1):
+ dif = padv[yr + 1, xr] - padv[yr, xr]
+ else:
+ dif = padv[yr, xr] - padv[yr - 1, xr]
+ v_next = padv[yr, xr] / (1 + dif)
+ i = xr - u_next
+ j = yr - v_next
+ i_int = int(i)
+ j_int = int(j)
+
+ # The same as traditional iterative search method
+ for _ in range(maxIter):
+ if not 0 <= i <= (W - 1) or not 0 <= j <= (H - 1):
+ return i, j
+
+ u11 = padu[j_int, i_int]
+ v11 = padv[j_int, i_int]
+
+ u12 = padu[j_int, i_int + 1]
+ v12 = padv[j_int, i_int + 1]
+
+ int1 = padu[j_int + 1, i_int]
+ v21 = padv[j_int + 1, i_int]
+
+ int2 = padu[j_int + 1, i_int + 1]
+ v22 = padv[j_int + 1, i_int + 1]
+
+ u = (
+ u11 * (i_int + 1 - i) * (j_int + 1 - j)
+ + u12 * (i - i_int) * (j_int + 1 - j)
+ + int1 * (i_int + 1 - i) * (j - j_int)
+ + int2 * (i - i_int) * (j - j_int)
+ )
+
+ v = (
+ v11 * (i_int + 1 - i) * (j_int + 1 - j)
+ + v12 * (i - i_int) * (j_int + 1 - j)
+ + v21 * (i_int + 1 - i) * (j - j_int)
+ + v22 * (i - i_int) * (j - j_int)
+ )
+
+ i_next = xr - u
+ j_next = yr - v
+
+ if abs(i - i_next) < precision and abs(j - j_next) < precision:
+ return i, j
+
+ i = i_next
+ j = j_next
+
+ # if the search doesn't converge within max iter, it will return the last iter result
+ return i_next, j_next
+
+@jit(nopython=True, nogil=True)
+def iterSearch_cpu(distortImg, resultImg, padu, padv, W, H, maxIter=5, precision=1e-2):
+ for xr in range(W):
+ for yr in range(H):
+ # (xr, yr) is the point in result image, (i, j) is the search result in distorted image
+ i, j = iterSearchShader_cpu(padu, padv, xr, yr, W, H, maxIter, precision)
+
+ # reflect the pixels outside the border
+ if i > W - 1:
+ i = 2 * W - 1 - i
+ if i < 0:
+ i = -i
+ if j > H - 1:
+ j = 2 * H - 1 - j
+ if j < 0:
+ j = -j
+
+ # Bilinear interpolation to get the pixel at (i, j) in distorted image
+ resultImg[yr, xr, 0] = biInterpolation_cpu(
+ distortImg[:, :, 0],
+ i,
+ j,
+ )
+ resultImg[yr, xr, 1] = biInterpolation_cpu(
+ distortImg[:, :, 1],
+ i,
+ j,
+ )
+ resultImg[yr, xr, 2] = biInterpolation_cpu(
+ distortImg[:, :, 2],
+ i,
+ j,
+ )
+ return None
+
+
+def forward_mapping_cpu(source_image, u, v, maxIter=5, precision=1e-2):
+ """
+ warp the image according to the forward flow
+ u: horizontal
+ v: vertical
+ """
+ H = source_image.shape[0]
+ W = source_image.shape[1]
+
+ distortImg = np.array(np.zeros((H + 1, W + 1, 3)), dtype=np.uint8)
+ distortImg[0:H, 0:W] = source_image[0:H, 0:W]
+ distortImg[H, 0:W] = source_image[H - 1, 0:W]
+ distortImg[0:H, W] = source_image[0:H, W - 1]
+ distortImg[H, W] = source_image[H - 1, W - 1]
+
+ padu = np.array(np.zeros((H + 1, W + 1)), dtype=np.float32)
+ padu[0:H, 0:W] = u[0:H, 0:W]
+ padu[H, 0:W] = u[H - 1, 0:W]
+ padu[0:H, W] = u[0:H, W - 1]
+ padu[H, W] = u[H - 1, W - 1]
+
+ padv = np.array(np.zeros((H + 1, W + 1)), dtype=np.float32)
+ padv[0:H, 0:W] = v[0:H, 0:W]
+ padv[H, 0:W] = v[H - 1, 0:W]
+ padv[0:H, W] = v[0:H, W - 1]
+ padv[H, W] = v[H - 1, W - 1]
+
+ resultImg = np.array(np.zeros((H, W, 3)), dtype=np.uint8)
+ iterSearch_cpu(distortImg, resultImg, padu, padv, W, H, maxIter, precision)
+ return resultImg
+
+class Distortion_with_flow_cpu(object):
+ """Elastic distortion"""
+
+ def __init__(self, maxIter=3, precision=1e-3):
+ self.maxIter = maxIter
+ self.precision = precision
+
+ def __call__(self, inputs, dx, dy):
+ inputs = np.array(inputs)
+ shape = inputs.shape[0], inputs.shape[1]
+ remap_image = forward_mapping_cpu(inputs, dy, dx, maxIter=self.maxIter, precision=self.precision)
+
+ return Image.fromarray(remap_image)
+
+@cuda.jit(device=True)
+def biInterpolation_gpu(distorted, i, j):
+ i = int(i)
+ j = int(j)
+ Q11 = distorted[j, i]
+ Q12 = distorted[j, i + 1]
+ Q21 = distorted[j + 1, i]
+ Q22 = distorted[j + 1, i + 1]
+
+ return np.int8(
+ Q11 * (i + 1 - i) * (j + 1 - j) + Q12 * (i - i) * (j + 1 - j) + Q21 * (i + 1 - i) * (j - j) + Q22 * (i - i) * (j - j)
+ )
+
+@cuda.jit(device=True)
+def iterSearchShader_gpu(padu, padv, xr, yr, W, H, maxIter, precision):
+ # print('processing location', (xr, yr))
+ #
+ if abs(padu[yr, xr]) < precision and abs(padv[yr, xr]) < precision:
+ return xr, yr
+
+ # Our initialize method in this paper, can see the overleaf for detail
+ if (xr + 1) <= (W - 1):
+ dif = padu[yr, xr + 1] - padu[yr, xr]
+ else:
+ dif = padu[yr, xr] - padu[yr, xr - 1]
+ u_next = padu[yr, xr] / (1 + dif)
+ if (yr + 1) <= (H - 1):
+ dif = padv[yr + 1, xr] - padv[yr, xr]
+ else:
+ dif = padv[yr, xr] - padv[yr - 1, xr]
+ v_next = padv[yr, xr] / (1 + dif)
+ i = xr - u_next
+ j = yr - v_next
+ i_int = int(i)
+ j_int = int(j)
+
+ # The same as traditional iterative search method
+ for _ in range(maxIter):
+ if not 0 <= i <= (W - 1) or not 0 <= j <= (H - 1):
+ return i, j
+
+ u11 = padu[j_int, i_int]
+ v11 = padv[j_int, i_int]
+
+ u12 = padu[j_int, i_int + 1]
+ v12 = padv[j_int, i_int + 1]
+
+ int1 = padu[j_int + 1, i_int]
+ v21 = padv[j_int + 1, i_int]
+
+ int2 = padu[j_int + 1, i_int + 1]
+ v22 = padv[j_int + 1, i_int + 1]
+
+ u = (
+ u11 * (i_int + 1 - i) * (j_int + 1 - j)
+ + u12 * (i - i_int) * (j_int + 1 - j)
+ + int1 * (i_int + 1 - i) * (j - j_int)
+ + int2 * (i - i_int) * (j - j_int)
+ )
+
+ v = (
+ v11 * (i_int + 1 - i) * (j_int + 1 - j)
+ + v12 * (i - i_int) * (j_int + 1 - j)
+ + v21 * (i_int + 1 - i) * (j - j_int)
+ + v22 * (i - i_int) * (j - j_int)
+ )
+
+ i_next = xr - u
+ j_next = yr - v
+
+ if abs(i - i_next) < precision and abs(j - j_next) < precision:
+ return i, j
+
+ i = i_next
+ j = j_next
+
+ # if the search doesn't converge within max iter, it will return the last iter result
+ return i_next, j_next
+
+@cuda.jit
+def iterSearch_gpu(distortImg, resultImg, padu, padv, W, H, maxIter=5, precision=1e-2):
+
+ start_x, start_y = cuda.grid(2)
+ stride_x, stride_y = cuda.gridsize(2)
+
+ for xr in range(start_x, W, stride_x):
+ for yr in range(start_y, H, stride_y):
+
+ i,j = iterSearchShader_gpu(padu, padv, xr, yr, W, H, maxIter, precision)
+
+ if i > W - 1:
+ i = 2 * W - 1 - i
+ if i < 0:
+ i = -i
+ if j > H - 1:
+ j = 2 * H - 1 - j
+ if j < 0:
+ j = -j
+
+ resultImg[yr, xr,0] = biInterpolation_gpu(distortImg[:,:,0], i, j)
+ resultImg[yr, xr,1] = biInterpolation_gpu(distortImg[:,:,1], i, j)
+ resultImg[yr, xr,2] = biInterpolation_gpu(distortImg[:,:,2], i, j)
+ return None
+
+def forward_mapping_gpu(source_image, u, v, maxIter=5, precision=1e-2):
+ """
+ warp the image according to the forward flow
+ u: horizontal
+ v: vertical
+ """
+ H = source_image.shape[0]
+ W = source_image.shape[1]
+
+ resultImg = np.array(np.zeros((H, W, 3)), dtype=np.uint8)
+
+ distortImg = np.array(np.zeros((H + 1, W + 1, 3)), dtype=np.uint8)
+ distortImg[0:H, 0:W] = source_image[0:H, 0:W]
+ distortImg[H, 0:W] = source_image[H - 1, 0:W]
+ distortImg[0:H, W] = source_image[0:H, W - 1]
+ distortImg[H, W] = source_image[H - 1, W - 1]
+
+ padu = np.array(np.zeros((H + 1, W + 1)), dtype=np.float32)
+ padu[0:H, 0:W] = u[0:H, 0:W]
+ padu[H, 0:W] = u[H - 1, 0:W]
+ padu[0:H, W] = u[0:H, W - 1]
+ padu[H, W] = u[H - 1, W - 1]
+
+ padv = np.array(np.zeros((H + 1, W + 1)), dtype=np.float32)
+ padv[0:H, 0:W] = v[0:H, 0:W]
+ padv[H, 0:W] = v[H - 1, 0:W]
+ padv[0:H, W] = v[0:H, W - 1]
+ padv[H, W] = v[H - 1, W - 1]
+
+ padu = cuda.to_device(padu)
+ padv = cuda.to_device(padv)
+ distortImg = cuda.to_device(distortImg)
+ resultImg = cuda.to_device(resultImg)
+
+ threadsperblock = (16, 16)
+ blockspergrid_x = math.ceil(W / threadsperblock[0])
+ blockspergrid_y = math.ceil(H / threadsperblock[1])
+ blockspergrid = (blockspergrid_x, blockspergrid_y)
+
+
+ iterSearch_gpu[blockspergrid, threadsperblock](distortImg, resultImg, padu, padv, W, H, maxIter, precision)
+ resultImg = resultImg.copy_to_host()
+ return resultImg
+
+class Distortion_with_flow_gpu(object):
+
+ def __init__(self, maxIter=3, precision=1e-3):
+ self.maxIter = maxIter
+ self.precision = precision
+
+ def __call__(self, inputs, dx, dy):
+ inputs = np.array(inputs)
+ shape = inputs.shape[0], inputs.shape[1]
+ remap_image = forward_mapping_gpu(inputs, dy, dx, maxIter=self.maxIter, precision=self.precision)
+
+ return Image.fromarray(remap_image)
+
+def read_flow(filename):
+ """
+ read optical flow from Middlebury .flo file
+ :param filename: name of the flow file
+ :return: optical flow data in matrix
+ """
+ f = open(filename, "rb")
+ try:
+ magic = np.fromfile(f, np.float32, count=1)[0] # For Python3.x
+ except:
+ magic = np.fromfile(f, np.float32, count=1) # For Python2.x
+ data2d = None
+ if (202021.25 != magic)and(123.25!=magic):
+ print("Magic number incorrect. Invalid .flo file")
+ elif (123.25==magic):
+ w = np.fromfile(f, np.int32, count=1)[0]
+ h = np.fromfile(f, np.int32, count=1)[0]
+ # print("Reading %d x %d flo file" % (h, w))
+ data2d = np.fromfile(f, np.float16, count=2 * w * h)
+ # reshape data into 3D array (columns, rows, channels)
+ data2d = np.resize(data2d, (2, h, w))
+ elif (202021.25 == magic):
+ w = np.fromfile(f, np.int32, count=1)[0]
+ h = np.fromfile(f, np.int32, count=1)[0]
+ # print("Reading %d x %d flo file" % (h, w))
+ data2d = np.fromfile(f, np.float32, count=2 * w * h)
+ # reshape data into 3D array (columns, rows, channels)
+ data2d = np.resize(data2d, (2, h, w))
+ f.close()
+ return data2d.astype(np.float32)
+
+class LossHandler:
+ def __init__(self):
+ self.loss_dict = {}
+ self.count_sample = 0
+
+ def add_loss(self, key, loss):
+ if key not in self.loss_dict:
+ self.loss_dict[key] = 0
+ self.loss_dict[key] += loss
+
+ def get_loss(self, key):
+ return self.loss_dict[key] / self.count_sample
+
+ def count_one_sample(self):
+ self.count_sample += 1
+
+ def reset(self):
+ self.loss_dict = {}
+ self.count_sample = 0
+
+
+class TimeHandler:
+ def __init__(self):
+ self.time_handler = {}
+
+ def compute_time(self, key):
+ if key not in self.time_handler:
+ self.time_handler[key] = time.time()
+ return None
+ else:
+ return time.time() - self.time_handler.pop(key)
+
+
+def print_num_params(model, is_trainable=False):
+ model_name = model.__class__.__name__.ljust(30)
+
+ if is_trainable:
+ num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+ print(f"| TRAINABLE | {model_name} | {('{:,}'.format(num_params)).rjust(10)} |")
+ else:
+ num_params = sum(p.numel() for p in model.parameters())
+ print(f"| GENERAL | {model_name} | {('{:,}'.format(num_params)).rjust(10)} |")
+
+ return num_params
diff --git a/test.py b/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8a4a2053d6592ceed9232ebf07bcb1a8fd7770d
--- /dev/null
+++ b/test.py
@@ -0,0 +1,147 @@
+import numpy as np
+import shutil
+import os
+import argparse
+import torch
+import glob
+from tqdm import tqdm
+from PIL import Image
+from collections import OrderedDict
+from src.models.vit.config import load_config
+import torchvision.transforms as transforms
+import cv2
+from skimage import io
+
+from src.models.CNN.ColorVidNet import GeneralColorVidNet
+from src.models.vit.embed import GeneralEmbedModel
+from src.models.CNN.NonlocalNet import GeneralWarpNet
+from src.models.CNN.FrameColor import frame_colorization
+from src.utils import (
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ uncenter_l,
+ tensor_lab2rgb,
+ SquaredPadding,
+ UnpaddingSquare
+)
+
+def load_params(ckpt_file):
+ params = torch.load(ckpt_file)
+ new_params = []
+ for key, value in params.items():
+ new_params.append((key, value))
+ return OrderedDict(new_params)
+
+def custom_transform(transforms, img):
+ for transform in transforms:
+ if isinstance(transform, SquaredPadding):
+ img,padding=transform(img, return_paddings=True)
+ else:
+ img = transform(img)
+ return img.to(device), padding
+
+def save_frames(predicted_rgb, video_name, frame_name):
+ if predicted_rgb is not None:
+ predicted_rgb = np.clip(predicted_rgb, 0, 255).astype(np.uint8)
+ io.imsave(os.path.join(args.output_video_path, video_name, frame_name), predicted_rgb)
+
+
+def colorize_video(video_name):
+ frames_list = os.listdir(os.path.join(args.input_videos_path, video_name))
+ frames_list.sort()
+ refs_list = os.listdir(os.path.join(args.reference_images_path, video_name))
+ refs_list.sort()
+
+ for ref_path in refs_list:
+ frame_ref = Image.open(os.path.join(args.reference_images_path, video_name, ref_path)).convert("RGB")
+ I_last_lab_predict = None
+
+ IB_lab, IB_paddings = custom_transform(transforms, frame_ref)
+ IB_lab = IB_lab.unsqueeze(0).to(device)
+ IB_l = IB_lab[:, 0:1, :, :]
+ IB_ab = IB_lab[:, 1:3, :, :]
+
+ with torch.no_grad():
+ I_reference_lab = IB_lab
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1)).to(device)
+ features_B = embed_net(I_reference_rgb)
+
+ for frame_name in frames_list:
+ curr_frame = Image.open(os.path.join(args.input_videos_path, video_name, frame_name)).convert("RGB")
+ IA_lab, IA_paddings = custom_transform(transforms, curr_frame)
+ IA_lab = IA_lab.unsqueeze(0).to(device)
+ IA_l = IA_lab[:, 0:1, :, :]
+ IA_ab = IA_lab[:, 1:3, :, :]
+
+ if I_last_lab_predict is None:
+ I_last_lab_predict = torch.zeros_like(IA_lab).to(device)
+
+ with torch.no_grad():
+ I_current_lab = IA_lab
+ I_current_ab_predict, _, _ = frame_colorization(
+ I_current_lab,
+ I_reference_lab,
+ I_last_lab_predict,
+ features_B,
+ embed_net,
+ nonlocal_net,
+ colornet,
+ luminance_noise=0,
+ temperature=1e-10,
+ joint_training=False
+ )
+ I_last_lab_predict = torch.cat((IA_l, I_current_ab_predict), dim=1)
+
+ IA_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(IA_l), I_current_ab_predict), dim=1))
+ save_frames(IA_predict_rgb, video_name, frame_name)
+
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Video Colorization')
+ parser.add_argument("--input_videos_path", type=str, help="path to input video")
+ parser.add_argument("--reference_images_path", type=str, help="path to reference image")
+ parser.add_argument("--output_video_path", type=str, help="path to output video")
+ parser.add_argument("--weight_path", type=str, default="checkpoints/epoch_5/", help="path to weight")
+ parser.add_argument("--device", type=str, default="cpu", help="device to run the model")
+ parser.add_argument("--high_resolution", action="store_true", help="use high resolution")
+ parser.add_argument("--wls_filter_on", action="store_true", help="use wls filter")
+ args = parser.parse_args()
+
+ device = torch.device(args.device)
+
+ if os.path.exists(args.output_video_path):
+ shutil.rmtree(args.output_video_path)
+ os.makedirs(args.output_video_path, exist_ok=True)
+
+ videos_list = os.listdir(args.input_videos_path)
+
+ embed_net=GeneralEmbedModel(pretrained_model="swin-tiny", device=device).to(device)
+ nonlocal_net = GeneralWarpNet(feature_channel=128).to(device)
+ colornet=GeneralColorVidNet(7).to(device)
+
+ embed_net.eval()
+ nonlocal_net.eval()
+ colornet.eval()
+
+ # Load weights
+ embed_net_params = load_params(os.path.join(args.weight_path, "embed_net.pth"))
+ nonlocal_net_params = load_params(os.path.join(args.weight_path, "nonlocal_net.pth"))
+ colornet_params = load_params(os.path.join(args.weight_path, "colornet.pth"))
+
+ embed_net.load_state_dict(embed_net_params, strict=True)
+ nonlocal_net.load_state_dict(nonlocal_net_params, strict=True)
+ colornet.load_state_dict(colornet_params, strict=True)
+
+ transforms = [SquaredPadding(target_size=224),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize()]
+
+ # center_padder = CenterPad((224,224))
+ with torch.no_grad():
+ for video_name in tqdm(videos_list):
+ colorize_video(video_name)
\ No newline at end of file
diff --git a/train.py b/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..8181702e26b66117a7e2d54b4d07e22308874caf
--- /dev/null
+++ b/train.py
@@ -0,0 +1,609 @@
+import os
+import sys
+import wandb
+import argparse
+import numpy as np
+from tqdm import tqdm
+from PIL import Image
+from datetime import datetime
+from zoneinfo import ZoneInfo
+from time import gmtime, strftime
+from collections import OrderedDict
+import random
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.backends.cudnn as cudnn
+from torchvision.transforms import CenterCrop
+from torch.utils.data import ConcatDataset, DataLoader, WeightedRandomSampler
+import torchvision.transforms as torch_transforms
+from torchvision.utils import make_grid
+
+from src.losses import (
+ ContextualLoss,
+ ContextualLoss_forward,
+ Perceptual_loss,
+ consistent_loss_fn,
+ discriminator_loss_fn,
+ generator_loss_fn,
+ l1_loss_fn,
+ smoothness_loss_fn,
+)
+from src.models.CNN.GAN_models import Discriminator_x64
+from src.models.CNN.ColorVidNet import ColorVidNet
+from src.models.CNN.FrameColor import frame_colorization
+from src.models.CNN.NonlocalNet import WeightedAverage_color, NonlocalWeightedAverage, WarpNet, WarpNet_new
+from src.models.vit.embed import EmbedModel
+from src.models.vit.config import load_config
+from src.data import transforms
+from src.data.dataloader import VideosDataset, VideosDataset_ImageNet
+from src.utils import CenterPad_threshold
+from src.utils import (
+ TimeHandler,
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ LossHandler,
+ WarpingLayer,
+ uncenter_l,
+ tensor_lab2rgb,
+ print_num_params,
+)
+from src.scheduler import PolynomialLR
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--video_data_root_list", type=str, default="dataset")
+parser.add_argument("--flow_data_root_list", type=str, default="flow")
+parser.add_argument("--mask_data_root_list", type=str, default="mask")
+parser.add_argument("--data_root_imagenet", default="imagenet", type=str)
+parser.add_argument("--annotation_file_path", default="dataset/annotation.csv", type=str)
+parser.add_argument("--imagenet_pairs_file", default="imagenet_pairs.txt", type=str)
+parser.add_argument("--gpu_ids", type=str, default="0,1,2,3", help="separate by comma")
+parser.add_argument("--workers", type=int, default=0)
+parser.add_argument("--batch_size", type=int, default=2)
+parser.add_argument("--image_size", type=int, default=[384, 384])
+parser.add_argument("--ic", type=int, default=7)
+parser.add_argument("--epoch", type=int, default=40)
+parser.add_argument("--resume_epoch", type=int, default=0)
+parser.add_argument("--resume", type=bool, default=False)
+parser.add_argument("--load_pretrained_model", type=bool, default=False)
+parser.add_argument("--lr", type=float, default=1e-4)
+parser.add_argument("--beta1", type=float, default=0.5)
+parser.add_argument("--lr_step", type=int, default=1)
+parser.add_argument("--lr_gamma", type=float, default=0.9)
+parser.add_argument("--checkpoint_dir", type=str, default="checkpoints")
+parser.add_argument("--checkpoint_step", type=int, default=500)
+parser.add_argument("--real_reference_probability", type=float, default=0.7)
+parser.add_argument("--nonzero_placeholder_probability", type=float, default=0.0)
+parser.add_argument("--domain_invariant", type=bool, default=False)
+parser.add_argument("--weigth_l1", type=float, default=2.0)
+parser.add_argument("--weight_contextual", type=float, default="0.5")
+parser.add_argument("--weight_perceptual", type=float, default="0.02")
+parser.add_argument("--weight_smoothness", type=float, default="5.0")
+parser.add_argument("--weight_gan", type=float, default="0.5")
+parser.add_argument("--weight_nonlocal_smoothness", type=float, default="0.0")
+parser.add_argument("--weight_nonlocal_consistent", type=float, default="0.0")
+parser.add_argument("--weight_consistent", type=float, default="0.05")
+parser.add_argument("--luminance_noise", type=float, default="2.0")
+parser.add_argument("--permute_data", type=bool, default=True)
+parser.add_argument("--contextual_loss_direction", type=str, default="forward", help="forward or backward matching")
+parser.add_argument("--batch_accum_size", type=int, default=10)
+parser.add_argument("--epoch_train_discriminator", type=int, default=3)
+parser.add_argument("--vit_version", type=str, default="vit_tiny_patch16_384")
+parser.add_argument("--use_dummy", type=bool, default=False)
+parser.add_argument("--use_wandb", type=bool, default=False)
+parser.add_argument("--use_feature_transform", type=bool, default=False)
+parser.add_argument("--head_out_idx", type=str, default="8,9,10,11")
+parser.add_argument("--wandb_token", type=str, default="")
+parser.add_argument("--wandb_name", type=str, default="")
+
+
+def load_data():
+ transforms_video = [
+ CenterCrop(opt.image_size),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize(),
+ ]
+
+ train_dataset_videos = [
+ VideosDataset(
+ video_data_root=video_data_root,
+ flow_data_root=flow_data_root,
+ mask_data_root=mask_data_root,
+ imagenet_folder=opt.data_root_imagenet,
+ annotation_file_path=opt.annotation_file_path,
+ image_size=opt.image_size,
+ image_transform=transforms.Compose(transforms_video),
+ real_reference_probability=opt.real_reference_probability,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ )
+ for video_data_root, flow_data_root, mask_data_root in zip(
+ opt.video_data_root_list, opt.flow_data_root_list, opt.mask_data_root_list
+ )
+ ]
+
+ transforms_imagenet = [CenterPad_threshold(opt.image_size), RGB2Lab(), ToTensor(), Normalize()]
+ extra_reference_transform = [
+ torch_transforms.RandomHorizontalFlip(0.5),
+ torch_transforms.RandomResizedCrop(480, (0.98, 1.0), ratio=(0.8, 1.2)),
+ ]
+
+ train_dataset_imagenet = VideosDataset_ImageNet(
+ imagenet_data_root=opt.data_root_imagenet,
+ pairs_file=opt.imagenet_pairs_file,
+ image_size=opt.image_size,
+ transforms_imagenet=transforms_imagenet,
+ distortion_level=4,
+ brightnessjitter=5,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ extra_reference_transform=extra_reference_transform,
+ real_reference_probability=opt.real_reference_probability,
+ )
+
+ # video_training_length = sum([len(dataset) for dataset in train_dataset_videos])
+ # imagenet_training_length = len(train_dataset_imagenet)
+ # dataset_training_length = sum([dataset.real_len for dataset in train_dataset_videos]) + +train_dataset_imagenet.real_len
+ dataset_combined = ConcatDataset(train_dataset_videos + [train_dataset_imagenet])
+ # sampler=[]
+ # seed_sampler=int.from_bytes(os.urandom(4),"big")
+ # random.seed(seed_sampler)
+ # for idx in range(opt.epoch):
+ # sampler = sampler + random.sample(range(dataset_training_length),dataset_training_length)
+ # wandb.log({"Sampler_Seed":seed_sampler})
+ # sampler = sampler+WeightedRandomSampler([1] * video_training_length + [1] * imagenet_training_length, dataset_training_length*opt.epoch)
+
+ # video_training_length = sum([len(dataset) for dataset in train_dataset_videos])
+ # dataset_training_length = sum([dataset.real_len for dataset in train_dataset_videos])
+ # dataset_combined = ConcatDataset(train_dataset_videos)
+ # sampler = WeightedRandomSampler([1] * video_training_length, dataset_training_length * opt.epoch)
+
+ data_loader = DataLoader(dataset_combined, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers)
+ return data_loader
+
+
+def training_logger():
+ if (total_iter % opt.checkpoint_step == 0) or (total_iter == len(data_loader)):
+ train_loss_dict = {"train/" + str(k): v / loss_handler.count_sample for k, v in loss_handler.loss_dict.items()}
+ train_loss_dict["train/opt_g_lr_1"] = step_optim_scheduler_g.get_last_lr()[0]
+ train_loss_dict["train/opt_g_lr_2"] = step_optim_scheduler_g.get_last_lr()[1]
+ train_loss_dict["train/opt_d_lr"] = step_optim_scheduler_d.get_last_lr()[0]
+
+ alert_text = f"l1_loss: {l1_loss.item()}\npercep_loss: {perceptual_loss.item()}\nctx_loss: {contextual_loss_total.item()}\ncst_loss: {consistent_loss.item()}\nsm_loss: {smoothness_loss.item()}\ntotal: {total_loss.item()}"
+
+ if opt.use_wandb:
+ wandb.log(train_loss_dict)
+ wandb.alert(title=f"Progress training #{total_iter}", text=alert_text)
+
+ for idx in range(I_predict_rgb.shape[0]):
+ concated_I = make_grid(
+ [(I_predict_rgb[idx] * 255), (I_reference_rgb[idx] * 255), (I_current_rgb[idx] * 255)], nrow=3
+ )
+ wandb_concated_I = wandb.Image(
+ concated_I,
+ caption="[LEFT] Predict, [CENTER] Reference, [RIGHT] Ground truth\n[REF] {}, [FRAME] {}".format(
+ ref_path[idx], curr_frame_path[idx]
+ ),
+ )
+ wandb.log({f"example_{idx}": wandb_concated_I})
+
+ torch.save(
+ nonlocal_net.state_dict(),
+ os.path.join(opt.checkpoint_dir, "nonlocal_net_iter.pth"),
+ )
+ torch.save(
+ colornet.state_dict(),
+ os.path.join(opt.checkpoint_dir, "colornet_iter.pth"),
+ )
+ torch.save(
+ discriminator.state_dict(),
+ os.path.join(opt.checkpoint_dir, "discriminator_iter.pth"),
+ )
+ torch.save(embed_net.state_dict(), os.path.join(opt.checkpoint_dir, "embed_net_iter.pth"))
+
+ loss_handler.reset()
+
+
+def load_params(ckpt_file):
+ params = torch.load(ckpt_file)
+ new_params = []
+ for key, value in params.items():
+ new_params.append((key, value))
+ return OrderedDict(new_params)
+
+
+def parse(parser, save=True):
+ opt = parser.parse_args()
+ args = vars(opt)
+
+ print("------------------------------ Options -------------------------------")
+ for k, v in sorted(args.items()):
+ print("%s: %s" % (str(k), str(v)))
+ print("-------------------------------- End ---------------------------------")
+
+ if save:
+ file_name = os.path.join("opt.txt")
+ with open(file_name, "wt") as opt_file:
+ opt_file.write(os.path.basename(sys.argv[0]) + " " + strftime("%Y-%m-%d %H:%M:%S", gmtime()) + "\n")
+ opt_file.write("------------------------------ Options -------------------------------\n")
+ for k, v in sorted(args.items()):
+ opt_file.write("%s: %s\n" % (str(k), str(v)))
+ opt_file.write("-------------------------------- End ---------------------------------\n")
+ return opt
+
+
+def gpu_setup():
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+ cudnn.benchmark = True
+ torch.cuda.set_device(opt.gpu_ids[0])
+ device = torch.device("cuda")
+ print("running on GPU", opt.gpu_ids)
+ return device
+
+
+if __name__ == "__main__":
+ ############################################## SETUP ###############################################
+ torch.multiprocessing.set_start_method("spawn", force=True)
+ # =============== GET PARSER OPTION ================
+ opt = parse(parser)
+ opt.video_data_root_list = opt.video_data_root_list.split(",")
+ opt.flow_data_root_list = opt.flow_data_root_list.split(",")
+ opt.mask_data_root_list = opt.mask_data_root_list.split(",")
+ opt.gpu_ids = list(map(int, opt.gpu_ids.split(",")))
+ opt.head_out_idx = list(map(int, opt.head_out_idx.split(",")))
+ n_dim_output = 3 if opt.use_feature_transform else 4
+ assert len(opt.head_out_idx) == 4, "Size of head_out_idx must be 4"
+
+ os.makedirs(opt.checkpoint_dir, exist_ok=True)
+
+ # =================== INIT WANDB ===================
+ if opt.use_wandb:
+ print("Save images to Wandb")
+ if opt.wandb_token != "":
+ try:
+ wandb.login(key=opt.wandb_token)
+ except:
+ pass
+ wandb.init(
+ project="video-colorization",
+ name=f"{opt.wandb_name} {datetime.now(tz=ZoneInfo('Asia/Ho_Chi_Minh')).strftime('%Y/%m/%d_%H-%M-%S')}",
+ )
+
+ # ================== SETUP DEVICE ==================
+ # torch.multiprocessing.set_start_method("spawn", force=True)
+ # device = gpu_setup()
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+
+ # =================== VIT CONFIG ===================
+ cfg = load_config()
+ model_cfg = cfg["model"][opt.vit_version]
+ model_cfg["image_size"] = (384, 384)
+ model_cfg["backbone"] = opt.vit_version
+ model_cfg["dropout"] = 0.0
+ model_cfg["drop_path_rate"] = 0.1
+ model_cfg["n_cls"] = 10
+
+ ############################################ LOAD DATA #############################################
+ if opt.use_dummy:
+ H, W = 384, 384
+ I_last_lab = torch.rand(opt.batch_size, 3, H, W)
+ I_current_lab = torch.rand(opt.batch_size, 3, H, W)
+ I_reference_lab = torch.rand(opt.batch_size, 3, H, W)
+ flow_forward = torch.rand(opt.batch_size, 2, H, W)
+ mask = torch.rand(opt.batch_size, 1, H, W)
+ placeholder_lab = torch.rand(opt.batch_size, 3, H, W)
+ self_ref_flag = torch.rand(opt.batch_size, 3, H, W)
+ data_loader = [
+ [I_last_lab, I_current_lab, I_reference_lab, flow_forward, mask, placeholder_lab, self_ref_flag, None, None, None]
+ for _ in range(10)
+ ]
+ else:
+ data_loader = load_data()
+
+ ########################################## DEFINE NETWORK ##########################################
+ print("-" * 59)
+ print("| TYPE | Model name | Num params |")
+ print("-" * 59)
+ colornet = ColorVidNet(opt.ic).to(device)
+ colornet_params = print_num_params(colornet)
+
+ if opt.use_feature_transform:
+ nonlocal_net = WarpNet().to(device)
+ else:
+ nonlocal_net = WarpNet_new(model_cfg["d_model"]).to(device)
+ nonlocal_net_params = print_num_params(nonlocal_net)
+
+ discriminator = Discriminator_x64(ndf=64).to(device)
+ discriminator_params = print_num_params(discriminator)
+
+ weighted_layer_color = WeightedAverage_color().to(device)
+ weighted_layer_color_params = print_num_params(weighted_layer_color)
+
+ nonlocal_weighted_layer = NonlocalWeightedAverage().to(device)
+ nonlocal_weighted_layer_params = print_num_params(nonlocal_weighted_layer)
+
+ warping_layer = WarpingLayer(device=device).to(device)
+ warping_layer_params = print_num_params(warping_layer)
+
+ embed_net = EmbedModel(model_cfg, head_out_idx=opt.head_out_idx, n_dim_output=n_dim_output, device=device)
+ embed_net_params = print_num_params(embed_net)
+ print("-" * 59)
+ print(
+ f"| TOTAL | | {('{:,}'.format(colornet_params+nonlocal_net_params+discriminator_params+weighted_layer_color_params+nonlocal_weighted_layer_params+warping_layer_params+embed_net_params)).rjust(10)} |"
+ )
+ print("-" * 59)
+ if opt.use_wandb:
+ wandb.watch(discriminator, log="all", log_freq=opt.checkpoint_step, idx=0)
+ wandb.watch(embed_net, log="all", log_freq=opt.checkpoint_step, idx=1)
+ wandb.watch(colornet, log="all", log_freq=opt.checkpoint_step, idx=2)
+ wandb.watch(nonlocal_net, log="all", log_freq=opt.checkpoint_step, idx=3)
+
+ # ============= USE PRETRAINED OR NOT ==============
+ if opt.load_pretrained_model:
+ # pretrained_path = "/workspace/video_colorization/ckpt_folder_ver_1_vit_small_patch16_384"
+ nonlocal_net.load_state_dict(load_params(os.path.join(opt.checkpoint_dir, "nonlocal_net_iter.pth")))
+ colornet.load_state_dict(load_params(os.path.join(opt.checkpoint_dir, "colornet_iter.pth")))
+ discriminator.load_state_dict(load_params(os.path.join(opt.checkpoint_dir, "discriminator_iter.pth")))
+ embed_net_params = load_params(os.path.join(opt.checkpoint_dir, "embed_net_iter.pth"))
+ embed_net_params.pop("vit.heads_out")
+ embed_net.load_state_dict(embed_net_params)
+
+ ###################################### DEFINE LOSS FUNCTIONS #######################################
+ perceptual_loss_fn = Perceptual_loss(opt.domain_invariant, opt.weight_perceptual)
+ contextual_loss = ContextualLoss().to(device)
+ contextual_forward_loss = ContextualLoss_forward().to(device)
+
+ ######################################## DEFINE OPTIMIZERS #########################################
+ optimizer_g = optim.AdamW(
+ [
+ {"params": nonlocal_net.parameters(), "lr": opt.lr},
+ {"params": colornet.parameters(), "lr": 2 * opt.lr},
+ {"params": embed_net.parameters(), "lr": opt.lr},
+ ],
+ betas=(0.5, 0.999),
+ eps=1e-5,
+ amsgrad=True,
+ )
+
+ optimizer_d = optim.AdamW(
+ filter(lambda p: p.requires_grad, discriminator.parameters()),
+ lr=opt.lr,
+ betas=(0.5, 0.999),
+ amsgrad=True,
+ )
+
+ step_optim_scheduler_g = PolynomialLR(
+ optimizer_g,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8,
+ )
+ step_optim_scheduler_d = PolynomialLR(
+ optimizer_d,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8,
+ )
+ ########################################## DEFINE OTHERS ###########################################
+ downsampling_by2 = nn.AvgPool2d(kernel_size=2).to(device)
+ timer_handler = TimeHandler()
+ loss_handler = LossHandler() # Handle loss value
+ ############################################## TRAIN ###############################################
+
+ total_iter = 0
+ for epoch_num in range(1, opt.epoch + 1):
+ # if opt.use_wandb:
+ # wandb.log({"Current_trainning_epoch": epoch_num})
+ with tqdm(total=len(data_loader), position=0, leave=True) as pbar:
+ for iter, sample in enumerate(data_loader):
+ timer_handler.compute_time("load_sample")
+ total_iter += 1
+
+ # =============== LOAD DATA SAMPLE ================
+ (
+ I_last_lab, ######## (3, H, W)
+ I_current_lab, ##### (3, H, W)
+ I_reference_lab, ### (3, H, W)
+ flow_forward, ###### (2, H, W)
+ mask, ############## (1, H, W)
+ placeholder_lab, ### (3, H, W)
+ self_ref_flag, ##### (3, H, W)
+ prev_frame_path,
+ curr_frame_path,
+ ref_path,
+ ) = sample
+
+ I_last_lab = I_last_lab.to(device)
+ I_current_lab = I_current_lab.to(device)
+ I_reference_lab = I_reference_lab.to(device)
+ flow_forward = flow_forward.to(device)
+ mask = mask.to(device)
+ placeholder_lab = placeholder_lab.to(device)
+ self_ref_flag = self_ref_flag.to(device)
+
+ I_last_l = I_last_lab[:, 0:1, :, :]
+ I_last_ab = I_last_lab[:, 1:3, :, :]
+ I_current_l = I_current_lab[:, 0:1, :, :]
+ I_current_ab = I_current_lab[:, 1:3, :, :]
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1))
+
+ _load_sample_time = timer_handler.compute_time("load_sample")
+ timer_handler.compute_time("forward_model")
+
+ features_B = embed_net(I_reference_rgb)
+ _, B_feat_1, B_feat_2, B_feat_3 = features_B
+
+ # ================== COLORIZATION ==================
+ # The last frame
+ I_last_ab_predict, I_last_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_last_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=placeholder_lab,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_last_lab_predict = torch.cat((I_last_l, I_last_ab_predict), dim=1)
+
+ # The current frame
+ I_current_ab_predict, I_current_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_current_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=I_last_lab_predict,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_current_lab_predict = torch.cat((I_last_l, I_current_ab_predict), dim=1)
+
+ # ================ UPDATE GENERATOR ================
+ if opt.weight_gan > 0:
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+ fake_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab_predict,
+ uncenter_l(I_last_l),
+ I_last_ab_predict,
+ ),
+ dim=1,
+ )
+ real_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab,
+ uncenter_l(I_last_l),
+ I_last_ab,
+ ),
+ dim=1,
+ )
+
+ if opt.permute_data:
+ batch_index = torch.arange(-1, opt.batch_size - 1, dtype=torch.long)
+ real_data_lab = real_data_lab[batch_index, ...]
+
+ discriminator_loss = discriminator_loss_fn(real_data_lab, fake_data_lab, discriminator)
+ discriminator_loss.backward()
+ optimizer_d.step()
+
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+
+ # ================== COMPUTE LOSS ==================
+ # L1 loss
+ l1_loss = l1_loss_fn(I_current_ab, I_current_ab_predict) * opt.weigth_l1
+
+ # Generator_loss. TODO: freeze this to train some first epoch
+ if epoch_num > opt.epoch_train_discriminator:
+ generator_loss = generator_loss_fn(real_data_lab, fake_data_lab, discriminator, opt.weight_gan, device)
+
+ # Perceptual Loss
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab_predict), dim=1))
+ _, pred_feat_1, pred_feat_2, pred_feat_3 = embed_net(I_predict_rgb)
+
+ I_current_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab), dim=1))
+ A_feat_0, _, _, A_feat_3 = embed_net(I_current_rgb)
+
+ perceptual_loss = perceptual_loss_fn(A_feat_3, pred_feat_3)
+
+ # Contextual Loss
+ contextual_style5_1 = torch.mean(contextual_forward_loss(pred_feat_3, B_feat_3.detach())) * 8
+ contextual_style4_1 = torch.mean(contextual_forward_loss(pred_feat_2, B_feat_2.detach())) * 4
+ contextual_style3_1 = torch.mean(contextual_forward_loss(pred_feat_1, B_feat_1.detach())) * 2
+ # if opt.use_feature_transform:
+ # contextual_style3_1 = (
+ # torch.mean(
+ # contextual_forward_loss(
+ # downsampling_by2(pred_feat_1),
+ # downsampling_by2(),
+ # )
+ # )
+ # * 2
+ # )
+ # else:
+ # contextual_style3_1 = (
+ # torch.mean(
+ # contextual_forward_loss(
+ # pred_feat_1,
+ # B_feat_1.detach(),
+ # )
+ # )
+ # * 2
+ # )
+
+ contextual_loss_total = (
+ contextual_style5_1 + contextual_style4_1 + contextual_style3_1
+ ) * opt.weight_contextual
+
+ # Consistent Loss
+ consistent_loss = consistent_loss_fn(
+ I_current_lab_predict,
+ I_last_ab_predict,
+ I_current_nonlocal_lab_predict,
+ I_last_nonlocal_lab_predict,
+ flow_forward,
+ mask,
+ warping_layer,
+ weight_consistent=opt.weight_consistent,
+ weight_nonlocal_consistent=opt.weight_nonlocal_consistent,
+ device=device,
+ )
+
+ # Smoothness loss
+ smoothness_loss = smoothness_loss_fn(
+ I_current_l,
+ I_current_lab,
+ I_current_ab_predict,
+ A_feat_0,
+ weighted_layer_color,
+ nonlocal_weighted_layer,
+ weight_smoothness=opt.weight_smoothness,
+ weight_nonlocal_smoothness=opt.weight_nonlocal_smoothness,
+ device=device,
+ )
+
+ # Total loss
+ total_loss = l1_loss + perceptual_loss + contextual_loss_total + consistent_loss + smoothness_loss
+ if epoch_num > opt.epoch_train_discriminator:
+ total_loss += generator_loss
+
+ # Add loss to loss handler
+ loss_handler.add_loss(key="total_loss", loss=total_loss.item())
+ loss_handler.add_loss(key="l1_loss", loss=l1_loss.item())
+ loss_handler.add_loss(key="perceptual_loss", loss=perceptual_loss.item())
+ loss_handler.add_loss(key="contextual_loss", loss=contextual_loss_total.item())
+ loss_handler.add_loss(key="consistent_loss", loss=consistent_loss.item())
+ loss_handler.add_loss(key="smoothness_loss", loss=smoothness_loss.item())
+ loss_handler.add_loss(key="discriminator_loss", loss=discriminator_loss.item())
+ if epoch_num > opt.epoch_train_discriminator:
+ loss_handler.add_loss(key="generator_loss", loss=generator_loss.item())
+ loss_handler.count_one_sample()
+
+ total_loss.backward()
+
+ optimizer_g.step()
+ step_optim_scheduler_g.step()
+ step_optim_scheduler_d.step()
+
+ _forward_model_time = timer_handler.compute_time("forward_model")
+
+ timer_handler.compute_time("training_logger")
+ training_logger()
+ _training_logger_time = timer_handler.compute_time("training_logger")
+
+ pbar.set_description(
+ f"Epochs: {epoch_num}, Load_sample: {_load_sample_time:.3f}s, Forward: {_forward_model_time:.3f}s, log: {_training_logger_time:.3f}s"
+ )
+ pbar.update(1)
diff --git a/train_ddp.py b/train_ddp.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fc255c50f7f7aad1667940e131e351a3e3d1ed7
--- /dev/null
+++ b/train_ddp.py
@@ -0,0 +1,637 @@
+import os
+import sys
+import wandb
+import argparse
+import numpy as np
+from tqdm import tqdm
+from PIL import Image
+from datetime import datetime
+from zoneinfo import ZoneInfo
+from time import gmtime, strftime
+from collections import OrderedDict
+import random
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.backends.cudnn as cudnn
+from torchvision.transforms import CenterCrop
+from torch.utils.data import ConcatDataset, DataLoader, WeightedRandomSampler
+import torchvision.transforms as torch_transforms
+from torchvision.utils import make_grid
+
+from src.losses import (
+ ContextualLoss,
+ ContextualLoss_forward,
+ Perceptual_loss,
+ consistent_loss_fn,
+ discriminator_loss_fn,
+ generator_loss_fn,
+ l1_loss_fn,
+ smoothness_loss_fn,
+)
+from src.models.CNN.GAN_models import Discriminator_x64
+from src.models.CNN.ColorVidNet import ColorVidNet
+from src.models.CNN.FrameColor import frame_colorization
+from src.models.CNN.NonlocalNet import WeightedAverage_color, NonlocalWeightedAverage, WarpNet, WarpNet_new
+from src.models.vit.embed import EmbedModel
+from src.models.vit.config import load_config
+from src.data import transforms
+from src.data.dataloader import VideosDataset, VideosDataset_ImageNet
+from src.utils import CenterPad_threshold
+from src.utils import (
+ TimeHandler,
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ LossHandler,
+ WarpingLayer,
+ uncenter_l,
+ tensor_lab2rgb,
+ print_num_params,
+ SquaredPadding
+)
+from src.scheduler import PolynomialLR
+
+from torch.nn.parallel import DistributedDataParallel as DDP
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--video_data_root_list", type=str, default="dataset")
+parser.add_argument("--flow_data_root_list", type=str, default='flow')
+parser.add_argument("--mask_data_root_list", type=str, default='mask')
+parser.add_argument("--data_root_imagenet", default="imagenet", type=str)
+parser.add_argument("--annotation_file_path", default="dataset/annotation.csv", type=str)
+parser.add_argument("--imagenet_pairs_file", default="imagenet_pairs.txt", type=str)
+parser.add_argument("--gpu_ids", type=str, default="0,1,2,3", help="separate by comma")
+parser.add_argument("--workers", type=int, default=0)
+parser.add_argument("--batch_size", type=int, default=2)
+parser.add_argument("--image_size", type=int, default=[384, 384])
+parser.add_argument("--ic", type=int, default=7)
+parser.add_argument("--epoch", type=int, default=40)
+parser.add_argument("--resume_epoch", type=int, default=0)
+parser.add_argument("--resume", action='store_true')
+parser.add_argument("--load_pretrained_model", action='store_true')
+parser.add_argument("--pretrained_model_dir", type=str, default='ckpt')
+parser.add_argument("--lr", type=float, default=1e-4)
+parser.add_argument("--beta1", type=float, default=0.5)
+parser.add_argument("--lr_step", type=int, default=1)
+parser.add_argument("--lr_gamma", type=float, default=0.9)
+parser.add_argument("--checkpoint_dir", type=str, default="checkpoints")
+parser.add_argument("--checkpoint_step", type=int, default=500)
+parser.add_argument("--real_reference_probability", type=float, default=0.7)
+parser.add_argument("--nonzero_placeholder_probability", type=float, default=0.0)
+parser.add_argument("--domain_invariant", action='store_true')
+parser.add_argument("--weigth_l1", type=float, default=2.0)
+parser.add_argument("--weight_contextual", type=float, default="0.5")
+parser.add_argument("--weight_perceptual", type=float, default="0.02")
+parser.add_argument("--weight_smoothness", type=float, default="5.0")
+parser.add_argument("--weight_gan", type=float, default="0.5")
+parser.add_argument("--weight_nonlocal_smoothness", type=float, default="0.0")
+parser.add_argument("--weight_nonlocal_consistent", type=float, default="0.0")
+parser.add_argument("--weight_consistent", type=float, default="0.05")
+parser.add_argument("--luminance_noise", type=float, default="2.0")
+parser.add_argument("--permute_data", action='store_true')
+parser.add_argument("--contextual_loss_direction", type=str, default="forward", help="forward or backward matching")
+parser.add_argument("--batch_accum_size", type=int, default=10)
+parser.add_argument("--epoch_train_discriminator", type=int, default=3)
+parser.add_argument("--vit_version", type=str, default="vit_tiny_patch16_384")
+parser.add_argument("--use_dummy", action='store_true')
+parser.add_argument("--use_wandb", action='store_true')
+parser.add_argument("--use_feature_transform", action='store_true')
+parser.add_argument("--head_out_idx", type=str, default="8,9,10,11")
+parser.add_argument("--wandb_token", type=str, default="")
+parser.add_argument("--wandb_name", type=str, default="")
+
+
+def ddp_setup():
+ dist.init_process_group(backend="nccl")
+ local_rank = int(os.environ['LOCAL_RANK'])
+ return local_rank
+
+def ddp_cleanup():
+ dist.destroy_process_group()
+
+def prepare_dataloader_ddp(dataset, batch_size=4, pin_memory=False, num_workers=0):
+ sampler = DistributedSampler(dataset, shuffle=True)
+ dataloader = DataLoader(dataset,
+ batch_size=batch_size,
+ pin_memory=pin_memory,
+ num_workers=num_workers,
+ sampler=sampler)
+ return dataloader
+
+def is_master_process():
+ ddp_rank = int(os.environ['RANK'])
+ return ddp_rank == 0
+
+def load_data():
+ transforms_video = [
+ SquaredPadding(target_size=opt.image_size[0]),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize(),
+ ]
+
+ train_dataset_videos = [
+ VideosDataset(
+ video_data_root=video_data_root,
+ flow_data_root=flow_data_root,
+ mask_data_root=mask_data_root,
+ imagenet_folder=opt.data_root_imagenet,
+ annotation_file_path=opt.annotation_file_path,
+ image_size=opt.image_size,
+ image_transform=torch_transforms.Compose(transforms_video),
+ real_reference_probability=opt.real_reference_probability,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ )
+ for video_data_root, flow_data_root, mask_data_root in zip(opt.video_data_root_list, opt.flow_data_root_list, opt.mask_data_root_list)
+ ]
+
+ transforms_imagenet = [SquaredPadding(target_size=opt.image_size[0]), RGB2Lab(), ToTensor(), Normalize()]
+ extra_reference_transform = [
+ torch_transforms.RandomHorizontalFlip(0.5),
+ torch_transforms.RandomResizedCrop(480, (0.98, 1.0), ratio=(0.8, 1.2)),
+ ]
+
+ train_dataset_imagenet = VideosDataset_ImageNet(
+ imagenet_data_root=opt.data_root_imagenet,
+ pairs_file=opt.imagenet_pairs_file,
+ image_size=opt.image_size,
+ transforms_imagenet=transforms_imagenet,
+ distortion_level=4,
+ brightnessjitter=5,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ extra_reference_transform=extra_reference_transform,
+ real_reference_probability=opt.real_reference_probability,
+ )
+ dataset_combined = ConcatDataset(train_dataset_videos + [train_dataset_imagenet])
+ data_loader = prepare_dataloader_ddp(dataset_combined,
+ batch_size=opt.batch_size,
+ pin_memory=False,
+ num_workers=opt.workers)
+ return data_loader
+
+def save_checkpoints(saved_path):
+ # Make directory if the folder doesn't exists
+ os.makedirs(saved_path, exist_ok=True)
+
+ # Save model
+ torch.save(
+ nonlocal_net.module.state_dict(),
+ os.path.join(saved_path, "nonlocal_net.pth"),
+ )
+ torch.save(
+ colornet.module.state_dict(),
+ os.path.join(saved_path, "colornet.pth"),
+ )
+ torch.save(
+ discriminator.module.state_dict(),
+ os.path.join(saved_path, "discriminator.pth"),
+ )
+ torch.save(
+ embed_net.state_dict(),
+ os.path.join(saved_path, "embed_net.pth")
+ )
+
+ # Save learning state for restoring train
+ learning_state = {
+ "epoch": epoch_num,
+ "total_iter": total_iter,
+ "optimizer_g": optimizer_g.state_dict(),
+ "optimizer_d": optimizer_d.state_dict(),
+ "optimizer_schedule_g": step_optim_scheduler_g.state_dict(),
+ "optimizer_schedule_d": step_optim_scheduler_d.state_dict(),
+ }
+
+ torch.save(learning_state, os.path.join(saved_path, "learning_state.pth"))
+
+def training_logger():
+ if (total_iter % opt.checkpoint_step == 0) or (total_iter == len(data_loader)):
+ train_loss_dict = {"train/" + str(k): v / loss_handler.count_sample for k, v in loss_handler.loss_dict.items()}
+ train_loss_dict["train/opt_g_lr_1"] = step_optim_scheduler_g.get_last_lr()[0]
+ train_loss_dict["train/opt_g_lr_2"] = step_optim_scheduler_g.get_last_lr()[1]
+ train_loss_dict["train/opt_d_lr"] = step_optim_scheduler_d.get_last_lr()[0]
+
+ alert_text = f"l1_loss: {l1_loss.item()}\npercep_loss: {perceptual_loss.item()}\nctx_loss: {contextual_loss_total.item()}\ncst_loss: {consistent_loss.item()}\nsm_loss: {smoothness_loss.item()}\ntotal: {total_loss.item()}"
+
+ if opt.use_wandb:
+ wandb.log(train_loss_dict)
+ wandb.alert(title=f"Progress training #{total_iter}", text=alert_text)
+
+ for idx in range(I_predict_rgb.shape[0]):
+ concated_I = make_grid(
+ [(I_predict_rgb[idx] * 255), (I_reference_rgb[idx] * 255), (I_current_rgb[idx] * 255)], nrow=3
+ )
+ wandb_concated_I = wandb.Image(
+ concated_I,
+ caption="[LEFT] Predict, [CENTER] Reference, [RIGHT] Ground truth\n[REF] {}, [FRAME] {}".format(
+ ref_path[idx], curr_frame_path[idx]
+ ),
+ )
+ wandb.log({f"example_{idx}": wandb_concated_I})
+
+ # Save learning state checkpoint
+ # save_checkpoints(os.path.join(opt.checkpoint_dir, 'runs'))
+ loss_handler.reset()
+
+
+def load_params(ckpt_file, local_rank, has_module=False):
+ params = torch.load(ckpt_file, map_location=f'cuda:{local_rank}')
+ new_params = []
+ for key, value in params.items():
+ new_params.append(("module."+key if has_module else key, value))
+ return OrderedDict(new_params)
+
+
+def parse(parser, save=True):
+ opt = parser.parse_args()
+ args = vars(opt)
+
+ print("------------------------------ Options -------------------------------")
+ for k, v in sorted(args.items()):
+ print("%s: %s" % (str(k), str(v)))
+ print("-------------------------------- End ---------------------------------")
+
+ if save:
+ file_name = os.path.join("opt.txt")
+ with open(file_name, "wt") as opt_file:
+ opt_file.write(os.path.basename(sys.argv[0]) + " " + strftime("%Y-%m-%d %H:%M:%S", gmtime()) + "\n")
+ opt_file.write("------------------------------ Options -------------------------------\n")
+ for k, v in sorted(args.items()):
+ opt_file.write("%s: %s\n" % (str(k), str(v)))
+ opt_file.write("-------------------------------- End ---------------------------------\n")
+ return opt
+
+
+def gpu_setup():
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+ cudnn.benchmark = True
+ torch.cuda.set_device(opt.gpu_ids[0])
+ device = torch.device("cuda")
+ print("running on GPU", opt.gpu_ids)
+ return device
+
+
+if __name__ == "__main__":
+ ############################################## SETUP ###############################################
+ torch.multiprocessing.set_start_method("spawn", force=True)
+ # =============== GET PARSER OPTION ================
+ opt = parse(parser)
+ opt.video_data_root_list = opt.video_data_root_list.split(",")
+ opt.flow_data_root_list = opt.flow_data_root_list.split(",")
+ opt.mask_data_root_list = opt.mask_data_root_list.split(",")
+ opt.gpu_ids = list(map(int, opt.gpu_ids.split(",")))
+ opt.head_out_idx = list(map(int, opt.head_out_idx.split(",")))
+ n_dim_output = 3 if opt.use_feature_transform else 4
+ assert len(opt.head_out_idx) == 4, "Size of head_out_idx must be 4"
+
+ # =================== INIT WANDB ===================
+# if is_master_process():
+ if opt.use_wandb:
+ print("Save images to Wandb")
+ if opt.wandb_token != "":
+ try:
+ wandb.login(key=opt.wandb_token)
+ except:
+ pass
+ if opt.use_wandb:
+ wandb.init(
+ project="video-colorization",
+ group=f"{opt.wandb_name} {datetime.now(tz=ZoneInfo('Asia/Ho_Chi_Minh')).strftime('%Y/%m/%d_%H-%M-%S')}",
+ #group="DDP"
+ )
+
+ # ================== SETUP DEVICE ==================
+ local_rank = ddp_setup()
+ # =================== VIT CONFIG ===================
+ cfg = load_config()
+ model_cfg = cfg["model"][opt.vit_version]
+ model_cfg["image_size"] = (384, 384)
+ model_cfg["backbone"] = opt.vit_version
+ model_cfg["dropout"] = 0.0
+ model_cfg["drop_path_rate"] = 0.1
+ model_cfg["n_cls"] = 10
+ ############################################ LOAD DATA #############################################
+ data_loader = load_data()
+ ########################################## DEFINE NETWORK ##########################################
+
+
+ colornet = DDP(ColorVidNet(opt.ic).to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ if opt.use_feature_transform:
+ nonlocal_net = DDP(WarpNet().to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ else:
+ nonlocal_net = DDP(WarpNet_new(model_cfg["d_model"]).to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ discriminator = DDP(Discriminator_x64(ndf=64).to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ weighted_layer_color = WeightedAverage_color().to(local_rank)
+ nonlocal_weighted_layer = NonlocalWeightedAverage().to(local_rank)
+ warping_layer = WarpingLayer(device=local_rank).to(local_rank)
+ embed_net = EmbedModel(model_cfg, head_out_idx=opt.head_out_idx, n_dim_output=n_dim_output, device=local_rank)
+
+ if is_master_process():
+ # Print number of parameters
+ print("-" * 59)
+ print("| TYPE | Model name | Num params |")
+ print("-" * 59)
+
+ colornet_params = print_num_params(colornet)
+ nonlocal_net_params = print_num_params(nonlocal_net)
+ discriminator_params = print_num_params(discriminator)
+ weighted_layer_color_params = print_num_params(weighted_layer_color)
+ nonlocal_weighted_layer_params = print_num_params(nonlocal_weighted_layer)
+ warping_layer_params = print_num_params(warping_layer)
+ embed_net_params = print_num_params(embed_net)
+ print("-" * 59)
+ print(
+ f"| TOTAL | | {('{:,}'.format(colornet_params+nonlocal_net_params+discriminator_params+weighted_layer_color_params+nonlocal_weighted_layer_params+warping_layer_params+embed_net_params)).rjust(10)} |"
+ )
+ print("-" * 59)
+ if opt.use_wandb:
+ wandb.watch(discriminator, log="all", log_freq=opt.checkpoint_step, idx=0)
+ wandb.watch(embed_net, log="all", log_freq=opt.checkpoint_step, idx=1)
+ wandb.watch(colornet, log="all", log_freq=opt.checkpoint_step, idx=2)
+ wandb.watch(nonlocal_net, log="all", log_freq=opt.checkpoint_step, idx=3)
+
+
+
+ ###################################### DEFINE LOSS FUNCTIONS #######################################
+ perceptual_loss_fn = Perceptual_loss(opt.domain_invariant, opt.weight_perceptual)
+ contextual_loss = ContextualLoss().to(local_rank)
+ contextual_forward_loss = ContextualLoss_forward().to(local_rank)
+ ######################################## DEFINE OPTIMIZERS #########################################
+ optimizer_g = optim.AdamW(
+ [
+ {"params": nonlocal_net.parameters(), "lr": opt.lr},
+ {"params": colornet.parameters(), "lr": 2 * opt.lr},
+ {"params": embed_net.parameters(), "lr": opt.lr},
+ ],
+ betas=(0.5, 0.999),
+ eps=1e-5,
+ amsgrad=True,
+ )
+
+ optimizer_d = optim.AdamW(
+ filter(lambda p: p.requires_grad, discriminator.parameters()),
+ lr=opt.lr,
+ betas=(0.5, 0.999),
+ amsgrad=True,
+ )
+
+ step_optim_scheduler_g = PolynomialLR(
+ optimizer_g,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8
+ )
+ step_optim_scheduler_d = PolynomialLR(
+ optimizer_d,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8
+ )
+ ########################################## DEFINE OTHERS ###########################################
+ downsampling_by2 = nn.AvgPool2d(kernel_size=2).to(local_rank)
+ # timer_handler = TimeHandler()
+ loss_handler = LossHandler()
+ ############################################## TRAIN ###############################################
+
+ # ============= USE PRETRAINED OR NOT ==============
+ if opt.load_pretrained_model:
+ nonlocal_net.load_state_dict(load_params(os.path.join(opt.pretrained_model_dir, "nonlocal_net.pth"),
+ local_rank,
+ has_module=True))
+ colornet.load_state_dict(load_params(os.path.join(opt.pretrained_model_dir, "colornet.pth"),
+ local_rank,
+ has_module=True))
+ discriminator.load_state_dict(load_params(os.path.join(opt.pretrained_model_dir, "discriminator.pth"),
+ local_rank,
+ has_module=True))
+ embed_net_params = load_params(os.path.join(opt.pretrained_model_dir, "embed_net.pth"),
+ local_rank,
+ has_module=False)
+ if "module.vit.heads_out" in embed_net_params:
+ embed_net_params.pop("module.vit.heads_out")
+ elif "vit.heads_out" in embed_net_params:
+ embed_net_params.pop("vit.heads_out")
+ embed_net.load_state_dict(embed_net_params)
+
+ learning_checkpoint = torch.load(os.path.join(opt.pretrained_model_dir, "learning_state.pth"))
+ optimizer_g.load_state_dict(learning_checkpoint["optimizer_g"])
+ optimizer_d.load_state_dict(learning_checkpoint["optimizer_d"])
+ step_optim_scheduler_g.load_state_dict(learning_checkpoint["optimizer_schedule_g"])
+ step_optim_scheduler_d.load_state_dict(learning_checkpoint["optimizer_schedule_d"])
+ total_iter = learning_checkpoint['total_iter']
+ start_epoch = learning_checkpoint['epoch']+1
+ else:
+ total_iter = 0
+ start_epoch = 1
+
+
+
+ for epoch_num in range(start_epoch, opt.epoch+1):
+ data_loader.sampler.set_epoch(epoch_num-1)
+
+ if is_master_process():
+ train_progress_bar = tqdm(
+ data_loader,
+ desc =f'Epoch {epoch_num}[Training]',
+ position = 0,
+ leave = False
+ )
+ else:
+ train_progress_bar = data_loader
+ for iter, sample in enumerate(train_progress_bar):
+ # timer_handler.compute_time("load_sample")
+ total_iter += 1
+ # =============== LOAD DATA SAMPLE ================
+ (
+ I_last_lab, ######## (3, H, W)
+ I_current_lab, ##### (3, H, W)
+ I_reference_lab, ### (3, H, W)
+ flow_forward, ###### (2, H, W)
+ mask, ############## (1, H, W)
+ placeholder_lab, ### (3, H, W)
+ self_ref_flag, ##### (3, H, W)
+ prev_frame_path,
+ curr_frame_path,
+ ref_path,
+ ) = sample
+
+ I_last_lab = I_last_lab.to(local_rank)
+ I_current_lab = I_current_lab.to(local_rank)
+ I_reference_lab = I_reference_lab.to(local_rank)
+ flow_forward = flow_forward.to(local_rank)
+ mask = mask.to(local_rank)
+ placeholder_lab = placeholder_lab.to(local_rank)
+ self_ref_flag = self_ref_flag.to(local_rank)
+
+ I_last_l = I_last_lab[:, 0:1, :, :]
+ I_last_ab = I_last_lab[:, 1:3, :, :]
+ I_current_l = I_current_lab[:, 0:1, :, :]
+ I_current_ab = I_current_lab[:, 1:3, :, :]
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1))
+
+ # _load_sample_time = timer_handler.compute_time("load_sample")
+ # timer_handler.compute_time("forward_model")
+
+ features_B = embed_net(I_reference_rgb)
+ _, B_feat_1, B_feat_2, B_feat_3 = features_B
+
+ # ================== COLORIZATION ==================
+ # The last frame
+ I_last_ab_predict, I_last_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_last_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=placeholder_lab,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_last_lab_predict = torch.cat((I_last_l, I_last_ab_predict), dim=1)
+
+ # The current frame
+ I_current_ab_predict, I_current_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_current_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=I_last_lab_predict,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_current_lab_predict = torch.cat((I_last_l, I_current_ab_predict), dim=1)
+
+ # ================ UPDATE GENERATOR ================
+ if opt.weight_gan > 0:
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+ fake_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab_predict,
+ uncenter_l(I_last_l),
+ I_last_ab_predict,
+ ),
+ dim=1,
+ )
+ real_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab,
+ uncenter_l(I_last_l),
+ I_last_ab,
+ ),
+ dim=1,
+ )
+
+ if opt.permute_data:
+ batch_index = torch.arange(-1, opt.batch_size - 1, dtype=torch.long)
+ real_data_lab = real_data_lab[batch_index, ...]
+
+ discriminator_loss = discriminator_loss_fn(real_data_lab, fake_data_lab, discriminator)
+ discriminator_loss.backward()
+ optimizer_d.step()
+
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+
+ # ================== COMPUTE LOSS ==================
+ # L1 loss
+ l1_loss = l1_loss_fn(I_current_ab, I_current_ab_predict) * opt.weigth_l1
+
+ # Generator_loss. TODO: freeze this to train some first epoch
+ if epoch_num > opt.epoch_train_discriminator:
+ generator_loss = generator_loss_fn(real_data_lab, fake_data_lab, discriminator, opt.weight_gan, local_rank)
+
+ # Perceptual Loss
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab_predict), dim=1))
+ _, pred_feat_1, pred_feat_2, pred_feat_3 = embed_net(I_predict_rgb)
+
+ I_current_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab), dim=1))
+ A_feat_0, _, _, A_feat_3 = embed_net(I_current_rgb)
+
+ perceptual_loss = perceptual_loss_fn(A_feat_3, pred_feat_3)
+
+ # Contextual Loss
+ contextual_style5_1 = torch.mean(contextual_forward_loss(pred_feat_3, B_feat_3.detach())) * 8
+ contextual_style4_1 = torch.mean(contextual_forward_loss(pred_feat_2, B_feat_2.detach())) * 4
+ contextual_style3_1 = torch.mean(contextual_forward_loss(pred_feat_1, B_feat_1.detach())) * 2
+
+ contextual_loss_total = (
+ contextual_style5_1 + contextual_style4_1 + contextual_style3_1
+ ) * opt.weight_contextual
+
+ # Consistent Loss
+ consistent_loss = consistent_loss_fn(
+ I_current_lab_predict,
+ I_last_ab_predict,
+ I_current_nonlocal_lab_predict,
+ I_last_nonlocal_lab_predict,
+ flow_forward,
+ mask,
+ warping_layer,
+ weight_consistent=opt.weight_consistent,
+ weight_nonlocal_consistent=opt.weight_nonlocal_consistent,
+ device=local_rank,
+ )
+
+ # Smoothness loss
+ smoothness_loss = smoothness_loss_fn(
+ I_current_l,
+ I_current_lab,
+ I_current_ab_predict,
+ A_feat_0,
+ weighted_layer_color,
+ nonlocal_weighted_layer,
+ weight_smoothness=opt.weight_smoothness,
+ weight_nonlocal_smoothness=opt.weight_nonlocal_smoothness,
+ device=local_rank
+ )
+
+ # Total loss
+ total_loss = l1_loss + perceptual_loss + contextual_loss_total + consistent_loss + smoothness_loss
+ if epoch_num > opt.epoch_train_discriminator:
+ total_loss += generator_loss
+
+ # Add loss to loss handler
+ loss_handler.add_loss(key="total_loss", loss=total_loss.item())
+ loss_handler.add_loss(key="l1_loss", loss=l1_loss.item())
+ loss_handler.add_loss(key="perceptual_loss", loss=perceptual_loss.item())
+ loss_handler.add_loss(key="contextual_loss", loss=contextual_loss_total.item())
+ loss_handler.add_loss(key="consistent_loss", loss=consistent_loss.item())
+ loss_handler.add_loss(key="smoothness_loss", loss=smoothness_loss.item())
+ loss_handler.add_loss(key="discriminator_loss", loss=discriminator_loss.item())
+ if epoch_num > opt.epoch_train_discriminator:
+ loss_handler.add_loss(key="generator_loss", loss=generator_loss.item())
+ loss_handler.count_one_sample()
+
+ total_loss.backward()
+
+ optimizer_g.step()
+ step_optim_scheduler_g.step()
+ step_optim_scheduler_d.step()
+
+ # _forward_model_time = timer_handler.compute_time("forward_model")
+
+ # timer_handler.compute_time("training_logger")
+ training_logger()
+ # _training_logger_time = timer_handler.compute_time("training_logger")
+
+ ####
+ if is_master_process():
+ save_checkpoints(os.path.join(opt.checkpoint_dir, f"epoch_{epoch_num}"))
+ ####
+ if opt.use_wandb:
+ wandb.finish()
+ ddp_cleanup()
\ No newline at end of file
diff --git a/train_swin_224.py b/train_swin_224.py
new file mode 100644
index 0000000000000000000000000000000000000000..31e6069068771f8f3184f58bfea1dc4d0f11bdc0
--- /dev/null
+++ b/train_swin_224.py
@@ -0,0 +1,593 @@
+import os
+import sys
+import wandb
+import argparse
+import numpy as np
+from tqdm import tqdm
+from PIL import Image
+from datetime import datetime
+from zoneinfo import ZoneInfo
+from time import gmtime, strftime
+from collections import OrderedDict
+import random
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.backends.cudnn as cudnn
+from torchvision.transforms import CenterCrop
+from torch.utils.data import ConcatDataset, DataLoader
+import torchvision.transforms as torch_transforms
+from torchvision.utils import make_grid
+
+from src.losses import (
+ ContextualLoss,
+ ContextualLoss_forward,
+ Perceptual_loss,
+ consistent_loss_fn,
+ discriminator_loss_fn,
+ generator_loss_fn,
+ l1_loss_fn,
+ smoothness_loss_fn,
+)
+from src.models.CNN.GAN_models import Discriminator_x64_224
+from src.models.CNN.ColorVidNet import GeneralColorVidNet
+from src.models.CNN.FrameColor import frame_colorization
+from src.models.CNN.NonlocalNet import WeightedAverage_color, NonlocalWeightedAverage, GeneralWarpNet
+from src.models.vit.embed import GeneralEmbedModel
+from src.data import transforms
+from src.data.dataloader import VideosDataset, VideosDataset_ImageNet
+from src.utils import CenterPad_threshold
+from src.utils import (
+ TimeHandler,
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ LossHandler,
+ WarpingLayer,
+ uncenter_l,
+ tensor_lab2rgb,
+ print_num_params,
+)
+from src.scheduler import PolynomialLR
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--video_data_root_list", type=str, default="dataset")
+parser.add_argument("--flow_data_root_list", type=str, default="flow")
+parser.add_argument("--mask_data_root_list", type=str, default="mask")
+parser.add_argument("--data_root_imagenet", default="imagenet", type=str)
+parser.add_argument("--annotation_file_path", default="dataset/annotation.csv", type=str)
+parser.add_argument("--imagenet_pairs_file", default="imagenet_pairs.txt", type=str)
+parser.add_argument("--gpu_ids", type=str, default="0,1,2,3", help="separate by comma")
+parser.add_argument("--workers", type=int, default=0)
+parser.add_argument("--batch_size", type=int, default=2)
+parser.add_argument("--image_size", type=int, default=[384, 384])
+parser.add_argument("--ic", type=int, default=7)
+parser.add_argument("--epoch", type=int, default=40)
+parser.add_argument("--resume_epoch", type=int, default=0)
+parser.add_argument("--resume", type=bool, default=False)
+parser.add_argument("--load_pretrained_model", type=bool, default=False)
+parser.add_argument("--lr", type=float, default=1e-4)
+parser.add_argument("--beta1", type=float, default=0.5)
+parser.add_argument("--lr_step", type=int, default=1)
+parser.add_argument("--lr_gamma", type=float, default=0.9)
+parser.add_argument("--checkpoint_dir", type=str, default="checkpoints")
+parser.add_argument("--checkpoint_step", type=int, default=500)
+parser.add_argument("--real_reference_probability", type=float, default=0.7)
+parser.add_argument("--nonzero_placeholder_probability", type=float, default=0.0)
+parser.add_argument("--domain_invariant", type=bool, default=False)
+parser.add_argument("--weigth_l1", type=float, default=2.0)
+parser.add_argument("--weight_contextual", type=float, default="0.5")
+parser.add_argument("--weight_perceptual", type=float, default="0.02")
+parser.add_argument("--weight_smoothness", type=float, default="5.0")
+parser.add_argument("--weight_gan", type=float, default="0.5")
+parser.add_argument("--weight_nonlocal_smoothness", type=float, default="0.0")
+parser.add_argument("--weight_nonlocal_consistent", type=float, default="0.0")
+parser.add_argument("--weight_consistent", type=float, default="0.05")
+parser.add_argument("--luminance_noise", type=float, default="2.0")
+parser.add_argument("--permute_data", type=bool, default=True)
+parser.add_argument("--contextual_loss_direction", type=str, default="forward", help="forward or backward matching")
+parser.add_argument("--batch_accum_size", type=int, default=10)
+parser.add_argument("--epoch_train_discriminator", type=int, default=3)
+parser.add_argument("--vit_version", type=str, default="vit_tiny_patch16_384")
+parser.add_argument("--use_dummy", type=bool, default=False)
+parser.add_argument("--use_wandb", type=bool, default=False)
+parser.add_argument("--use_feature_transform", type=bool, default=False)
+parser.add_argument("--head_out_idx", type=str, default="8,9,10,11")
+parser.add_argument("--wandb_token", type=str, default="")
+parser.add_argument("--wandb_name", type=str, default="")
+
+
+def load_data():
+ transforms_video = [
+ CenterCrop(opt.image_size),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize(),
+ ]
+
+ train_dataset_videos = [
+ VideosDataset(
+ video_data_root=video_data_root,
+ flow_data_root=flow_data_root,
+ mask_data_root=mask_data_root,
+ imagenet_folder=opt.data_root_imagenet,
+ annotation_file_path=opt.annotation_file_path,
+ image_size=opt.image_size,
+ image_transform=transforms.Compose(transforms_video),
+ real_reference_probability=opt.real_reference_probability,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ )
+ for video_data_root, flow_data_root, mask_data_root in zip(
+ opt.video_data_root_list, opt.flow_data_root_list, opt.mask_data_root_list
+ )
+ ]
+
+ transforms_imagenet = [CenterPad_threshold(opt.image_size), RGB2Lab(), ToTensor(), Normalize()]
+ extra_reference_transform = [
+ torch_transforms.RandomHorizontalFlip(0.5),
+ torch_transforms.RandomResizedCrop(480, (0.98, 1.0), ratio=(0.8, 1.2)),
+ ]
+
+ train_dataset_imagenet = VideosDataset_ImageNet(
+ imagenet_data_root=opt.data_root_imagenet,
+ pairs_file=opt.imagenet_pairs_file,
+ image_size=opt.image_size,
+ transforms_imagenet=transforms_imagenet,
+ distortion_level=4,
+ brightnessjitter=5,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ extra_reference_transform=extra_reference_transform,
+ real_reference_probability=opt.real_reference_probability,
+ )
+
+ # video_training_length = sum([len(dataset) for dataset in train_dataset_videos])
+ # imagenet_training_length = len(train_dataset_imagenet)
+ # dataset_training_length = sum([dataset.real_len for dataset in train_dataset_videos]) + +train_dataset_imagenet.real_len
+ dataset_combined = ConcatDataset(train_dataset_videos + [train_dataset_imagenet])
+ # sampler=[]
+ # seed_sampler=int.from_bytes(os.urandom(4),"big")
+ # random.seed(seed_sampler)
+ # for idx in range(opt.epoch):
+ # sampler = sampler + random.sample(range(dataset_training_length),dataset_training_length)
+ # wandb.log({"Sampler_Seed":seed_sampler})
+ # sampler = sampler+WeightedRandomSampler([1] * video_training_length + [1] * imagenet_training_length, dataset_training_length*opt.epoch)
+
+ # video_training_length = sum([len(dataset) for dataset in train_dataset_videos])
+ # dataset_training_length = sum([dataset.real_len for dataset in train_dataset_videos])
+ # dataset_combined = ConcatDataset(train_dataset_videos)
+ # sampler = WeightedRandomSampler([1] * video_training_length, dataset_training_length * opt.epoch)
+
+ data_loader = DataLoader(dataset_combined, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers)
+ return data_loader
+
+
+def training_logger():
+ if (total_iter % opt.checkpoint_step == 0) or (total_iter == len(data_loader)):
+ train_loss_dict = {"train/" + str(k): v / loss_handler.count_sample for k, v in loss_handler.loss_dict.items()}
+ train_loss_dict["train/opt_g_lr_1"] = step_optim_scheduler_g.get_last_lr()[0]
+ train_loss_dict["train/opt_g_lr_2"] = step_optim_scheduler_g.get_last_lr()[1]
+ train_loss_dict["train/opt_d_lr"] = step_optim_scheduler_d.get_last_lr()[0]
+
+ alert_text = f"l1_loss: {l1_loss.item()}\npercep_loss: {perceptual_loss.item()}\nctx_loss: {contextual_loss_total.item()}\ncst_loss: {consistent_loss.item()}\nsm_loss: {smoothness_loss.item()}\ntotal: {total_loss.item()}"
+
+ if opt.use_wandb:
+ wandb.log(train_loss_dict)
+ wandb.alert(title=f"Progress training #{total_iter}", text=alert_text)
+
+ for idx in range(I_predict_rgb.shape[0]):
+ concated_I = make_grid(
+ [(I_predict_rgb[idx] * 255), (I_reference_rgb[idx] * 255), (I_current_rgb[idx] * 255)], nrow=3
+ )
+ wandb_concated_I = wandb.Image(
+ concated_I,
+ caption="[LEFT] Predict, [CENTER] Reference, [RIGHT] Ground truth\n[REF] {}, [FRAME] {}".format(
+ ref_path[idx], curr_frame_path[idx]
+ ),
+ )
+ wandb.log({f"example_{idx}": wandb_concated_I})
+
+ torch.save(
+ nonlocal_net.state_dict(),
+ os.path.join(opt.checkpoint_dir, "nonlocal_net_iter.pth"),
+ )
+ torch.save(
+ colornet.state_dict(),
+ os.path.join(opt.checkpoint_dir, "colornet_iter.pth"),
+ )
+ torch.save(
+ discriminator.state_dict(),
+ os.path.join(opt.checkpoint_dir, "discriminator_iter.pth"),
+ )
+ torch.save(embed_net.state_dict(), os.path.join(opt.checkpoint_dir, "embed_net_iter.pth"))
+
+ loss_handler.reset()
+
+
+def load_params(ckpt_file):
+ params = torch.load(ckpt_file)
+ new_params = []
+ for key, value in params.items():
+ new_params.append((key, value))
+ return OrderedDict(new_params)
+
+
+def parse(parser, save=True):
+ opt = parser.parse_args()
+ args = vars(opt)
+
+ print("------------------------------ Options -------------------------------")
+ for k, v in sorted(args.items()):
+ print("%s: %s" % (str(k), str(v)))
+ print("-------------------------------- End ---------------------------------")
+
+ if save:
+ file_name = os.path.join("opt.txt")
+ with open(file_name, "wt") as opt_file:
+ opt_file.write(os.path.basename(sys.argv[0]) + " " + strftime("%Y-%m-%d %H:%M:%S", gmtime()) + "\n")
+ opt_file.write("------------------------------ Options -------------------------------\n")
+ for k, v in sorted(args.items()):
+ opt_file.write("%s: %s\n" % (str(k), str(v)))
+ opt_file.write("-------------------------------- End ---------------------------------\n")
+ return opt
+
+
+def gpu_setup():
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+ cudnn.benchmark = True
+ torch.cuda.set_device(opt.gpu_ids[0])
+ device = torch.device("cuda")
+ print("running on GPU", opt.gpu_ids)
+ return device
+
+
+if __name__ == "__main__":
+ ############################################## SETUP ###############################################
+ torch.multiprocessing.set_start_method("spawn", force=True)
+ # =============== GET PARSER OPTION ================
+ opt = parse(parser)
+ opt.video_data_root_list = opt.video_data_root_list.split(",")
+ opt.flow_data_root_list = opt.flow_data_root_list.split(",")
+ opt.mask_data_root_list = opt.mask_data_root_list.split(",")
+ opt.gpu_ids = list(map(int, opt.gpu_ids.split(",")))
+ opt.head_out_idx = list(map(int, opt.head_out_idx.split(",")))
+ n_dim_output = 3 if opt.use_feature_transform else 4
+ assert len(opt.head_out_idx) == 4, "Size of head_out_idx must be 4"
+
+ os.makedirs(opt.checkpoint_dir, exist_ok=True)
+
+ # =================== INIT WANDB ===================
+ if opt.use_wandb:
+ print("Save images to Wandb")
+ if opt.wandb_token != "":
+ try:
+ wandb.login(key=opt.wandb_token)
+ except:
+ pass
+ wandb.init(
+ project="video-colorization",
+ name=f"{opt.wandb_name} {datetime.now(tz=ZoneInfo('Asia/Ho_Chi_Minh')).strftime('%Y/%m/%d_%H-%M-%S')}",
+ )
+
+ # ================== SETUP DEVICE ==================
+ # torch.multiprocessing.set_start_method("spawn", force=True)
+ # device = gpu_setup()
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+
+ ############################################ LOAD DATA #############################################
+ if opt.use_dummy:
+ H, W = 224, 224
+ I_last_lab = torch.rand(opt.batch_size, 3, H, W)
+ I_current_lab = torch.rand(opt.batch_size, 3, H, W)
+ I_reference_lab = torch.rand(opt.batch_size, 3, H, W)
+ flow_forward = torch.rand(opt.batch_size, 2, H, W)
+ mask = torch.rand(opt.batch_size, 1, H, W)
+ placeholder_lab = torch.rand(opt.batch_size, 3, H, W)
+ self_ref_flag = torch.rand(opt.batch_size, 3, H, W)
+ data_loader = [
+ [I_last_lab, I_current_lab, I_reference_lab, flow_forward, mask, placeholder_lab, self_ref_flag, None, None, None]
+ for _ in range(1)
+ ]
+ else:
+ data_loader = load_data()
+
+ ########################################## DEFINE NETWORK ##########################################
+ colornet = GeneralColorVidNet(opt.ic).to(device)
+ nonlocal_net = GeneralWarpNet(feature_channel=256).to(device) # change to 128 in swin tiny
+ discriminator = Discriminator_x64_224(ndf=64).to(device)
+ weighted_layer_color = WeightedAverage_color().to(device)
+ nonlocal_weighted_layer = NonlocalWeightedAverage().to(device)
+ warping_layer = WarpingLayer(device=device).to(device)
+ embed_net = GeneralEmbedModel(pretrained_model="swin-small", device=device).to(device)
+
+ print("-" * 59)
+ print("| TYPE | Model name | Num params |")
+ print("-" * 59)
+ colornet_params = print_num_params(colornet)
+ nonlocal_net_params = print_num_params(nonlocal_net)
+ discriminator_params = print_num_params(discriminator)
+ weighted_layer_color_params = print_num_params(weighted_layer_color)
+ nonlocal_weighted_layer_params = print_num_params(nonlocal_weighted_layer)
+ warping_layer_params = print_num_params(warping_layer)
+ embed_net_params = print_num_params(embed_net)
+
+ print("-" * 59)
+ print(
+ f"| TOTAL | | {('{:,}'.format(colornet_params+nonlocal_net_params+discriminator_params+weighted_layer_color_params+nonlocal_weighted_layer_params+warping_layer_params+embed_net_params)).rjust(10)} |"
+ )
+ print("-" * 59)
+
+ if opt.use_wandb:
+ wandb.watch(discriminator, log="all", log_freq=opt.checkpoint_step, idx=0)
+ wandb.watch(embed_net, log="all", log_freq=opt.checkpoint_step, idx=1)
+ wandb.watch(colornet, log="all", log_freq=opt.checkpoint_step, idx=2)
+ wandb.watch(nonlocal_net, log="all", log_freq=opt.checkpoint_step, idx=3)
+
+ # ============= USE PRETRAINED OR NOT ==============
+ if opt.load_pretrained_model:
+ # pretrained_path = "/workspace/video_colorization/ckpt_folder_ver_1_vit_small_patch16_384"
+ nonlocal_net.load_state_dict(load_params(os.path.join(opt.checkpoint_dir, "nonlocal_net_iter.pth")))
+ colornet.load_state_dict(load_params(os.path.join(opt.checkpoint_dir, "colornet_iter.pth")))
+ discriminator.load_state_dict(load_params(os.path.join(opt.checkpoint_dir, "discriminator_iter.pth")))
+ embed_net_params = load_params(os.path.join(opt.checkpoint_dir, "embed_net_iter.pth"))
+ embed_net.load_state_dict(embed_net_params)
+
+ ###################################### DEFINE LOSS FUNCTIONS #######################################
+ perceptual_loss_fn = Perceptual_loss(opt.domain_invariant, opt.weight_perceptual)
+ contextual_loss = ContextualLoss().to(device)
+ contextual_forward_loss = ContextualLoss_forward().to(device)
+
+ ######################################## DEFINE OPTIMIZERS #########################################
+ optimizer_g = optim.AdamW(
+ [
+ {"params": nonlocal_net.parameters(), "lr": opt.lr},
+ {"params": colornet.parameters(), "lr": 2 * opt.lr},
+ {"params": embed_net.parameters(), "lr": opt.lr},
+ ],
+ betas=(0.5, 0.999),
+ eps=1e-5,
+ amsgrad=True,
+ )
+
+ optimizer_d = optim.AdamW(
+ filter(lambda p: p.requires_grad, discriminator.parameters()),
+ lr=opt.lr,
+ betas=(0.5, 0.999),
+ amsgrad=True,
+ )
+
+ step_optim_scheduler_g = PolynomialLR(
+ optimizer_g,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8,
+ )
+ step_optim_scheduler_d = PolynomialLR(
+ optimizer_d,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8,
+ )
+ ########################################## DEFINE OTHERS ###########################################
+ downsampling_by2 = nn.AvgPool2d(kernel_size=2).to(device)
+ timer_handler = TimeHandler()
+ loss_handler = LossHandler() # Handle loss value
+ ############################################## TRAIN ###############################################
+
+ total_iter = 0
+ for epoch_num in range(1, opt.epoch + 1):
+ # if opt.use_wandb:
+ # wandb.log({"Current_trainning_epoch": epoch_num})
+ with tqdm(total=len(data_loader), position=0, leave=True) as pbar:
+ for iter, sample in enumerate(data_loader):
+ timer_handler.compute_time("load_sample")
+ total_iter += 1
+
+ # =============== LOAD DATA SAMPLE ================
+ (
+ I_last_lab, ######## (3, H, W)
+ I_current_lab, ##### (3, H, W)
+ I_reference_lab, ### (3, H, W)
+ flow_forward, ###### (2, H, W)
+ mask, ############## (1, H, W)
+ placeholder_lab, ### (3, H, W)
+ self_ref_flag, ##### (3, H, W)
+ prev_frame_path,
+ curr_frame_path,
+ ref_path,
+ ) = sample
+
+ I_last_lab = I_last_lab.to(device)
+ I_current_lab = I_current_lab.to(device)
+ I_reference_lab = I_reference_lab.to(device)
+ flow_forward = flow_forward.to(device)
+ mask = mask.to(device)
+ placeholder_lab = placeholder_lab.to(device)
+ self_ref_flag = self_ref_flag.to(device)
+
+ I_last_l = I_last_lab[:, 0:1, :, :]
+ I_last_ab = I_last_lab[:, 1:3, :, :]
+ I_current_l = I_current_lab[:, 0:1, :, :]
+ I_current_ab = I_current_lab[:, 1:3, :, :]
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1))
+
+ _load_sample_time = timer_handler.compute_time("load_sample")
+ timer_handler.compute_time("forward_model")
+
+ features_B = embed_net(I_reference_rgb)
+ B_feat_0, B_feat_1, B_feat_2, B_feat_3 = features_B
+
+ # ================== COLORIZATION ==================
+ # The last frame
+ I_last_ab_predict, I_last_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_last_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=placeholder_lab,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_last_lab_predict = torch.cat((I_last_l, I_last_ab_predict), dim=1)
+
+ # The current frame
+ I_current_ab_predict, I_current_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_current_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=I_last_lab_predict,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_current_lab_predict = torch.cat((I_last_l, I_current_ab_predict), dim=1)
+
+ # ================ UPDATE GENERATOR ================
+ if opt.weight_gan > 0:
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+ fake_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab_predict,
+ uncenter_l(I_last_l),
+ I_last_ab_predict,
+ ),
+ dim=1,
+ )
+ real_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab,
+ uncenter_l(I_last_l),
+ I_last_ab,
+ ),
+ dim=1,
+ )
+
+ if opt.permute_data:
+ batch_index = torch.arange(-1, opt.batch_size - 1, dtype=torch.long)
+ real_data_lab = real_data_lab[batch_index, ...]
+
+ discriminator_loss = discriminator_loss_fn(real_data_lab, fake_data_lab, discriminator)
+ discriminator_loss.backward()
+ optimizer_d.step()
+
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+
+ # ================== COMPUTE LOSS ==================
+ # L1 loss
+ l1_loss = l1_loss_fn(I_current_ab, I_current_ab_predict) * opt.weigth_l1
+
+ # Generator_loss. TODO: freeze this to train some first epoch
+ if epoch_num > opt.epoch_train_discriminator:
+ generator_loss = generator_loss_fn(real_data_lab, fake_data_lab, discriminator, opt.weight_gan, device)
+
+ # Perceptual Loss
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab_predict), dim=1))
+ pred_feat_0, pred_feat_1, pred_feat_2, pred_feat_3 = embed_net(I_predict_rgb)
+
+ I_current_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab), dim=1))
+ A_feat_0, _, _, A_feat_3 = embed_net(I_current_rgb)
+
+ perceptual_loss = perceptual_loss_fn(A_feat_3, pred_feat_3)
+
+ # Contextual Loss
+ contextual_style5_1 = torch.mean(contextual_forward_loss(pred_feat_3, B_feat_3.detach())) * 8
+ contextual_style4_1 = torch.mean(contextual_forward_loss(pred_feat_2, B_feat_2.detach())) * 4
+ contextual_style3_1 = torch.mean(contextual_forward_loss(pred_feat_1, B_feat_1.detach())) * 2
+ contextual_style2_1 = torch.mean(contextual_forward_loss(pred_feat_0, B_feat_0.detach()))
+ # if opt.use_feature_transform:
+ # contextual_style3_1 = (
+ # torch.mean(
+ # contextual_forward_loss(
+ # downsampling_by2(pred_feat_1),
+ # downsampling_by2(),
+ # )
+ # )
+ # * 2
+ # )
+ # else:
+ # contextual_style3_1 = (
+ # torch.mean(
+ # contextual_forward_loss(
+ # pred_feat_1,
+ # B_feat_1.detach(),
+ # )
+ # )
+ # * 2
+ # )
+
+ contextual_loss_total = (
+ contextual_style5_1 + contextual_style4_1 + contextual_style3_1 + contextual_style2_1
+ ) * opt.weight_contextual
+
+ # Consistent Loss
+ consistent_loss = consistent_loss_fn(
+ I_current_lab_predict,
+ I_last_ab_predict,
+ I_current_nonlocal_lab_predict,
+ I_last_nonlocal_lab_predict,
+ flow_forward,
+ mask,
+ warping_layer,
+ weight_consistent=opt.weight_consistent,
+ weight_nonlocal_consistent=opt.weight_nonlocal_consistent,
+ device=device,
+ )
+
+ # Smoothness loss
+ smoothness_loss = smoothness_loss_fn(
+ I_current_l,
+ I_current_lab,
+ I_current_ab_predict,
+ A_feat_0,
+ weighted_layer_color,
+ nonlocal_weighted_layer,
+ weight_smoothness=opt.weight_smoothness,
+ weight_nonlocal_smoothness=opt.weight_nonlocal_smoothness,
+ device=device,
+ )
+
+ # Total loss
+ total_loss = l1_loss + perceptual_loss + contextual_loss_total + consistent_loss + smoothness_loss
+ if epoch_num > opt.epoch_train_discriminator:
+ total_loss += generator_loss
+
+ # Add loss to loss handler
+ loss_handler.add_loss(key="total_loss", loss=total_loss.item())
+ loss_handler.add_loss(key="l1_loss", loss=l1_loss.item())
+ loss_handler.add_loss(key="perceptual_loss", loss=perceptual_loss.item())
+ loss_handler.add_loss(key="contextual_loss", loss=contextual_loss_total.item())
+ loss_handler.add_loss(key="consistent_loss", loss=consistent_loss.item())
+ loss_handler.add_loss(key="smoothness_loss", loss=smoothness_loss.item())
+ loss_handler.add_loss(key="discriminator_loss", loss=discriminator_loss.item())
+ if epoch_num > opt.epoch_train_discriminator:
+ loss_handler.add_loss(key="generator_loss", loss=generator_loss.item())
+ loss_handler.count_one_sample()
+
+ total_loss.backward()
+
+ optimizer_g.step()
+ step_optim_scheduler_g.step()
+ step_optim_scheduler_d.step()
+
+ _forward_model_time = timer_handler.compute_time("forward_model")
+
+ timer_handler.compute_time("training_logger")
+ training_logger()
+ _training_logger_time = timer_handler.compute_time("training_logger")
+
+ pbar.set_description(
+ f"Epochs: {epoch_num}, Load_sample: {_load_sample_time:.3f}s, Forward: {_forward_model_time:.3f}s, log: {_training_logger_time:.3f}s"
+ )
+ pbar.update(1)
diff --git a/train_swin_224_ddp.py b/train_swin_224_ddp.py
new file mode 100644
index 0000000000000000000000000000000000000000..b12218dca1bf6a54657613fe51e7116ea04c07f7
--- /dev/null
+++ b/train_swin_224_ddp.py
@@ -0,0 +1,634 @@
+import os
+import sys
+import wandb
+import argparse
+import numpy as np
+from tqdm import tqdm
+from PIL import Image
+from datetime import datetime
+from zoneinfo import ZoneInfo
+from time import gmtime, strftime
+from collections import OrderedDict
+import random
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.backends.cudnn as cudnn
+from torchvision.transforms import CenterCrop
+from torch.utils.data import ConcatDataset, DataLoader, WeightedRandomSampler
+import torchvision.transforms as torch_transforms
+from torchvision.utils import make_grid
+
+from src.losses import (
+ ContextualLoss,
+ ContextualLoss_forward,
+ Perceptual_loss,
+ consistent_loss_fn,
+ discriminator_loss_fn,
+ generator_loss_fn,
+ l1_loss_fn,
+ smoothness_loss_fn,
+)
+from src.models.CNN.GAN_models import Discriminator_x64_224
+from src.models.CNN.ColorVidNet import GeneralColorVidNet
+from src.models.CNN.FrameColor import frame_colorization
+from src.models.CNN.NonlocalNet import WeightedAverage_color, NonlocalWeightedAverage, GeneralWarpNet
+from src.models.vit.embed import GeneralEmbedModel
+from src.models.vit.config import load_config
+from src.data import transforms
+from src.data.dataloader import VideosDataset, VideosDataset_ImageNet
+from src.utils import CenterPad_threshold
+from src.utils import (
+ TimeHandler,
+ RGB2Lab,
+ ToTensor,
+ Normalize,
+ LossHandler,
+ WarpingLayer,
+ uncenter_l,
+ tensor_lab2rgb,
+ print_num_params,
+ SquaredPadding
+)
+from src.scheduler import PolynomialLR
+
+from torch.nn.parallel import DistributedDataParallel as DDP
+import torch.distributed as dist
+from torch.utils.data.distributed import DistributedSampler
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--video_data_root_list", type=str, default="dataset")
+parser.add_argument("--flow_data_root_list", type=str, default='flow')
+parser.add_argument("--mask_data_root_list", type=str, default='mask')
+parser.add_argument("--data_root_imagenet", default="imagenet", type=str)
+parser.add_argument("--annotation_file_path", default="dataset/annotation.csv", type=str)
+parser.add_argument("--imagenet_pairs_file", default="imagenet_pairs.txt", type=str)
+parser.add_argument("--gpu_ids", type=str, default="0,1,2,3", help="separate by comma")
+parser.add_argument("--workers", type=int, default=0)
+parser.add_argument("--batch_size", type=int, default=2)
+parser.add_argument("--image_size", type=int, default=[224, 224])
+parser.add_argument("--ic", type=int, default=7)
+parser.add_argument("--epoch", type=int, default=40)
+parser.add_argument("--resume_epoch", type=int, default=0)
+parser.add_argument("--resume", action='store_true')
+parser.add_argument("--load_pretrained_model", action='store_true')
+parser.add_argument("--pretrained_model_dir", type=str, default='ckpt')
+parser.add_argument("--lr", type=float, default=1e-4)
+parser.add_argument("--beta1", type=float, default=0.5)
+parser.add_argument("--lr_step", type=int, default=1)
+parser.add_argument("--lr_gamma", type=float, default=0.9)
+parser.add_argument("--checkpoint_dir", type=str, default="checkpoints")
+parser.add_argument("--checkpoint_step", type=int, default=500)
+parser.add_argument("--real_reference_probability", type=float, default=0.7)
+parser.add_argument("--nonzero_placeholder_probability", type=float, default=0.0)
+parser.add_argument("--domain_invariant", action='store_true')
+parser.add_argument("--weigth_l1", type=float, default=2.0)
+parser.add_argument("--weight_contextual", type=float, default="0.5")
+parser.add_argument("--weight_perceptual", type=float, default="0.02")
+parser.add_argument("--weight_smoothness", type=float, default="5.0")
+parser.add_argument("--weight_gan", type=float, default="0.5")
+parser.add_argument("--weight_nonlocal_smoothness", type=float, default="0.0")
+parser.add_argument("--weight_nonlocal_consistent", type=float, default="0.0")
+parser.add_argument("--weight_consistent", type=float, default="0.05")
+parser.add_argument("--luminance_noise", type=float, default="2.0")
+parser.add_argument("--permute_data", action='store_true')
+parser.add_argument("--contextual_loss_direction", type=str, default="forward", help="forward or backward matching")
+parser.add_argument("--batch_accum_size", type=int, default=10)
+parser.add_argument("--epoch_train_discriminator", type=int, default=3)
+parser.add_argument("--vit_version", type=str, default="vit_tiny_patch16_384")
+parser.add_argument("--use_dummy", action='store_true')
+parser.add_argument("--use_wandb", action='store_true')
+parser.add_argument("--use_feature_transform", action='store_true')
+parser.add_argument("--head_out_idx", type=str, default="8,9,10,11")
+parser.add_argument("--wandb_token", type=str, default="")
+parser.add_argument("--wandb_name", type=str, default="")
+
+
+def ddp_setup():
+ dist.init_process_group(backend="nccl")
+ local_rank = int(os.environ['LOCAL_RANK'])
+ return local_rank
+
+def ddp_cleanup():
+ dist.destroy_process_group()
+
+def prepare_dataloader_ddp(dataset, batch_size=4, pin_memory=False, num_workers=0):
+ sampler = DistributedSampler(dataset, shuffle=True)
+ dataloader = DataLoader(dataset,
+ batch_size=batch_size,
+ pin_memory=pin_memory,
+ num_workers=num_workers,
+ sampler=sampler)
+ return dataloader
+
+def is_master_process():
+ ddp_rank = int(os.environ['RANK'])
+ return ddp_rank == 0
+
+def load_data():
+ transforms_video = [
+ SquaredPadding(target_size=opt.image_size[0]),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize(),
+ ]
+
+ train_dataset_videos = [
+ VideosDataset(
+ video_data_root=video_data_root,
+ flow_data_root=flow_data_root,
+ mask_data_root=mask_data_root,
+ imagenet_folder=opt.data_root_imagenet,
+ annotation_file_path=opt.annotation_file_path,
+ image_size=opt.image_size,
+ image_transform=torch_transforms.Compose(transforms_video),
+ real_reference_probability=opt.real_reference_probability,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ )
+ for video_data_root, flow_data_root, mask_data_root in zip(opt.video_data_root_list, opt.flow_data_root_list, opt.mask_data_root_list)
+ ]
+
+ transforms_imagenet = [SquaredPadding(target_size=opt.image_size[0]), RGB2Lab(), ToTensor(), Normalize()]
+ extra_reference_transform = [
+ torch_transforms.RandomHorizontalFlip(0.5),
+ torch_transforms.RandomResizedCrop(480, (0.98, 1.0), ratio=(0.8, 1.2)),
+ ]
+
+ train_dataset_imagenet = VideosDataset_ImageNet(
+ imagenet_data_root=opt.data_root_imagenet,
+ pairs_file=opt.imagenet_pairs_file,
+ image_size=opt.image_size,
+ transforms_imagenet=transforms_imagenet,
+ distortion_level=4,
+ brightnessjitter=5,
+ nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
+ extra_reference_transform=extra_reference_transform,
+ real_reference_probability=opt.real_reference_probability,
+ )
+ dataset_combined = ConcatDataset(train_dataset_videos + [train_dataset_imagenet])
+ data_loader = prepare_dataloader_ddp(dataset_combined,
+ batch_size=opt.batch_size,
+ pin_memory=False,
+ num_workers=opt.workers)
+ return data_loader
+
+def save_checkpoints(saved_path):
+ # Make directory if the folder doesn't exists
+ os.makedirs(saved_path, exist_ok=True)
+
+ # Save model
+ torch.save(
+ nonlocal_net.module.state_dict(),
+ os.path.join(saved_path, "nonlocal_net.pth"),
+ )
+ torch.save(
+ colornet.module.state_dict(),
+ os.path.join(saved_path, "colornet.pth"),
+ )
+ torch.save(
+ discriminator.module.state_dict(),
+ os.path.join(saved_path, "discriminator.pth"),
+ )
+ torch.save(
+ embed_net.state_dict(),
+ os.path.join(saved_path, "embed_net.pth")
+ )
+
+ # Save learning state for restoring train
+ learning_state = {
+ "epoch": epoch_num,
+ "total_iter": total_iter,
+ "optimizer_g": optimizer_g.state_dict(),
+ "optimizer_d": optimizer_d.state_dict(),
+ "optimizer_schedule_g": step_optim_scheduler_g.state_dict(),
+ "optimizer_schedule_d": step_optim_scheduler_d.state_dict(),
+ }
+
+ torch.save(learning_state, os.path.join(saved_path, "learning_state.pth"))
+
+def training_logger():
+ if (total_iter % opt.checkpoint_step == 0) or (total_iter == len(data_loader)):
+ train_loss_dict = {"train/" + str(k): v / loss_handler.count_sample for k, v in loss_handler.loss_dict.items()}
+ train_loss_dict["train/opt_g_lr_1"] = step_optim_scheduler_g.get_last_lr()[0]
+ train_loss_dict["train/opt_g_lr_2"] = step_optim_scheduler_g.get_last_lr()[1]
+ train_loss_dict["train/opt_d_lr"] = step_optim_scheduler_d.get_last_lr()[0]
+
+ alert_text = f"l1_loss: {l1_loss.item()}\npercep_loss: {perceptual_loss.item()}\nctx_loss: {contextual_loss_total.item()}\ncst_loss: {consistent_loss.item()}\nsm_loss: {smoothness_loss.item()}\ntotal: {total_loss.item()}"
+
+ if opt.use_wandb:
+ wandb.log(train_loss_dict)
+ wandb.alert(title=f"Progress training #{total_iter}", text=alert_text)
+
+ for idx in range(I_predict_rgb.shape[0]):
+ concated_I = make_grid(
+ [(I_predict_rgb[idx] * 255), (I_reference_rgb[idx] * 255), (I_current_rgb[idx] * 255)], nrow=3
+ )
+ wandb_concated_I = wandb.Image(
+ concated_I,
+ caption="[LEFT] Predict, [CENTER] Reference, [RIGHT] Ground truth\n[REF] {}, [FRAME] {}".format(
+ ref_path[idx], curr_frame_path[idx]
+ ),
+ )
+ wandb.log({f"example_{idx}": wandb_concated_I})
+
+ # Save learning state checkpoint
+ # save_checkpoints(os.path.join(opt.checkpoint_dir, 'runs'))
+ loss_handler.reset()
+
+
+def load_params(ckpt_file, local_rank, has_module=False):
+ params = torch.load(ckpt_file, map_location=f'cuda:{local_rank}')
+ new_params = []
+ for key, value in params.items():
+ new_params.append(("module."+key if has_module else key, value))
+ return OrderedDict(new_params)
+
+
+def parse(parser, save=True):
+ opt = parser.parse_args()
+ args = vars(opt)
+
+ print("------------------------------ Options -------------------------------")
+ for k, v in sorted(args.items()):
+ print("%s: %s" % (str(k), str(v)))
+ print("-------------------------------- End ---------------------------------")
+
+ if save:
+ file_name = os.path.join("opt.txt")
+ with open(file_name, "wt") as opt_file:
+ opt_file.write(os.path.basename(sys.argv[0]) + " " + strftime("%Y-%m-%d %H:%M:%S", gmtime()) + "\n")
+ opt_file.write("------------------------------ Options -------------------------------\n")
+ for k, v in sorted(args.items()):
+ opt_file.write("%s: %s\n" % (str(k), str(v)))
+ opt_file.write("-------------------------------- End ---------------------------------\n")
+ return opt
+
+
+def gpu_setup():
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+ cudnn.benchmark = True
+ torch.cuda.set_device(opt.gpu_ids[0])
+ device = torch.device("cuda")
+ print("running on GPU", opt.gpu_ids)
+ return device
+
+
+if __name__ == "__main__":
+ ############################################## SETUP ###############################################
+ torch.multiprocessing.set_start_method("spawn", force=True)
+ # =============== GET PARSER OPTION ================
+ opt = parse(parser)
+ opt.video_data_root_list = opt.video_data_root_list.split(",")
+ opt.flow_data_root_list = opt.flow_data_root_list.split(",")
+ opt.mask_data_root_list = opt.mask_data_root_list.split(",")
+ opt.gpu_ids = list(map(int, opt.gpu_ids.split(",")))
+ opt.head_out_idx = list(map(int, opt.head_out_idx.split(",")))
+ n_dim_output = 3 if opt.use_feature_transform else 4
+ assert len(opt.head_out_idx) == 4, "Size of head_out_idx must be 4"
+
+ # =================== INIT WANDB ===================
+# if is_master_process():
+ if opt.use_wandb:
+ print("Save images to Wandb")
+ if opt.wandb_token != "":
+ try:
+ wandb.login(key=opt.wandb_token)
+ except:
+ pass
+ if opt.use_wandb:
+ wandb.init(
+ project="video-colorization",
+ group=f"{opt.wandb_name} {datetime.now(tz=ZoneInfo('Asia/Ho_Chi_Minh')).strftime('%Y/%m/%d_%H-%M-%S')}",
+ #group="DDP"
+ )
+
+ # ================== SETUP DEVICE ==================
+ local_rank = ddp_setup()
+ # =================== VIT CONFIG ===================
+ cfg = load_config()
+ model_cfg = cfg["model"][opt.vit_version]
+ model_cfg["image_size"] = (384, 384)
+ model_cfg["backbone"] = opt.vit_version
+ model_cfg["dropout"] = 0.0
+ model_cfg["drop_path_rate"] = 0.1
+ model_cfg["n_cls"] = 10
+ ############################################ LOAD DATA #############################################
+ data_loader = load_data()
+ ########################################## DEFINE NETWORK ##########################################
+
+
+ colornet = DDP(GeneralColorVidNet(opt.ic).to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ nonlocal_net = DDP(GeneralWarpNet().to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ discriminator = DDP(Discriminator_x64_224(ndf=64).to(local_rank), device_ids=[local_rank], output_device=local_rank)
+ weighted_layer_color = WeightedAverage_color().to(local_rank)
+ nonlocal_weighted_layer = NonlocalWeightedAverage().to(local_rank)
+ warping_layer = WarpingLayer(device=local_rank).to(local_rank)
+ embed_net = GeneralEmbedModel(device=local_rank).to(local_rank)
+
+ if is_master_process():
+ # Print number of parameters
+ print("-" * 59)
+ print("| TYPE | Model name | Num params |")
+ print("-" * 59)
+
+ colornet_params = print_num_params(colornet)
+ nonlocal_net_params = print_num_params(nonlocal_net)
+ discriminator_params = print_num_params(discriminator)
+ weighted_layer_color_params = print_num_params(weighted_layer_color)
+ nonlocal_weighted_layer_params = print_num_params(nonlocal_weighted_layer)
+ warping_layer_params = print_num_params(warping_layer)
+ embed_net_params = print_num_params(embed_net)
+ print("-" * 59)
+ print(
+ f"| TOTAL | | {('{:,}'.format(colornet_params+nonlocal_net_params+discriminator_params+weighted_layer_color_params+nonlocal_weighted_layer_params+warping_layer_params+embed_net_params)).rjust(10)} |"
+ )
+ print("-" * 59)
+ if opt.use_wandb:
+ wandb.watch(discriminator, log="all", log_freq=opt.checkpoint_step, idx=0)
+ wandb.watch(embed_net, log="all", log_freq=opt.checkpoint_step, idx=1)
+ wandb.watch(colornet, log="all", log_freq=opt.checkpoint_step, idx=2)
+ wandb.watch(nonlocal_net, log="all", log_freq=opt.checkpoint_step, idx=3)
+
+
+
+ ###################################### DEFINE LOSS FUNCTIONS #######################################
+ perceptual_loss_fn = Perceptual_loss(opt.domain_invariant, opt.weight_perceptual)
+ contextual_loss = ContextualLoss().to(local_rank)
+ contextual_forward_loss = ContextualLoss_forward().to(local_rank)
+ ######################################## DEFINE OPTIMIZERS #########################################
+ optimizer_g = optim.AdamW(
+ [
+ {"params": nonlocal_net.parameters(), "lr": opt.lr},
+ {"params": colornet.parameters(), "lr": 2 * opt.lr}
+ ],
+ betas=(0.5, 0.999),
+ eps=1e-5,
+ amsgrad=True,
+ )
+
+ optimizer_d = optim.AdamW(
+ filter(lambda p: p.requires_grad, discriminator.parameters()),
+ lr=opt.lr,
+ betas=(0.5, 0.999),
+ amsgrad=True,
+ )
+
+ step_optim_scheduler_g = PolynomialLR(
+ optimizer_g,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8
+ )
+ step_optim_scheduler_d = PolynomialLR(
+ optimizer_d,
+ step_size=opt.lr_step,
+ iter_warmup=0,
+ iter_max=len(data_loader) * opt.epoch,
+ power=0.9,
+ min_lr=1e-8
+ )
+ ########################################## DEFINE OTHERS ###########################################
+ downsampling_by2 = nn.AvgPool2d(kernel_size=2).to(local_rank)
+ # timer_handler = TimeHandler()
+ loss_handler = LossHandler()
+ ############################################## TRAIN ###############################################
+
+ # ============= USE PRETRAINED OR NOT ==============
+ if opt.load_pretrained_model:
+ nonlocal_net.load_state_dict(load_params(os.path.join(opt.pretrained_model_dir, "nonlocal_net.pth"),
+ local_rank,
+ has_module=True))
+ colornet.load_state_dict(load_params(os.path.join(opt.pretrained_model_dir, "colornet.pth"),
+ local_rank,
+ has_module=True))
+ discriminator.load_state_dict(load_params(os.path.join(opt.pretrained_model_dir, "discriminator.pth"),
+ local_rank,
+ has_module=True))
+ embed_net_params = load_params(os.path.join(opt.pretrained_model_dir, "embed_net.pth"),
+ local_rank,
+ has_module=False)
+ if "module.vit.heads_out" in embed_net_params:
+ embed_net_params.pop("module.vit.heads_out")
+ elif "vit.heads_out" in embed_net_params:
+ embed_net_params.pop("vit.heads_out")
+ embed_net.load_state_dict(embed_net_params)
+
+ learning_checkpoint = torch.load(os.path.join(opt.pretrained_model_dir, "learning_state.pth"))
+ optimizer_g.load_state_dict(learning_checkpoint["optimizer_g"])
+ optimizer_d.load_state_dict(learning_checkpoint["optimizer_d"])
+ step_optim_scheduler_g.load_state_dict(learning_checkpoint["optimizer_schedule_g"])
+ step_optim_scheduler_d.load_state_dict(learning_checkpoint["optimizer_schedule_d"])
+ total_iter = learning_checkpoint['total_iter']
+ start_epoch = learning_checkpoint['epoch']+1
+ else:
+ total_iter = 0
+ start_epoch = 1
+
+
+
+ for epoch_num in range(start_epoch, opt.epoch+1):
+ data_loader.sampler.set_epoch(epoch_num-1)
+
+ if is_master_process():
+ train_progress_bar = tqdm(
+ data_loader,
+ desc =f'Epoch {epoch_num}[Training]',
+ position = 0,
+ leave = False
+ )
+ else:
+ train_progress_bar = data_loader
+ for iter, sample in enumerate(train_progress_bar):
+ # timer_handler.compute_time("load_sample")
+ total_iter += 1
+ # =============== LOAD DATA SAMPLE ================
+ (
+ I_last_lab, ######## (3, H, W)
+ I_current_lab, ##### (3, H, W)
+ I_reference_lab, ### (3, H, W)
+ flow_forward, ###### (2, H, W)
+ mask, ############## (1, H, W)
+ placeholder_lab, ### (3, H, W)
+ self_ref_flag, ##### (3, H, W)
+ prev_frame_path,
+ curr_frame_path,
+ ref_path,
+ ) = sample
+
+ I_last_lab = I_last_lab.to(local_rank)
+ I_current_lab = I_current_lab.to(local_rank)
+ I_reference_lab = I_reference_lab.to(local_rank)
+ flow_forward = flow_forward.to(local_rank)
+ mask = mask.to(local_rank)
+ placeholder_lab = placeholder_lab.to(local_rank)
+ self_ref_flag = self_ref_flag.to(local_rank)
+
+ I_last_l = I_last_lab[:, 0:1, :, :]
+ I_last_ab = I_last_lab[:, 1:3, :, :]
+ I_current_l = I_current_lab[:, 0:1, :, :]
+ I_current_ab = I_current_lab[:, 1:3, :, :]
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1))
+
+ # _load_sample_time = timer_handler.compute_time("load_sample")
+ # timer_handler.compute_time("forward_model")
+
+ features_B = embed_net(I_reference_rgb)
+ B_feat_0, B_feat_1, B_feat_2, B_feat_3 = features_B
+
+ # ================== COLORIZATION ==================
+ # The last frame
+ I_last_ab_predict, I_last_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_last_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=placeholder_lab,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_last_lab_predict = torch.cat((I_last_l, I_last_ab_predict), dim=1)
+
+ # The current frame
+ I_current_ab_predict, I_current_nonlocal_lab_predict = frame_colorization(
+ IA_l=I_current_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=I_last_lab_predict,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=opt.luminance_noise,
+ )
+ I_current_lab_predict = torch.cat((I_last_l, I_current_ab_predict), dim=1)
+
+ # ================ UPDATE GENERATOR ================
+ if opt.weight_gan > 0:
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+ fake_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab_predict,
+ uncenter_l(I_last_l),
+ I_last_ab_predict,
+ ),
+ dim=1,
+ )
+ real_data_lab = torch.cat(
+ (
+ uncenter_l(I_current_l),
+ I_current_ab,
+ uncenter_l(I_last_l),
+ I_last_ab,
+ ),
+ dim=1,
+ )
+
+ if opt.permute_data:
+ batch_index = torch.arange(-1, opt.batch_size - 1, dtype=torch.long)
+ real_data_lab = real_data_lab[batch_index, ...]
+
+ discriminator_loss = discriminator_loss_fn(real_data_lab, fake_data_lab, discriminator)
+ discriminator_loss.backward()
+ optimizer_d.step()
+
+ optimizer_g.zero_grad()
+ optimizer_d.zero_grad()
+
+ # ================== COMPUTE LOSS ==================
+ # L1 loss
+ l1_loss = l1_loss_fn(I_current_ab, I_current_ab_predict) * opt.weigth_l1
+
+ # Generator_loss. TODO: freeze this to train some first epoch
+ if epoch_num > opt.epoch_train_discriminator:
+ generator_loss = generator_loss_fn(real_data_lab, fake_data_lab, discriminator, opt.weight_gan, local_rank)
+
+ # Perceptual Loss
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab_predict), dim=1))
+ pred_feat_0, pred_feat_1, pred_feat_2, pred_feat_3 = embed_net(I_predict_rgb)
+
+ I_current_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab), dim=1))
+ A_feat_0, _, _, A_feat_3 = embed_net(I_current_rgb)
+
+ perceptual_loss = perceptual_loss_fn(A_feat_3, pred_feat_3)
+
+ # Contextual Loss
+ contextual_style5_1 = torch.mean(contextual_forward_loss(pred_feat_3, B_feat_3.detach())) * 8
+ contextual_style4_1 = torch.mean(contextual_forward_loss(pred_feat_2, B_feat_2.detach())) * 4
+ contextual_style3_1 = torch.mean(contextual_forward_loss(pred_feat_1, B_feat_1.detach())) * 2
+ contextual_style2_1 = torch.mean(contextual_forward_loss(pred_feat_0, B_feat_0.detach()))
+
+ contextual_loss_total = (
+ contextual_style5_1 + contextual_style4_1 + contextual_style3_1 + contextual_style2_1
+ ) * opt.weight_contextual
+
+ # Consistent Loss
+ consistent_loss = consistent_loss_fn(
+ I_current_lab_predict,
+ I_last_ab_predict,
+ I_current_nonlocal_lab_predict,
+ I_last_nonlocal_lab_predict,
+ flow_forward,
+ mask,
+ warping_layer,
+ weight_consistent=opt.weight_consistent,
+ weight_nonlocal_consistent=opt.weight_nonlocal_consistent,
+ device=local_rank,
+ )
+
+ # Smoothness loss
+ smoothness_loss = smoothness_loss_fn(
+ I_current_l,
+ I_current_lab,
+ I_current_ab_predict,
+ A_feat_0,
+ weighted_layer_color,
+ nonlocal_weighted_layer,
+ weight_smoothness=opt.weight_smoothness,
+ weight_nonlocal_smoothness=opt.weight_nonlocal_smoothness,
+ device=local_rank
+ )
+
+ # Total loss
+ total_loss = l1_loss + perceptual_loss + contextual_loss_total + consistent_loss + smoothness_loss
+ if epoch_num > opt.epoch_train_discriminator:
+ total_loss += generator_loss
+
+ # Add loss to loss handler
+ loss_handler.add_loss(key="total_loss", loss=total_loss.item())
+ loss_handler.add_loss(key="l1_loss", loss=l1_loss.item())
+ loss_handler.add_loss(key="perceptual_loss", loss=perceptual_loss.item())
+ loss_handler.add_loss(key="contextual_loss", loss=contextual_loss_total.item())
+ loss_handler.add_loss(key="consistent_loss", loss=consistent_loss.item())
+ loss_handler.add_loss(key="smoothness_loss", loss=smoothness_loss.item())
+ loss_handler.add_loss(key="discriminator_loss", loss=discriminator_loss.item())
+ if epoch_num > opt.epoch_train_discriminator:
+ loss_handler.add_loss(key="generator_loss", loss=generator_loss.item())
+ loss_handler.count_one_sample()
+
+ total_loss.backward()
+
+ optimizer_g.step()
+ step_optim_scheduler_g.step()
+ step_optim_scheduler_d.step()
+
+ # _forward_model_time = timer_handler.compute_time("forward_model")
+
+ # timer_handler.compute_time("training_logger")
+ training_logger()
+ # _training_logger_time = timer_handler.compute_time("training_logger")
+
+ ####
+ if is_master_process():
+ save_checkpoints(os.path.join(opt.checkpoint_dir, f"epoch_{epoch_num}"))
+ ####
+ if opt.use_wandb:
+ wandb.finish()
+ ddp_cleanup()
diff --git a/video_predictor.py b/video_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f591774b11638b71a74577eaf0c3f014c053b2f
--- /dev/null
+++ b/video_predictor.py
@@ -0,0 +1,196 @@
+# from PIL import Image
+# from predictor import predictor
+# import torch
+# from src.utils import (
+# uncenter_l,
+# tensor_lab2rgb,
+# )
+
+import numpy as np
+import shutil
+import os
+import argparse
+import torch
+import glob
+from tqdm import tqdm
+from PIL import Image
+from collections import OrderedDict
+from src.models.vit.config import load_config
+import torchvision.transforms as transforms
+
+from src.models.CNN.ColorVidNet import GeneralColorVidNet
+from src.models.vit.embed import GeneralEmbedModel
+from src.models.CNN.NonlocalNet import GeneralWarpNet
+from src.utils import (
+ TimeHandler,
+ RGB2Lab,
+ ToTensor,
+ CenterPad,
+ Normalize,
+ LossHandler,
+ WarpingLayer,
+ uncenter_l,
+ tensor_lab2rgb,
+ print_num_params,
+ SquaredPadding,
+ UnpaddingSquare,
+)
+from src.models.CNN.FrameColor import frame_colorization
+# cur_path="./horse2_ground.webp"
+# ref_path="./horse2_ref.jpg"
+# las_path="./horse2_ground.webp"
+weight_path="./ckp/12/"
+out_path = "./output_video/"
+root_path = "./EvalDataset"
+device="cuda"
+
+shutil.rmtree(out_path)
+os.mkdir(out_path)
+videos_list=os.listdir(root_path+"/clips/")
+# predictor_instance=predictor(model_path=weight_path,device=device)
+def load_params(ckpt_file):
+ params = torch.load(ckpt_file)
+ new_params = []
+ for key, value in params.items():
+ new_params.append((key, value))
+ return OrderedDict(new_params)
+
+embed_net=GeneralEmbedModel(pretrained_model="swin-small", device=device).to(device).eval()
+nonlocal_net = GeneralWarpNet(feature_channel=128).to(device).eval()
+colornet=GeneralColorVidNet(7).to(device).eval()
+embed_net.load_state_dict(
+ load_params(
+ (glob.glob(os.path.join(weight_path,"embed_net*.pth")))[-1]
+ ),strict=False
+ )
+nonlocal_net.load_state_dict(
+ load_params(
+ (glob.glob(os.path.join(weight_path,"nonlocal_net*.pth")))[-1]
+ )
+ )
+colornet.load_state_dict(
+ load_params(
+ (glob.glob(os.path.join(weight_path,"colornet*.pth")))[-1]
+ )
+ )
+
+def custom_transform(listTrans,img):
+ for trans in listTrans:
+ if isinstance(trans,SquaredPadding):
+ img,padding=trans(img,return_paddings=True)
+ else:
+ img=trans(img)
+ return img.to(device),padding
+# def save_numpy(path:str,ts,module):
+# np_ar=ts.numpy()
+# np.save(path.replace(".jpg","")+"_"+module,np_ar)
+transformer=[
+ SquaredPadding(target_size=224),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize(),
+ ]
+high_resolution=True
+center_padder=CenterPad((224,224))
+with torch.no_grad():
+ for video_name in tqdm(videos_list):
+ frames_list=os.listdir(root_path+"/clips/"+video_name)
+ frames_list= sorted(frames_list)
+ ref_path = root_path+"/ref/"+video_name+"/"
+ ref_file = os.listdir(ref_path)[0]
+ ref_path = ref_path + ref_file
+ I_last_lab_predict = torch.zeros((1,3,224,224)).to(device)
+ video_out_path = out_path+"/"+video_name+"/"
+ os.mkdir(video_out_path)
+ ref_frame_pil_rgb=Image.open(ref_path).convert("RGB")
+ I_reference_lab, I_reference_padding= custom_transform(transformer,center_padder(ref_frame_pil_rgb))
+ I_reference_lab=torch.unsqueeze(I_reference_lab,0)
+ I_reference_l = I_reference_lab[:, 0:1, :, :]
+ I_reference_ab = I_reference_lab[:, 1:3, :, :]
+ I_reference_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_reference_l), I_reference_ab), dim=1)).to(device)
+ features_B = embed_net(I_reference_rgb)
+ for frame_name in frames_list:
+ # current_frame_pil_rgb=Image.open(root_path+"/clips/"+video_name+"/"+frame_name).convert("RGB")
+ # ref_frame_pil_rgb=Image.open(ref_path).convert("RGB")
+ # last_frame_pil_rgb=Image.open(las_path).convert("RGB")
+
+ #=================================using predictor but fail========================
+
+
+ # I_current_lab = predictor_instance.data_transform(current_frame_pil_rgb)
+ # I_current_lab = torch.unsqueeze(I_current_lab,0)
+ # I_current_l = I_current_lab[:, 0:1, :, :]
+ # I_current_ab = I_current_lab[:, 1:3, :, :]
+
+ # # print(I_current_l[0,0,112:122,112:122])
+
+ # # I_last_lab = predictor_instance.data_transform(last_frame_pil_rgb)
+ # # I_last_lab = torch.unsqueeze(I_last_lab,0)
+ # # I_last_l = I_last_lab[:, 0:1, :, :]
+ # # I_last_ab = I_last_lab[:, 1:3, :, :]
+
+ # I_current_lab_predict= predictor_instance(I_current_l=I_current_l,ref_img=ref_frame_pil_rgb,I_last_lab_predict=I_last_lab_predict)
+ # I_current_l_predict=I_current_lab_predict[:, 0:1, :, :]
+ # I_current_ab_predict=I_current_lab_predict[:, 1:3, :, :]
+
+ # I_current_rgb_predict = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l_predict), I_current_ab_predict), dim=1))
+
+ # image_result2 = Image.fromarray((I_current_rgb_predict[0] * 255).permute(1, 2, 0).detach().cpu().numpy().astype(np.uint8))
+ # I_last_lab_predict = I_current_lab_predict
+ #=================================using predictor but fail========================
+
+ current_frame_pil_rgb=Image.open(root_path+"/clips/"+video_name+"/"+frame_name).convert("RGB")
+ im_w,im_h=current_frame_pil_rgb.size
+ # ref_frame_pil_rgb.show()
+
+
+
+ I_current_lab,I_current_padding = custom_transform(transformer,current_frame_pil_rgb)
+ I_current_lab=torch.unsqueeze(I_current_lab,0)
+ I_current_l = I_current_lab[:, 0:1, :, :]
+ I_current_ab = I_current_lab[:, 1:3, :, :]
+
+
+
+
+
+ # save_numpy(video_out_path+"/"+frame_name,I_current_l,"current_I")
+ # save_numpy(video_out_path+"/"+frame_name,I_reference_lab,"reference_lab")
+ # save_numpy(video_out_path+"/"+frame_name,I_last_lab_predict,"I_last_lab_predict")
+ with torch.no_grad():
+ I_current_ab_predict,_ = frame_colorization(
+ IA_l=I_current_l,
+ IB_lab=I_reference_lab,
+ IA_last_lab=I_last_lab_predict,
+ features_B=features_B,
+ embed_net=embed_net,
+ colornet=colornet,
+ nonlocal_net=nonlocal_net,
+ luminance_noise=False,
+ #temperature=1e-10,
+ )
+ if high_resolution:
+ high_lab=transforms.Compose([
+ SquaredPadding(target_size=max(im_h,im_w)),
+ RGB2Lab(),
+ ToTensor(),
+ Normalize(),
+ ])
+ # print(im_h)
+ # print(im_w)
+ high_lab_current = high_lab(current_frame_pil_rgb)
+ high_lab_current = torch.unsqueeze(high_lab_current,dim=0).to(device)
+ high_l_current = high_lab_current[:, 0:1, :, :]
+ high_ab_current = high_lab_current[:, 1:3, :, :]
+ upsampler=torch.nn.Upsample(scale_factor=max(im_h,im_w)/224,mode="bilinear")
+ high_ab_predict = upsampler(I_current_ab_predict)
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(high_l_current), high_ab_predict), dim=1))
+ else:
+ I_predict_rgb = tensor_lab2rgb(torch.cat((uncenter_l(I_current_l), I_current_ab_predict), dim=1))
+ # I_predict_rgb = unpadder(I_predict_rgb,I_current_padding)
+ image_result2 = Image.fromarray((I_predict_rgb[0] * 255).permute(1, 2, 0).detach().cpu().numpy().astype(np.uint8))
+ I_last_lab_predict = torch.cat((I_current_l, I_current_ab_predict), dim=1)
+ # save_numpy(video_out_path+"/"+frame_name,I_last_lab_predict,"result_lab")
+ image_result2.save(video_out_path+"/"+frame_name)
+
+# image_result2.show()
\ No newline at end of file