Spaces:

alexnasa
/

pixel3dmm

Running on Zero

App Files Files Community

alexnasa commited on 10 days ago

Commit

901df39

verified ·

1 Parent(s): e02fa45

Delete src/network_inference.py

Browse files

Files changed (1) hide show

src/network_inference.py +0 -191

src/network_inference.py DELETED Viewed

@@ -1,191 +0,0 @@
-import traceback
-from tqdm import tqdm
-import os
-import torch
-import numpy as np
-from PIL import Image
-from omegaconf import OmegaConf
-from time import time
-from pixel3dmm.utils.uv import uv_pred_to_mesh
-from pixel3dmm.lightning.p3dmm_system import system as p3dmm_system
-#from pixel3dmm.lightning.system_flame_params_legacy import system as system_flame_params_legacy
-from pixel3dmm import env_paths
-def pad_to_3_channels(img):
-    if img.shape[-1] == 3:
-        return img
-    elif img.shape[-1] == 1:
-        return np.concatenate([img, np.zeros_like(img[..., :1]), np.zeros_like(img[..., :1])], axis=-1)
-    elif img.shape[-1] == 2:
-        return np.concatenate([img, np.zeros_like(img[..., :1])], axis=-1)
-    else:
-        raise ValueError('too many dimensions in prediction type!')
-def gaussian_fn(M, std):
-    n = torch.arange(0, M) - (M - 1.0) / 2.0
-    sig2 = 2 * std * std
-    w = torch.exp(-n ** 2 / sig2)
-    return w
-def gkern(kernlen=256, std=128):
-    """Returns a 2D Gaussian kernel array."""
-    gkern1d_x = gaussian_fn(kernlen, std=std * 5)
-    gkern1d_y = gaussian_fn(kernlen, std=std)
-    gkern2d = torch.outer(gkern1d_y, gkern1d_x)
-    return gkern2d
-valid_verts = np.load(f'{env_paths.VALID_VERTICES_WIDE_REGION}')
-def normals_n_uvs(cfg, model):
-    if cfg.model.prediction_type == 'flame_params':
-        cfg.data.mirror_aug = False
-    # data loader
-    if cfg.model.feature_map_type == 'DINO':
-        feature_map_size = 32
-    elif cfg.model.feature_map_type == 'sapiens':
-        feature_map_size = 64
-    batch_size = 1  # cfg.inference_batch_size
-    prediction_types = cfg.model.prediction_type.split(',')
-    conv = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=11, bias=False, padding='same')
-    g_weights = gkern(11, 2)
-    g_weights /= torch.sum(g_weights)
-    conv.weight = torch.nn.Parameter(g_weights.unsqueeze(0).unsqueeze(0))
-    OUT_NAMES = str(cfg.video_name).split(',')
-    print(f"""
-            <<<<<<<< STARTING PIXEL3DMM INFERENCE for {cfg.video_name} in {prediction_types} MODE >>>>>>>>
-            """)
-    for OUT_NAME in OUT_NAMES:
-        folder = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/'
-        IMAGE_FOLDER = f'{folder}/cropped'
-        SEGMENTATION_FOLDER = f'{folder}/seg_og/'
-        out_folders = {}
-        out_folders_wGT = {}
-        out_folders_viz = {}
-        for prediction_type in prediction_types:
-            out_folders[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm/{prediction_type}/'
-            out_folders_wGT[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm_wGT/{prediction_type}/'
-            os.makedirs(out_folders[prediction_type], exist_ok=True)
-            os.makedirs(out_folders_wGT[prediction_type], exist_ok=True)
-            out_folders_viz[prediction_type] = f'{env_paths.PREPROCESSED_DATA}/{OUT_NAME}/p3dmm_extraViz/{prediction_type}/'
-            os.makedirs(out_folders_viz[prediction_type], exist_ok=True)
-        image_names = os.listdir(f'{IMAGE_FOLDER}')
-        image_names.sort()
-        if os.path.exists(out_folders[prediction_type]):
-            if len(os.listdir(out_folders[prediction_type])) == len(image_names):
-                return
-        for i in tqdm(range(len(image_names))):
-            try:
-                img = np.array(Image.open(f'{IMAGE_FOLDER}/{image_names[i]}').resize((512, 512))) / 255 # need 512,512 images as input; normalize to [0, 1] range
-                img = torch.from_numpy(img)[None, None].float().cuda() # 1,1,512,512,3
-                img_seg = np.array(Image.open(f'{SEGMENTATION_FOLDER}/{image_names[i][:-4]}.png').resize((512, 512), Image.NEAREST))
-                if len(img_seg.shape) == 3:
-                    img_seg = img_seg[..., 0]
-                #img_seg = np.array(Image.open(f'{SEGEMNTATION_FOLDER}/{int(image_names[i][:-4])*3:05d}.png').resize((512, 512), Image.NEAREST))
-                mask = ((img_seg == 2) | ((img_seg > 3) & (img_seg < 14)) ) &  ~(img_seg==11)
-                mask = torch.from_numpy(mask).long().cuda()[None, None] # 1, 1, 512, 512
-                #mask = torch.ones_like(img[..., 0]).cuda().bool()
-                batch = {
-                    'tar_msk': mask,
-                    'tar_rgb': img,
-                }
-                batch_mirrored = {
-                'tar_rgb': torch.flip(batch['tar_rgb'], dims=[3]).cuda(),
-                'tar_msk': torch.flip(batch['tar_msk'], dims=[3]).cuda(),
-                }
-                with torch.no_grad():
-                    output, conf = model.net(batch)
-                    output_mirrored, conf = model.net(batch_mirrored)
-                    if 'uv_map' in output:
-                        fliped_uv_pred = torch.flip(output_mirrored['uv_map'], dims=[4])
-                        fliped_uv_pred[:, :, 0, :, :] *= -1
-                        fliped_uv_pred[:, :, 0, :, :] += 2*0.0075
-                        output['uv_map'] = (output['uv_map'] + fliped_uv_pred)/2
-                    if 'normals' in output:
-                        fliped_uv_pred = torch.flip(output_mirrored['normals'], dims=[4])
-                        fliped_uv_pred[:, :, 0, :, :] *= -1
-                        output['normals'] = (output['normals'] + fliped_uv_pred)/2
-                    if 'disps' in output:
-                        fliped_uv_pred = torch.flip(output_mirrored['disps'], dims=[4])
-                        fliped_uv_pred[:, :, 0, :, :] *= -1
-                        output['disps'] = (output['disps'] + fliped_uv_pred)/2
-                for prediction_type in prediction_types:
-                    for i_batch in range(batch_size):
-                        i_view = 0
-                        gt_rgb = batch['tar_rgb']
-                        # normalize to [0,1] range
-                        if prediction_type == 'uv_map':
-                            tmp_output = torch.clamp((output[prediction_type][i_batch, i_view] + 1) / 2, 0, 1)
-                        elif prediction_type == 'disps':
-                            tmp_output = torch.clamp((output[prediction_type][i_batch, i_view] + 50) / 100, 0, 1)
-                        elif prediction_type in ['normals', 'normals_can']:
-                            tmp_output = output[prediction_type][i_batch, i_view]
-                            tmp_output = tmp_output / torch.norm(tmp_output, dim=0).unsqueeze(0)
-                            tmp_output = torch.clamp((tmp_output + 1) / 2, 0, 1)
-                            # undo "weird" convention of normals that I used for preprocessing
-                            tmp_output = torch.stack(
-                                [tmp_output[0, ...], 1 - tmp_output[2, ...], 1 - tmp_output[1, ...]],
-                                dim=0)
-                        content = [
-                            gt_rgb[i_batch, i_view].detach().cpu().numpy(),
-                            pad_to_3_channels(tmp_output.permute(1, 2, 0).detach().cpu().float().numpy()),
-                        ]
-                        catted = (np.concatenate(content, axis=1) * 255).astype(np.uint8)
-                        Image.fromarray(catted).save(f'{out_folders_wGT[prediction_type]}/{image_names[i]}')
-                        Image.fromarray(
-                            pad_to_3_channels(
-                                tmp_output.permute(1, 2, 0).detach().cpu().float().numpy() * 255).astype(
-                                np.uint8)).save(
-                            f'{out_folders[prediction_type]}/{image_names[i][:-4]}.png')
-                        # this visulization is quite slow, therefore disable it per default
-                        if prediction_type == 'uv_map' and cfg.viz_uv_mesh:
-                            to_show_non_mirr = uv_pred_to_mesh(
-                                output[prediction_type][i_batch:i_batch + 1, ...],
-                                batch['tar_msk'][i_batch:i_batch + 1, ...],
-                                batch['tar_rgb'][i_batch:i_batch + 1, ...],
-                                right_ear = [537, 1334, 857, 554, 941],
-                                left_ear = [541, 476, 237, 502, 286],
-                            )
-                            Image.fromarray(to_show_non_mirr).save(f'{out_folders_viz[prediction_type]}/{image_names[i]}')
-            except Exception:
-                traceback.print_exc()
-    print(f"""
-                <<<<<<<< FINISHED PIXEL3DMM INFERENCE for {cfg.video_name} in {prediction_types} MODE >>>>>>>>
-                """)