counterfactuals-demo

Sleeping

App Files Files Community

fabio-deep commited on Jul 15, 2023

Commit

146a6ea

1 Parent(s): 8c4fe8b

added links

Browse files

Files changed (7) hide show

.gitignore +1 -0
app.py +343 -192
app_utils.py +176 -114
datasets.py +168 -123
pgm/flow_pgm.py +310 -380
pgm/layers.py +50 -42
vae.py +136 -88

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 __pycache__
 *.pyc

+.vscode
 __pycache__
 *.pyc

app.py CHANGED Viewed

@@ -8,48 +8,55 @@ from vae import HVAE
 from datasets import morphomnist, ukbb, mimic, get_attr_max_min
 from pgm.flow_pgm import MorphoMNISTPGM, FlowPGM, ChestPGM
 from app_utils import (
-    mnist_graph, brain_graph, chest_graph, vae_preprocess, normalize, \
-        preprocess_brain, get_fig_arr, postprocess, MidpointNormalize
 )
 DATA, MODELS = {}, {}
-for k in ['Morpho-MNIST', 'Brain MRI', 'Chest X-ray']:
     DATA[k], MODELS[k] = {}, {}
 # mnist
 DIGITS = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
 # brain
-MRISEQ_CAT = ['T1', 'T2-FLAIR']  # 0,1
-SEX_CAT = ['female', 'male']  # 0,1
 HEIGHT, WIDTH = 270, 270
 # chest
-SEX_CAT_CHEST = ['male', 'female']  # 0,1
-RACE_CAT = ['white', 'asian', 'black']  # 0,1,2
-FIND_CAT = ['no disease', 'pleural effusion']
-DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 class Hparams:
     def update(self, dict):
         for k, v in dict.items():
-            setattr(self, k, v)
 def get_paths(dataset_id):
-    if 'MNIST' in dataset_id:
-        data_path = './data/morphomnist'
-        pgm_path = './checkpoints/t_i_d/sup_pgm/checkpoint.pt'
-        vae_path = './checkpoints/t_i_d/dgauss_cond_big_beta1_dropexo/checkpoint.pt'
-    elif 'Brain' in dataset_id:
-        data_path = './data/ukbb_subset'
-        pgm_path = './checkpoints/m_b_v_s/sup_pgm/checkpoint.pt'
-        vae_path = './checkpoints/m_b_v_s/ukbb192_beta5_dgauss_b33/checkpoint.pt'
-    elif 'Chest' in dataset_id:
-        data_path = './data/mimic_subset'
-        pgm_path = './checkpoints/a_r_s_f/sup_pgm_mimic/checkpoint.pt'
         vae_path = [
-            './checkpoints/a_r_s_f/mimic_beta9_gelu_dgauss_1_lr3/checkpoint.pt',  # base vae
-            './checkpoints/a_r_s_f/mimic_dscm_lr_1e5_lagrange_lr_1_damping_10/6500_checkpoint.pt'  # cf trained DSCM
         ]
     return data_path, vae_path, pgm_path
@@ -57,64 +64,71 @@ def get_paths(dataset_id):
 def load_pgm(dataset_id, pgm_path):
     checkpoint = torch.load(pgm_path, map_location=DEVICE)
     args = Hparams()
-    args.update(checkpoint['hparams'])
     args.device = DEVICE
-    if 'MNIST' in dataset_id:
         pgm = MorphoMNISTPGM(args).to(args.device)
-    elif 'Brain' in dataset_id:
         pgm = FlowPGM(args).to(args.device)
-    elif 'Chest' in dataset_id:
         pgm = ChestPGM(args).to(args.device)
-    pgm.load_state_dict(checkpoint['ema_model_state_dict'])
-    MODELS[dataset_id]['pgm'] = pgm
-    MODELS[dataset_id]['pgm_args'] = args
 def load_vae(dataset_id, vae_path):
-    if 'Chest' in dataset_id:
         vae_path, dscm_path = vae_path[0], vae_path[1]
     checkpoint = torch.load(vae_path, map_location=DEVICE)
     args = Hparams()
-    args.update(checkpoint['hparams'])
     # backwards compatibility hack
-    if not hasattr(args, 'vae'):
-        args.vae = 'hierarchical'
-    if not hasattr(args, 'cond_prior'):
         args.cond_prior = False
-    if hasattr(args, 'free_bits'):
         args.kl_free_bits = args.free_bits
     args.device = DEVICE
     vae = HVAE(args).to(args.device)
-    if 'Chest' in dataset_id:
         dscm_ckpt = torch.load(dscm_path, map_location=DEVICE)
-        vae.load_state_dict({k[4:]: v for k, v in dscm_ckpt['ema_model_state_dict'].items() if 'vae.' in k})
     else:
-        vae.load_state_dict(checkpoint['ema_model_state_dict'])
-    MODELS[dataset_id]['vae'] = vae
-    MODELS[dataset_id]['vae_args'] = args
 def get_dataloader(dataset_id, data_path):
-    MODELS[dataset_id]['pgm_args'].data_dir = data_path
-    args = MODELS[dataset_id]['pgm_args']
-    if 'MNIST' in dataset_id:
         datasets = morphomnist(args)
-    elif 'Brain' in dataset_id:
         datasets = ukbb(args)
-    elif 'Chest' in dataset_id:
         datasets = mimic(args)
-    DATA[dataset_id]['test'] = torch.utils.data.DataLoader(
-        datasets['test'], shuffle=False, batch_size=args.bs, num_workers=4)
 def load_dataset(dataset_id):
     data_path, _, pgm_path = get_paths(dataset_id)
     checkpoint = torch.load(pgm_path, map_location=DEVICE)
     args = Hparams()
-    args.update(checkpoint['hparams'])
     args.device = DEVICE
-    MODELS[dataset_id]['pgm_args'] = args
     get_dataloader(dataset_id, data_path)
@@ -122,167 +136,179 @@ def load_model(dataset_id):
     _, vae_path, pgm_path = get_paths(dataset_id)
     load_pgm(dataset_id, pgm_path)
     load_vae(dataset_id, vae_path)
 @torch.no_grad()
 def counterfactual_inference(dataset_id, obs, do_pa):
-    pa = {k: v.clone() for k, v in obs.items() if k != 'x'}
-    cf_pa = MODELS[dataset_id]['pgm'].counterfactual(obs=pa, intervention=do_pa, num_particles=1)
-    args, vae = MODELS[dataset_id]['vae_args'], MODELS[dataset_id]['vae']
     _pa = vae_preprocess(args, {k: v.clone() for k, v in pa.items()})
-    _cf_pa = vae_preprocess(args , {k: v.clone() for k, v in cf_pa.items()})
-    z_t = 0.1 if 'mnist' in args.hps else 1.0
-    z = vae.abduct(x=obs['x'], parents=_pa, t=z_t)
     if vae.cond_prior:
-        z = [z[j]['z'] for j in range(len(z))]
     px_loc, px_scale = vae.forward_latents(latents=z, parents=_pa)
     cf_loc, cf_scale = vae.forward_latents(latents=z, parents=_cf_pa)
-    u = (obs['x'] - px_loc) / px_scale.clamp(min=1e-12)
-    u_t = 0.1 if 'mnist' in args.hps else 1.0  # cf sampling temp
     cf_scale = cf_scale * u_t
     cf_x = torch.clamp(cf_loc + cf_scale * u, min=-1, max=1)
-    return {'cf_x': cf_x, 'rec_x': px_loc, 'cf_pa': cf_pa}
 def get_obs_item(dataset_id, idx=None):
     if idx is None:
-        n_test = len(DATA[dataset_id]['test'].dataset)
         idx = torch.randperm(n_test)[0]
     idx = int(idx)
-    return idx, DATA[dataset_id]['test'].dataset.__getitem__(idx)
 def get_mnist_obs(idx=None):
-    dataset_id = 'Morpho-MNIST'
     if not DATA[dataset_id]:
         load_dataset(dataset_id)
     idx, obs = get_obs_item(dataset_id, idx)
-    x = get_fig_arr(obs['x'].clone().squeeze().numpy())
-    t = (obs['thickness'].clone() + 1) / 2 * (6.255515 - 0.87598526) + 0.87598526
-    i = (obs['intensity'].clone() + 1) / 2 * (254.90317 - 66.601204) + 66.601204
-    y = DIGITS[obs['digit'].clone().argmax(-1)]
     return (idx, x, float(np.round(t, 2)), float(np.round(i, 2)), y)
 def get_brain_obs(idx=None):
-    dataset_id = 'Brain MRI'
     if not DATA[dataset_id]:
         load_dataset(dataset_id)
     idx, obs = get_obs_item(dataset_id, idx)
-    x = get_fig_arr(obs['x'].clone().squeeze().numpy())
-    m = MRISEQ_CAT[int(obs['mri_seq'].clone().item())]
-    s = SEX_CAT[int(obs['sex'].clone().item())]
-    a = obs['age'].clone().item()
-    b = obs['brain_volume'].clone().item() / 1000  # in ml
-    v = obs['ventricle_volume'].clone().item() / 1000  # in ml
     return (idx, x, m, s, a, float(np.round(b, 2)), float(np.round(v, 2)))
 def get_chest_obs(idx=None):
-    dataset_id = 'Chest X-ray'
     if not DATA[dataset_id]:
-        load_dataset(dataset_id)
     idx, obs = get_obs_item(dataset_id, idx)
-    x = get_fig_arr(postprocess(obs['x'].clone()))
-    s = SEX_CAT_CHEST[int(obs['sex'].clone().squeeze().numpy())]
-    f = FIND_CAT[int(obs['finding'].clone().squeeze().numpy())]
-    r = RACE_CAT[obs['race'].clone().squeeze().numpy().argmax(-1)]
-    a = (obs['age'].clone().squeeze().numpy()+1)*50
     return (idx, x, r, s, f, float(np.round(a, 1)))
 def infer_mnist_cf(*args):
-    dataset_id = 'Morpho-MNIST'
     idx, _, t, i, y, do_t, do_i, do_y = args
     n_particles = 32
     # preprocess
-    obs = DATA[dataset_id]['test'].dataset.__getitem__(int(idx))
-    obs['x'] = (obs['x'] - 127.5) / 127.5
     for k, v in obs.items():
         obs[k] = v.view(1, 1) if len(v.shape) < 1 else v.unsqueeze(0)
-        obs[k] = obs[k].to(MODELS[dataset_id]['vae_args'].device).float()
         if n_particles > 1:
-            ndims = (1,)*3 if k == 'x' else (1,)
             obs[k] = obs[k].repeat(n_particles, *ndims)
     # intervention(s)
     do_pa = {}
     if do_t:
-        do_pa['thickness'] = torch.tensor(normalize(t, x_max=6.255515, x_min=0.87598526)).view(1, 1)
     if do_i:
-        do_pa['intensity'] = torch.tensor(normalize(i, x_max=254.90317, x_min=66.601204)).view(1, 1)
     if do_y:
-        do_pa['digit'] = F.one_hot(torch.tensor(DIGITS.index(y)), num_classes=10).view(1, 10)
     for k, v in do_pa.items():
-        do_pa[k] = v.to(MODELS[dataset_id]['vae_args'].device).float().repeat(n_particles, 1)
     # infer counterfactual
     out = counterfactual_inference(dataset_id, obs, do_pa)
     # avg cf particles
-    cf_x = out['cf_x'].mean(0)
-    cf_x_std = out['cf_x'].std(0)
-    rec_x = out['rec_x'].mean(0)
-    cf_t = out['cf_pa']['thickness'].mean(0)
-    cf_i = out['cf_pa']['intensity'].mean(0)
-    cf_y = out['cf_pa']['digit'].mean(0)
     # post process
     cf_x = postprocess(cf_x)
     cf_x_std = cf_x_std.squeeze().detach().cpu().numpy()
     rec_x = postprocess(rec_x)
     cf_t = np.round((cf_t.item() + 1) / 2 * (6.255515 - 0.87598526) + 0.87598526, 2)
-    cf_i = np.round((cf_i.item() + 1) / 2 * (254.90317 - 66.601204) + 66.601204, 2)
     cf_y = DIGITS[cf_y.argmax(-1)]
-    # plots
     # plt.close('all')
     effect = cf_x - rec_x
-    effect = get_fig_arr(effect, cmap='RdBu_r',
-        norm=MidpointNormalize(vmin=-255, midpoint=0, vmax=255))
     cf_x = get_fig_arr(cf_x)
-    cf_x_std = get_fig_arr(cf_x_std, cmap='jet')
     return (cf_x, cf_x_std, effect, cf_t, cf_i, cf_y)
 def infer_brain_cf(*args):
-    dataset_id = 'Brain MRI'
     idx, _, m, s, a, b, v = args[:7]
     do_m, do_s, do_a, do_b, do_v = args[7:]
     n_particles = 16
     # preprocessing
-    obs = DATA[dataset_id]['test'].dataset.__getitem__(int(idx))
-    obs.pop('pa')
-    obs = preprocess_brain(MODELS[dataset_id]['vae_args'], obs)
     for k, _v in obs.items():
         if n_particles > 1:
-            ndims = (1,)*3 if k == 'x' else (1,)
             obs[k] = _v.repeat(n_particles, *ndims)
     # interventions(s)
     do_pa = {}
     if do_m:
-        do_pa['mri_seq'] = torch.tensor(MRISEQ_CAT.index(m)).view(1, 1)
     if do_s:
-        do_pa['sex'] = torch.tensor(SEX_CAT.index(s)).view(1, 1)
     if do_a:
-        do_pa['age'] = torch.tensor(a).view(1, 1)
     if do_b:
-        do_pa['brain_volume'] = torch.tensor(b * 1000).view(1, 1)
     if do_v:
-        do_pa['ventricle_volume'] = torch.tensor(v * 1000).view(1, 1)
     # normalize continuous attributes
-    for k in ['age', 'brain_volume', 'ventricle_volume']:
         if k in do_pa.keys():
             k_max, k_min = get_attr_max_min(k)
             do_pa[k] = (do_pa[k] - k_min) / (k_max - k_min)  # [0,1]
             do_pa[k] = 2 * do_pa[k] - 1  # [-1,1]
     for k, _v in do_pa.items():
-        do_pa[k] = _v.to(MODELS[dataset_id]['vae_args'].device).float().repeat(n_particles, 1)
     # infer counterfactual
     out = counterfactual_inference(dataset_id, obs, do_pa)
     # avg cf particles
-    cf_x = out['cf_x'].mean(0)
-    cf_x_std = out['cf_x'].std(0)
-    rec_x = out['rec_x'].mean(0)
-    cf_m = out['cf_pa']['mri_seq'].mean(0)
-    cf_s = out['cf_pa']['sex'].mean(0)
     # post process
     cf_x = postprocess(cf_x)
     cf_x_std = cf_x_std.squeeze().detach().cpu().numpy()
@@ -290,54 +316,70 @@ def infer_brain_cf(*args):
     cf_m = MRISEQ_CAT[int(cf_m.item())]
     cf_s = SEX_CAT[int(cf_s.item())]
     cf_ = {}
-    for k in ['age', 'brain_volume', 'ventricle_volume']:  # unnormalize
         k_max, k_min = get_attr_max_min(k)
-        cf_[k] = (out['cf_pa'][k].mean(0).item() + 1) / 2 * (k_max - k_min) + k_min
     # plots
-    # plt.close('all')
     effect = cf_x - rec_x
-    effect = get_fig_arr(effect, cmap='RdBu_r',
-        norm=MidpointNormalize(vmin=effect.min(), midpoint=0, vmax=effect.max()))
     cf_x = get_fig_arr(cf_x)
-    cf_x_std = get_fig_arr(cf_x_std, cmap='jet')
-    return (cf_x, cf_x_std, effect, cf_m, cf_s, np.round(cf_['age'], 1), np.round(cf_['brain_volume'] / 1000, 2), np.round(cf_['ventricle_volume'] / 1000, 2))
 def infer_chest_cf(*args):
-    dataset_id = 'Chest X-ray'
     idx, _, r, s, f, a = args[:6]
     do_r, do_s, do_f, do_a = args[6:]
     n_particles = 16
     # preprocessing
-    obs = DATA[dataset_id]['test'].dataset.__getitem__(int(idx))
     for k, v in obs.items():
-        obs[k] = v.to(MODELS[dataset_id]['vae_args'].device).float()
         if n_particles > 1:
-            ndims = (1,)*3 if k == 'x' else (1,)
             obs[k] = obs[k].repeat(n_particles, *ndims)
     # intervention(s)
     do_pa = {}
     with torch.no_grad():
         if do_s:
-            do_pa['sex'] = torch.tensor(SEX_CAT_CHEST.index(s)).view(1, 1)
         if do_f:
-            do_pa['finding'] = torch.tensor(FIND_CAT.index(f)).view(1, 1)
         if do_r:
-            do_pa['race'] = F.one_hot(torch.tensor(RACE_CAT.index(r)), num_classes=3).view(1, 3)
         if do_a:
-            do_pa['age'] = torch.tensor(a/100*2-1).view(1,1)
     for k, v in do_pa.items():
-        do_pa[k] = v.to(MODELS[dataset_id]['vae_args'].device).float().repeat(n_particles, 1)
     # infer counterfactual
     out = counterfactual_inference(dataset_id, obs, do_pa)
     # avg cf particles
-    cf_x = out['cf_x'].mean(0)
-    cf_x_std = out['cf_x'].std(0)
-    rec_x = out['rec_x'].mean(0)
-    cf_r = out['cf_pa']['race'].mean(0)
-    cf_s = out['cf_pa']['sex'].mean(0)
-    cf_f = out['cf_pa']['finding'].mean(0)
-    cf_a = out['cf_pa']['age'].mean(0)
     # post process
     cf_x = postprocess(cf_x)
     cf_x_std = cf_x_std.squeeze().detach().cpu().numpy()
@@ -349,10 +391,13 @@ def infer_chest_cf(*args):
     # plots
     # plt.close('all')
     effect = cf_x - rec_x
-    effect = get_fig_arr(effect, cmap='RdBu_r',
-        norm=MidpointNormalize(vmin=effect.min(), midpoint=0, vmax=effect.max()))
     cf_x = get_fig_arr(cf_x)
-    cf_x_std = get_fig_arr(cf_x_std, cmap='jet')
     return (cf_x, cf_x_std, effect, cf_r, cf_s, cf_f, np.round(cf_a, 1))
@@ -364,33 +409,59 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             with gr.Row().style(equal_height=True):
                 idx = gr.Number(value=0, visible=False)
                 with gr.Column(scale=1, min_width=200):
-                    x = gr.Image(label='Observation', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    cf_x = gr.Image(label='Counterfactual', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    cf_x_std = gr.Image(label='Counterfactual Uncertainty', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    effect = gr.Image(label='Direct Causal Effect', interactive=False).style(height=HEIGHT)
             with gr.Row().style(equal_height=True):
                 with gr.Column(scale=1.75):
-                    gr.Markdown("#### Intervention")
                     with gr.Column():
                         do_y = gr.Checkbox(label="do(digit)", value=False)
                         y = gr.Radio(DIGITS, label="", interactive=False)
                         with gr.Row():
                             with gr.Column(min_width=100):
                                 do_t = gr.Checkbox(label="do(thickness)", value=False)
-                                t = gr.Slider(label="\u00A0", minimum=0.9, maximum=5.5, step=0.01, interactive=False)
                             with gr.Column(min_width=100):
                                 do_i = gr.Checkbox(label="do(intensity)", value=False)
-                                i = gr.Slider(label="\u00A0", minimum=50, maximum=255, step=0.01, interactive=False)
                     with gr.Row():
                         new = gr.Button("New Observation")
                         reset = gr.Button("Reset", variant="stop")
                         submit = gr.Button("Submit", variant="primary")
                 with gr.Column(scale=1):
                     gr.Markdown("### &nbsp;")
-                    causal_graph = gr.Image(label='Causal Graph', interactive=False).style(height=300)
         with gr.TabItem("Brain MRI") as brain_tab:
             brain_id = gr.Textbox(value=brain_tab.label, visible=False)
@@ -398,40 +469,81 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             with gr.Row().style(equal_height=True):
                 idx_brain = gr.Number(value=0, visible=False)
                 with gr.Column(scale=1, min_width=200):
-                    x_brain = gr.Image(label='Observation', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    cf_x_brain = gr.Image(label='Counterfactual', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    cf_x_std_brain = gr.Image(label='Counterfactual Uncertainty', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    effect_brain = gr.Image(label='Direct Causal Effect', interactive=False).style(height=HEIGHT)
             with gr.Row():
                 with gr.Column(scale=2.55):
-                    gr.Markdown("#### Intervention")
                     with gr.Row():
                         with gr.Column(min_width=200):
                             do_m = gr.Checkbox(label="do(MRI sequence)", value=False)
-                            m = gr.Radio(["T1", "T2-FLAIR"], label="", interactive=False)
                         with gr.Column(min_width=200):
                             do_s = gr.Checkbox(label="do(sex)", value=False)
-                            s = gr.Radio(["female", "male"], label="", interactive=False)
                     with gr.Row():
                         with gr.Column(min_width=100):
                             do_a = gr.Checkbox(label="do(age)", value=False)
-                            a = gr.Slider(label="\u00A0", value=50, minimum=44, maximum=73, step=1, interactive=False)
                         with gr.Column(min_width=100):
                             do_b = gr.Checkbox(label="do(brain volume)", value=False)
-                            b = gr.Slider(label="\u00A0", value=1000, minimum=850, maximum=1550, step=20, interactive=False)
                         with gr.Column(min_width=100):
-                            do_v = gr.Checkbox(label="do(ventricle volume)", value=False)
-                            v = gr.Slider(label="\u00A0", value=40, minimum=10, maximum=125, step=2, interactive=False)
                     with gr.Row():
                         new_brain = gr.Button("New Observation")
-                        reset_brain = gr.Button("Reset", variant='stop')
-                        submit_brain = gr.Button("Submit", variant='primary')
                 with gr.Column(scale=1):
                     # gr.Markdown("### &nbsp;")
-                    causal_graph_brain = gr.Image(label='Causal Graph', interactive=False).style(height=340)
         with gr.TabItem("Chest X-ray") as chest_tab:
             chest_id = gr.Textbox(value=chest_tab.label, visible=False)
@@ -439,40 +551,58 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
             with gr.Row().style(equal_height=True):
                 idx_chest = gr.Number(value=0, visible=False)
                 with gr.Column(scale=1, min_width=200):
-                    x_chest = gr.Image(label='Observation', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    cf_x_chest = gr.Image(label='Counterfactual', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    cf_x_std_chest = gr.Image(label='Counterfactual Uncertainty', interactive=False).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
-                    effect_chest = gr.Image(label='Direct Causal Effect', interactive=False).style(height=HEIGHT)
             with gr.Row():
                 with gr.Column(scale=2.55):
-                    gr.Markdown("#### Intervention")
                     with gr.Row().style(equal_height=True):
                         with gr.Column(min_width=200):
                             do_f_chest = gr.Checkbox(label="do(disease)", value=False)
                             f_chest = gr.Radio(FIND_CAT, label="", interactive=False)
                         with gr.Column(min_width=200):
                             do_s_chest = gr.Checkbox(label="do(sex)", value=False)
-                            s_chest = gr.Radio(SEX_CAT_CHEST, label="", interactive=False)
                     with gr.Row():
                         with gr.Column(min_width=200):
                             do_r_chest = gr.Checkbox(label="do(race)", value=False)
-                            r_chest =  gr.Radio(RACE_CAT, label="", interactive=False)
                         with gr.Column(min_width=200):
                             do_a_chest = gr.Checkbox(label="do(age)", value=False)
-                            a_chest = gr.Slider(label="\u00A0", minimum=18, maximum=98, step=1)
                     with gr.Row():
                         new_chest = gr.Button("New Observation")
                         reset_chest = gr.Button("Reset", variant="stop")
                         submit_chest = gr.Button("Submit", variant="primary")
                 with gr.Column(scale=1):
                     # gr.Markdown("### &nbsp;")
-                    causal_graph_chest = gr.Image(label='Causal Graph', interactive=False).style(height=345)
     # morphomnist
     do = [do_t, do_i, do_y]
@@ -514,29 +644,41 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     new_chest.click(fn=chest_graph, inputs=do_chest, outputs=causal_graph_chest)
     # "new" button: reset cf output panels
-    for _k, _v in zip([new, new_brain, new_chest], [cf_out, cf_out_brain, cf_out_chest]):
-        _k.click(fn=lambda: (gr.update(value=None),)*3, inputs=None, outputs=_v)
     # "reset" button: reload current observations
     reset.click(fn=get_mnist_obs, inputs=idx, outputs=obs)
     reset_brain.click(fn=get_brain_obs, inputs=idx_brain, outputs=obs_brain)
     reset_chest.click(fn=get_chest_obs, inputs=idx_chest, outputs=obs_chest)
     # "reset" button: deselect intervention checkboxes
-    reset.click(fn=lambda: (gr.update(value=False),)*len(do), inputs=None, outputs=do)
-    reset_brain.click(fn=lambda: (gr.update(value=False),)*len(do_brain), inputs=None, outputs=do_brain)
-    reset_chest.click(fn=lambda: (gr.update(value=False),)*len(do_chest), inputs=None, outputs=do_chest)
     # "reset" button: reset cf output panels
-    for _k, _v in zip([reset, reset_brain, reset_chest], [cf_out, cf_out_brain, cf_out_chest]):
-        _k.click(fn=lambda: plt.close('all'), inputs=None, outputs=None)
-        _k.click(fn=lambda: (gr.update(value=None),)*3, inputs=None, outputs=_v)
     # enable mnist interventions when checkbox is selected & update graph
     for _k, _v in zip(do, [t, i, y]):
         _k.change(fn=lambda x: gr.update(interactive=x), inputs=_k, outputs=_v)
         _k.change(mnist_graph, inputs=do, outputs=causal_graph)
     # enable brain interventions when checkbox is selected & update graph
     for _k, _v in zip(do_brain, [m, s, a, b, v]):
         _k.change(fn=lambda x: gr.update(interactive=x), inputs=_k, outputs=_v)
@@ -546,11 +688,20 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     for _k, _v in zip(do_chest, [r_chest, s_chest, f_chest, a_chest]):
         _k.change(fn=lambda x: gr.update(interactive=x), inputs=_k, outputs=_v)
         _k.change(chest_graph, inputs=do_chest, outputs=causal_graph_chest)
     # "submit" button: infer countefactuals
     submit.click(fn=infer_mnist_cf, inputs=obs + do, outputs=cf_out + [t, i, y])
-    submit_brain.click(fn=infer_brain_cf, inputs=obs_brain + do_brain, outputs=cf_out_brain + [m, s, a, b, v])
-    submit_chest.click(fn=infer_chest_cf, inputs=obs_chest + do_chest,  outputs=cf_out_chest + [r_chest, s_chest, f_chest, a_chest])
 if __name__ == "__main__":
-    demo.launch()

 from datasets import morphomnist, ukbb, mimic, get_attr_max_min
 from pgm.flow_pgm import MorphoMNISTPGM, FlowPGM, ChestPGM
 from app_utils import (
+    mnist_graph,
+    brain_graph,
+    chest_graph,
+    vae_preprocess,
+    normalize,
+    preprocess_brain,
+    get_fig_arr,
+    postprocess,
+    MidpointNormalize,
 )
 DATA, MODELS = {}, {}
+for k in ["Morpho-MNIST", "Brain MRI", "Chest X-ray"]:
     DATA[k], MODELS[k] = {}, {}
 # mnist
 DIGITS = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
 # brain
+MRISEQ_CAT = ["T1", "T2-FLAIR"]  # 0,1
+SEX_CAT = ["female", "male"]  # 0,1
 HEIGHT, WIDTH = 270, 270
 # chest
+SEX_CAT_CHEST = ["male", "female"]  # 0,1
+RACE_CAT = ["white", "asian", "black"]  # 0,1,2
+FIND_CAT = ["no disease", "pleural effusion"]
+DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 class Hparams:
     def update(self, dict):
         for k, v in dict.items():
+            setattr(self, k, v)
 def get_paths(dataset_id):
+    if "MNIST" in dataset_id:
+        data_path = "./data/morphomnist"
+        pgm_path = "./checkpoints/t_i_d/sup_pgm/checkpoint.pt"
+        vae_path = "./checkpoints/t_i_d/dgauss_cond_big_beta1_dropexo/checkpoint.pt"
+    elif "Brain" in dataset_id:
+        data_path = "./data/ukbb_subset"
+        pgm_path = "./checkpoints/m_b_v_s/sup_pgm/checkpoint.pt"
+        vae_path = "./checkpoints/m_b_v_s/ukbb192_beta5_dgauss_b33/checkpoint.pt"
+    elif "Chest" in dataset_id:
+        data_path = "./data/mimic_subset"
+        pgm_path = "./checkpoints/a_r_s_f/sup_pgm_mimic/checkpoint.pt"
         vae_path = [
+            "./checkpoints/a_r_s_f/mimic_beta9_gelu_dgauss_1_lr3/checkpoint.pt",  # base vae
+            "./checkpoints/a_r_s_f/mimic_dscm_lr_1e5_lagrange_lr_1_damping_10/6500_checkpoint.pt",  # cf trained DSCM
         ]
     return data_path, vae_path, pgm_path
 def load_pgm(dataset_id, pgm_path):
     checkpoint = torch.load(pgm_path, map_location=DEVICE)
     args = Hparams()
+    args.update(checkpoint["hparams"])
     args.device = DEVICE
+    if "MNIST" in dataset_id:
         pgm = MorphoMNISTPGM(args).to(args.device)
+    elif "Brain" in dataset_id:
         pgm = FlowPGM(args).to(args.device)
+    elif "Chest" in dataset_id:
         pgm = ChestPGM(args).to(args.device)
+    pgm.load_state_dict(checkpoint["ema_model_state_dict"])
+    MODELS[dataset_id]["pgm"] = pgm
+    MODELS[dataset_id]["pgm_args"] = args
 def load_vae(dataset_id, vae_path):
+    if "Chest" in dataset_id:
         vae_path, dscm_path = vae_path[0], vae_path[1]
     checkpoint = torch.load(vae_path, map_location=DEVICE)
     args = Hparams()
+    args.update(checkpoint["hparams"])
     # backwards compatibility hack
+    if not hasattr(args, "vae"):
+        args.vae = "hierarchical"
+    if not hasattr(args, "cond_prior"):
         args.cond_prior = False
+    if hasattr(args, "free_bits"):
         args.kl_free_bits = args.free_bits
     args.device = DEVICE
     vae = HVAE(args).to(args.device)
+    if "Chest" in dataset_id:
         dscm_ckpt = torch.load(dscm_path, map_location=DEVICE)
+        vae.load_state_dict(
+            {
+                k[4:]: v
+                for k, v in dscm_ckpt["ema_model_state_dict"].items()
+                if "vae." in k
+            }
+        )
     else:
+        vae.load_state_dict(checkpoint["ema_model_state_dict"])
+    MODELS[dataset_id]["vae"] = vae
+    MODELS[dataset_id]["vae_args"] = args
 def get_dataloader(dataset_id, data_path):
+    MODELS[dataset_id]["pgm_args"].data_dir = data_path
+    args = MODELS[dataset_id]["pgm_args"]
+    if "MNIST" in dataset_id:
         datasets = morphomnist(args)
+    elif "Brain" in dataset_id:
         datasets = ukbb(args)
+    elif "Chest" in dataset_id:
         datasets = mimic(args)
+    DATA[dataset_id]["test"] = torch.utils.data.DataLoader(
+        datasets["test"], shuffle=False, batch_size=args.bs, num_workers=4
+    )
 def load_dataset(dataset_id):
     data_path, _, pgm_path = get_paths(dataset_id)
     checkpoint = torch.load(pgm_path, map_location=DEVICE)
     args = Hparams()
+    args.update(checkpoint["hparams"])
     args.device = DEVICE
+    MODELS[dataset_id]["pgm_args"] = args
     get_dataloader(dataset_id, data_path)
     _, vae_path, pgm_path = get_paths(dataset_id)
     load_pgm(dataset_id, pgm_path)
     load_vae(dataset_id, vae_path)
 @torch.no_grad()
 def counterfactual_inference(dataset_id, obs, do_pa):
+    pa = {k: v.clone() for k, v in obs.items() if k != "x"}
+    cf_pa = MODELS[dataset_id]["pgm"].counterfactual(
+        obs=pa, intervention=do_pa, num_particles=1
+    )
+    args, vae = MODELS[dataset_id]["vae_args"], MODELS[dataset_id]["vae"]
     _pa = vae_preprocess(args, {k: v.clone() for k, v in pa.items()})
+    _cf_pa = vae_preprocess(args, {k: v.clone() for k, v in cf_pa.items()})
+    z_t = 0.1 if "mnist" in args.hps else 1.0
+    z = vae.abduct(x=obs["x"], parents=_pa, t=z_t)
     if vae.cond_prior:
+        z = [z[j]["z"] for j in range(len(z))]
     px_loc, px_scale = vae.forward_latents(latents=z, parents=_pa)
     cf_loc, cf_scale = vae.forward_latents(latents=z, parents=_cf_pa)
+    u = (obs["x"] - px_loc) / px_scale.clamp(min=1e-12)
+    u_t = 0.1 if "mnist" in args.hps else 1.0  # cf sampling temp
     cf_scale = cf_scale * u_t
     cf_x = torch.clamp(cf_loc + cf_scale * u, min=-1, max=1)
+    return {"cf_x": cf_x, "rec_x": px_loc, "cf_pa": cf_pa}
 def get_obs_item(dataset_id, idx=None):
     if idx is None:
+        n_test = len(DATA[dataset_id]["test"].dataset)
         idx = torch.randperm(n_test)[0]
     idx = int(idx)
+    return idx, DATA[dataset_id]["test"].dataset.__getitem__(idx)
 def get_mnist_obs(idx=None):
+    dataset_id = "Morpho-MNIST"
     if not DATA[dataset_id]:
         load_dataset(dataset_id)
     idx, obs = get_obs_item(dataset_id, idx)
+    x = get_fig_arr(obs["x"].clone().squeeze().numpy())
+    t = (obs["thickness"].clone() + 1) / 2 * (6.255515 - 0.87598526) + 0.87598526
+    i = (obs["intensity"].clone() + 1) / 2 * (254.90317 - 66.601204) + 66.601204
+    y = DIGITS[obs["digit"].clone().argmax(-1)]
     return (idx, x, float(np.round(t, 2)), float(np.round(i, 2)), y)
 def get_brain_obs(idx=None):
+    dataset_id = "Brain MRI"
     if not DATA[dataset_id]:
         load_dataset(dataset_id)
     idx, obs = get_obs_item(dataset_id, idx)
+    x = get_fig_arr(obs["x"].clone().squeeze().numpy())
+    m = MRISEQ_CAT[int(obs["mri_seq"].clone().item())]
+    s = SEX_CAT[int(obs["sex"].clone().item())]
+    a = obs["age"].clone().item()
+    b = obs["brain_volume"].clone().item() / 1000  # in ml
+    v = obs["ventricle_volume"].clone().item() / 1000  # in ml
     return (idx, x, m, s, a, float(np.round(b, 2)), float(np.round(v, 2)))
 def get_chest_obs(idx=None):
+    dataset_id = "Chest X-ray"
     if not DATA[dataset_id]:
+        load_dataset(dataset_id)
     idx, obs = get_obs_item(dataset_id, idx)
+    x = get_fig_arr(postprocess(obs["x"].clone()))
+    s = SEX_CAT_CHEST[int(obs["sex"].clone().squeeze().numpy())]
+    f = FIND_CAT[int(obs["finding"].clone().squeeze().numpy())]
+    r = RACE_CAT[obs["race"].clone().squeeze().numpy().argmax(-1)]
+    a = (obs["age"].clone().squeeze().numpy() + 1) * 50
     return (idx, x, r, s, f, float(np.round(a, 1)))
 def infer_mnist_cf(*args):
+    dataset_id = "Morpho-MNIST"
     idx, _, t, i, y, do_t, do_i, do_y = args
     n_particles = 32
     # preprocess
+    obs = DATA[dataset_id]["test"].dataset.__getitem__(int(idx))
+    obs["x"] = (obs["x"] - 127.5) / 127.5
     for k, v in obs.items():
         obs[k] = v.view(1, 1) if len(v.shape) < 1 else v.unsqueeze(0)
+        obs[k] = obs[k].to(MODELS[dataset_id]["vae_args"].device).float()
         if n_particles > 1:
+            ndims = (1,) * 3 if k == "x" else (1,)
             obs[k] = obs[k].repeat(n_particles, *ndims)
     # intervention(s)
     do_pa = {}
     if do_t:
+        do_pa["thickness"] = torch.tensor(
+            normalize(t, x_max=6.255515, x_min=0.87598526)
+        ).view(1, 1)
     if do_i:
+        do_pa["intensity"] = torch.tensor(
+            normalize(i, x_max=254.90317, x_min=66.601204)
+        ).view(1, 1)
     if do_y:
+        do_pa["digit"] = F.one_hot(torch.tensor(DIGITS.index(y)), num_classes=10).view(
+            1, 10
+        )
     for k, v in do_pa.items():
+        do_pa[k] = (
+            v.to(MODELS[dataset_id]["vae_args"].device).float().repeat(n_particles, 1)
+        )
     # infer counterfactual
     out = counterfactual_inference(dataset_id, obs, do_pa)
     # avg cf particles
+    cf_x = out["cf_x"].mean(0)
+    cf_x_std = out["cf_x"].std(0)
+    rec_x = out["rec_x"].mean(0)
+    cf_t = out["cf_pa"]["thickness"].mean(0)
+    cf_i = out["cf_pa"]["intensity"].mean(0)
+    cf_y = out["cf_pa"]["digit"].mean(0)
     # post process
     cf_x = postprocess(cf_x)
     cf_x_std = cf_x_std.squeeze().detach().cpu().numpy()
     rec_x = postprocess(rec_x)
     cf_t = np.round((cf_t.item() + 1) / 2 * (6.255515 - 0.87598526) + 0.87598526, 2)
+    cf_i = np.round((cf_i.item() + 1) / 2 * (254.90317 - 66.601204) + 66.601204, 2)
     cf_y = DIGITS[cf_y.argmax(-1)]
+    # plots
     # plt.close('all')
     effect = cf_x - rec_x
+    effect = get_fig_arr(
+        effect, cmap="RdBu_r", norm=MidpointNormalize(vmin=-255, midpoint=0, vmax=255)
+    )
     cf_x = get_fig_arr(cf_x)
+    cf_x_std = get_fig_arr(cf_x_std, cmap="jet")
     return (cf_x, cf_x_std, effect, cf_t, cf_i, cf_y)
 def infer_brain_cf(*args):
+    dataset_id = "Brain MRI"
     idx, _, m, s, a, b, v = args[:7]
     do_m, do_s, do_a, do_b, do_v = args[7:]
     n_particles = 16
     # preprocessing
+    obs = DATA[dataset_id]["test"].dataset.__getitem__(int(idx))
+    obs = preprocess_brain(MODELS[dataset_id]["vae_args"], obs)
     for k, _v in obs.items():
         if n_particles > 1:
+            ndims = (1,) * 3 if k == "x" else (1,)
             obs[k] = _v.repeat(n_particles, *ndims)
     # interventions(s)
     do_pa = {}
     if do_m:
+        do_pa["mri_seq"] = torch.tensor(MRISEQ_CAT.index(m)).view(1, 1)
     if do_s:
+        do_pa["sex"] = torch.tensor(SEX_CAT.index(s)).view(1, 1)
     if do_a:
+        do_pa["age"] = torch.tensor(a).view(1, 1)
     if do_b:
+        do_pa["brain_volume"] = torch.tensor(b * 1000).view(1, 1)
     if do_v:
+        do_pa["ventricle_volume"] = torch.tensor(v * 1000).view(1, 1)
     # normalize continuous attributes
+    for k in ["age", "brain_volume", "ventricle_volume"]:
         if k in do_pa.keys():
             k_max, k_min = get_attr_max_min(k)
             do_pa[k] = (do_pa[k] - k_min) / (k_max - k_min)  # [0,1]
             do_pa[k] = 2 * do_pa[k] - 1  # [-1,1]
     for k, _v in do_pa.items():
+        do_pa[k] = (
+            _v.to(MODELS[dataset_id]["vae_args"].device).float().repeat(n_particles, 1)
+        )
     # infer counterfactual
     out = counterfactual_inference(dataset_id, obs, do_pa)
     # avg cf particles
+    cf_x = out["cf_x"].mean(0)
+    cf_x_std = out["cf_x"].std(0)
+    rec_x = out["rec_x"].mean(0)
+    cf_m = out["cf_pa"]["mri_seq"].mean(0)
+    cf_s = out["cf_pa"]["sex"].mean(0)
     # post process
     cf_x = postprocess(cf_x)
     cf_x_std = cf_x_std.squeeze().detach().cpu().numpy()
     cf_m = MRISEQ_CAT[int(cf_m.item())]
     cf_s = SEX_CAT[int(cf_s.item())]
     cf_ = {}
+    for k in ["age", "brain_volume", "ventricle_volume"]:  # unnormalize
         k_max, k_min = get_attr_max_min(k)
+        cf_[k] = (out["cf_pa"][k].mean(0).item() + 1) / 2 * (k_max - k_min) + k_min
     # plots
+    # plt.close('all')
     effect = cf_x - rec_x
+    effect = get_fig_arr(
+        effect,
+        cmap="RdBu_r",
+        norm=MidpointNormalize(vmin=effect.min(), midpoint=0, vmax=effect.max()),
+    )
     cf_x = get_fig_arr(cf_x)
+    cf_x_std = get_fig_arr(cf_x_std, cmap="jet")
+    return (
+        cf_x,
+        cf_x_std,
+        effect,
+        cf_m,
+        cf_s,
+        np.round(cf_["age"], 1),
+        np.round(cf_["brain_volume"] / 1000, 2),
+        np.round(cf_["ventricle_volume"] / 1000, 2),
+    )
 def infer_chest_cf(*args):
+    dataset_id = "Chest X-ray"
     idx, _, r, s, f, a = args[:6]
     do_r, do_s, do_f, do_a = args[6:]
     n_particles = 16
     # preprocessing
+    obs = DATA[dataset_id]["test"].dataset.__getitem__(int(idx))
     for k, v in obs.items():
+        obs[k] = v.to(MODELS[dataset_id]["vae_args"].device).float()
         if n_particles > 1:
+            ndims = (1,) * 3 if k == "x" else (1,)
             obs[k] = obs[k].repeat(n_particles, *ndims)
     # intervention(s)
     do_pa = {}
     with torch.no_grad():
         if do_s:
+            do_pa["sex"] = torch.tensor(SEX_CAT_CHEST.index(s)).view(1, 1)
         if do_f:
+            do_pa["finding"] = torch.tensor(FIND_CAT.index(f)).view(1, 1)
         if do_r:
+            do_pa["race"] = F.one_hot(
+                torch.tensor(RACE_CAT.index(r)), num_classes=3
+            ).view(1, 3)
         if do_a:
+            do_pa["age"] = torch.tensor(a / 100 * 2 - 1).view(1, 1)
     for k, v in do_pa.items():
+        do_pa[k] = (
+            v.to(MODELS[dataset_id]["vae_args"].device).float().repeat(n_particles, 1)
+        )
     # infer counterfactual
     out = counterfactual_inference(dataset_id, obs, do_pa)
     # avg cf particles
+    cf_x = out["cf_x"].mean(0)
+    cf_x_std = out["cf_x"].std(0)
+    rec_x = out["rec_x"].mean(0)
+    cf_r = out["cf_pa"]["race"].mean(0)
+    cf_s = out["cf_pa"]["sex"].mean(0)
+    cf_f = out["cf_pa"]["finding"].mean(0)
+    cf_a = out["cf_pa"]["age"].mean(0)
     # post process
     cf_x = postprocess(cf_x)
     cf_x_std = cf_x_std.squeeze().detach().cpu().numpy()
     # plots
     # plt.close('all')
     effect = cf_x - rec_x
+    effect = get_fig_arr(
+        effect,
+        cmap="RdBu_r",
+        norm=MidpointNormalize(vmin=effect.min(), midpoint=0, vmax=effect.max()),
+    )
     cf_x = get_fig_arr(cf_x)
+    cf_x_std = get_fig_arr(cf_x_std, cmap="jet")
     return (cf_x, cf_x_std, effect, cf_r, cf_s, cf_f, np.round(cf_a, 1))
             with gr.Row().style(equal_height=True):
                 idx = gr.Number(value=0, visible=False)
                 with gr.Column(scale=1, min_width=200):
+                    x = gr.Image(label="Observation", interactive=False).style(
+                        height=HEIGHT
+                    )
                 with gr.Column(scale=1, min_width=200):
+                    cf_x = gr.Image(label="Counterfactual", interactive=False).style(
+                        height=HEIGHT
+                    )
                 with gr.Column(scale=1, min_width=200):
+                    cf_x_std = gr.Image(
+                        label="Counterfactual Uncertainty", interactive=False
+                    ).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
+                    effect = gr.Image(
+                        label="Direct Causal Effect", interactive=False
+                    ).style(height=HEIGHT)
             with gr.Row().style(equal_height=True):
                 with gr.Column(scale=1.75):
+                    gr.Markdown(
+                        "#### Intervention"
+                        + 28 * "&emsp;"
+                        + "[arXiv paper](https://arxiv.org/abs/2306.15764) &ensp; | &ensp; [GitHub code](https://github.com/biomedia-mira/causal-gen)"
+                    )
                     with gr.Column():
                         do_y = gr.Checkbox(label="do(digit)", value=False)
                         y = gr.Radio(DIGITS, label="", interactive=False)
                         with gr.Row():
                             with gr.Column(min_width=100):
                                 do_t = gr.Checkbox(label="do(thickness)", value=False)
+                                t = gr.Slider(
+                                    label="\u00A0",
+                                    minimum=0.9,
+                                    maximum=5.5,
+                                    step=0.01,
+                                    interactive=False,
+                                )
                             with gr.Column(min_width=100):
                                 do_i = gr.Checkbox(label="do(intensity)", value=False)
+                                i = gr.Slider(
+                                    label="\u00A0",
+                                    minimum=50,
+                                    maximum=255,
+                                    step=0.01,
+                                    interactive=False,
+                                )
                     with gr.Row():
                         new = gr.Button("New Observation")
                         reset = gr.Button("Reset", variant="stop")
                         submit = gr.Button("Submit", variant="primary")
                 with gr.Column(scale=1):
                     gr.Markdown("### &nbsp;")
+                    causal_graph = gr.Image(
+                        label="Causal Graph", interactive=False
+                    ).style(height=300)
         with gr.TabItem("Brain MRI") as brain_tab:
             brain_id = gr.Textbox(value=brain_tab.label, visible=False)
             with gr.Row().style(equal_height=True):
                 idx_brain = gr.Number(value=0, visible=False)
                 with gr.Column(scale=1, min_width=200):
+                    x_brain = gr.Image(label="Observation", interactive=False).style(
+                        height=HEIGHT
+                    )
                 with gr.Column(scale=1, min_width=200):
+                    cf_x_brain = gr.Image(
+                        label="Counterfactual", interactive=False
+                    ).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
+                    cf_x_std_brain = gr.Image(
+                        label="Counterfactual Uncertainty", interactive=False
+                    ).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
+                    effect_brain = gr.Image(
+                        label="Direct Causal Effect", interactive=False
+                    ).style(height=HEIGHT)
             with gr.Row():
                 with gr.Column(scale=2.55):
+                    gr.Markdown(
+                        "#### Intervention"
+                        + 28 * "&emsp;"
+                        + "[arXiv paper](https://arxiv.org/abs/2306.15764) &ensp; | &ensp; [GitHub code](https://github.com/biomedia-mira/causal-gen)"
+                    )
                     with gr.Row():
                         with gr.Column(min_width=200):
                             do_m = gr.Checkbox(label="do(MRI sequence)", value=False)
+                            m = gr.Radio(
+                                ["T1", "T2-FLAIR"], label="", interactive=False
+                            )
                         with gr.Column(min_width=200):
                             do_s = gr.Checkbox(label="do(sex)", value=False)
+                            s = gr.Radio(
+                                ["female", "male"], label="", interactive=False
+                            )
                     with gr.Row():
                         with gr.Column(min_width=100):
                             do_a = gr.Checkbox(label="do(age)", value=False)
+                            a = gr.Slider(
+                                label="\u00A0",
+                                value=50,
+                                minimum=44,
+                                maximum=73,
+                                step=1,
+                                interactive=False,
+                            )
                         with gr.Column(min_width=100):
                             do_b = gr.Checkbox(label="do(brain volume)", value=False)
+                            b = gr.Slider(
+                                label="\u00A0",
+                                value=1000,
+                                minimum=850,
+                                maximum=1550,
+                                step=20,
+                                interactive=False,
+                            )
                         with gr.Column(min_width=100):
+                            do_v = gr.Checkbox(
+                                label="do(ventricle volume)", value=False
+                            )
+                            v = gr.Slider(
+                                label="\u00A0",
+                                value=40,
+                                minimum=10,
+                                maximum=125,
+                                step=2,
+                                interactive=False,
+                            )
                     with gr.Row():
                         new_brain = gr.Button("New Observation")
+                        reset_brain = gr.Button("Reset", variant="stop")
+                        submit_brain = gr.Button("Submit", variant="primary")
                 with gr.Column(scale=1):
                     # gr.Markdown("### &nbsp;")
+                    causal_graph_brain = gr.Image(
+                        label="Causal Graph", interactive=False
+                    ).style(height=340)
         with gr.TabItem("Chest X-ray") as chest_tab:
             chest_id = gr.Textbox(value=chest_tab.label, visible=False)
             with gr.Row().style(equal_height=True):
                 idx_chest = gr.Number(value=0, visible=False)
                 with gr.Column(scale=1, min_width=200):
+                    x_chest = gr.Image(label="Observation", interactive=False).style(
+                        height=HEIGHT
+                    )
                 with gr.Column(scale=1, min_width=200):
+                    cf_x_chest = gr.Image(
+                        label="Counterfactual", interactive=False
+                    ).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
+                    cf_x_std_chest = gr.Image(
+                        label="Counterfactual Uncertainty", interactive=False
+                    ).style(height=HEIGHT)
                 with gr.Column(scale=1, min_width=200):
+                    effect_chest = gr.Image(
+                        label="Direct Causal Effect", interactive=False
+                    ).style(height=HEIGHT)
             with gr.Row():
                 with gr.Column(scale=2.55):
+                    gr.Markdown(
+                        "#### Intervention"
+                        + 28 * "&emsp;"
+                        + "[arXiv paper](https://arxiv.org/abs/2306.15764) &ensp; | &ensp; [GitHub code](https://github.com/biomedia-mira/causal-gen)"
+                    )
                     with gr.Row().style(equal_height=True):
                         with gr.Column(min_width=200):
                             do_f_chest = gr.Checkbox(label="do(disease)", value=False)
                             f_chest = gr.Radio(FIND_CAT, label="", interactive=False)
                         with gr.Column(min_width=200):
                             do_s_chest = gr.Checkbox(label="do(sex)", value=False)
+                            s_chest = gr.Radio(
+                                SEX_CAT_CHEST, label="", interactive=False
+                            )
                     with gr.Row():
                         with gr.Column(min_width=200):
                             do_r_chest = gr.Checkbox(label="do(race)", value=False)
+                            r_chest = gr.Radio(RACE_CAT, label="", interactive=False)
                         with gr.Column(min_width=200):
                             do_a_chest = gr.Checkbox(label="do(age)", value=False)
+                            a_chest = gr.Slider(
+                                label="\u00A0", minimum=18, maximum=98, step=1
+                            )
                     with gr.Row():
                         new_chest = gr.Button("New Observation")
                         reset_chest = gr.Button("Reset", variant="stop")
                         submit_chest = gr.Button("Submit", variant="primary")
                 with gr.Column(scale=1):
                     # gr.Markdown("### &nbsp;")
+                    causal_graph_chest = gr.Image(
+                        label="Causal Graph", interactive=False
+                    ).style(height=345)
     # morphomnist
     do = [do_t, do_i, do_y]
     new_chest.click(fn=chest_graph, inputs=do_chest, outputs=causal_graph_chest)
     # "new" button: reset cf output panels
+    for _k, _v in zip(
+        [new, new_brain, new_chest], [cf_out, cf_out_brain, cf_out_chest]
+    ):
+        _k.click(fn=lambda: (gr.update(value=None),) * 3, inputs=None, outputs=_v)
     # "reset" button: reload current observations
     reset.click(fn=get_mnist_obs, inputs=idx, outputs=obs)
     reset_brain.click(fn=get_brain_obs, inputs=idx_brain, outputs=obs_brain)
     reset_chest.click(fn=get_chest_obs, inputs=idx_chest, outputs=obs_chest)
     # "reset" button: deselect intervention checkboxes
+    reset.click(fn=lambda: (gr.update(value=False),) * len(do), inputs=None, outputs=do)
+    reset_brain.click(
+        fn=lambda: (gr.update(value=False),) * len(do_brain),
+        inputs=None,
+        outputs=do_brain,
+    )
+    reset_chest.click(
+        fn=lambda: (gr.update(value=False),) * len(do_chest),
+        inputs=None,
+        outputs=do_chest,
+    )
     # "reset" button: reset cf output panels
+    for _k, _v in zip(
+        [reset, reset_brain, reset_chest], [cf_out, cf_out_brain, cf_out_chest]
+    ):
+        _k.click(fn=lambda: plt.close("all"), inputs=None, outputs=None)
+        _k.click(fn=lambda: (gr.update(value=None),) * 3, inputs=None, outputs=_v)
     # enable mnist interventions when checkbox is selected & update graph
     for _k, _v in zip(do, [t, i, y]):
         _k.change(fn=lambda x: gr.update(interactive=x), inputs=_k, outputs=_v)
         _k.change(mnist_graph, inputs=do, outputs=causal_graph)
     # enable brain interventions when checkbox is selected & update graph
     for _k, _v in zip(do_brain, [m, s, a, b, v]):
         _k.change(fn=lambda x: gr.update(interactive=x), inputs=_k, outputs=_v)
     for _k, _v in zip(do_chest, [r_chest, s_chest, f_chest, a_chest]):
         _k.change(fn=lambda x: gr.update(interactive=x), inputs=_k, outputs=_v)
         _k.change(chest_graph, inputs=do_chest, outputs=causal_graph_chest)
     # "submit" button: infer countefactuals
     submit.click(fn=infer_mnist_cf, inputs=obs + do, outputs=cf_out + [t, i, y])
+    submit_brain.click(
+        fn=infer_brain_cf,
+        inputs=obs_brain + do_brain,
+        outputs=cf_out_brain + [m, s, a, b, v],
+    )
+    submit_chest.click(
+        fn=infer_chest_cf,
+        inputs=obs_chest + do_chest,
+        outputs=cf_out_chest + [r_chest, s_chest, f_chest, a_chest],
+    )
 if __name__ == "__main__":
+    demo.queue()
+    demo.launch()

app_utils.py CHANGED Viewed

@@ -3,16 +3,18 @@ import numpy as np
 import networkx as nx
 import matplotlib.pyplot as plt
 from matplotlib import rc, patches, colors
-rc('font', **{'family': 'serif', 'serif': ['Roman']})
-rc('text', usetex=True)
-rc('image', interpolation='none')
-rc('text.latex', preamble=r'\usepackage{amsmath} \usepackage{amssymb}')
 from datasets import get_attr_max_min
-from PIL import Image
-HAMMER = np.array(Image.open('./hammer.png').resize((35, 35))) / 255
 class MidpointNormalize(colors.Normalize):
@@ -21,7 +23,7 @@ class MidpointNormalize(colors.Normalize):
         colors.Normalize.__init__(self, vmin, vmax, clip)
     def __call__(self, value, clip=None):
-        v_ext = np.max( [ np.abs(self.vmin), np.abs(self.vmax) ] )
         x, y = [-v_ext, self.midpoint, v_ext], [0, 0.5, 1]
         return np.ma.masked_array(np.interp(value, x, y))
@@ -31,10 +33,10 @@ def postprocess(x):
 def mnist_graph(*args):
-    x, t, i, y = r'$\mathbf{x}$', r'$t$', r'$i$', r'$y$'
-    ut, ui, uy = r'$\mathbf{U}_t$', r'$\mathbf{U}_i$', r'$\mathbf{U}_y$'
-    zx, ex = r'$\mathbf{z}_{1:L}$', r'$\boldsymbol{\epsilon}$'
     G = nx.DiGraph()
     G.add_edge(t, x)
     G.add_edge(i, x)
@@ -47,31 +49,36 @@ def mnist_graph(*args):
     G.add_edge(ex, x)
     pos = {
-        y: (0, 0), uy: (-1, 0),
-        t: (0, 0.5), ut: (0, 1),
-        x: (1, 0), zx: (2, 0.375), ex: (2, 0),
-        i: (1, 0.5), ui: (1, 1),
     }
     node_c = {}
     for node in G:
-        node_c[node] = 'lightgrey' if node in [x, t, i, y] else 'white'
-    node_line_c = {k: 'black' for k, _ in node_c.items()}
-    edge_c = {e: 'black' for e in G.edges}
     if args[0]:  # do_t
-        edge_c[(ut, t)] = 'lightgrey'
         # G.remove_edge(ut, t)
-        node_line_c[t] = 'red'
     if args[1]:  # do_i
-        edge_c[(ui, i)] = 'lightgrey'
-        edge_c[(t, i)] = 'lightgrey'
         # G.remove_edges_from([(ui, i), (t, i)])
-        node_line_c[i] = 'red'
     if args[2]:  # do_y
-        edge_c[(uy, y)] = 'lightgrey'
         # G.remove_edge(uy, y)
-        node_line_c[y] = 'red'
     fs = 30
     options = {
@@ -83,27 +90,36 @@ def mnist_graph(*args):
         "linewidths": 2,
         "width": 2,
     }
-    plt.close('all')
-    fig, ax = plt.subplots(1, 1, figsize=(6,4.1))#, constrained_layout=True)
     # fig.patch.set_visible(False)
     ax.margins(x=0.06, y=0.15, tight=False)
     ax.axis("off")
-    nx.draw_networkx(G, pos, **options, arrowsize=25, arrowstyle='-|>', ax=ax)
     # need to reuse x, y limits so that the graphs plot the same way before and after removing edges
     x_lim = (-1.348, 2.348)
     y_lim = (-0.215, 1.215)
     ax.set_xlim(x_lim)
     ax.set_ylim(y_lim)
-    rect = patches.FancyBboxPatch((1.75, -0.16), 0.5, 0.7, boxstyle="round, pad=0.05, rounding_size=0", linewidth=2, edgecolor='black', facecolor='none', linestyle='-')
     ax.add_patch(rect)
     ax.text(1.85, 0.65, r"$\mathbf{U}_{\mathbf{x}}$", fontsize=fs)
     if args[0]:  # do_t
-        fig.figimage(HAMMER, 0.26*fig.bbox.xmax, 0.525*fig.bbox.ymax, zorder=10)
     if args[1]:  # do_i
-        fig.figimage(HAMMER, 0.5175*fig.bbox.xmax, 0.525*fig.bbox.ymax, zorder=11)
     if args[2]:  # do_y
-        fig.figimage(HAMMER, 0.26*fig.bbox.xmax, 0.2*fig.bbox.ymax, zorder=12)
     fig.tight_layout()
     fig.canvas.draw()
@@ -111,10 +127,16 @@ def mnist_graph(*args):
 def brain_graph(*args):
-    x, m, s, a, b, v = r'$\mathbf{x}$', r'$m$', r'$s$', r'$a$', r'$b$', r'$v$'
-    um, us, ua, ub, uv = r'$\mathbf{U}_m$', r'$\mathbf{U}_s$', r'$\mathbf{U}_a$', r'$\mathbf{U}_b$', r'$\mathbf{U}_v$'
-    zx, ex = r'$\mathbf{z}_{1:L}$', r'$\boldsymbol{\epsilon}$'
     G = nx.DiGraph()
     G.add_edge(m, x)
     G.add_edge(s, x)
@@ -132,44 +154,51 @@ def brain_graph(*args):
     G.add_edge(uv, v)
     pos = {
-        x: (0, 0), zx: (-0.25, -1), ex: (0.25, -1),
-        a: (0, 1), ua: (0, 2),
-        s: (1, 0), us: (1, -1),
-        b: (1, 1), ub: (1, 2),
-        m: (-1, 0), um: (-1, -1),
-        v: (-1, 1), uv: (-1, 2)
     }
     node_c = {}
     for node in G:
-        node_c[node] = 'lightgrey' if node in [x, m, s, a, b, v] else 'white'
-    node_line_c = {k: 'black' for k, _ in node_c.items()}
-    edge_c = {e: 'black' for e in G.edges}
     if args[0]:  # do_m
         # G.remove_edge(um, m)
-        edge_c[(um, m)] = 'lightgrey'
-        node_line_c[m] = 'red'
     if args[1]:  # do_s
         # G.remove_edge(us, s)
-        edge_c[(us, s)] = 'lightgrey'
-        node_line_c[s] = 'red'
     if args[2]:  # do_a
         # G.remove_edge(ua, a)
-        edge_c[(ua, a)] = 'lightgrey'
-        node_line_c[a] = 'red'
     if args[3]:  # do_b
         # G.remove_edges_from([(ub, b), (s, b), (a, b)])
-        edge_c[(ub, b)] = 'lightgrey'
-        edge_c[(s, b)] = 'lightgrey'
-        edge_c[(a, b)] = 'lightgrey'
-        node_line_c[b] = 'red'
     if args[4]:  # do_v
         # G.remove_edges_from([(uv, v), (a, v), (b, v)])
-        edge_c[(uv, v)] = 'lightgrey'
-        edge_c[(a, v)] = 'lightgrey'
-        edge_c[(b, v)] = 'lightgrey'
-        node_line_c[v] = 'red'
     fs = 30
     options = {
@@ -182,33 +211,49 @@ def brain_graph(*args):
         "width": 2,
     }
-    plt.close('all')
-    fig, ax = plt.subplots(1, 1, figsize=(5,5))#, constrained_layout=True)
     # fig.patch.set_visible(False)
     ax.margins(x=0.1, y=0.08, tight=False)
     ax.axis("off")
-    nx.draw_networkx(G, pos, **options, arrowsize=25, arrowstyle='-|>', ax=ax)
     # need to reuse x, y limits so that the graphs plot the same way before and after removing edges
     x_lim = (-1.32, 1.32)
     y_lim = (-1.414, 2.414)
     ax.set_xlim(x_lim)
     ax.set_ylim(y_lim)
-    rect = patches.FancyBboxPatch((-0.5, -1.325), 1, 0.65, boxstyle="round, pad=0.05, rounding_size=0", linewidth=2, edgecolor='black', facecolor='none', linestyle='-')
     ax.add_patch(rect)
     # ax.text(1.85, 0.65, r"$\mathbf{U}_{\mathbf{x}}$", fontsize=fs)
     if args[0]:  # do_m
-        fig.figimage(HAMMER, 0.0075*fig.bbox.xmax, 0.395*fig.bbox.ymax, zorder=10)
     if args[1]:  # do_s
-        fig.figimage(HAMMER, 0.72*fig.bbox.xmax, 0.395*fig.bbox.ymax, zorder=11)
     if args[2]:  # do_a
-        fig.figimage(HAMMER, 0.363*fig.bbox.xmax, 0.64*fig.bbox.ymax, zorder=12)
     if args[3]:  # do_b
-        fig.figimage(HAMMER, 0.72*fig.bbox.xmax, 0.64*fig.bbox.ymax, zorder=13)
     if args[4]:  # do_v
-        fig.figimage(HAMMER, 0.0075*fig.bbox.xmax, 0.64*fig.bbox.ymax, zorder=14)
     else:  # b -> v
-        a3 = patches.FancyArrowPatch((.86, 1.21), (-.86, 1.21), connectionstyle="arc3,rad=.3", linewidth=2, arrowstyle='simple, head_width=10, head_length=10', color='k')
         ax.add_patch(a3)
     # print(ax.get_xlim())
     # print(ax.get_ylim())
@@ -217,12 +262,16 @@ def brain_graph(*args):
     return np.array(fig.canvas.renderer.buffer_rgba())
 def chest_graph(*args):
-    x, a, d, r, s= r'$\mathbf{x}$', r'$a$', r'$d$', r'$r$', r'$s$'
-    ua, ud, ur, us = r'$\mathbf{U}_a$', r'$\mathbf{U}_d$', r'$\mathbf{U}_r$', r'$\mathbf{U}_s$'
-    zx, ex = r'$\mathbf{z}_{1:L}$', r'$\boldsymbol{\epsilon}$'
     G = nx.DiGraph()
     G.add_edge(ua, a)
     G.add_edge(ud, d)
@@ -237,7 +286,7 @@ def chest_graph(*args):
     G.add_edge(a, x)
     pos = {
-        x: (0, 0),
         a: (-1, 1),
         d: (0, 1),
         r: (1, 1),
@@ -246,34 +295,34 @@ def chest_graph(*args):
         ud: (0, 2),
         ur: (1, 2),
         us: (1, -1),
-        zx: (-0.25, -1),
         ex: (0.25, -1),
     }
     node_c = {}
     for node in G:
-        node_c[node] = 'lightgrey' if node in [x, a, d, r, s] else 'white'
-    edge_c = {e: 'black' for e in G.edges}
-    node_line_c = {k: 'black' for k, _ in node_c.items()}
     if args[0]:  # do_r
         # G.remove_edge(ur, r)
-        edge_c[(ur, r)] = 'lightgrey'
-        node_line_c[r] = 'red'
     if args[1]:  # do_s
         # G.remove_edges_from([(us, s)])
-        edge_c[(us, s)] = 'lightgrey'
-        node_line_c[s] = 'red'
     if args[2]:  # do_f (do_d)
         # G.remove_edges_from([(ud, d), (a, d)])
-        edge_c[(ud, d)] = 'lightgrey'
-        edge_c[(a, d)] = 'lightgrey'
-        node_line_c[d] = 'red'
     if args[3]:  # do_a
         # G.remove_edge(ua, a)
-        edge_c[(ua, a)] = 'lightgrey'
-        node_line_c[a] = 'red'
     fs = 30
     options = {
@@ -285,29 +334,38 @@ def chest_graph(*args):
         "linewidths": 2,
         "width": 2,
     }
-    plt.close('all')
-    fig, ax = plt.subplots(1, 1, figsize=(5,5))#, constrained_layout=True)
     # fig.patch.set_visible(False)
     ax.margins(x=0.1, y=0.08, tight=False)
     ax.axis("off")
-    nx.draw_networkx(G, pos, **options, arrowsize=25, arrowstyle='-|>', ax=ax)
     # need to reuse x, y limits so that the graphs plot the same way before and after removing edges
     x_lim = (-1.32, 1.32)
     y_lim = (-1.414, 2.414)
     ax.set_xlim(x_lim)
     ax.set_ylim(y_lim)
-    rect = patches.FancyBboxPatch((-0.5, -1.325), 1, 0.65, boxstyle="round, pad=0.05, rounding_size=0", linewidth=2, edgecolor='black', facecolor='none', linestyle='-')
     ax.add_patch(rect)
     ax.text(-0.9, -1.075, r"$\mathbf{U}_{\mathbf{x}}$", fontsize=fs)
     if args[0]:  # do_r
-        fig.figimage(HAMMER, 0.72*fig.bbox.xmax, 0.64*fig.bbox.ymax, zorder=10)
     if args[1]:  # do_s
-        fig.figimage(HAMMER, 0.72*fig.bbox.xmax, 0.395*fig.bbox.ymax, zorder=11)
     if args[2]:  # do_f
-        fig.figimage(HAMMER, 0.363*fig.bbox.xmax, 0.64*fig.bbox.ymax, zorder=12)
     if args[3]:  # do_a
-        fig.figimage(HAMMER, 0.0075*fig.bbox.xmax, 0.64*fig.bbox.ymax, zorder=13)
     fig.tight_layout()
     fig.canvas.draw()
@@ -315,51 +373,55 @@ def chest_graph(*args):
 def vae_preprocess(args, pa):
-    if 'ukbb' in args.hps:
         # preprocessing ukbb parents for the vae which was originally trained using
         # log standardized parents. The pgm was trained using [-1,1] normalization
         # first undo [-1,1] parent preprocessing back to original range
         for k, v in pa.items():
-            if k != 'mri_seq' and k != 'sex':
                 pa[k] = (v + 1) / 2  # [-1,1] -> [0,1]
                 _max, _min = get_attr_max_min(k)
                 pa[k] = pa[k] * (_max - _min) + _min
         # log_standardize parents for vae input
         for k, v in pa.items():
             logpa_k = torch.log(v.clamp(min=1e-12))
-            if k == 'age':
                 pa[k] = (logpa_k - 4.112339973449707) / 0.11769197136163712
-            elif k == 'brain_volume':
                 pa[k] = (logpa_k - 13.965583801269531) / 0.09537758678197861
-            elif k == 'ventricle_volume':
                 pa[k] = (logpa_k - 10.345998764038086) / 0.43127763271331787
     # concatenate parents expand to input res for conditioning the vae
-    pa = torch.cat([pa[k] if len(pa[k].shape) > 1 else pa[k][..., None]
-                   for k in args.parents_x], dim=1)
-    pa = pa[..., None, None].repeat(1, 1, *(args.input_res,)*2).to(args.device).float()
     return pa
 def preprocess_brain(args, obs):
-    obs['x'] = (obs['x'][None,...].float().to(args.device) - 127.5) / 127.5  # [-1,1]
     # for all other variables except x
-    for k in [k for k in obs.keys() if k != 'x']:
         obs[k] = obs[k].float().to(args.device).view(1, 1)
-        if k in ['age', 'brain_volume', 'ventricle_volume']:
             k_max, k_min = get_attr_max_min(k)
             obs[k] = (obs[k] - k_min) / (k_max - k_min)  # [0,1]
             obs[k] = 2 * obs[k] - 1  # [-1,1]
     return obs
-def get_fig_arr(x, width=4, height=4, dpi=144, cmap='Greys_r', norm=None):
     fig = plt.figure(figsize=(width, height), dpi=dpi)
-    ax = plt.axes([0,0,1,1], frameon=False)
-    if cmap == 'Greys_r':
         ax.imshow(x, cmap=cmap, vmin=0, vmax=255)
     else:
         ax.imshow(x, cmap=cmap, norm=norm)
-    ax.axis('off')
     fig.canvas.draw()
     return np.array(fig.canvas.renderer.buffer_rgba())
@@ -370,4 +432,4 @@ def normalize(x, x_min=None, x_max=None, zero_one=False):
     if x_max is None:
         x_max = x.max()
     x = (x - x_min) / (x_max - x_min)  # [0,1]
-    return x if zero_one else 2 * x - 1  # else [-1,1]

 import networkx as nx
 import matplotlib.pyplot as plt
+from PIL import Image
 from matplotlib import rc, patches, colors
+rc("font", **{"family": "serif", "serif": ["Roman"]})
+rc("text", usetex=True)
+rc("image", interpolation="none")
+rc("text.latex", preamble=r"\usepackage{amsmath} \usepackage{amssymb}")
 from datasets import get_attr_max_min
+HAMMER = np.array(Image.open("./hammer.png").resize((35, 35))) / 255
 class MidpointNormalize(colors.Normalize):
         colors.Normalize.__init__(self, vmin, vmax, clip)
     def __call__(self, value, clip=None):
+        v_ext = np.max([np.abs(self.vmin), np.abs(self.vmax)])
         x, y = [-v_ext, self.midpoint, v_ext], [0, 0.5, 1]
         return np.ma.masked_array(np.interp(value, x, y))
 def mnist_graph(*args):
+    x, t, i, y = r"$\mathbf{x}$", r"$t$", r"$i$", r"$y$"
+    ut, ui, uy = r"$\mathbf{U}_t$", r"$\mathbf{U}_i$", r"$\mathbf{U}_y$"
+    zx, ex = r"$\mathbf{z}_{1:L}$", r"$\boldsymbol{\epsilon}$"
     G = nx.DiGraph()
     G.add_edge(t, x)
     G.add_edge(i, x)
     G.add_edge(ex, x)
     pos = {
+        y: (0, 0),
+        uy: (-1, 0),
+        t: (0, 0.5),
+        ut: (0, 1),
+        x: (1, 0),
+        zx: (2, 0.375),
+        ex: (2, 0),
+        i: (1, 0.5),
+        ui: (1, 1),
     }
     node_c = {}
     for node in G:
+        node_c[node] = "lightgrey" if node in [x, t, i, y] else "white"
+    node_line_c = {k: "black" for k, _ in node_c.items()}
+    edge_c = {e: "black" for e in G.edges}
     if args[0]:  # do_t
+        edge_c[(ut, t)] = "lightgrey"
         # G.remove_edge(ut, t)
+        node_line_c[t] = "red"
     if args[1]:  # do_i
+        edge_c[(ui, i)] = "lightgrey"
+        edge_c[(t, i)] = "lightgrey"
         # G.remove_edges_from([(ui, i), (t, i)])
+        node_line_c[i] = "red"
     if args[2]:  # do_y
+        edge_c[(uy, y)] = "lightgrey"
         # G.remove_edge(uy, y)
+        node_line_c[y] = "red"
     fs = 30
     options = {
         "linewidths": 2,
         "width": 2,
     }
+    plt.close("all")
+    fig, ax = plt.subplots(1, 1, figsize=(6, 4.1))  # , constrained_layout=True)
     # fig.patch.set_visible(False)
     ax.margins(x=0.06, y=0.15, tight=False)
     ax.axis("off")
+    nx.draw_networkx(G, pos, **options, arrowsize=25, arrowstyle="-|>", ax=ax)
     # need to reuse x, y limits so that the graphs plot the same way before and after removing edges
     x_lim = (-1.348, 2.348)
     y_lim = (-0.215, 1.215)
     ax.set_xlim(x_lim)
     ax.set_ylim(y_lim)
+    rect = patches.FancyBboxPatch(
+        (1.75, -0.16),
+        0.5,
+        0.7,
+        boxstyle="round, pad=0.05, rounding_size=0",
+        linewidth=2,
+        edgecolor="black",
+        facecolor="none",
+        linestyle="-",
+    )
     ax.add_patch(rect)
     ax.text(1.85, 0.65, r"$\mathbf{U}_{\mathbf{x}}$", fontsize=fs)
     if args[0]:  # do_t
+        fig.figimage(HAMMER, 0.26 * fig.bbox.xmax, 0.525 * fig.bbox.ymax, zorder=10)
     if args[1]:  # do_i
+        fig.figimage(HAMMER, 0.5175 * fig.bbox.xmax, 0.525 * fig.bbox.ymax, zorder=11)
     if args[2]:  # do_y
+        fig.figimage(HAMMER, 0.26 * fig.bbox.xmax, 0.2 * fig.bbox.ymax, zorder=12)
     fig.tight_layout()
     fig.canvas.draw()
 def brain_graph(*args):
+    x, m, s, a, b, v = r"$\mathbf{x}$", r"$m$", r"$s$", r"$a$", r"$b$", r"$v$"
+    um, us, ua, ub, uv = (
+        r"$\mathbf{U}_m$",
+        r"$\mathbf{U}_s$",
+        r"$\mathbf{U}_a$",
+        r"$\mathbf{U}_b$",
+        r"$\mathbf{U}_v$",
+    )
+    zx, ex = r"$\mathbf{z}_{1:L}$", r"$\boldsymbol{\epsilon}$"
     G = nx.DiGraph()
     G.add_edge(m, x)
     G.add_edge(s, x)
     G.add_edge(uv, v)
     pos = {
+        x: (0, 0),
+        zx: (-0.25, -1),
+        ex: (0.25, -1),
+        a: (0, 1),
+        ua: (0, 2),
+        s: (1, 0),
+        us: (1, -1),
+        b: (1, 1),
+        ub: (1, 2),
+        m: (-1, 0),
+        um: (-1, -1),
+        v: (-1, 1),
+        uv: (-1, 2),
     }
     node_c = {}
     for node in G:
+        node_c[node] = "lightgrey" if node in [x, m, s, a, b, v] else "white"
+    node_line_c = {k: "black" for k, _ in node_c.items()}
+    edge_c = {e: "black" for e in G.edges}
     if args[0]:  # do_m
         # G.remove_edge(um, m)
+        edge_c[(um, m)] = "lightgrey"
+        node_line_c[m] = "red"
     if args[1]:  # do_s
         # G.remove_edge(us, s)
+        edge_c[(us, s)] = "lightgrey"
+        node_line_c[s] = "red"
     if args[2]:  # do_a
         # G.remove_edge(ua, a)
+        edge_c[(ua, a)] = "lightgrey"
+        node_line_c[a] = "red"
     if args[3]:  # do_b
         # G.remove_edges_from([(ub, b), (s, b), (a, b)])
+        edge_c[(ub, b)] = "lightgrey"
+        edge_c[(s, b)] = "lightgrey"
+        edge_c[(a, b)] = "lightgrey"
+        node_line_c[b] = "red"
     if args[4]:  # do_v
         # G.remove_edges_from([(uv, v), (a, v), (b, v)])
+        edge_c[(uv, v)] = "lightgrey"
+        edge_c[(a, v)] = "lightgrey"
+        edge_c[(b, v)] = "lightgrey"
+        node_line_c[v] = "red"
     fs = 30
     options = {
         "width": 2,
     }
+    plt.close("all")
+    fig, ax = plt.subplots(1, 1, figsize=(5, 5))  # , constrained_layout=True)
     # fig.patch.set_visible(False)
     ax.margins(x=0.1, y=0.08, tight=False)
     ax.axis("off")
+    nx.draw_networkx(G, pos, **options, arrowsize=25, arrowstyle="-|>", ax=ax)
     # need to reuse x, y limits so that the graphs plot the same way before and after removing edges
     x_lim = (-1.32, 1.32)
     y_lim = (-1.414, 2.414)
     ax.set_xlim(x_lim)
     ax.set_ylim(y_lim)
+    rect = patches.FancyBboxPatch(
+        (-0.5, -1.325),
+        1,
+        0.65,
+        boxstyle="round, pad=0.05, rounding_size=0",
+        linewidth=2,
+        edgecolor="black",
+        facecolor="none",
+        linestyle="-",
+    )
     ax.add_patch(rect)
     # ax.text(1.85, 0.65, r"$\mathbf{U}_{\mathbf{x}}$", fontsize=fs)
     if args[0]:  # do_m
+        fig.figimage(HAMMER, 0.0075 * fig.bbox.xmax, 0.395 * fig.bbox.ymax, zorder=10)
     if args[1]:  # do_s
+        fig.figimage(HAMMER, 0.72 * fig.bbox.xmax, 0.395 * fig.bbox.ymax, zorder=11)
     if args[2]:  # do_a
+        fig.figimage(HAMMER, 0.363 * fig.bbox.xmax, 0.64 * fig.bbox.ymax, zorder=12)
     if args[3]:  # do_b
+        fig.figimage(HAMMER, 0.72 * fig.bbox.xmax, 0.64 * fig.bbox.ymax, zorder=13)
     if args[4]:  # do_v
+        fig.figimage(HAMMER, 0.0075 * fig.bbox.xmax, 0.64 * fig.bbox.ymax, zorder=14)
     else:  # b -> v
+        a3 = patches.FancyArrowPatch(
+            (0.86, 1.21),
+            (-0.86, 1.21),
+            connectionstyle="arc3,rad=.3",
+            linewidth=2,
+            arrowstyle="simple, head_width=10, head_length=10",
+            color="k",
+        )
         ax.add_patch(a3)
     # print(ax.get_xlim())
     # print(ax.get_ylim())
     return np.array(fig.canvas.renderer.buffer_rgba())
 def chest_graph(*args):
+    x, a, d, r, s = r"$\mathbf{x}$", r"$a$", r"$d$", r"$r$", r"$s$"
+    ua, ud, ur, us = (
+        r"$\mathbf{U}_a$",
+        r"$\mathbf{U}_d$",
+        r"$\mathbf{U}_r$",
+        r"$\mathbf{U}_s$",
+    )
+    zx, ex = r"$\mathbf{z}_{1:L}$", r"$\boldsymbol{\epsilon}$"
     G = nx.DiGraph()
     G.add_edge(ua, a)
     G.add_edge(ud, d)
     G.add_edge(a, x)
     pos = {
+        x: (0, 0),
         a: (-1, 1),
         d: (0, 1),
         r: (1, 1),
         ud: (0, 2),
         ur: (1, 2),
         us: (1, -1),
+        zx: (-0.25, -1),
         ex: (0.25, -1),
     }
     node_c = {}
     for node in G:
+        node_c[node] = "lightgrey" if node in [x, a, d, r, s] else "white"
+    edge_c = {e: "black" for e in G.edges}
+    node_line_c = {k: "black" for k, _ in node_c.items()}
     if args[0]:  # do_r
         # G.remove_edge(ur, r)
+        edge_c[(ur, r)] = "lightgrey"
+        node_line_c[r] = "red"
     if args[1]:  # do_s
         # G.remove_edges_from([(us, s)])
+        edge_c[(us, s)] = "lightgrey"
+        node_line_c[s] = "red"
     if args[2]:  # do_f (do_d)
         # G.remove_edges_from([(ud, d), (a, d)])
+        edge_c[(ud, d)] = "lightgrey"
+        edge_c[(a, d)] = "lightgrey"
+        node_line_c[d] = "red"
     if args[3]:  # do_a
         # G.remove_edge(ua, a)
+        edge_c[(ua, a)] = "lightgrey"
+        node_line_c[a] = "red"
     fs = 30
     options = {
         "linewidths": 2,
         "width": 2,
     }
+    plt.close("all")
+    fig, ax = plt.subplots(1, 1, figsize=(5, 5))  # , constrained_layout=True)
     # fig.patch.set_visible(False)
     ax.margins(x=0.1, y=0.08, tight=False)
     ax.axis("off")
+    nx.draw_networkx(G, pos, **options, arrowsize=25, arrowstyle="-|>", ax=ax)
     # need to reuse x, y limits so that the graphs plot the same way before and after removing edges
     x_lim = (-1.32, 1.32)
     y_lim = (-1.414, 2.414)
     ax.set_xlim(x_lim)
     ax.set_ylim(y_lim)
+    rect = patches.FancyBboxPatch(
+        (-0.5, -1.325),
+        1,
+        0.65,
+        boxstyle="round, pad=0.05, rounding_size=0",
+        linewidth=2,
+        edgecolor="black",
+        facecolor="none",
+        linestyle="-",
+    )
     ax.add_patch(rect)
     ax.text(-0.9, -1.075, r"$\mathbf{U}_{\mathbf{x}}$", fontsize=fs)
     if args[0]:  # do_r
+        fig.figimage(HAMMER, 0.72 * fig.bbox.xmax, 0.64 * fig.bbox.ymax, zorder=10)
     if args[1]:  # do_s
+        fig.figimage(HAMMER, 0.72 * fig.bbox.xmax, 0.395 * fig.bbox.ymax, zorder=11)
     if args[2]:  # do_f
+        fig.figimage(HAMMER, 0.363 * fig.bbox.xmax, 0.64 * fig.bbox.ymax, zorder=12)
     if args[3]:  # do_a
+        fig.figimage(HAMMER, 0.0075 * fig.bbox.xmax, 0.64 * fig.bbox.ymax, zorder=13)
     fig.tight_layout()
     fig.canvas.draw()
 def vae_preprocess(args, pa):
+    if "ukbb" in args.hps:
         # preprocessing ukbb parents for the vae which was originally trained using
         # log standardized parents. The pgm was trained using [-1,1] normalization
         # first undo [-1,1] parent preprocessing back to original range
         for k, v in pa.items():
+            if k != "mri_seq" and k != "sex":
                 pa[k] = (v + 1) / 2  # [-1,1] -> [0,1]
                 _max, _min = get_attr_max_min(k)
                 pa[k] = pa[k] * (_max - _min) + _min
         # log_standardize parents for vae input
         for k, v in pa.items():
             logpa_k = torch.log(v.clamp(min=1e-12))
+            if k == "age":
                 pa[k] = (logpa_k - 4.112339973449707) / 0.11769197136163712
+            elif k == "brain_volume":
                 pa[k] = (logpa_k - 13.965583801269531) / 0.09537758678197861
+            elif k == "ventricle_volume":
                 pa[k] = (logpa_k - 10.345998764038086) / 0.43127763271331787
     # concatenate parents expand to input res for conditioning the vae
+    pa = torch.cat(
+        [pa[k] if len(pa[k].shape) > 1 else pa[k][..., None] for k in args.parents_x],
+        dim=1,
+    )
+    pa = (
+        pa[..., None, None].repeat(1, 1, *(args.input_res,) * 2).to(args.device).float()
+    )
     return pa
 def preprocess_brain(args, obs):
+    obs["x"] = (obs["x"][None, ...].float().to(args.device) - 127.5) / 127.5  # [-1,1]
     # for all other variables except x
+    for k in [k for k in obs.keys() if k != "x"]:
         obs[k] = obs[k].float().to(args.device).view(1, 1)
+        if k in ["age", "brain_volume", "ventricle_volume"]:
             k_max, k_min = get_attr_max_min(k)
             obs[k] = (obs[k] - k_min) / (k_max - k_min)  # [0,1]
             obs[k] = 2 * obs[k] - 1  # [-1,1]
     return obs
+def get_fig_arr(x, width=4, height=4, dpi=144, cmap="Greys_r", norm=None):
     fig = plt.figure(figsize=(width, height), dpi=dpi)
+    ax = plt.axes([0, 0, 1, 1], frameon=False)
+    if cmap == "Greys_r":
         ax.imshow(x, cmap=cmap, vmin=0, vmax=255)
     else:
         ax.imshow(x, cmap=cmap, norm=norm)
+    ax.axis("off")
     fig.canvas.draw()
     return np.array(fig.canvas.renderer.buffer_rgba())
     if x_max is None:
         x_max = x.max()
     x = (x - x_min) / (x_max - x_min)  # [0,1]
+    return x if zero_one else 2 * x - 1  # else [-1,1]

datasets.py CHANGED Viewed

@@ -23,44 +23,45 @@ def normalize(x, x_min=None, x_max=None, zero_one=False):
         x_min = x.min()
     if x_max is None:
         x_max = x.max()
-    print(f'max: {x_max}, min: {x_min}')
     x = (x - x_min) / (x_max - x_min)  # [0,1]
     return x if zero_one else 2 * x - 1  # else [-1,1]
 class UKBBDataset(Dataset):
-    def __init__(self, root, csv_file, transform=None, columns=None, norm=None, concat_pa=True):
         super().__init__()
         self.root = root
         self.transform = transform
         self.concat_pa = concat_pa  # return concatenated parents
-        print(f'\nLoading csv data: {csv_file}')
         self.df = pd.read_csv(csv_file)
         self.columns = columns
         if self.columns is None:
             # ['eid', 'sex', 'age', 'brain_volume', 'ventricle_volume', 'mri_seq']
             self.columns = list(self.df.columns)  # return all
             self.columns.pop(0)  # remove redundant 'index' column
-        print(f'columns: {self.columns}')
-        self.samples = {i: torch.as_tensor(
-            self.df[i]).float() for i in self.columns}
-        for k in ['age', 'brain_volume', 'ventricle_volume']:
-            print(f'{k} normalization: {norm}')
             if k in self.columns:
-                if norm == '[-1,1]':
                     self.samples[k] = normalize(self.samples[k])
-                elif norm == '[0,1]':
                     self.samples[k] = normalize(self.samples[k], zero_one=True)
-                elif norm == 'log_standard':
                     self.samples[k] = log_standardize(self.samples[k])
                 elif norm == None:
                     pass
                 else:
-                    NotImplementedError(f'{norm} not implemented.')
-        print(f'#samples: {len(self.df)}')
-        self.return_x = True if 'eid' in self.columns else False
     def __len__(self):
         return len(self.df)
@@ -69,31 +70,32 @@ class UKBBDataset(Dataset):
         sample = {k: v[idx] for k, v in self.samples.items()}
         if self.return_x:
-            mri_seq = 'T1' if sample['mri_seq'] == 0. else 'T2_FLAIR'
             # Load scan
-            filename = f'{int(sample["eid"])}_' + \
-                mri_seq+'_unbiased_brain_rigid_to_mni.png'
-            x = Image.open(os.path.join(self.root, 'thumbs_192x192', filename))
             if self.transform is not None:
-                sample['x'] = self.transform(x)
-            sample.pop('eid', None)
         if self.concat_pa:
-            sample['pa'] = torch.cat([
-                torch.tensor([sample[k]]) for k in self.columns if k != 'eid'
-            ], dim=0)
         return sample
 def get_attr_max_min(attr):
     # some ukbb dataset (max, min) stats
-    if attr == 'age':
         return 73, 44
-    elif attr == 'brain_volume':
         return 1629520, 841919
-    elif attr == 'ventricle_volume':
         return 157075, 7613.27001953125
     else:
         NotImplementedError
@@ -102,37 +104,43 @@ def get_attr_max_min(attr):
 def ukbb(args):
     csv_dir = args.data_dir
     augmentation = {
-        'train': TF.Compose([
-            TF.Resize((args.input_res, args.input_res), antialias=None),
-            TF.RandomCrop(size=(args.input_res, args.input_res),
-                          padding=[2*args.pad, args.pad]),
-            TF.RandomHorizontalFlip(p=args.hflip),
-            TF.PILToTensor()
-        ]),
-        'eval': TF.Compose([
-            TF.Resize((args.input_res, args.input_res), antialias=None),
-            TF.PILToTensor()
-        ])
     }
     datasets = {}
     # for split in ['train', 'valid', 'test']:
-    for split in ['test']:
         datasets[split] = UKBBDataset(
             root=args.data_dir,
-            csv_file=os.path.join(csv_dir, split+'.csv'),
-            transform=augmentation[('eval' if split != 'train' else split)],
-            columns=(None if not args.parents_x else ['eid'] + args.parents_x),
-            norm=(None if not hasattr(args, 'context_norm')
-                  else args.context_norm),
-            concat_pa=(True if not hasattr(args, 'concat_pa') else args.concat_pa))
     return datasets
 def _load_uint8(f):
-    idx_dtype, ndim = struct.unpack('BBBB', f.read(4))[2:]
-    shape = struct.unpack('>' + 'I' * ndim, f.read(4 * ndim))
     buffer_length = int(np.prod(shape))
     data = np.frombuffer(f.read(buffer_length), dtype=np.uint8).reshape(shape)
     return data
@@ -152,8 +160,8 @@ def load_idx(path: str) -> np.ndarray:
     ----------
     http://yann.lecun.com/exdb/mnist/
     """
-    open_fcn = gzip.open if path.endswith('.gz') else open
-    with open_fcn(path, 'rb') as f:
         return _load_uint8(f)
@@ -168,8 +176,9 @@ def _get_paths(root_dir, train):
     return images_path, labels_path, metrics_path
-def load_morphomnist_like(root_dir, train: bool = True, columns=None) \
-        -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]:
     """
     Args:
         root_dir: path to data directory
@@ -184,68 +193,84 @@ def load_morphomnist_like(root_dir, train: bool = True, columns=None) \
     images = load_idx(images_path)
     labels = load_idx(labels_path)
-    if columns is not None and 'index' not in columns:
-        usecols = ['index'] + list(columns)
     else:
         usecols = columns
-    metrics = pd.read_csv(metrics_path, usecols=usecols, index_col='index')
     return images, labels, metrics
 class MorphoMNIST(Dataset):
-    def __init__(self, root_dir, train=True, transform=None, columns=None, norm=None, concat_pa=True):
         self.train = train
         self.transform = transform
         self.columns = columns
         self.concat_pa = concat_pa
         self.norm = norm
-        cols_not_digit = [c for c in self.columns if c != 'digit']
         images, labels, metrics_df = load_morphomnist_like(
-            root_dir, train, cols_not_digit)
         self.images = torch.from_numpy(np.array(images)).unsqueeze(1)
         self.labels = F.one_hot(
-            torch.from_numpy(np.array(labels)).long(), num_classes=10)
         if self.columns is None:
             self.columns = metrics_df.columns
         self.samples = {k: torch.tensor(metrics_df[k]) for k in cols_not_digit}
         self.min_max = {
-            'thickness': [0.87598526, 6.255515],
-            'intensity': [66.601204, 254.90317]
         }
         for k, v in self.samples.items():  # optional preprocessing
-            print(f'{k} normalization: {norm}')
-            if norm == '[-1,1]':
-                self.samples[k] = normalize(v,
-                                            x_min=self.min_max[k][0], x_max=self.min_max[k][1])
-            elif norm == '[0,1]':
-                self.samples[k] = normalize(v,
-                                            x_min=self.min_max[k][0], x_max=self.min_max[k][1], zero_one=True)
             elif norm == None:
                 pass
             else:
-                NotImplementedError(f'{norm} not implemented.')
-        print(f'#samples: {len(metrics_df)}\n')
-        self.samples.update({'digit': self.labels})
     def __len__(self):
         return len(self.images)
     def __getitem__(self, idx):
         sample = {}
-        sample['x'] = self.images[idx]
         if self.transform is not None:
-            sample['x'] = self.transform(sample['x'])
         if self.concat_pa:
-            sample['pa'] = torch.cat([
-                v[idx] if k == 'digit' else torch.tensor([v[idx]]) for k, v in self.samples.items()],
-                dim=0)
         else:
             sample.update({k: v[idx] for k, v in self.samples.items()})
         return sample
@@ -254,39 +279,43 @@ class MorphoMNIST(Dataset):
 def morphomnist(args):
     # Load data
     augmentation = {
-        'train': TF.Compose([
-            TF.RandomCrop((args.input_res, args.input_res), padding=args.pad),
-        ]),
-        'eval': TF.Compose([
-            TF.Pad(padding=2),  # (32, 32)
-        ])
     }
     datasets = {}
     # for split in ['train', 'valid', 'test']:
-    for split in ['test']:
         datasets[split] = MorphoMNIST(
             root_dir=args.data_dir,
-            train=(split == 'train'),  # test set is valid set
-            transform=augmentation[('eval' if split != 'train' else split)],
             columns=args.parents_x,
             norm=args.context_norm,
-            concat_pa=args.concat_pa
         )
     return datasets
 def preproc_mimic(batch):
     for k, v in batch.items():
-        if k == 'x':
-            batch['x'] = (batch['x'].float() - 127.5) / 127.5  # [-1,1]
-        elif k in ['age']:
             batch[k] = batch[k].float().unsqueeze(-1)
-            batch[k] = batch[k] / 100.
             batch[k] = batch[k] * 2 - 1  # [-1,1]
-        elif k in ['race']:
             batch[k] = F.one_hot(batch[k], num_classes=3).squeeze().float()
-        elif k in ['finding']:
             batch[k] = batch[k].unsqueeze(-1).float()
         else:
             batch[k] = batch[k].float().unsqueeze(-1)
@@ -294,39 +323,52 @@ def preproc_mimic(batch):
 class MIMICDataset(Dataset):
-    def __init__(self, root, csv_file, transform=None, columns=None, concat_pa=True, only_pleural_eff=True):
         self.data = pd.read_csv(csv_file)
         self.transform = transform
-        self.disease_labels = ['No Finding', 'Other', 'Pleural Effusion', 'Lung Opacity']
         self.samples = {
-            'age': [],
-            'sex': [],
-            'finding': [],
-            'x': [],
-            'race': [],
-            'lung_opacity': [],
-            'pleural_effusion': [],
         }
-        for idx, _ in enumerate(tqdm(range(len(self.data)), desc='Loading MIMIC Data')):
-            if only_pleural_eff and self.data.loc[idx, 'disease'] == 'Other':
                 continue
-            img_path = os.path.join(root, self.data.loc[idx, 'path_preproc'])
-            lung_opacity = self.data.loc[idx, 'Lung Opacity']
-            self.samples['lung_opacity'].append(lung_opacity)
-            pleural_effusion = self.data.loc[idx, 'Pleural Effusion']
-            self.samples['pleural_effusion'].append(pleural_effusion)
-            disease = self.data.loc[idx, 'disease']
-            finding = 0 if disease == 'No Finding' else 1
-            self.samples['x'].append(img_path)
-            self.samples['finding'].append(finding)
-            self.samples['age'].append(self.data.loc[idx, 'age'])
-            self.samples['race'].append(self.data.loc[idx, 'race_label'])
-            self.samples['sex'].append(self.data.loc[idx, 'sex_label'])
         self.columns = columns
         if self.columns is None:
@@ -336,33 +378,36 @@ class MIMICDataset(Dataset):
         self.concat_pa = concat_pa
     def __len__(self):
-        return len(self.samples['x'])
     def __getitem__(self, idx):
         sample = {k: v[idx] for k, v in self.samples.items()}
-        sample['x'] = imread(sample['x']).astype(np.float32)[None, ...]
         for k, v in sample.items():
             sample[k] = torch.tensor(v)
         if self.transform:
-            sample['x'] = self.transform(sample['x'])
         sample = preproc_mimic(sample)
         if self.concat_pa:
-            sample['pa'] = torch.cat([sample[k] for k in self.columns], dim=0)
         return sample
 def mimic(args):
     args.csv_dir = args.data_dir
     datasets = {}
-    datasets['test'] = MIMICDataset(
         root=args.data_dir,
-        csv_file=os.path.join(args.csv_dir, 'mimic.sample.test.csv'),
         columns=args.parents_x,
-        transform=TF.Compose([
-            TF.Resize((args.input_res, args.input_res), antialias=None),
-        ])
     )
-    return datasets

         x_min = x.min()
     if x_max is None:
         x_max = x.max()
+    print(f"max: {x_max}, min: {x_min}")
     x = (x - x_min) / (x_max - x_min)  # [0,1]
     return x if zero_one else 2 * x - 1  # else [-1,1]
 class UKBBDataset(Dataset):
+    def __init__(
+        self, root, csv_file, transform=None, columns=None, norm=None, concat_pa=True
+    ):
         super().__init__()
         self.root = root
         self.transform = transform
         self.concat_pa = concat_pa  # return concatenated parents
+        print(f"\nLoading csv data: {csv_file}")
         self.df = pd.read_csv(csv_file)
         self.columns = columns
         if self.columns is None:
             # ['eid', 'sex', 'age', 'brain_volume', 'ventricle_volume', 'mri_seq']
             self.columns = list(self.df.columns)  # return all
             self.columns.pop(0)  # remove redundant 'index' column
+        print(f"columns: {self.columns}")
+        self.samples = {i: torch.as_tensor(self.df[i]).float() for i in self.columns}
+        for k in ["age", "brain_volume", "ventricle_volume"]:
+            print(f"{k} normalization: {norm}")
             if k in self.columns:
+                if norm == "[-1,1]":
                     self.samples[k] = normalize(self.samples[k])
+                elif norm == "[0,1]":
                     self.samples[k] = normalize(self.samples[k], zero_one=True)
+                elif norm == "log_standard":
                     self.samples[k] = log_standardize(self.samples[k])
                 elif norm == None:
                     pass
                 else:
+                    NotImplementedError(f"{norm} not implemented.")
+        print(f"#samples: {len(self.df)}")
+        self.return_x = True if "eid" in self.columns else False
     def __len__(self):
         return len(self.df)
         sample = {k: v[idx] for k, v in self.samples.items()}
         if self.return_x:
+            mri_seq = "T1" if sample["mri_seq"] == 0.0 else "T2_FLAIR"
             # Load scan
+            filename = (
+                f'{int(sample["eid"])}_' + mri_seq + "_unbiased_brain_rigid_to_mni.png"
+            )
+            x = Image.open(os.path.join(self.root, "thumbs_192x192", filename))
             if self.transform is not None:
+                sample["x"] = self.transform(x)
+            sample.pop("eid", None)
         if self.concat_pa:
+            sample["pa"] = torch.cat(
+                [torch.tensor([sample[k]]) for k in self.columns if k != "eid"], dim=0
+            )
         return sample
 def get_attr_max_min(attr):
     # some ukbb dataset (max, min) stats
+    if attr == "age":
         return 73, 44
+    elif attr == "brain_volume":
         return 1629520, 841919
+    elif attr == "ventricle_volume":
         return 157075, 7613.27001953125
     else:
         NotImplementedError
 def ukbb(args):
     csv_dir = args.data_dir
     augmentation = {
+        "train": TF.Compose(
+            [
+                TF.Resize((args.input_res, args.input_res), antialias=None),
+                TF.RandomCrop(
+                    size=(args.input_res, args.input_res),
+                    padding=[2 * args.pad, args.pad],
+                ),
+                TF.RandomHorizontalFlip(p=args.hflip),
+                TF.PILToTensor(),
+            ]
+        ),
+        "eval": TF.Compose(
+            [
+                TF.Resize((args.input_res, args.input_res), antialias=None),
+                TF.PILToTensor(),
+            ]
+        ),
     }
     datasets = {}
     # for split in ['train', 'valid', 'test']:
+    for split in ["test"]:
         datasets[split] = UKBBDataset(
             root=args.data_dir,
+            csv_file=os.path.join(csv_dir, split + ".csv"),
+            transform=augmentation[("eval" if split != "train" else split)],
+            columns=(None if not args.parents_x else ["eid"] + args.parents_x),
+            norm=(None if not hasattr(args, "context_norm") else args.context_norm),
+            concat_pa=False,
+        )
     return datasets
 def _load_uint8(f):
+    idx_dtype, ndim = struct.unpack("BBBB", f.read(4))[2:]
+    shape = struct.unpack(">" + "I" * ndim, f.read(4 * ndim))
     buffer_length = int(np.prod(shape))
     data = np.frombuffer(f.read(buffer_length), dtype=np.uint8).reshape(shape)
     return data
     ----------
     http://yann.lecun.com/exdb/mnist/
     """
+    open_fcn = gzip.open if path.endswith(".gz") else open
+    with open_fcn(path, "rb") as f:
         return _load_uint8(f)
     return images_path, labels_path, metrics_path
+def load_morphomnist_like(
+    root_dir, train: bool = True, columns=None
+) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]:
     """
     Args:
         root_dir: path to data directory
     images = load_idx(images_path)
     labels = load_idx(labels_path)
+    if columns is not None and "index" not in columns:
+        usecols = ["index"] + list(columns)
     else:
         usecols = columns
+    metrics = pd.read_csv(metrics_path, usecols=usecols, index_col="index")
     return images, labels, metrics
 class MorphoMNIST(Dataset):
+    def __init__(
+        self,
+        root_dir,
+        train=True,
+        transform=None,
+        columns=None,
+        norm=None,
+        concat_pa=True,
+    ):
         self.train = train
         self.transform = transform
         self.columns = columns
         self.concat_pa = concat_pa
         self.norm = norm
+        cols_not_digit = [c for c in self.columns if c != "digit"]
         images, labels, metrics_df = load_morphomnist_like(
+            root_dir, train, cols_not_digit
+        )
         self.images = torch.from_numpy(np.array(images)).unsqueeze(1)
         self.labels = F.one_hot(
+            torch.from_numpy(np.array(labels)).long(), num_classes=10
+        )
         if self.columns is None:
             self.columns = metrics_df.columns
         self.samples = {k: torch.tensor(metrics_df[k]) for k in cols_not_digit}
         self.min_max = {
+            "thickness": [0.87598526, 6.255515],
+            "intensity": [66.601204, 254.90317],
         }
         for k, v in self.samples.items():  # optional preprocessing
+            print(f"{k} normalization: {norm}")
+            if norm == "[-1,1]":
+                self.samples[k] = normalize(
+                    v, x_min=self.min_max[k][0], x_max=self.min_max[k][1]
+                )
+            elif norm == "[0,1]":
+                self.samples[k] = normalize(
+                    v, x_min=self.min_max[k][0], x_max=self.min_max[k][1], zero_one=True
+                )
             elif norm == None:
                 pass
             else:
+                NotImplementedError(f"{norm} not implemented.")
+        print(f"#samples: {len(metrics_df)}\n")
+        self.samples.update({"digit": self.labels})
     def __len__(self):
         return len(self.images)
     def __getitem__(self, idx):
         sample = {}
+        sample["x"] = self.images[idx]
         if self.transform is not None:
+            sample["x"] = self.transform(sample["x"])
         if self.concat_pa:
+            sample["pa"] = torch.cat(
+                [
+                    v[idx] if k == "digit" else torch.tensor([v[idx]])
+                    for k, v in self.samples.items()
+                ],
+                dim=0,
+            )
         else:
             sample.update({k: v[idx] for k, v in self.samples.items()})
         return sample
 def morphomnist(args):
     # Load data
     augmentation = {
+        "train": TF.Compose(
+            [
+                TF.RandomCrop((args.input_res, args.input_res), padding=args.pad),
+            ]
+        ),
+        "eval": TF.Compose(
+            [
+                TF.Pad(padding=2),  # (32, 32)
+            ]
+        ),
     }
     datasets = {}
     # for split in ['train', 'valid', 'test']:
+    for split in ["test"]:
         datasets[split] = MorphoMNIST(
             root_dir=args.data_dir,
+            train=(split == "train"),  # test set is valid set
+            transform=augmentation[("eval" if split != "train" else split)],
             columns=args.parents_x,
             norm=args.context_norm,
+            concat_pa=False,
         )
     return datasets
 def preproc_mimic(batch):
     for k, v in batch.items():
+        if k == "x":
+            batch["x"] = (batch["x"].float() - 127.5) / 127.5  # [-1,1]
+        elif k in ["age"]:
             batch[k] = batch[k].float().unsqueeze(-1)
+            batch[k] = batch[k] / 100.0
             batch[k] = batch[k] * 2 - 1  # [-1,1]
+        elif k in ["race"]:
             batch[k] = F.one_hot(batch[k], num_classes=3).squeeze().float()
+        elif k in ["finding"]:
             batch[k] = batch[k].unsqueeze(-1).float()
         else:
             batch[k] = batch[k].float().unsqueeze(-1)
 class MIMICDataset(Dataset):
+    def __init__(
+        self,
+        root,
+        csv_file,
+        transform=None,
+        columns=None,
+        concat_pa=True,
+        only_pleural_eff=True,
+    ):
         self.data = pd.read_csv(csv_file)
         self.transform = transform
+        self.disease_labels = [
+            "No Finding",
+            "Other",
+            "Pleural Effusion",
+            # "Lung Opacity",
+        ]
         self.samples = {
+            "age": [],
+            "sex": [],
+            "finding": [],
+            "x": [],
+            "race": [],
+            # "lung_opacity": [],
+            # "pleural_effusion": [],
         }
+        for idx, _ in enumerate(tqdm(range(len(self.data)), desc="Loading MIMIC Data")):
+            if only_pleural_eff and self.data.loc[idx, "disease"] == "Other":
                 continue
+            img_path = os.path.join(root, self.data.loc[idx, "path_preproc"])
+            # lung_opacity = self.data.loc[idx, "Lung Opacity"]
+            # self.samples["lung_opacity"].append(lung_opacity)
+            # pleural_effusion = self.data.loc[idx, "Pleural Effusion"]
+            # self.samples["pleural_effusion"].append(pleural_effusion)
+            disease = self.data.loc[idx, "disease"]
+            finding = 0 if disease == "No Finding" else 1
+            self.samples["x"].append(img_path)
+            self.samples["finding"].append(finding)
+            self.samples["age"].append(self.data.loc[idx, "age"])
+            self.samples["race"].append(self.data.loc[idx, "race_label"])
+            self.samples["sex"].append(self.data.loc[idx, "sex_label"])
         self.columns = columns
         if self.columns is None:
         self.concat_pa = concat_pa
     def __len__(self):
+        return len(self.samples["x"])
     def __getitem__(self, idx):
         sample = {k: v[idx] for k, v in self.samples.items()}
+        sample["x"] = imread(sample["x"]).astype(np.float32)[None, ...]
         for k, v in sample.items():
             sample[k] = torch.tensor(v)
         if self.transform:
+            sample["x"] = self.transform(sample["x"])
         sample = preproc_mimic(sample)
         if self.concat_pa:
+            sample["pa"] = torch.cat([sample[k] for k in self.columns], dim=0)
         return sample
 def mimic(args):
     args.csv_dir = args.data_dir
     datasets = {}
+    datasets["test"] = MIMICDataset(
         root=args.data_dir,
+        csv_file=os.path.join(args.csv_dir, "mimic.sample.test.csv"),
         columns=args.parents_x,
+        transform=TF.Compose(
+            [
+                TF.Resize((args.input_res, args.input_res), antialias=None),
+            ]
+        ),
+        concat_pa=False,
     )
+    return datasets

pgm/flow_pgm.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import numpy as np
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
 import pyro
@@ -15,53 +16,69 @@ from pyro.distributions.conditional import ConditionalTransformedDistribution
 from .layers import (
     ConditionalTransformedDistributionGumbelMax,
     ConditionalGumbelMax,
-    ConditionalAffineTransform, MLP, CNN,
 )
 class BasePGM(nn.Module):
     def __init__(self):
         super().__init__()
     def scm(self, *args, **kwargs):
         def config(msg):
-            if isinstance(msg['fn'], dist.TransformedDistribution):
                 return TransformReparam()
             else:
                 return None
         return pyro.poutine.reparam(self.model, config=config)(*args, **kwargs)
-    def sample_scm(self, n_samples=1, t=None):
-        with pyro.plate('obs', n_samples):
-            samples = self.scm(t)
         return samples
-    def sample(self, n_samples=1, t=None):
-        with pyro.plate('obs', n_samples):
-            samples = self.model(t)  # model defined in parent class
         return samples
-    def infer_exogeneous(self, obs):
         batch_size = list(obs.values())[0].shape[0]
         # assuming that we use transformed distributions for everything:
         cond_model = pyro.condition(self.sample, data=obs)
-        cond_trace = pyro.poutine.trace(
-            cond_model).get_trace(batch_size)
         output = {}
         for name, node in cond_trace.nodes.items():
-            if 'z' in name or 'fn' not in node.keys():
                 continue
-            fn = node['fn']
             if isinstance(fn, dist.Independent):
                 fn = fn.base_dist
             if isinstance(fn, dist.TransformedDistribution):
                 # compute exogenous base dist (created with TransformReparam) at all sites
-                output[name + '_base'] = T.ComposeTransform(
-                    fn.transforms).inv(node['value'])
         return output
-    def counterfactual(self, obs, intervention, num_particles=1, detach=True, t=None):
         dag_variables = self.variables.keys()
         assert set(obs.keys()) == set(dag_variables)
         avg_cfs = {k: torch.zeros_like(obs[k]) for k in obs.keys()}
@@ -70,43 +87,50 @@ class BasePGM(nn.Module):
         for _ in range(num_particles):
             # Abduction
             exo_noise = self.infer_exogeneous(obs)
-            exo_noise = {k: v.detach() if detach else v for k,
-                         v in exo_noise.items()}
             # condition on root node variables (no exogeneous noise available)
             for k in dag_variables:
                 if k not in intervention.keys():
-                    if k not in [i.split('_base')[0] for i in exo_noise.keys()]:
                         exo_noise[k] = obs[k]
             # Abducted SCM
-            abducted_scm = pyro.poutine.condition(
-                self.sample_scm, data=exo_noise)
             # Action
-            counterfactual_scm = pyro.poutine.do(
-                abducted_scm, data=intervention)
             # Prediction
-            counterfactuals = counterfactual_scm(batch_size, t)
             for k, v in counterfactuals.items():
-                avg_cfs[k] += (v / num_particles)
         return avg_cfs
 class FlowPGM(BasePGM):
-    def __init__(self, args):
         super().__init__()
         self.variables = {
-            'sex': 'binary',
-            'mri_seq': 'binary',
-            'age': 'continuous',
-            'brain_volume': 'continuous',
-            'ventricle_volume': 'continuous'
         }
         # priors: s, m, a, b and v
         self.s_logit = nn.Parameter(torch.zeros(1))
         self.m_logit = nn.Parameter(torch.zeros(1))
-        for k in ['a', 'b', 'v']:
-            self.register_buffer(f'{k}_base_loc', torch.zeros(1))
-            self.register_buffer(f'{k}_base_scale', torch.ones(1))
         # constraint, assumes data is [-1,1] normalized
         # normalize_transform = T.ComposeTransform([
@@ -116,23 +140,19 @@ class FlowPGM(BasePGM):
         # age flow
         self.age_module = T.ComposeTransformModule(
-            [T.Spline(1, count_bins=4, order='linear')])
-        self.age_flow = T.ComposeTransform([
-            self.age_module])
         # self.age_module, normalize_transform])
         # brain volume (conditional) flow: (sex, age) -> brain_vol
-        bvol_net = DenseNN(
-            2, args.widths, [1, 1], nonlinearity=nn.LeakyReLU(0.1))
-        self.bvol_flow = ConditionalAffineTransform(
-            context_nn=bvol_net, event_dim=0)
         # self.bvol_flow = [self.bvol_flow, normalize_transform]
         # ventricle volume (conditional) flow: (brain_vol, age) -> ventricle_vol
-        vvol_net = DenseNN(
-            2, args.widths, [1, 1], nonlinearity=nn.LeakyReLU(0.1))
-        self.vvol_flow = ConditionalAffineTransform(
-            context_nn=vvol_net, event_dim=0)
         # self.vvol_flow = [self.vvol_transf, normalize_transform]
         # if args.setup != 'sup_pgm':
@@ -148,148 +168,152 @@ class FlowPGM(BasePGM):
         self.encoder_b = CNN(input_shape, num_outputs=2, context_dim=1)
         # q(v | x) = Normal(mu(x), sigma(x))
         self.encoder_v = CNN(input_shape, num_outputs=2)
-        self.f = lambda x: args.std_fixed * \
-            torch.ones_like(x) if args.std_fixed > 0 else F.softplus(x)
-    def model(self, t=None):
         # p(s), sex dist
         ps = dist.Bernoulli(logits=self.s_logit).to_event(1)
-        sex = pyro.sample('sex', ps)
         # p(m), mri_seq dist
         pm = dist.Bernoulli(logits=self.m_logit).to_event(1)
-        mri_seq = pyro.sample('mri_seq', pm)
         # p(a), age flow
         pa_base = dist.Normal(self.a_base_loc, self.a_base_scale).to_event(1)
         pa = dist.TransformedDistribution(pa_base, self.age_flow)
-        age = pyro.sample('age', pa)
         # p(b | s, a), brain volume flow
-        pb_sa_base = dist.Normal(
-            self.b_base_loc, self.b_base_scale).to_event(1)
         pb_sa = ConditionalTransformedDistribution(
-            pb_sa_base, [self.bvol_flow]).condition(torch.cat([sex, age], dim=1))
-        bvol = pyro.sample('brain_volume', pb_sa)
         # _ = self.bvol_transf  # register with pyro
         # p(v | b, a), ventricle volume flow
-        pv_ba_base = dist.Normal(
-            self.v_base_loc, self.v_base_scale).to_event(1)
         pv_ba = ConditionalTransformedDistribution(
-            pv_ba_base, [self.vvol_flow]).condition(torch.cat([bvol, age], dim=1))
-        vvol = pyro.sample('ventricle_volume', pv_ba)
         # _ = self.vvol_transf  # register with pyro
         return {
-            'sex': sex,
-            'mri_seq': mri_seq,
-            'age': age,
-            'brain_volume': bvol,
-            'ventricle_volume': vvol,
         }
-    def guide(self, **obs):
         # guide for (optional) semi-supervised learning
-        pyro.module('FlowPGM', self)
-        with pyro.plate('observations', obs['x'].shape[0]):
             # q(m | x)
-            if obs['mri_seq'] is None:
-                m_prob = torch.sigmoid(self.encoder_m(obs['x']))
-                m = pyro.sample('mri_seq', dist.Bernoulli(
-                    probs=m_prob).to_event(1))
             # q(v | x)
-            if obs['ventricle_volume'] is None:
-                v_loc, v_logscale = self.encoder_v(obs['x']).chunk(2, dim=-1)
                 qv_x = dist.Normal(v_loc, self.f(v_logscale)).to_event(1)
-                obs['ventricle_volume'] = pyro.sample('ventricle_volume', qv_x)
             # q(b | x, v)
-            if obs['brain_volume'] is None:
                 b_loc, b_logscale = self.encoder_b(
-                    obs['x'], y=obs['ventricle_volume']).chunk(2, dim=-1)
                 qb_xv = dist.Normal(b_loc, self.f(b_logscale)).to_event(1)
-                obs['brain_volume'] = pyro.sample('brain_volume', qb_xv)
             # q(s | x, b)
-            if obs['sex'] is None:
-                s_prob = torch.sigmoid(self.encoder_s(
-                    obs['x'], y=obs['brain_volume']))  # .squeeze()
-                pyro.sample('sex', dist.Bernoulli(probs=s_prob).to_event(1))
             # q(a | b, v)
-            if obs['age'] is None:
-                ctx = torch.cat(
-                    [obs['brain_volume'], obs['ventricle_volume']], dim=-1)
                 a_loc, a_logscale = self.encoder_a(ctx).chunk(2, dim=-1)
-                pyro.sample('age', dist.Normal(
-                    a_loc, self.f(a_logscale)).to_event(1))
-    def model_anticausal(self, **obs):
         # assumes all variables are observed
-        pyro.module('FlowPGM', self)
-        with pyro.plate('observations', obs['x'].shape[0]):
             # q(v | x)
-            v_loc, v_logscale = self.encoder_v(obs['x']).chunk(2, dim=-1)
             qv_x = dist.Normal(v_loc, self.f(v_logscale)).to_event(1)
-            pyro.sample('ventricle_volume_aux', qv_x,
-                        obs=obs['ventricle_volume'])
             # q(b | x, v)
             b_loc, b_logscale = self.encoder_b(
-                obs['x'], y=obs['ventricle_volume']).chunk(2, dim=-1)
             qb_xv = dist.Normal(b_loc, self.f(b_logscale)).to_event(1)
-            pyro.sample('brain_volume_aux', qb_xv, obs=obs['brain_volume'])
             # q(a | b, v)
-            ctx = torch.cat(
-                [obs['brain_volume'], obs['ventricle_volume']], dim=-1)
             a_loc, a_logscale = self.encoder_a(ctx).chunk(2, dim=-1)
-            pyro.sample('age_aux', dist.Normal(
-                a_loc, self.f(a_logscale)).to_event(1), obs=obs['age'])
             # q(s | x, b)
-            s_prob = torch.sigmoid(self.encoder_s(
-                obs['x'], y=obs['brain_volume']))
             qs_xb = dist.Bernoulli(probs=s_prob).to_event(1)
-            pyro.sample('sex_aux', qs_xb, obs=obs['sex'])
             # q(m | x)
-            m_prob = torch.sigmoid(self.encoder_m(obs['x']))
             qm_x = dist.Bernoulli(probs=m_prob).to_event(1)
-            pyro.sample('mri_seq_aux', qm_x, obs=obs['mri_seq'])
-    def predict(self, **obs):
         # q(v | x)
-        v_loc, v_logscale = self.encoder_v(obs['x']).chunk(2, dim=-1)
         # v_loc = torch.tanh(v_loc)
         # q(b | x, v)
-        b_loc, b_logscale = self.encoder_b(
-            obs['x'], y=obs['ventricle_volume']).chunk(2, dim=-1)
         # b_loc = torch.tanh(b_loc)
         # q(a | b, v)
-        ctx = torch.cat([obs['brain_volume'], obs['ventricle_volume']], dim=-1)
         a_loc, a_logscale = self.encoder_a(ctx).chunk(2, dim=-1)
         # a_loc = torch.tanh(b_loc)
         # q(s | x, b)
-        s_prob = torch.sigmoid(self.encoder_s(obs['x'], y=obs['brain_volume']))
         # q(m | x)
-        m_prob = torch.sigmoid(self.encoder_m(obs['x']))
         return {
-            'sex': s_prob,
-            'mri_seq': m_prob,
-            'age': a_loc,
-            'brain_volume': b_loc,
-            'ventricle_volume': v_loc,
         }
-    def svi_model(self, **obs):
-        with pyro.plate('observations', obs['x'].shape[0]):
             pyro.condition(self.model, data=obs)()
-    def guide_pass(self, **obs):
         pass
@@ -297,173 +321,174 @@ class MorphoMNISTPGM(BasePGM):
     def __init__(self, args):
         super().__init__()
         self.variables = {
-            'thickness': 'continuous',
-            'intensity': 'continuous',
-            'digit': 'categorical',
         }
         # priors
         self.digit_logits = nn.Parameter(torch.zeros(1, 10))  # uniform prior
-        for k in ['t', 'i']:  # thickness, intensity, standard Gaussian
-            self.register_buffer(f'{k}_base_loc', torch.zeros(1))
-            self.register_buffer(f'{k}_base_scale', torch.ones(1))
         # constraint, assumes data is [-1,1] normalized
-        normalize_transform = T.ComposeTransform([
-            T.SigmoidTransform(), T.AffineTransform(loc=-1, scale=2)])
         # thickness flow
         self.thickness_module = T.ComposeTransformModule(
-            [T.Spline(1, count_bins=4, order='linear')])
-        self.thickness_flow = T.ComposeTransform([
-            self.thickness_module, normalize_transform])
         # intensity (conditional) flow: thickness -> intensity
-        intensity_net = DenseNN(
-            1, args.widths, [1, 1], nonlinearity=nn.GELU())
         self.context_nn = ConditionalAffineTransform(
-            context_nn=intensity_net, event_dim=0)
         self.intensity_flow = [self.context_nn, normalize_transform]
-        if args.setup != 'sup_pgm':
             # anticausal predictors
             input_shape = (args.input_channels, args.input_res, args.input_res)
             # q(t | x, i) = Normal(mu(x, i), sigma(x, i)), 2 outputs: loc & scale
-            self.encoder_t = CNN(input_shape, num_outputs=2,
-                                 context_dim=1, width=8)
             # q(i | x) = Normal(mu(x), sigma(x))
             self.encoder_i = CNN(input_shape, num_outputs=2, width=8)
             # q(y | x) = Categorical(pi(x))
             self.encoder_y = CNN(input_shape, num_outputs=10, width=8)
-            self.f = lambda x: args.std_fixed * \
-                torch.ones_like(x) if args.std_fixed > 0 else F.softplus(x)
-    def model(self, t=None):
-        pyro.module('MorphoMNISTPGM', self)
         # p(y), digit label prior dist
         py = dist.OneHotCategorical(
-            probs=F.softmax(self.digit_logits, dim=-1)).to_event(1)
         # with pyro.poutine.scale(scale=0.05):
-        digit = pyro.sample('digit', py)
         # p(t), thickness flow
         pt_base = dist.Normal(self.t_base_loc, self.t_base_scale).to_event(1)
         pt = dist.TransformedDistribution(pt_base, self.thickness_flow)
-        thickness = pyro.sample('thickness', pt)
         # p(i | t), intensity conditional flow
         pi_t_base = dist.Normal(self.i_base_loc, self.i_base_scale).to_event(1)
         pi_t = ConditionalTransformedDistribution(
-            pi_t_base, self.intensity_flow).condition(thickness)
-        intensity = pyro.sample('intensity', pi_t)
         _ = self.context_nn
-        return {'thickness': thickness, 'intensity': intensity, 'digit': digit}
-    def guide(self, **obs):
         # guide for (optional) semi-supervised learning
-        with pyro.plate('observations', obs['x'].shape[0]):
             # q(i | x)
-            if obs['intensity'] is None:
-                i_loc, i_logscale = self.encoder_i(obs['x']).chunk(2, dim=-1)
-                qi_t = dist.Normal(torch.tanh(
-                    i_loc), self.f(i_logscale)).to_event(1)
-                obs['intensity'] = pyro.sample('intensity', qi_t)
             # q(t | x, i)
-            if obs['thickness'] is None:
-                t_loc, t_logscale = self.encoder_t(
-                    obs['x'], y=obs['intensity']).chunk(2, dim=-1)
-                qt_x = dist.Normal(torch.tanh(
-                    t_loc), self.f(t_logscale)).to_event(1)
-                obs['thickness'] = pyro.sample('thickness', qt_x)
             # q(y | x)
-            if obs['digit'] is None:
-                y_prob = F.softmax(self.encoder_y(obs['x']), dim=-1)
-                qy_x = dist.OneHotCategorical(probs=y_prob).to_event(1)
-                pyro.sample('digit', qy_x)
-    def model_anticausal(self, **obs):
         # assumes all variables are observed & continuous ones are in [-1,1]
-        pyro.module('MorphoMNISTPGM', self)
-        with pyro.plate('observations', obs['x'].shape[0]):
             # q(t | x, i)
-            t_loc, t_logscale = self.encoder_t(
-                obs['x'], y=obs['intensity']).chunk(2, dim=-1)
-            qt_x = dist.Normal(torch.tanh(
-                t_loc), self.f(t_logscale)).to_event(1)
-            pyro.sample('thickness_aux', qt_x, obs=obs['thickness'])
             # q(i | x)
-            i_loc, i_logscale = self.encoder_i(obs['x']).chunk(2, dim=-1)
-            qi_t = dist.Normal(torch.tanh(
-                i_loc), self.f(i_logscale)).to_event(1)
-            pyro.sample('intensity_aux', qi_t, obs=obs['intensity'])
             # q(y | x)
-            y_prob = F.softmax(self.encoder_y(obs['x']), dim=-1)
-            qy_x = dist.OneHotCategorical(probs=y_prob).to_event(1)
-            pyro.sample('digit_aux', qy_x, obs=obs['digit'])
-    def predict(self, **obs):
         # q(t | x, i)
-        t_loc, t_logscale = self.encoder_t(
-            obs['x'], y=obs['intensity']).chunk(2, dim=-1)
         t_loc = torch.tanh(t_loc)
         # q(i | x)
-        i_loc, i_logscale = self.encoder_i(obs['x']).chunk(2, dim=-1)
         i_loc = torch.tanh(i_loc)
         # q(y | x)
-        y_prob = F.softmax(self.encoder_y(obs['x']), dim=-1)
-        return {'thickness': t_loc, 'intensity': i_loc, 'digit': y_prob}
-    def svi_model(self, **obs):
-        with pyro.plate('observations', obs['x'].shape[0]):
             pyro.condition(self.model, data=obs)()
-    def guide_pass(self, **obs):
         pass
-class ChestPGM(nn.Module):
-    def __init__(self, args):
         super().__init__()
         self.variables = {
-            'race': 'categorical',
-            'sex': 'binary',
-            'finding': 'binary',
-            'age': 'continuous',
         }
         # Discrete variables that are not root nodes
-        self.discrete_variables = {
-            'finding': 'binary',
-        }
-        # prior age
-        for k in ['a']:
-            self.register_buffer(f'{k}_base_loc', torch.zeros(1))
-            self.register_buffer(f'{k}_base_scale', torch.ones(1))
-        # age flow
         self.age_flow_components = T.ComposeTransformModule([T.Spline(1)])
         # self.age_constraints = T.ComposeTransform([
         #     T.AffineTransform(loc=4.09541458484, scale=0.32548387126),
         #     T.ExpTransform()])
-        self.age_flow = T.ComposeTransform([
-            self.age_flow_components,
-            # self.age_constraints,
-        ])
         # Finding (conditional) via MLP, a -> f
-        finding_net = DenseNN(
-            1, [8, 16], param_dims=[2], nonlinearity=nn.Sigmoid())#.cuda()
         self.finding_transform_GumbelMax = ConditionalGumbelMax(
-            context_nn=finding_net,
-            event_dim=0)
         # log space for sex and race
-        self.sex_logit = nn.Parameter(torch.zeros(1))
-        # self.sex_logit = pyro.param(torch.zeros(1))
-        self.race_logits = nn.Parameter(np.log(1/3)*torch.ones(1, 3))
         input_shape = (args.input_channels, args.input_res, args.input_res)
@@ -477,207 +502,112 @@ class ChestPGM(nn.Module):
             # q(a | x, f) ~ Normal(mu(x), sigma(x))
             self.encoder_a = CNN(input_shape, num_outputs=1, context_dim=1)
-    def model(self, t=None):
         # p(s), sex dist
         ps = dist.Bernoulli(logits=self.sex_logit).to_event(1)
-        sex = pyro.sample('sex', ps)
         # p(a), age flow
         pa_base = dist.Normal(self.a_base_loc, self.a_base_scale).to_event(1)
         pa = dist.TransformedDistribution(pa_base, self.age_flow)
-        age = pyro.sample('age', pa)
         # age_ = self.age_constraints.inv(age)
         _ = self.age_flow_components  # register with pyro
         # p(r), race dist
-        race_dist = dist.OneHotCategorical(logits=self.race_logits).to_event(0)
-        race = pyro.sample('race', race_dist)
         # p(f | a), finding as OneHotCategorical conditioned on age
-        finding_dist_base = dist.Gumbel(
-            torch.zeros(1), torch.ones(1)).to_event(1)
         finding_dist = ConditionalTransformedDistributionGumbelMax(
-            finding_dist_base,
-            [self.finding_transform_GumbelMax]).condition(age)
         finding = pyro.sample("finding", finding_dist)
         return {
-            'sex': sex,
-            'race': race,
-            'age': age,
-            'finding': finding,
         }
-    def guide(self, **obs):
-        # print([k for k, v in obs.items() if v is not None])
-        pyro.module('ChestPGM', self)
-        with pyro.plate('observations', obs['x'].shape[0]):
             # q(s | x)
-            if obs['sex'] is None:
-                s_prob = torch.sigmoid(self.encoder_s(obs['x']))
-                s = pyro.sample('sex', dist.Bernoulli(
-                    probs=s_prob).to_event(1))
             # q(r | x)
-            if obs['race'] is None:
-                r_logits = F.softmax(self.encoder_r(
-                    obs['x']), dim=-1)  # .squeeze()
-                r = pyro.sample('race', dist.OneHotCategorical(
-                    logits=r_logits).to_event(1))
             # q(f | x)
-            if obs['finding'] is None:
-                f_prob = torch.sigmoid(self.encoder_ff(obs['x']))
-                f = pyro.sample('finding', dist.Bernoulli(
-                    probs=f_prob).to_event(1))
             # q(a | x, f)
-            if obs['age'] is None:
-                a_loc = self.encoder_a(
-                    obs['x'], y=obs['finding'])
-                pyro.sample('age', dist.Normal(
-                    a_loc, torch.ones_like(a_loc)).to_event(1))
-    def model_anticausal(self, **obs):
         # assumes all variables are observed, train classfiers
-        pyro.module('ChestPGM', self)
-        with pyro.plate('observations', obs['x'].shape[0]):
-           # q(s | x)
-            s_prob = torch.sigmoid(self.encoder_s(obs['x']))
-            s = pyro.sample('sex', dist.Bernoulli(
-                probs=s_prob).to_event(1))
             # q(r | x)
-            r_logits = F.softmax(self.encoder_r(
-                obs['x']), dim=-1)  # .squeeze()
-            r = pyro.sample('race', dist.OneHotCategorical(
-                logits=r_logits).to_event(1))
             # q(f | x)
-            f_prob = torch.sigmoid(self.encoder_f(obs['x']))
             qf_x = dist.Bernoulli(probs=f_prob).to_event(1)
-            obs['finding'] = pyro.sample('finding', qf_x)
             # q(a | x, f)
-            a_loc = self.encoder_a(
-                obs['x'], y=obs['finding'])
-            pyro.sample('age', dist.Normal(
-                a_loc, torch.ones_like(a_loc)).to_event(1))
-    def predict(self, **obs):
-        # q(s | x)
-        s_prob = torch.sigmoid(self.encoder_s(obs['x']))
-        # q(r | x)
-        r_logits = F.softmax(self.encoder_r(obs['x']), dim=-1)  # .squeeze()
-        # q(f | x)
-        f_prob = torch.sigmoid(self.encoder_f(obs['x']))
-        # q(a | x, f)
-        a_loc = self.encoder_a(
-            obs['x'], y=obs['finding'])
-        return {
-            'sex': s_prob,
-            'race': r_logits,
-            'age': a_loc,
-            'finding': f_prob,
-        }
-    def predict_unnorm(self, **obs):
         # q(s | x)
-        s_prob = self.encoder_s(obs['x'])
         # q(r | x)
-        r_logits = self.encoder_r(obs['x'])
         # q(f | x)
-        f_prob = self.encoder_f(obs['x'])
-        qf_x = dist.Bernoulli(probs=torch.sigmoid(f_prob)).to_event(1)
-        obs_finding = pyro.sample('finding', qf_x)
         # q(a | x, f)
-        a_loc = self.encoder_a(
-            obs['x'],
-            # y=obs['finding'],
-            y=obs_finding,
-        )
         return {
-            'sex': s_prob,
-            'race': r_logits,
-            'age': a_loc,
-            'finding': f_prob,
         }
-    def svi_model(self, **obs):
-        with pyro.plate('observations', obs['x'].shape[0]):
             pyro.condition(self.model, data=obs)()
-    def guide_pass(self, **obs):
         pass
-    def infer_exogeneous(self, obs):
-        batch_size = list(obs.values())[0].shape[0]
-        # assuming that we use transformed distributions for everything:
-        cond_model = pyro.condition(self.sample, data=obs)
-        cond_trace = pyro.poutine.trace(
-            cond_model).get_trace(batch_size)
-        output = {}
-        for name, node in cond_trace.nodes.items():
-            if 'z' in name or 'fn' not in node.keys():
-                continue
-            fn = node['fn']
-            if isinstance(fn, dist.Independent):
-                fn = fn.base_dist
-            if isinstance(fn, dist.TransformedDistribution):
-                # compute exogenous base dist (created with TransformReparam) at all sites
-                output[name + '_base'] = T.ComposeTransform(
-                    fn.transforms).inv(node['value'])
-        return output
-    def scm(self, *args, **kwargs):
-        def config(msg):
-            if isinstance(msg['fn'], dist.TransformedDistribution):
-                return TransformReparam()
-            else:
-                return None
-        return pyro.poutine.reparam(self.model, config=config)(*args, **kwargs)
-    def sample_scm(self, n_samples=1, t=None):
-        with pyro.plate('obs', n_samples):
-            samples = self.scm(t)
-        return samples
-    def sample(self, n_samples=1, t=None):
-        with pyro.plate('obs', n_samples):
-            samples = self.model(t)
-        return samples
-    def counterfactual(self, obs, intervention, num_particles=1, detach=True, t=None):
-        dag_variables = self.variables.keys()
-        obs_ = {k: v for k, v in obs.items() if k in dag_variables}
-        assert set(obs_.keys()) == set(dag_variables)
-        # For continuos variables
-        avg_cfs = {k: torch.zeros_like(obs_[k]) for k in obs_.keys()}
-        batch_size = list(obs_.values())[0].shape[0]
-        for _ in range(num_particles):
-            # Abduction
-            exo_noise = self.infer_exogeneous(obs_)
-            exo_noise = {k: v.detach() if detach else v for k,
-                         v in exo_noise.items()}
-            # condition on root node variables (no exogeneous noise available)
-            for k in dag_variables:
-                if k not in intervention.keys():
-                    if k not in [i.split('_base')[0] for i in exo_noise.keys()]:
-                        exo_noise[k] = obs_[k]
-            # Abducted SCM
-            abducted_scm = pyro.poutine.condition(
-                self.sample_scm, data=exo_noise)
-            # Action
-            counterfactual_scm = pyro.poutine.do(
-                abducted_scm, data=intervention)
-            # Prediction
-            counterfactuals = counterfactual_scm(batch_size, t)
-            # Check if we should change "finding", i.e. if its parents and itself are not intervened,
-            # then we use its observed value. This is needed due to stochastic abduction of discrete variables.
-            if 'age' not in intervention.keys() and 'finding' not in intervention.keys():
-                counterfactuals['finding'] = obs_['finding']
-            for k, v in counterfactuals.items():
-                avg_cfs[k] += (v / num_particles)
-        return avg_cfs

+from typing import Dict
+import numpy as np
 import torch
+from torch import nn, Tensor
 import torch.nn.functional as F
 import pyro
 from .layers import (
     ConditionalTransformedDistributionGumbelMax,
     ConditionalGumbelMax,
+    ConditionalAffineTransform,
+    MLP,
+    CNN,
 )
+class Hparams:
+    def update(self, dict):
+        for k, v in dict.items():
+            setattr(self, k, v)
 class BasePGM(nn.Module):
     def __init__(self):
         super().__init__()
     def scm(self, *args, **kwargs):
         def config(msg):
+            if isinstance(msg["fn"], dist.TransformedDistribution):
                 return TransformReparam()
             else:
                 return None
         return pyro.poutine.reparam(self.model, config=config)(*args, **kwargs)
+    def sample_scm(self, n_samples: int = 1):
+        with pyro.plate("obs", n_samples):
+            samples = self.scm()
         return samples
+    def sample(self, n_samples: int = 1):
+        with pyro.plate("obs", n_samples):
+            samples = self.model()  # NOTE: not ideal as model is defined in child class
         return samples
+    def infer_exogeneous(self, obs: Dict[str, Tensor]) -> Dict[str, Tensor]:
         batch_size = list(obs.values())[0].shape[0]
         # assuming that we use transformed distributions for everything:
         cond_model = pyro.condition(self.sample, data=obs)
+        cond_trace = pyro.poutine.trace(cond_model).get_trace(batch_size)
         output = {}
         for name, node in cond_trace.nodes.items():
+            if "z" in name or "fn" not in node.keys():
                 continue
+            fn = node["fn"]
             if isinstance(fn, dist.Independent):
                 fn = fn.base_dist
             if isinstance(fn, dist.TransformedDistribution):
                 # compute exogenous base dist (created with TransformReparam) at all sites
+                output[name + "_base"] = T.ComposeTransform(fn.transforms).inv(
+                    node["value"]
+                )
         return output
+    def counterfactual(
+        self,
+        obs: Dict[str, Tensor],
+        intervention: Dict[str, Tensor],
+        num_particles: int = 1,
+        detach: bool = True,
+    ) -> Dict[str, Tensor]:
+        # NOTE: not ideal as "variables" is defined in child class
         dag_variables = self.variables.keys()
         assert set(obs.keys()) == set(dag_variables)
         avg_cfs = {k: torch.zeros_like(obs[k]) for k in obs.keys()}
         for _ in range(num_particles):
             # Abduction
             exo_noise = self.infer_exogeneous(obs)
+            exo_noise = {k: v.detach() if detach else v for k, v in exo_noise.items()}
             # condition on root node variables (no exogeneous noise available)
             for k in dag_variables:
                 if k not in intervention.keys():
+                    if k not in [i.split("_base")[0] for i in exo_noise.keys()]:
                         exo_noise[k] = obs[k]
             # Abducted SCM
+            abducted_scm = pyro.poutine.condition(self.sample_scm, data=exo_noise)
             # Action
+            counterfactual_scm = pyro.poutine.do(abducted_scm, data=intervention)
             # Prediction
+            counterfactuals = counterfactual_scm(batch_size)
+            if hasattr(self, "discrete_variables"):  # hack for MIMIC
+                # Check if we should change "finding", i.e. if its parents and/or
+                # itself are not intervened on, then we use its observed value.
+                # This is used due to stochastic abduction of discrete variables
+                if (
+                    "age" not in intervention.keys()
+                    and "finding" not in intervention.keys()
+                ):
+                    counterfactuals["finding"] = obs["finding"]
             for k, v in counterfactuals.items():
+                avg_cfs[k] += v / num_particles
         return avg_cfs
 class FlowPGM(BasePGM):
+    def __init__(self, args: Hparams):
         super().__init__()
         self.variables = {
+            "sex": "binary",
+            "mri_seq": "binary",
+            "age": "continuous",
+            "brain_volume": "continuous",
+            "ventricle_volume": "continuous",
         }
         # priors: s, m, a, b and v
         self.s_logit = nn.Parameter(torch.zeros(1))
         self.m_logit = nn.Parameter(torch.zeros(1))
+        for k in ["a", "b", "v"]:
+            self.register_buffer(f"{k}_base_loc", torch.zeros(1))
+            self.register_buffer(f"{k}_base_scale", torch.ones(1))
         # constraint, assumes data is [-1,1] normalized
         # normalize_transform = T.ComposeTransform([
         # age flow
         self.age_module = T.ComposeTransformModule(
+            [T.Spline(1, count_bins=4, order="linear")]
+        )
+        self.age_flow = T.ComposeTransform([self.age_module])
         # self.age_module, normalize_transform])
         # brain volume (conditional) flow: (sex, age) -> brain_vol
+        bvol_net = DenseNN(2, args.widths, [1, 1], nonlinearity=nn.LeakyReLU(0.1))
+        self.bvol_flow = ConditionalAffineTransform(context_nn=bvol_net, event_dim=0)
         # self.bvol_flow = [self.bvol_flow, normalize_transform]
         # ventricle volume (conditional) flow: (brain_vol, age) -> ventricle_vol
+        vvol_net = DenseNN(2, args.widths, [1, 1], nonlinearity=nn.LeakyReLU(0.1))
+        self.vvol_flow = ConditionalAffineTransform(context_nn=vvol_net, event_dim=0)
         # self.vvol_flow = [self.vvol_transf, normalize_transform]
         # if args.setup != 'sup_pgm':
         self.encoder_b = CNN(input_shape, num_outputs=2, context_dim=1)
         # q(v | x) = Normal(mu(x), sigma(x))
         self.encoder_v = CNN(input_shape, num_outputs=2)
+        self.f = (
+            lambda x: args.std_fixed * torch.ones_like(x)
+            if args.std_fixed > 0
+            else F.softplus(x)
+        )
+    def model(self) -> Dict[str, Tensor]:
         # p(s), sex dist
         ps = dist.Bernoulli(logits=self.s_logit).to_event(1)
+        sex = pyro.sample("sex", ps)
         # p(m), mri_seq dist
         pm = dist.Bernoulli(logits=self.m_logit).to_event(1)
+        mri_seq = pyro.sample("mri_seq", pm)
         # p(a), age flow
         pa_base = dist.Normal(self.a_base_loc, self.a_base_scale).to_event(1)
         pa = dist.TransformedDistribution(pa_base, self.age_flow)
+        age = pyro.sample("age", pa)
         # p(b | s, a), brain volume flow
+        pb_sa_base = dist.Normal(self.b_base_loc, self.b_base_scale).to_event(1)
         pb_sa = ConditionalTransformedDistribution(
+            pb_sa_base, [self.bvol_flow]
+        ).condition(torch.cat([sex, age], dim=1))
+        bvol = pyro.sample("brain_volume", pb_sa)
         # _ = self.bvol_transf  # register with pyro
         # p(v | b, a), ventricle volume flow
+        pv_ba_base = dist.Normal(self.v_base_loc, self.v_base_scale).to_event(1)
         pv_ba = ConditionalTransformedDistribution(
+            pv_ba_base, [self.vvol_flow]
+        ).condition(torch.cat([bvol, age], dim=1))
+        vvol = pyro.sample("ventricle_volume", pv_ba)
         # _ = self.vvol_transf  # register with pyro
         return {
+            "sex": sex,
+            "mri_seq": mri_seq,
+            "age": age,
+            "brain_volume": bvol,
+            "ventricle_volume": vvol,
         }
+    def guide(self, **obs) -> None:
         # guide for (optional) semi-supervised learning
+        pyro.module("FlowPGM", self)
+        with pyro.plate("observations", obs["x"].shape[0]):
             # q(m | x)
+            if obs["mri_seq"] is None:
+                m_prob = torch.sigmoid(self.encoder_m(obs["x"]))
+                m = pyro.sample("mri_seq", dist.Bernoulli(probs=m_prob).to_event(1))
             # q(v | x)
+            if obs["ventricle_volume"] is None:
+                v_loc, v_logscale = self.encoder_v(obs["x"]).chunk(2, dim=-1)
                 qv_x = dist.Normal(v_loc, self.f(v_logscale)).to_event(1)
+                obs["ventricle_volume"] = pyro.sample("ventricle_volume", qv_x)
             # q(b | x, v)
+            if obs["brain_volume"] is None:
                 b_loc, b_logscale = self.encoder_b(
+                    obs["x"], y=obs["ventricle_volume"]
+                ).chunk(2, dim=-1)
                 qb_xv = dist.Normal(b_loc, self.f(b_logscale)).to_event(1)
+                obs["brain_volume"] = pyro.sample("brain_volume", qb_xv)
             # q(s | x, b)
+            if obs["sex"] is None:
+                s_prob = torch.sigmoid(
+                    self.encoder_s(obs["x"], y=obs["brain_volume"])
+                )  # .squeeze()
+                pyro.sample("sex", dist.Bernoulli(probs=s_prob).to_event(1))
             # q(a | b, v)
+            if obs["age"] is None:
+                ctx = torch.cat([obs["brain_volume"], obs["ventricle_volume"]], dim=-1)
                 a_loc, a_logscale = self.encoder_a(ctx).chunk(2, dim=-1)
+                pyro.sample("age", dist.Normal(a_loc, self.f(a_logscale)).to_event(1))
+    def model_anticausal(self, **obs) -> None:
         # assumes all variables are observed
+        pyro.module("FlowPGM", self)
+        with pyro.plate("observations", obs["x"].shape[0]):
             # q(v | x)
+            v_loc, v_logscale = self.encoder_v(obs["x"]).chunk(2, dim=-1)
             qv_x = dist.Normal(v_loc, self.f(v_logscale)).to_event(1)
+            pyro.sample("ventricle_volume_aux", qv_x, obs=obs["ventricle_volume"])
             # q(b | x, v)
             b_loc, b_logscale = self.encoder_b(
+                obs["x"], y=obs["ventricle_volume"]
+            ).chunk(2, dim=-1)
             qb_xv = dist.Normal(b_loc, self.f(b_logscale)).to_event(1)
+            pyro.sample("brain_volume_aux", qb_xv, obs=obs["brain_volume"])
             # q(a | b, v)
+            ctx = torch.cat([obs["brain_volume"], obs["ventricle_volume"]], dim=-1)
             a_loc, a_logscale = self.encoder_a(ctx).chunk(2, dim=-1)
+            pyro.sample(
+                "age_aux",
+                dist.Normal(a_loc, self.f(a_logscale)).to_event(1),
+                obs=obs["age"],
+            )
             # q(s | x, b)
+            s_prob = torch.sigmoid(self.encoder_s(obs["x"], y=obs["brain_volume"]))
             qs_xb = dist.Bernoulli(probs=s_prob).to_event(1)
+            pyro.sample("sex_aux", qs_xb, obs=obs["sex"])
             # q(m | x)
+            m_prob = torch.sigmoid(self.encoder_m(obs["x"]))
             qm_x = dist.Bernoulli(probs=m_prob).to_event(1)
+            pyro.sample("mri_seq_aux", qm_x, obs=obs["mri_seq"])
+    def predict(self, **obs) -> Dict[str, Tensor]:
         # q(v | x)
+        v_loc, v_logscale = self.encoder_v(obs["x"]).chunk(2, dim=-1)
         # v_loc = torch.tanh(v_loc)
         # q(b | x, v)
+        b_loc, b_logscale = self.encoder_b(obs["x"], y=obs["ventricle_volume"]).chunk(
+            2, dim=-1
+        )
         # b_loc = torch.tanh(b_loc)
         # q(a | b, v)
+        ctx = torch.cat([obs["brain_volume"], obs["ventricle_volume"]], dim=-1)
         a_loc, a_logscale = self.encoder_a(ctx).chunk(2, dim=-1)
         # a_loc = torch.tanh(b_loc)
         # q(s | x, b)
+        s_prob = torch.sigmoid(self.encoder_s(obs["x"], y=obs["brain_volume"]))
         # q(m | x)
+        m_prob = torch.sigmoid(self.encoder_m(obs["x"]))
         return {
+            "sex": s_prob,
+            "mri_seq": m_prob,
+            "age": a_loc,
+            "brain_volume": b_loc,
+            "ventricle_volume": v_loc,
         }
+    def svi_model(self, **obs) -> None:
+        with pyro.plate("observations", obs["x"].shape[0]):
             pyro.condition(self.model, data=obs)()
+    def guide_pass(self, **obs) -> None:
         pass
     def __init__(self, args):
         super().__init__()
         self.variables = {
+            "thickness": "continuous",
+            "intensity": "continuous",
+            "digit": "categorical",
         }
         # priors
         self.digit_logits = nn.Parameter(torch.zeros(1, 10))  # uniform prior
+        for k in ["t", "i"]:  # thickness, intensity, standard Gaussian
+            self.register_buffer(f"{k}_base_loc", torch.zeros(1))
+            self.register_buffer(f"{k}_base_scale", torch.ones(1))
         # constraint, assumes data is [-1,1] normalized
+        normalize_transform = T.ComposeTransform(
+            [T.SigmoidTransform(), T.AffineTransform(loc=-1, scale=2)]
+        )
         # thickness flow
         self.thickness_module = T.ComposeTransformModule(
+            [T.Spline(1, count_bins=4, order="linear")]
+        )
+        self.thickness_flow = T.ComposeTransform(
+            [self.thickness_module, normalize_transform]
+        )
         # intensity (conditional) flow: thickness -> intensity
+        intensity_net = DenseNN(1, args.widths, [1, 1], nonlinearity=nn.GELU())
         self.context_nn = ConditionalAffineTransform(
+            context_nn=intensity_net, event_dim=0
+        )
         self.intensity_flow = [self.context_nn, normalize_transform]
+        if args.setup != "sup_pgm":
             # anticausal predictors
             input_shape = (args.input_channels, args.input_res, args.input_res)
             # q(t | x, i) = Normal(mu(x, i), sigma(x, i)), 2 outputs: loc & scale
+            self.encoder_t = CNN(input_shape, num_outputs=2, context_dim=1, width=8)
             # q(i | x) = Normal(mu(x), sigma(x))
             self.encoder_i = CNN(input_shape, num_outputs=2, width=8)
             # q(y | x) = Categorical(pi(x))
             self.encoder_y = CNN(input_shape, num_outputs=10, width=8)
+            self.f = (
+                lambda x: args.std_fixed * torch.ones_like(x)
+                if args.std_fixed > 0
+                else F.softplus(x)
+            )
+    def model(self) -> Dict[str, Tensor]:
+        pyro.module("MorphoMNISTPGM", self)
         # p(y), digit label prior dist
         py = dist.OneHotCategorical(
+            probs=F.softmax(self.digit_logits, dim=-1)
+        )  # .to_event(1)
         # with pyro.poutine.scale(scale=0.05):
+        digit = pyro.sample("digit", py)
         # p(t), thickness flow
         pt_base = dist.Normal(self.t_base_loc, self.t_base_scale).to_event(1)
         pt = dist.TransformedDistribution(pt_base, self.thickness_flow)
+        thickness = pyro.sample("thickness", pt)
         # p(i | t), intensity conditional flow
         pi_t_base = dist.Normal(self.i_base_loc, self.i_base_scale).to_event(1)
         pi_t = ConditionalTransformedDistribution(
+            pi_t_base, self.intensity_flow
+        ).condition(thickness)
+        intensity = pyro.sample("intensity", pi_t)
         _ = self.context_nn
+        return {"thickness": thickness, "intensity": intensity, "digit": digit}
+    def guide(self, **obs) -> None:
         # guide for (optional) semi-supervised learning
+        with pyro.plate("observations", obs["x"].shape[0]):
             # q(i | x)
+            if obs["intensity"] is None:
+                i_loc, i_logscale = self.encoder_i(obs["x"]).chunk(2, dim=-1)
+                qi_t = dist.Normal(torch.tanh(i_loc), self.f(i_logscale)).to_event(1)
+                obs["intensity"] = pyro.sample("intensity", qi_t)
             # q(t | x, i)
+            if obs["thickness"] is None:
+                t_loc, t_logscale = self.encoder_t(obs["x"], y=obs["intensity"]).chunk(
+                    2, dim=-1
+                )
+                qt_x = dist.Normal(torch.tanh(t_loc), self.f(t_logscale)).to_event(1)
+                obs["thickness"] = pyro.sample("thickness", qt_x)
             # q(y | x)
+            if obs["digit"] is None:
+                y_prob = F.softmax(self.encoder_y(obs["x"]), dim=-1)
+                qy_x = dist.OneHotCategorical(probs=y_prob)  # .to_event(1)
+                pyro.sample("digit", qy_x)
+    def model_anticausal(self, **obs) -> None:
         # assumes all variables are observed & continuous ones are in [-1,1]
+        pyro.module("MorphoMNISTPGM", self)
+        with pyro.plate("observations", obs["x"].shape[0]):
             # q(t | x, i)
+            t_loc, t_logscale = self.encoder_t(obs["x"], y=obs["intensity"]).chunk(
+                2, dim=-1
+            )
+            qt_x = dist.Normal(torch.tanh(t_loc), self.f(t_logscale)).to_event(1)
+            pyro.sample("thickness_aux", qt_x, obs=obs["thickness"])
             # q(i | x)
+            i_loc, i_logscale = self.encoder_i(obs["x"]).chunk(2, dim=-1)
+            qi_t = dist.Normal(torch.tanh(i_loc), self.f(i_logscale)).to_event(1)
+            pyro.sample("intensity_aux", qi_t, obs=obs["intensity"])
             # q(y | x)
+            y_prob = F.softmax(self.encoder_y(obs["x"]), dim=-1)
+            qy_x = dist.OneHotCategorical(probs=y_prob)  # .to_event(1)
+            pyro.sample("digit_aux", qy_x, obs=obs["digit"])
+    def predict(self, **obs) -> Dict[str, Tensor]:
         # q(t | x, i)
+        t_loc, t_logscale = self.encoder_t(obs["x"], y=obs["intensity"]).chunk(
+            2, dim=-1
+        )
         t_loc = torch.tanh(t_loc)
         # q(i | x)
+        i_loc, i_logscale = self.encoder_i(obs["x"]).chunk(2, dim=-1)
         i_loc = torch.tanh(i_loc)
         # q(y | x)
+        y_prob = F.softmax(self.encoder_y(obs["x"]), dim=-1)
+        return {"thickness": t_loc, "intensity": i_loc, "digit": y_prob}
+    def svi_model(self, **obs) -> None:
+        with pyro.plate("observations", obs["x"].shape[0]):
             pyro.condition(self.model, data=obs)()
+    def guide_pass(self, **obs) -> None:
         pass
+class ChestPGM(BasePGM):
+    def __init__(self, args: Hparams):
         super().__init__()
         self.variables = {
+            "race": "categorical",
+            "sex": "binary",
+            "finding": "binary",
+            "age": "continuous",
         }
         # Discrete variables that are not root nodes
+        self.discrete_variables = {"finding": "binary"}
+        # define base distributions
+        for k in ["a"]:  # , "f"]:
+            self.register_buffer(f"{k}_base_loc", torch.zeros(1))
+            self.register_buffer(f"{k}_base_scale", torch.ones(1))
+        # age spline flow
         self.age_flow_components = T.ComposeTransformModule([T.Spline(1)])
         # self.age_constraints = T.ComposeTransform([
         #     T.AffineTransform(loc=4.09541458484, scale=0.32548387126),
         #     T.ExpTransform()])
+        self.age_flow = T.ComposeTransform(
+            [
+                self.age_flow_components,
+                # self.age_constraints,
+            ]
+        )
         # Finding (conditional) via MLP, a -> f
+        finding_net = DenseNN(1, [8, 16], param_dims=[2], nonlinearity=nn.Sigmoid())
         self.finding_transform_GumbelMax = ConditionalGumbelMax(
+            context_nn=finding_net, event_dim=0
+        )
         # log space for sex and race
+        self.sex_logit = nn.Parameter(np.log(1 / 2) * torch.ones(1))
+        self.race_logits = nn.Parameter(np.log(1 / 3) * torch.ones(1, 3))
         input_shape = (args.input_channels, args.input_res, args.input_res)
             # q(a | x, f) ~ Normal(mu(x), sigma(x))
             self.encoder_a = CNN(input_shape, num_outputs=1, context_dim=1)
+    def model(self) -> Dict[str, Tensor]:
+        pyro.module("ChestPGM", self)
         # p(s), sex dist
         ps = dist.Bernoulli(logits=self.sex_logit).to_event(1)
+        sex = pyro.sample("sex", ps)
         # p(a), age flow
         pa_base = dist.Normal(self.a_base_loc, self.a_base_scale).to_event(1)
         pa = dist.TransformedDistribution(pa_base, self.age_flow)
+        age = pyro.sample("age", pa)
         # age_ = self.age_constraints.inv(age)
         _ = self.age_flow_components  # register with pyro
         # p(r), race dist
+        pr = dist.OneHotCategorical(logits=self.race_logits)  # .to_event(1)
+        race = pyro.sample("race", pr)
         # p(f | a), finding as OneHotCategorical conditioned on age
+        # finding_dist_base = dist.Gumbel(self.f_base_loc, self.f_base_scale).to_event(1)
+        finding_dist_base = dist.Gumbel(torch.zeros(1), torch.ones(1)).to_event(1)
         finding_dist = ConditionalTransformedDistributionGumbelMax(
+            finding_dist_base, [self.finding_transform_GumbelMax]
+        ).condition(age)
         finding = pyro.sample("finding", finding_dist)
         return {
+            "sex": sex,
+            "race": race,
+            "age": age,
+            "finding": finding,
         }
+    def guide(self, **obs) -> None:
+        with pyro.plate("observations", obs["x"].shape[0]):
             # q(s | x)
+            if obs["sex"] is None:
+                s_prob = torch.sigmoid(self.encoder_s(obs["x"]))
+                pyro.sample("sex", dist.Bernoulli(probs=s_prob).to_event(1))
             # q(r | x)
+            if obs["race"] is None:
+                r_probs = F.softmax(self.encoder_r(obs["x"]), dim=-1)
+                qr_x = dist.OneHotCategorical(probs=r_probs)  # .to_event(1)
+                pyro.sample("race", qr_x)
             # q(f | x)
+            if obs["finding"] is None:
+                f_prob = torch.sigmoid(self.encoder_f(obs["x"]))
+                qf_x = dist.Bernoulli(probs=f_prob).to_event(1)
+                obs["finding"] = pyro.sample("finding", qf_x)
             # q(a | x, f)
+            if obs["age"] is None:
+                a_loc, a_logscale = self.encoder_a(obs["x"], y=obs["finding"]).chunk(
+                    2, dim=-1
+                )
+                qa_xf = dist.Normal(a_loc, self.f(a_logscale)).to_event(1)
+                pyro.sample("age_aux", qa_xf)
+    def model_anticausal(self, **obs) -> None:
         # assumes all variables are observed, train classfiers
+        pyro.module("ChestPGM", self)
+        with pyro.plate("observations", obs["x"].shape[0]):
+            # q(s | x)
+            s_prob = torch.sigmoid(self.encoder_s(obs["x"]))
+            qs_x = dist.Bernoulli(probs=s_prob).to_event(1)
+            # with pyro.poutine.scale(scale=0.8):
+            pyro.sample("sex_aux", qs_x, obs=obs["sex"])
             # q(r | x)
+            r_probs = F.softmax(self.encoder_r(obs["x"]), dim=-1)
+            qr_x = dist.OneHotCategorical(probs=r_probs)  # .to_event(1)
+            # with pyro.poutine.scale(scale=0.5):
+            pyro.sample("race_aux", qr_x, obs=obs["race"])
             # q(f | x)
+            f_prob = torch.sigmoid(self.encoder_f(obs["x"]))
             qf_x = dist.Bernoulli(probs=f_prob).to_event(1)
+            pyro.sample("finding_aux", qf_x, obs=obs["finding"])
             # q(a | x, f)
+            a_loc, a_logscale = self.encoder_a(obs["x"], y=obs["finding"]).chunk(
+                2, dim=-1
+            )
+            qa_xf = dist.Normal(a_loc, self.f(a_logscale)).to_event(1)
+            # with pyro.poutine.scale(scale=2):
+            pyro.sample("age_aux", qa_xf, obs=obs["age"])
+    def predict(self, **obs) -> Dict[str, Tensor]:
         # q(s | x)
+        s_prob = torch.sigmoid(self.encoder_s(obs["x"]))
         # q(r | x)
+        r_probs = F.softmax(self.encoder_r(obs["x"]), dim=-1)
         # q(f | x)
+        f_prob = torch.sigmoid(self.encoder_f(obs["x"]))
         # q(a | x, f)
+        a_loc, _ = self.encoder_a(obs["x"], y=obs["finding"]).chunk(2, dim=-1)
         return {
+            "sex": s_prob,
+            "race": r_probs,
+            "finding": f_prob,
+            "age": a_loc,
         }
+    def svi_model(self, **obs) -> None:
+        with pyro.plate("observations", obs["x"].shape[0]):
             pyro.condition(self.model, data=obs)()
+    def guide_pass(self, **obs) -> None:
         pass

pgm/layers.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Dict
 from pyro.distributions.conditional import (
     ConditionalTransformModule,
     ConditionalTransformedDistribution,
-    TransformedDistribution
 )
 from pyro.distributions.torch_distribution import TorchDistributionMixin
@@ -25,7 +25,8 @@ class ConditionalAffineTransform(ConditionalTransformModule):
     def condition(self, context):
         loc, log_scale = self.context_nn(context)
         return torch.distributions.transforms.AffineTransform(
-            loc, log_scale.exp(), event_dim=self.event_dim)
 class MLP(nn.Module):
@@ -58,27 +59,27 @@ class CNN(nn.Module):
             nn.BatchNorm2d(width),
             activation,
             (nn.MaxPool2d(2, 2) if res > 32 else nn.Identity()),
-            nn.Conv2d(width, 2*width, 3, 2, 1, bias=False),
-            nn.BatchNorm2d(2*width),
             activation,
-            nn.Conv2d(2*width, 2*width, 3, 1, 1, bias=False),
-            nn.BatchNorm2d(2*width),
             activation,
-            nn.Conv2d(2*width, 4*width, 3, 2, 1, bias=False),
-            nn.BatchNorm2d(4*width),
             activation,
-            nn.Conv2d(4*width, 4*width, 3, 1, 1, bias=False),
-            nn.BatchNorm2d(4*width),
             activation,
-            nn.Conv2d(4*width, 8*width, 3, 2, 1, bias=False),
-            nn.BatchNorm2d(8*width),
-            activation
         )
         self.fc = nn.Sequential(
-            nn.Linear(8*width + context_dim, 8*width, bias=False),
-            nn.BatchNorm1d(8*width),
             activation,
-            nn.Linear(8*width, num_outputs)
         )
     def forward(self, x, y=None):
@@ -96,7 +97,9 @@ class ArgMaxGumbelMax(Transform):
         super(ArgMaxGumbelMax, self).__init__(cache_size=cache_size)
         self.logits = logits
         self._event_dim = event_dim
-        self._categorical = pyro.distributions.torch.Categorical(logits=self.logits).to_event(0)
     @property
     def event_dim(self):
@@ -104,7 +107,7 @@ class ArgMaxGumbelMax(Transform):
     def __call__(self, gumbels):
         """
-        Computes the forward transform
         """
         assert self.logits != None, "Logits not defined."
@@ -126,8 +129,8 @@ class ArgMaxGumbelMax(Transform):
     @property
     def domain(self):
-        """"
-        Domain of input(gumbel variables), Real
         """
         if self.event_dim == 0:
             return constraints.real
@@ -135,8 +138,8 @@ class ArgMaxGumbelMax(Transform):
     @property
     def codomain(self):
-        """"
-        Domain of output(categorical variables), should be natural numbers, but set to Real for now
         """
         if self.event_dim == 0:
             return constraints.real
@@ -147,28 +150,32 @@ class ArgMaxGumbelMax(Transform):
         assert self.logits != None, "Logits not defined."
         uniforms = torch.rand(
-            self.logits.shape, dtype=self.logits.dtype, device=self.logits.device)
         gumbels = -((-(uniforms.log())).log())
         # print(f'gumbels: {gumbels.size()}, {gumbels.dtype}')
         # (batch_size, num_classes) mask to select kth class
         # print(f'k : {k.size()}')
-        mask = F.one_hot(k.squeeze(-1).to(torch.int64),
-                         num_classes=self.logits.shape[-1])
         # print(f'mask: {mask.size()}, {mask.dtype}')
         # (batch_size, 1) select topgumbel for truncation of other classes
-        topgumbel = (mask * gumbels).sum(dim=-1, keepdim=True) - \
-            (mask * self.logits).sum(dim=-1, keepdim=True)
         mask = 1 - mask  # invert mask to select other != k classes
         g = gumbels + self.logits
         # (batch_size, num_classes)
-        epsilons = -torch.log(mask * torch.exp(-g) +
-                              torch.exp(-topgumbel)) - (mask * self.logits)
         return epsilons
     def log_abs_det_jacobian(self, x, y):
         """We use the log_abs_det_jacobian to account for the categorical prob
-            x: Gumbels; y: argmax(x+logits)
-            P(y) = softmax
         """
         # print(f"logits: {torch.log(F.softmax(self.logits, dim=-1)).size()}")
         # print(f'y: {y.size()} ')
@@ -188,7 +195,8 @@ class ConditionalGumbelMax(ConditionalTransformModule):
     def condition(self, context):
         """Given context (age), output the Categorical results"""
         logits = self.context_nn(
-            context)  # The logits for calculating argmax(Gumbel + logits)
         return ArgMaxGumbelMax(logits)
     def _logits(self, context):
@@ -197,8 +205,8 @@ class ConditionalGumbelMax(ConditionalTransformModule):
     @property
     def domain(self):
-        """"
-        Domain of input(gumbel variables), Real
         """
         if self.event_dim == 0:
             return constraints.real
@@ -206,8 +214,8 @@ class ConditionalGumbelMax(ConditionalTransformModule):
     @property
     def codomain(self):
-        """"
-        Domain of output(categorical variables), should be natural numbers, but set to Real for now
         """
         if self.event_dim == 0:
             return constraints.real
@@ -215,8 +223,7 @@ class ConditionalGumbelMax(ConditionalTransformModule):
 class TransformedDistributionGumbelMax(TransformedDistribution, TorchDistributionMixin):
-    r""" Define a TransformedDistribution class for Gumbel max
-    """
     arg_constraints: Dict[str, constraints.Constraint] = {}
     def log_prob(self, value):
@@ -233,15 +240,16 @@ class TransformedDistributionGumbelMax(TransformedDistribution, TorchDistributio
         for transform in reversed(self.transforms):
             x = transform.inv(y)
             event_dim += transform.domain.event_dim - transform.codomain.event_dim
-            log_prob = log_prob - _sum_rightmost(transform.log_abs_det_jacobian(x, y),
-                                                 event_dim - transform.domain.event_dim)
             y = x
         # print(f"log_prob: {log_prob.size()}")
         return log_prob
 class ConditionalTransformedDistributionGumbelMax(ConditionalTransformedDistribution):
     def condition(self, context):
         base_dist = self.base_dist.condition(context)
         transforms = [t.condition(context) for t in self.transforms]
@@ -249,4 +257,4 @@ class ConditionalTransformedDistributionGumbelMax(ConditionalTransformedDistribu
         return TransformedDistributionGumbelMax(base_dist, transforms)
     def clear_cache(self):
-        pass

 from pyro.distributions.conditional import (
     ConditionalTransformModule,
     ConditionalTransformedDistribution,
+    TransformedDistribution,
 )
 from pyro.distributions.torch_distribution import TorchDistributionMixin
     def condition(self, context):
         loc, log_scale = self.context_nn(context)
         return torch.distributions.transforms.AffineTransform(
+            loc, log_scale.exp(), event_dim=self.event_dim
+        )
 class MLP(nn.Module):
             nn.BatchNorm2d(width),
             activation,
             (nn.MaxPool2d(2, 2) if res > 32 else nn.Identity()),
+            nn.Conv2d(width, 2 * width, 3, 2, 1, bias=False),
+            nn.BatchNorm2d(2 * width),
+            activation,
+            nn.Conv2d(2 * width, 2 * width, 3, 1, 1, bias=False),
+            nn.BatchNorm2d(2 * width),
             activation,
+            nn.Conv2d(2 * width, 4 * width, 3, 2, 1, bias=False),
+            nn.BatchNorm2d(4 * width),
             activation,
+            nn.Conv2d(4 * width, 4 * width, 3, 1, 1, bias=False),
+            nn.BatchNorm2d(4 * width),
             activation,
+            nn.Conv2d(4 * width, 8 * width, 3, 2, 1, bias=False),
+            nn.BatchNorm2d(8 * width),
             activation,
         )
         self.fc = nn.Sequential(
+            nn.Linear(8 * width + context_dim, 8 * width, bias=False),
+            nn.BatchNorm1d(8 * width),
             activation,
+            nn.Linear(8 * width, num_outputs),
         )
     def forward(self, x, y=None):
         super(ArgMaxGumbelMax, self).__init__(cache_size=cache_size)
         self.logits = logits
         self._event_dim = event_dim
+        self._categorical = pyro.distributions.torch.Categorical(
+            logits=self.logits
+        ).to_event(0)
     @property
     def event_dim(self):
     def __call__(self, gumbels):
         """
+        Computes the forward transform
         """
         assert self.logits != None, "Logits not defined."
     @property
     def domain(self):
+        """ "
+        Domain of input(gumbel variables), Real
         """
         if self.event_dim == 0:
             return constraints.real
     @property
     def codomain(self):
+        """ "
+        Domain of output(categorical variables), should be natural numbers, but set to Real for now
         """
         if self.event_dim == 0:
             return constraints.real
         assert self.logits != None, "Logits not defined."
         uniforms = torch.rand(
+            self.logits.shape, dtype=self.logits.dtype, device=self.logits.device
+        )
         gumbels = -((-(uniforms.log())).log())
         # print(f'gumbels: {gumbels.size()}, {gumbels.dtype}')
         # (batch_size, num_classes) mask to select kth class
         # print(f'k : {k.size()}')
+        mask = F.one_hot(
+            k.squeeze(-1).to(torch.int64), num_classes=self.logits.shape[-1]
+        )
         # print(f'mask: {mask.size()}, {mask.dtype}')
         # (batch_size, 1) select topgumbel for truncation of other classes
+        topgumbel = (mask * gumbels).sum(dim=-1, keepdim=True) - (
+            mask * self.logits
+        ).sum(dim=-1, keepdim=True)
         mask = 1 - mask  # invert mask to select other != k classes
         g = gumbels + self.logits
         # (batch_size, num_classes)
+        epsilons = -torch.log(mask * torch.exp(-g) + torch.exp(-topgumbel)) - (
+            mask * self.logits
+        )
         return epsilons
     def log_abs_det_jacobian(self, x, y):
         """We use the log_abs_det_jacobian to account for the categorical prob
+        x: Gumbels; y: argmax(x+logits)
+        P(y) = softmax
         """
         # print(f"logits: {torch.log(F.softmax(self.logits, dim=-1)).size()}")
         # print(f'y: {y.size()} ')
     def condition(self, context):
         """Given context (age), output the Categorical results"""
         logits = self.context_nn(
+            context
+        )  # The logits for calculating argmax(Gumbel + logits)
         return ArgMaxGumbelMax(logits)
     def _logits(self, context):
     @property
     def domain(self):
+        """ "
+        Domain of input(gumbel variables), Real
         """
         if self.event_dim == 0:
             return constraints.real
     @property
     def codomain(self):
+        """ "
+        Domain of output(categorical variables), should be natural numbers, but set to Real for now
         """
         if self.event_dim == 0:
             return constraints.real
 class TransformedDistributionGumbelMax(TransformedDistribution, TorchDistributionMixin):
+    r"""Define a TransformedDistribution class for Gumbel max"""
     arg_constraints: Dict[str, constraints.Constraint] = {}
     def log_prob(self, value):
         for transform in reversed(self.transforms):
             x = transform.inv(y)
             event_dim += transform.domain.event_dim - transform.codomain.event_dim
+            log_prob = log_prob - _sum_rightmost(
+                transform.log_abs_det_jacobian(x, y),
+                event_dim - transform.domain.event_dim,
+            )
             y = x
         # print(f"log_prob: {log_prob.size()}")
         return log_prob
 class ConditionalTransformedDistributionGumbelMax(ConditionalTransformedDistribution):
     def condition(self, context):
         base_dist = self.base_dist.condition(context)
         transforms = [t.condition(context) for t in self.transforms]
         return TransformedDistributionGumbelMax(base_dist, transforms)
     def clear_cache(self):
+        pass

vae.py CHANGED Viewed

@@ -6,9 +6,17 @@ import torch.distributions as dist
 EPS = -9  # minimum logscale
 @torch.jit.script
 def gaussian_kl(q_loc, q_logscale, p_loc, p_logscale):
-    return -0.5 + p_logscale - q_logscale + 0.5 * (q_logscale.exp().pow(2) + (q_loc - p_loc).pow(2)) / p_logscale.exp().pow(2)
 @torch.jit.script
@@ -17,20 +25,28 @@ def sample_gaussian(loc, logscale):
 class Block(nn.Module):
-    def __init__(self, in_width, bottleneck, out_width, kernel_size=3, residual=True,
-                 down_rate=None, version=None):
         super().__init__()
         self.d = down_rate
         self.residual = residual
         padding = 0 if kernel_size == 1 else 1
-        if version == 'light':  # for ukbb
             activation = nn.ReLU()
             self.conv = nn.Sequential(
                 activation,
                 nn.Conv2d(in_width, bottleneck, kernel_size, 1, padding),
                 activation,
-                nn.Conv2d(bottleneck, out_width, kernel_size, 1, padding)
             )
         else:  # for morphomnist
             activation = nn.GELU()
@@ -42,7 +58,7 @@ class Block(nn.Module):
                 activation,
                 nn.Conv2d(bottleneck, bottleneck, kernel_size, 1, padding),
                 activation,
-                nn.Conv2d(bottleneck, out_width, 1, 1)
             )
         if self.residual and (self.d or in_width > out_width):
@@ -67,30 +83,41 @@ class Encoder(nn.Module):
         super().__init__()
         # parse architecture
         stages = []
-        for i, stage in enumerate(args.enc_arch.split(',')):
-            start = stage.index('b') + 1
-            end = stage.index('d') if 'd' in stage else None
             n_blocks = int(stage[start:end])
             if i == 0:  # define network stem
-                if n_blocks == 0 and 'd' not in stage:
-                    print('Using stride=2 conv encoder stem.')
-                    self.stem = nn.Conv2d(args.input_channels, args.widths[1],
-                                          kernel_size=7, stride=2, padding=3)
                     continue
                 else:
-                    self.stem = nn.Conv2d(args.input_channels, args.widths[0],
-                                          kernel_size=7, stride=1, padding=3)
             stages += [(args.widths[i], None) for _ in range(n_blocks)]
-            if 'd' in stage:  # downsampling block
-                stages += [(args.widths[i+1], int(stage[stage.index('d') + 1]))]
         blocks = []
         for i, (width, d) in enumerate(stages):
-            prev_width = stages[max(0, i-1)][0]
             bottleneck = int(prev_width / args.bottleneck)
-            blocks.append(Block(prev_width, bottleneck, width, down_rate=d,
-                                version=args.vr))
         # scale weights of last conv layer in each block
         for b in blocks:
             b.conv[-1].weight.data *= np.sqrt(1 / len(blocks))
@@ -113,7 +140,7 @@ class DecoderBlock(nn.Module):
         super().__init__()
         bottleneck = int(in_width / args.bottleneck)
         self.res = resolution
-        self.stochastic = (self.res <= args.z_max_res)
         self.z_dim = args.z_dim
         self.cond_prior = args.cond_prior
         k = 3 if self.res > 2 else 1
@@ -125,21 +152,35 @@ class DecoderBlock(nn.Module):
             # self.z_feat_proj = nn.Conv2d(self.z_dim + in_width, out_width, 1)
         self.z_feat_proj = nn.Conv2d(self.z_dim + in_width, out_width, 1)
-        self.prior = Block(p_in_width, bottleneck, 2*self.z_dim + in_width,
-                           kernel_size=k, residual=False, version=args.vr)
         if self.stochastic:
-            self.posterior = Block(2*in_width + args.context_dim, bottleneck, 2*self.z_dim,
-                                   kernel_size=k, residual=False, version=args.vr)
         self.z_proj = nn.Conv2d(self.z_dim + args.context_dim, in_width, 1)
-        self.conv = Block(in_width, bottleneck, out_width, kernel_size=k, version=args.vr)
     def forward_prior(self, z, pa=None, t=None):
         if self.cond_prior:
             z = torch.cat([z, pa], dim=1)
         z = self.prior(z)
-        p_loc = z[:, :self.z_dim, ...]
-        p_logscale = z[:, self.z_dim:2*self.z_dim, ...]
-        p_features = z[:, 2*self.z_dim:, ...]
         if t is not None:
             p_logscale = p_logscale + torch.tensor(t).to(z.device).log()
         return p_loc, p_logscale, p_features
@@ -157,28 +198,27 @@ class Decoder(nn.Module):
         super().__init__()
         # parse architecture
         stages = []
-        for i, stage in enumerate(args.dec_arch.split(',')):
-            res = int(stage.split('b')[0])
-            n_blocks = int(stage[stage.index('b') + 1:])
             stages += [(res, args.widths[::-1][i]) for _ in range(n_blocks)]
         self.blocks = []
         for i, (res, width) in enumerate(stages):
-            next_width = stages[min(len(stages)-1, i+1)][1]
             self.blocks.append(DecoderBlock(args, width, next_width, res))
         self._scale_weights()
         self.blocks = nn.ModuleList(self.blocks)
         # bias params
-        self.all_res = list(np.unique([stages[i][0]
-                            for i in range(len(stages))]))
         bias = []
         for i, res in enumerate(self.all_res):
             if res <= args.bias_max_res:
-                bias.append(nn.Parameter(
-                    torch.zeros(1, args.widths[::-1][i], res, res)
-                ))
         self.bias = nn.ParameterList(bias)
         self.cond_prior = args.cond_prior
-        self.is_drop_cond = True if 'mnist' in args.hps else False  # hacky
     def _scale_weights(self):
         scale = np.sqrt(1 / len(self.blocks))
@@ -200,28 +240,29 @@ class Decoder(nn.Module):
             res = block.res  # current block resolution, e.g. 64x64
             pa = parents[..., :res, :res].clone()  # select parents @ res
-            if self.is_drop_cond:  # for morphomnist w/ conditioning dropout. Hacky, clean up later
                 pa_drop1 = pa.clone()
-                pa_drop1[:,2:,...] = pa_drop1[:,2:,...] * p1
                 pa_drop2 = pa.clone()
-                pa_drop2[:,2:,...] = pa_drop2[:,2:,...] * p2
             else:  # for ukbb
                 pa_drop1 = pa_drop2 = pa
             if h.size(-1) < res:  # upsample previous layer output
                 b = bias[res] if res in bias.keys() else 0  # broadcasting
-                h = b + F.interpolate(h, scale_factor=res/h.shape[-1])
             if block.cond_prior:  # conditional prior: p(z_i | z_<i, pa_x)
                 # w/ posterior correction
                 # p_loc, p_logscale, p_feat = block.forward_prior(h, pa_drop1, t=t)
                 if z.size(-1) < res:  # w/o posterior correction
-                    z = b + F.interpolate(z, scale_factor=res/z.shape[-1])
-                p_loc, p_logscale, p_feat = block.forward_prior(
-                    z, pa_drop1, t=t)
             else:  # exogenous prior: p(z_i | z_<i)
                 if z.size(-1) < res:
-                    z = b + F.interpolate(z, scale_factor=res/z.shape[-1])
                 p_loc, p_logscale, p_feat = block.forward_prior(z, t=t)
             # computation tree:
@@ -239,16 +280,17 @@ class Decoder(nn.Module):
             if block.stochastic:
                 if x is not None:  # z_i ~ q(z_i | z_<i, pa_x, x)
-                    q_loc, q_logscale = block.forward_posterior(
-                        h, pa, x[res], t=t)
                     z = sample_gaussian(q_loc, q_logscale)
-                    stat = dict(kl=gaussian_kl(
-                        q_loc, q_logscale, p_loc, p_logscale))
                     # abduct exogenous noise
                     if abduct:
                         if block.cond_prior:  # z* if conditional prior
-                            stat.update(dict(z={
-                                'z': z, 'q_loc': q_loc, 'q_logscale': q_logscale}))
                         else:  # z if exogenous prior
                             # stat.update(dict(z=z.detach()))
                             stat.update(dict(z=z))  # if cf training
@@ -258,8 +300,9 @@ class Decoder(nn.Module):
                         z = sample_gaussian(p_loc, p_logscale)
                         if abduct and block.cond_prior:  # for abducting z*
-                            stats.append(dict(z={
-                                'p_loc': p_loc, 'p_logscale': p_logscale}))
                     else:
                         try:  # forward fixed latents z or z*
                             z = latents[i]
@@ -267,8 +310,9 @@ class Decoder(nn.Module):
                             z = sample_gaussian(p_loc, p_logscale)
                             if abduct and block.cond_prior:  # for abducting z*
-                                stats.append(dict(z={
-                                    'p_loc': p_loc, 'p_logscale': p_logscale}))
             else:
                 z = p_loc  # deterministic path
@@ -276,7 +320,7 @@ class Decoder(nn.Module):
             h = self.forward_merge(block, h, z, pa_drop2)
             # if not block.cond_prior:
-            if (i+1) < len(self.blocks):
                 # z independent of pa_x for next layer prior
                 z = block.z_feat_proj(torch.cat([z, p_feat], dim=1))
         return h, stats
@@ -287,7 +331,7 @@ class Decoder(nn.Module):
         return block.conv(h)
     def drop_cond(self):
-        opt = dist.Categorical(1/3*torch.ones(3)).sample()
         if opt == 0:  # drop stochastic path
             p1, p2 = 0, 1
         elif opt == 1:  # drop deterministic path
@@ -301,30 +345,31 @@ class DGaussNet(nn.Module):
     def __init__(self, args):
         super(DGaussNet, self).__init__()
         self.x_loc = nn.Conv2d(
-            args.widths[0], args.input_channels, kernel_size=1, stride=1)
         self.x_logscale = nn.Conv2d(
-            args.widths[0], args.input_channels, kernel_size=1, stride=1)
         if args.input_channels == 3:
-            self.channel_coeffs = nn.Conv2d(
-                args.widths[0], 3, kernel_size=1, stride=1)
         if args.std_init > 0:  # if std_init=0, random init weights for diag cov
             nn.init.zeros_(self.x_logscale.weight)
             nn.init.constant_(self.x_logscale.bias, np.log(args.std_init))
-            covariance = args.x_like.split('_')[0]
-            if covariance == 'fixed':
                 self.x_logscale.weight.requires_grad = False
                 self.x_logscale.bias.requires_grad = False
-            elif covariance == 'shared':
                 self.x_logscale.weight.requires_grad = False
                 self.x_logscale.bias.requires_grad = True
-            elif covariance == 'diag':
                 self.x_logscale.weight.requires_grad = True
                 self.x_logscale.bias.requires_grad = True
             else:
-                NotImplementedError(f'{args.x_like} not implemented.')
     def forward(self, h, x=None, t=None):
         loc, logscale = self.x_loc(h), self.x_logscale(h).clamp(min=EPS)
@@ -351,7 +396,9 @@ class DGaussNet(nn.Module):
         return loc, logscale
     def approx_cdf(self, x):
-        return 0.5 * (1.0 + torch.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * torch.pow(x, 3))))
     def nll(self, h, x):
         loc, logscale = self.forward(h, x)
@@ -367,10 +414,11 @@ class DGaussNet(nn.Module):
         log_probs = torch.where(
             x < -0.999,
             log_cdf_plus,
-            torch.where(x > 0.999, log_one_minus_cdf_min,
-                        torch.log(cdf_delta.clamp(min=1e-12))),
         )
-        return -1. * log_probs.mean(dim=(1, 2, 3))
     def sample(self, h, return_loc=True, t=None):
         if return_loc:
@@ -378,20 +426,20 @@ class DGaussNet(nn.Module):
         else:
             loc, logscale = self.forward(h, t)
             x = loc + torch.exp(logscale) * torch.randn_like(loc)
-        x = torch.clamp(x, min=-1., max=1.)
         return x, logscale.exp()
 class HVAE(nn.Module):
     def __init__(self, args):
         super().__init__()
-        args.vr = 'light' if 'ukbb' in args.hps else None  # hacky
         self.encoder = Encoder(args)
         self.decoder = Decoder(args)
-        if args.x_like.split('_')[1] == 'dgauss':
             self.likelihood = DGaussNet(args)
         else:
-            NotImplementedError(f'{args.x_like} not implemented.')
         self.cond_prior = args.cond_prior
         self.free_bits = args.kl_free_bits
@@ -404,12 +452,12 @@ class HVAE(nn.Module):
             kl_pp = 0.0
             for stat in stats:
                 kl_pp += torch.maximum(
-                    free_bits, stat['kl'].sum(dim=(2, 3)).mean(dim=0)
                 ).sum()
         else:
             kl_pp = torch.zeros_like(nll_pp)
             for i, stat in enumerate(stats):
-                kl_pp += stat['kl'].sum(dim=(1, 2, 3))
         kl_pp = kl_pp / np.prod(x.shape[1:])  # per pixel
         elbo = nll_pp.mean() + beta * kl_pp.mean()  # negative elbo (free energy)
         return dict(elbo=elbo, nll=nll_pp.mean(), kl=kl_pp.mean())
@@ -421,26 +469,26 @@ class HVAE(nn.Module):
     def abduct(self, x, parents, cf_parents=None, alpha=0.5, t=None):
         acts = self.encoder(x)
         _, q_stats = self.decoder(
-            x=acts, parents=parents, abduct=True, t=t)  # q(z|x,pa)
-        q_stats = [s['z'] for s in q_stats]
         if self.cond_prior and cf_parents is not None:
-            _, p_stats = self.decoder(
-                parents=cf_parents, abduct=True, t=t)  # p(z|pa*)
-            p_stats = [s['z'] for s in p_stats]
             cf_zs = []
             t = torch.tensor(t).to(x.device)  # z* sampling temperature
             for i in range(len(q_stats)):
                 # from z_i ~ q(z_i | z_{<i}, x, pa)
-                q_loc = q_stats[i]['q_loc']
-                q_scale = q_stats[i]['q_logscale'].exp()
                 # abduct exogenouse noise u ~ N(0,I)
-                u = (q_stats[i]['z'] - q_loc) / q_scale
                 # p(z_i | z_{<i}, pa*)
-                p_loc = p_stats[i]['p_loc']
-                p_var = p_stats[i]['p_logscale'].exp().pow(2)
                 # Option1: mixture distribution: r(z_i | z_{<i}, x, pa, pa*)
                 #   = a*q(z_i | z_{<i}, x, pa) + (1-a)*p(z_i | z_{<i}, pa*)

 EPS = -9  # minimum logscale
 @torch.jit.script
 def gaussian_kl(q_loc, q_logscale, p_loc, p_logscale):
+    return (
+        -0.5
+        + p_logscale
+        - q_logscale
+        + 0.5
+        * (q_logscale.exp().pow(2) + (q_loc - p_loc).pow(2))
+        / p_logscale.exp().pow(2)
+    )
 @torch.jit.script
 class Block(nn.Module):
+    def __init__(
+        self,
+        in_width,
+        bottleneck,
+        out_width,
+        kernel_size=3,
+        residual=True,
+        down_rate=None,
+        version=None,
+    ):
         super().__init__()
         self.d = down_rate
         self.residual = residual
         padding = 0 if kernel_size == 1 else 1
+        if version == "light":  # for ukbb
             activation = nn.ReLU()
             self.conv = nn.Sequential(
                 activation,
                 nn.Conv2d(in_width, bottleneck, kernel_size, 1, padding),
                 activation,
+                nn.Conv2d(bottleneck, out_width, kernel_size, 1, padding),
             )
         else:  # for morphomnist
             activation = nn.GELU()
                 activation,
                 nn.Conv2d(bottleneck, bottleneck, kernel_size, 1, padding),
                 activation,
+                nn.Conv2d(bottleneck, out_width, 1, 1),
             )
         if self.residual and (self.d or in_width > out_width):
         super().__init__()
         # parse architecture
         stages = []
+        for i, stage in enumerate(args.enc_arch.split(",")):
+            start = stage.index("b") + 1
+            end = stage.index("d") if "d" in stage else None
             n_blocks = int(stage[start:end])
             if i == 0:  # define network stem
+                if n_blocks == 0 and "d" not in stage:
+                    print("Using stride=2 conv encoder stem.")
+                    self.stem = nn.Conv2d(
+                        args.input_channels,
+                        args.widths[1],
+                        kernel_size=7,
+                        stride=2,
+                        padding=3,
+                    )
                     continue
                 else:
+                    self.stem = nn.Conv2d(
+                        args.input_channels,
+                        args.widths[0],
+                        kernel_size=7,
+                        stride=1,
+                        padding=3,
+                    )
             stages += [(args.widths[i], None) for _ in range(n_blocks)]
+            if "d" in stage:  # downsampling block
+                stages += [(args.widths[i + 1], int(stage[stage.index("d") + 1]))]
         blocks = []
         for i, (width, d) in enumerate(stages):
+            prev_width = stages[max(0, i - 1)][0]
             bottleneck = int(prev_width / args.bottleneck)
+            blocks.append(
+                Block(prev_width, bottleneck, width, down_rate=d, version=args.vr)
+            )
         # scale weights of last conv layer in each block
         for b in blocks:
             b.conv[-1].weight.data *= np.sqrt(1 / len(blocks))
         super().__init__()
         bottleneck = int(in_width / args.bottleneck)
         self.res = resolution
+        self.stochastic = self.res <= args.z_max_res
         self.z_dim = args.z_dim
         self.cond_prior = args.cond_prior
         k = 3 if self.res > 2 else 1
             # self.z_feat_proj = nn.Conv2d(self.z_dim + in_width, out_width, 1)
         self.z_feat_proj = nn.Conv2d(self.z_dim + in_width, out_width, 1)
+        self.prior = Block(
+            p_in_width,
+            bottleneck,
+            2 * self.z_dim + in_width,
+            kernel_size=k,
+            residual=False,
+            version=args.vr,
+        )
         if self.stochastic:
+            self.posterior = Block(
+                2 * in_width + args.context_dim,
+                bottleneck,
+                2 * self.z_dim,
+                kernel_size=k,
+                residual=False,
+                version=args.vr,
+            )
         self.z_proj = nn.Conv2d(self.z_dim + args.context_dim, in_width, 1)
+        self.conv = Block(
+            in_width, bottleneck, out_width, kernel_size=k, version=args.vr
+        )
     def forward_prior(self, z, pa=None, t=None):
         if self.cond_prior:
             z = torch.cat([z, pa], dim=1)
         z = self.prior(z)
+        p_loc = z[:, : self.z_dim, ...]
+        p_logscale = z[:, self.z_dim : 2 * self.z_dim, ...]
+        p_features = z[:, 2 * self.z_dim :, ...]
         if t is not None:
             p_logscale = p_logscale + torch.tensor(t).to(z.device).log()
         return p_loc, p_logscale, p_features
         super().__init__()
         # parse architecture
         stages = []
+        for i, stage in enumerate(args.dec_arch.split(",")):
+            res = int(stage.split("b")[0])
+            n_blocks = int(stage[stage.index("b") + 1 :])
             stages += [(res, args.widths[::-1][i]) for _ in range(n_blocks)]
         self.blocks = []
         for i, (res, width) in enumerate(stages):
+            next_width = stages[min(len(stages) - 1, i + 1)][1]
             self.blocks.append(DecoderBlock(args, width, next_width, res))
         self._scale_weights()
         self.blocks = nn.ModuleList(self.blocks)
         # bias params
+        self.all_res = list(np.unique([stages[i][0] for i in range(len(stages))]))
         bias = []
         for i, res in enumerate(self.all_res):
             if res <= args.bias_max_res:
+                bias.append(
+                    nn.Parameter(torch.zeros(1, args.widths[::-1][i], res, res))
+                )
         self.bias = nn.ParameterList(bias)
         self.cond_prior = args.cond_prior
+        self.is_drop_cond = True if "mnist" in args.hps else False  # hacky
     def _scale_weights(self):
         scale = np.sqrt(1 / len(self.blocks))
             res = block.res  # current block resolution, e.g. 64x64
             pa = parents[..., :res, :res].clone()  # select parents @ res
+            if (
+                self.is_drop_cond
+            ):  # for morphomnist w/ conditioning dropout. Hacky, clean up later
                 pa_drop1 = pa.clone()
+                pa_drop1[:, 2:, ...] = pa_drop1[:, 2:, ...] * p1
                 pa_drop2 = pa.clone()
+                pa_drop2[:, 2:, ...] = pa_drop2[:, 2:, ...] * p2
             else:  # for ukbb
                 pa_drop1 = pa_drop2 = pa
             if h.size(-1) < res:  # upsample previous layer output
                 b = bias[res] if res in bias.keys() else 0  # broadcasting
+                h = b + F.interpolate(h, scale_factor=res / h.shape[-1])
             if block.cond_prior:  # conditional prior: p(z_i | z_<i, pa_x)
                 # w/ posterior correction
                 # p_loc, p_logscale, p_feat = block.forward_prior(h, pa_drop1, t=t)
                 if z.size(-1) < res:  # w/o posterior correction
+                    z = b + F.interpolate(z, scale_factor=res / z.shape[-1])
+                p_loc, p_logscale, p_feat = block.forward_prior(z, pa_drop1, t=t)
             else:  # exogenous prior: p(z_i | z_<i)
                 if z.size(-1) < res:
+                    z = b + F.interpolate(z, scale_factor=res / z.shape[-1])
                 p_loc, p_logscale, p_feat = block.forward_prior(z, t=t)
             # computation tree:
             if block.stochastic:
                 if x is not None:  # z_i ~ q(z_i | z_<i, pa_x, x)
+                    q_loc, q_logscale = block.forward_posterior(h, pa, x[res], t=t)
                     z = sample_gaussian(q_loc, q_logscale)
+                    stat = dict(kl=gaussian_kl(q_loc, q_logscale, p_loc, p_logscale))
                     # abduct exogenous noise
                     if abduct:
                         if block.cond_prior:  # z* if conditional prior
+                            stat.update(
+                                dict(
+                                    z={"z": z, "q_loc": q_loc, "q_logscale": q_logscale}
+                                )
+                            )
                         else:  # z if exogenous prior
                             # stat.update(dict(z=z.detach()))
                             stat.update(dict(z=z))  # if cf training
                         z = sample_gaussian(p_loc, p_logscale)
                         if abduct and block.cond_prior:  # for abducting z*
+                            stats.append(
+                                dict(z={"p_loc": p_loc, "p_logscale": p_logscale})
+                            )
                     else:
                         try:  # forward fixed latents z or z*
                             z = latents[i]
                             z = sample_gaussian(p_loc, p_logscale)
                             if abduct and block.cond_prior:  # for abducting z*
+                                stats.append(
+                                    dict(z={"p_loc": p_loc, "p_logscale": p_logscale})
+                                )
             else:
                 z = p_loc  # deterministic path
             h = self.forward_merge(block, h, z, pa_drop2)
             # if not block.cond_prior:
+            if (i + 1) < len(self.blocks):
                 # z independent of pa_x for next layer prior
                 z = block.z_feat_proj(torch.cat([z, p_feat], dim=1))
         return h, stats
         return block.conv(h)
     def drop_cond(self):
+        opt = dist.Categorical(1 / 3 * torch.ones(3)).sample()
         if opt == 0:  # drop stochastic path
             p1, p2 = 0, 1
         elif opt == 1:  # drop deterministic path
     def __init__(self, args):
         super(DGaussNet, self).__init__()
         self.x_loc = nn.Conv2d(
+            args.widths[0], args.input_channels, kernel_size=1, stride=1
+        )
         self.x_logscale = nn.Conv2d(
+            args.widths[0], args.input_channels, kernel_size=1, stride=1
+        )
         if args.input_channels == 3:
+            self.channel_coeffs = nn.Conv2d(args.widths[0], 3, kernel_size=1, stride=1)
         if args.std_init > 0:  # if std_init=0, random init weights for diag cov
             nn.init.zeros_(self.x_logscale.weight)
             nn.init.constant_(self.x_logscale.bias, np.log(args.std_init))
+            covariance = args.x_like.split("_")[0]
+            if covariance == "fixed":
                 self.x_logscale.weight.requires_grad = False
                 self.x_logscale.bias.requires_grad = False
+            elif covariance == "shared":
                 self.x_logscale.weight.requires_grad = False
                 self.x_logscale.bias.requires_grad = True
+            elif covariance == "diag":
                 self.x_logscale.weight.requires_grad = True
                 self.x_logscale.bias.requires_grad = True
             else:
+                NotImplementedError(f"{args.x_like} not implemented.")
     def forward(self, h, x=None, t=None):
         loc, logscale = self.x_loc(h), self.x_logscale(h).clamp(min=EPS)
         return loc, logscale
     def approx_cdf(self, x):
+        return 0.5 * (
+            1.0 + torch.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * torch.pow(x, 3)))
+        )
     def nll(self, h, x):
         loc, logscale = self.forward(h, x)
         log_probs = torch.where(
             x < -0.999,
             log_cdf_plus,
+            torch.where(
+                x > 0.999, log_one_minus_cdf_min, torch.log(cdf_delta.clamp(min=1e-12))
+            ),
         )
+        return -1.0 * log_probs.mean(dim=(1, 2, 3))
     def sample(self, h, return_loc=True, t=None):
         if return_loc:
         else:
             loc, logscale = self.forward(h, t)
             x = loc + torch.exp(logscale) * torch.randn_like(loc)
+        x = torch.clamp(x, min=-1.0, max=1.0)
         return x, logscale.exp()
 class HVAE(nn.Module):
     def __init__(self, args):
         super().__init__()
+        args.vr = "light" if "ukbb" in args.hps else None  # hacky
         self.encoder = Encoder(args)
         self.decoder = Decoder(args)
+        if args.x_like.split("_")[1] == "dgauss":
             self.likelihood = DGaussNet(args)
         else:
+            NotImplementedError(f"{args.x_like} not implemented.")
         self.cond_prior = args.cond_prior
         self.free_bits = args.kl_free_bits
             kl_pp = 0.0
             for stat in stats:
                 kl_pp += torch.maximum(
+                    free_bits, stat["kl"].sum(dim=(2, 3)).mean(dim=0)
                 ).sum()
         else:
             kl_pp = torch.zeros_like(nll_pp)
             for i, stat in enumerate(stats):
+                kl_pp += stat["kl"].sum(dim=(1, 2, 3))
         kl_pp = kl_pp / np.prod(x.shape[1:])  # per pixel
         elbo = nll_pp.mean() + beta * kl_pp.mean()  # negative elbo (free energy)
         return dict(elbo=elbo, nll=nll_pp.mean(), kl=kl_pp.mean())
     def abduct(self, x, parents, cf_parents=None, alpha=0.5, t=None):
         acts = self.encoder(x)
         _, q_stats = self.decoder(
+            x=acts, parents=parents, abduct=True, t=t
+        )  # q(z|x,pa)
+        q_stats = [s["z"] for s in q_stats]
         if self.cond_prior and cf_parents is not None:
+            _, p_stats = self.decoder(parents=cf_parents, abduct=True, t=t)  # p(z|pa*)
+            p_stats = [s["z"] for s in p_stats]
             cf_zs = []
             t = torch.tensor(t).to(x.device)  # z* sampling temperature
             for i in range(len(q_stats)):
                 # from z_i ~ q(z_i | z_{<i}, x, pa)
+                q_loc = q_stats[i]["q_loc"]
+                q_scale = q_stats[i]["q_logscale"].exp()
                 # abduct exogenouse noise u ~ N(0,I)
+                u = (q_stats[i]["z"] - q_loc) / q_scale
                 # p(z_i | z_{<i}, pa*)
+                p_loc = p_stats[i]["p_loc"]
+                p_var = p_stats[i]["p_logscale"].exp().pow(2)
                 # Option1: mixture distribution: r(z_i | z_{<i}, x, pa, pa*)
                 #   = a*q(z_i | z_{<i}, x, pa) + (1-a)*p(z_i | z_{<i}, pa*)