Last commit not found
# Number of GPUs per GPU worker | |
export GPUS_PER_NODE=8 | |
# Number of GPU workers, for single-worker training, please set to 1 | |
export NUM_NODES=$SLURM_NNODES | |
# The ip address of the rank-0 worker, for single-worker training, please set to localhost | |
master_addr=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) | |
export MASTER_ADDR=$master_addr | |
# The port for communication | |
export MASTER_PORT=12350 | |
# The rank of this worker, should be in {0, ..., WORKER_CNT-1}, for single-worker training, please set to 0 | |
export RANK=$SLURM_NODEID | |
echo "MASTER_ADDR: $MASTER_ADDR" | |
echo "RANK :$RANK" | |
echo "NUM_NODES :$NUM_NODES" | |
echo "GPUS_PER_NODE :$GPUS_PER_NODE" | |
export MIOPEN_USER_DB_PATH=/lus/home/NAT/gda2204/mshukor/.config/miopen_${MASTER_ADDR}_${SLURM_PROCID}/ | |
echo "MIOPEN_USER_DB_PATH :$MIOPEN_USER_DB_PATH" | |
num_workers=0 | |
ofa_dir=/lus/home/NAT/gda2204/mshukor/code/unival | |
base_data_dir=/lus/scratch/NAT/gda2204/SHARED/data | |
base_log_dir=/work/NAT/gda2204/mshukor/logs | |
bpe_dir=${ofa_dir}/utils/BPE | |
user_dir=${ofa_dir}/ofa_module | |
image_dir=${base_data_dir} | |
data_dir=${base_data_dir}/ofa/image_gen_data | |
data=${data_dir}/coco_vqgan_full_test.tsv | |
# data=${data_dir}/coco_vqgan_dev.tsv | |
model_name=unival_image_gen_ofa_stage_2 | |
path=/lus/scratch/NAT/gda2204/SHARED/logs/ofa/checkpoints/image_gen/unival_image_gen_ofa_stage_2/checkpoint_best1.pt | |
selected_cols=0,2,1 | |
split='test' | |
VQGAN_MODEL_PATH=${base_log_dir}/ofa/pretrained_models/vqgan/last.ckpt | |
VQGAN_CONFIG_PATH=${base_log_dir}/ofa/pretrained_models/vqgan/model.yaml | |
CLIP_MODEL_PATH=${base_log_dir}/ofa/pretrained_models/clip/ViT-B-16.pt | |
GEN_IMAGES_PATH=/lus/scratch/NAT/gda2204/SHARED/tmp/results/image_gen/${model_name} | |
mkdir -p $GEN_IMAGES_PATH | |
echo "torch_home_path" | |
echo $TORCH_HOME | |
export TORCH_HOME=/home/mshukor/.cache/torch | |
python3 -m torch.distributed.launch \ | |
--nnodes=${NUM_NODES} \ | |
--nproc_per_node=${GPUS_PER_NODE} \ | |
--master_port=${MASTER_PORT} \ | |
--node_rank=${RANK} \ | |
--master_addr=${MASTER_ADDR} \ | |
--use_env ${ofa_dir}/evaluate.py \ | |
${data} \ | |
--path=${path} \ | |
--bpe-dir=${bpe_dir} \ | |
--user-dir=${user_dir} \ | |
--task=image_gen \ | |
--batch-size=1 \ | |
--log-format=simple --log-interval=1 \ | |
--seed=42 \ | |
--gen-subset=${split} \ | |
--beam=16 \ | |
--min-len=1024 \ | |
--max-len-a=0 \ | |
--max-len-b=1024 \ | |
--sampling-topk=256 \ | |
--temperature=1.0 \ | |
--code-image-size=256 \ | |
--constraint-range=50265,58457 \ | |
--fp16 \ | |
--num-workers=0 \ | |
--model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\",\"clip_model_path\":\"${CLIP_MODEL_PATH}\",\"vqgan_model_path\":\"${VQGAN_MODEL_PATH}\",\"vqgan_config_path\":\"${VQGAN_CONFIG_PATH}\",\"gen_images_path\":\"${GEN_IMAGES_PATH}\"}" | |
# compute IS | |
python image_gen/inception_score.py --gpu 4 --batch-size 128 --path1 ${GEN_IMAGES_PATH}/top1 | |
# compute FID, download statistics for test dataset first. | |
python image_gen/fid_score.py --gpu 4 --batch-size 128 --path1 ${GEN_IMAGES_PATH}/top1 --path2 /lus/scratch/NAT/gda2204/SHARED/tmp/ofa/image_gen_data/gt_stat.npz | |