diff --git "a/train.log" "b/train.log" new file mode 100644--- /dev/null +++ "b/train.log" @@ -0,0 +1,8528 @@ +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional. + warnings.warn( +[2024-03-09 06:24:37,357] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:37,704] [INFO] [comm.py:637:init_distributed] cdb=None +[2024-03-09 06:24:37,927] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,102] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,115] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,140] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,148] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,158] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,181] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-03-09 06:24:38,238] [INFO] [comm.py:637:init_distributed] cdb=None +[2024-03-09 06:24:38,458] [INFO] [comm.py:637:init_distributed] cdb=None +[2024-03-09 06:24:38,458] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2024-03-09 06:24:38,470] [INFO] [comm.py:637:init_distributed] cdb=None +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[2024-03-09 06:24:38,483] [INFO] [comm.py:637:init_distributed] cdb=None +[2024-03-09 06:24:38,487] [INFO] [comm.py:637:init_distributed] cdb=None +[2024-03-09 06:24:38,493] [INFO] [comm.py:637:init_distributed] cdb=None +[2024-03-09 06:24:38,536] [INFO] [comm.py:637:init_distributed] cdb=None + Loading checkpoint shards: 0%| | 0/4 [00:00 +2024-03-09 06:25:01.984 n213-017-210:2252776:2252776 [0] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:01.991 n213-017-210:2252776:2252776 [0] NCCL INFO cudaDriverVersion 12010 +NCCL version 2.19.3+cuda12.1 +2024-03-09 06:25:01.995 n213-017-210:2252781:2252781 [5] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:01.996 n213-017-210:2252781:2252781 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:01.996 n213-017-210:2252781:2252781 [5] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:02.000 n213-017-210:2252779:2252779 [3] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:02.000 n213-017-210:2252779:2252779 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:02.001 n213-017-210:2252779:2252779 [3] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:02.004 n213-017-210:2252781:2252781 [5] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:02.007 n213-017-210:2252779:2252779 [3] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:02.015 n213-017-210:2252776:2253572 [0] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:02.021 n213-017-210:2252781:2253577 [5] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:02.022 n213-017-210:2252779:2253578 [3] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:02.035 n213-017-210:2252776:2253572 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:02.036 n213-017-210:2252776:2253572 [0] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:02.048 n213-017-210:2252776:2253572 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:02.049 n213-017-210:2252776:2253572 [0] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:02.049 n213-017-210:2252776:2253572 [0] NCCL INFO Using network IB +2024-03-09 06:25:02.051 n213-017-210:2252781:2253577 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:02.052 n213-017-210:2252781:2253577 [5] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:02.054 n213-017-210:2252779:2253578 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:02.054 n213-017-210:2252779:2253578 [3] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:02.070 n213-017-210:2252779:2253578 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:02.070 n213-017-210:2252781:2253577 [5] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:02.070 n213-017-210:2252779:2253578 [3] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:02.070 n213-017-210:2252779:2253578 [3] NCCL INFO Using network IB +2024-03-09 06:25:02.070 n213-017-210:2252781:2253577 [5] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:02.070 n213-017-210:2252781:2253577 [5] NCCL INFO Using network IB +2024-03-09 06:25:02.951 n213-017-210:2252778:2252778 [2] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:02.951 n213-017-210:2252778:2252778 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:02.951 n213-017-210:2252778:2252778 [2] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:02.956 n213-017-210:2252778:2252778 [2] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:02.978 n213-017-210:2252778:2253608 [2] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:02.992 n213-017-210:2252778:2253608 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:02.993 n213-017-210:2252778:2253608 [2] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:03.006 n213-017-210:2252778:2253608 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:03.006 n213-017-210:2252778:2253608 [2] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:03.006 n213-017-210:2252778:2253608 [2] NCCL INFO Using network IB +2024-03-09 06:25:03.333 n213-017-210:2252782:2252782 [6] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:03.333 n213-017-210:2252782:2252782 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:03.334 n213-017-210:2252782:2252782 [6] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:03.339 n213-017-210:2252782:2252782 [6] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:03.358 n213-017-210:2252782:2253624 [6] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:03.373 n213-017-210:2252782:2253624 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:03.373 n213-017-210:2252782:2253624 [6] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:03.386 n213-017-210:2252782:2253624 [6] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:03.386 n213-017-210:2252782:2253624 [6] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:03.386 n213-017-210:2252782:2253624 [6] NCCL INFO Using network IB +2024-03-09 06:25:04.291 n213-017-210:2252777:2252777 [1] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:04.291 n213-017-210:2252777:2252777 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:04.292 n213-017-210:2252777:2252777 [1] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:04.296 n213-017-210:2252777:2252777 [1] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:04.308 n213-017-210:2252777:2253640 [1] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:04.321 n213-017-210:2252777:2253640 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:04.322 n213-017-210:2252777:2253640 [1] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:04.335 n213-017-210:2252777:2253640 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:04.335 n213-017-210:2252777:2253640 [1] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:04.335 n213-017-210:2252777:2253640 [1] NCCL INFO Using network IB +2024-03-09 06:25:04.433 n213-017-210:2252780:2252780 [4] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:04.434 n213-017-210:2252780:2252780 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:04.435 n213-017-210:2252780:2252780 [4] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:04.440 n213-017-210:2252780:2252780 [4] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:04.459 n213-017-210:2252780:2253646 [4] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:04.473 n213-017-210:2252780:2253646 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:04.473 n213-017-210:2252780:2253646 [4] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:04.486 n213-017-210:2252780:2253646 [4] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:04.486 n213-017-210:2252780:2253646 [4] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:04.486 n213-017-210:2252780:2253646 [4] NCCL INFO Using network IB +2024-03-09 06:25:04.781 n213-017-210:2252783:2252783 [7] NCCL INFO cudaDriverVersion 12010 +2024-03-09 06:25:04.781 n213-017-210:2252783:2252783 [7] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:04.782 n213-017-210:2252783:2252783 [7] NCCL INFO Bootstrap : Using eth0:10.213.17.210<0> +2024-03-09 06:25:04.786 n213-017-210:2252783:2252783 [7] NCCL INFO NET/Plugin : dlerror=libnccl-net.so: cannot open shared object file: No such file or directory No plugin found (libnccl-net.so), using internal implementation +2024-03-09 06:25:04.805 n213-017-210:2252783:2253658 [7] NCCL INFO NCCL_IB_DISABLE set by environment to 0. +2024-03-09 06:25:04.819 n213-017-210:2252783:2253658 [7] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth0 +2024-03-09 06:25:04.820 n213-017-210:2252783:2253658 [7] NCCL INFO NCCL_IB_HCA set to mlx5 +2024-03-09 06:25:04.833 n213-017-210:2252783:2253658 [7] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_1:1/RoCE [2]mlx5_2:1/RoCE [3]mlx5_3:1/RoCE [RO]; OOB eth0:10.213.17.210<0> +2024-03-09 06:25:04.833 n213-017-210:2252783:2253658 [7] NCCL INFO Using non-device net plugin version 0 +2024-03-09 06:25:04.833 n213-017-210:2252783:2253658 [7] NCCL INFO Using network IB +2024-03-09 06:25:04.863 n213-017-210:2252783:2253658 [7] NCCL INFO comm 0x754554d0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId c9000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252780:2253646 [4] NCCL INFO comm 0x77ae8fb0 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 89000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252778:2253608 [2] NCCL INFO comm 0x7a3a2bd0 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 4a000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252779:2253578 [3] NCCL INFO comm 0x799eb500 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 4e000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252777:2253640 [1] NCCL INFO comm 0x75bb64c0 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 16000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252782:2253624 [6] NCCL INFO comm 0x91380c80 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId c5000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252776:2253572 [0] NCCL INFO comm 0x18afec00 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 10000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:04.863 n213-017-210:2252781:2253577 [5] NCCL INFO comm 0xa08bc940 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 8e000 commId 0x5adbfca4a74bfde - Init START +2024-03-09 06:25:06.929 n213-017-210:2252783:2253658 [7] NCCL INFO Setting affinity for GPU 7 to ffffffff,00000000,ffffffff,00000000 +2024-03-09 06:25:06.929 n213-017-210:2252783:2253658 [7] NCCL INFO NVLS multicast support is not available on dev 7 +2024-03-09 06:25:06.930 n213-017-210:2252780:2253646 [4] NCCL INFO Setting affinity for GPU 4 to ffffffff,00000000,ffffffff,00000000 +2024-03-09 06:25:06.930 n213-017-210:2252780:2253646 [4] NCCL INFO NVLS multicast support is not available on dev 4 +2024-03-09 06:25:06.930 n213-017-210:2252782:2253624 [6] NCCL INFO Setting affinity for GPU 6 to ffffffff,00000000,ffffffff,00000000 +2024-03-09 06:25:06.930 n213-017-210:2252782:2253624 [6] NCCL INFO NVLS multicast support is not available on dev 6 +2024-03-09 06:25:06.931 n213-017-210:2252777:2253640 [1] NCCL INFO Setting affinity for GPU 1 to ffffffff,00000000,ffffffff +2024-03-09 06:25:06.931 n213-017-210:2252777:2253640 [1] NCCL INFO NVLS multicast support is not available on dev 1 +2024-03-09 06:25:06.936 n213-017-210:2252778:2253608 [2] NCCL INFO Setting affinity for GPU 2 to ffffffff,00000000,ffffffff +2024-03-09 06:25:06.936 n213-017-210:2252778:2253608 [2] NCCL INFO NVLS multicast support is not available on dev 2 +2024-03-09 06:25:06.937 n213-017-210:2252779:2253578 [3] NCCL INFO Setting affinity for GPU 3 to ffffffff,00000000,ffffffff +2024-03-09 06:25:06.937 n213-017-210:2252779:2253578 [3] NCCL INFO NVLS multicast support is not available on dev 3 +2024-03-09 06:25:06.937 n213-017-210:2252781:2253577 [5] NCCL INFO Setting affinity for GPU 5 to ffffffff,00000000,ffffffff,00000000 +2024-03-09 06:25:06.938 n213-017-210:2252781:2253577 [5] NCCL INFO NVLS multicast support is not available on dev 5 +2024-03-09 06:25:06.940 n213-017-210:2252776:2253572 [0] NCCL INFO Setting affinity for GPU 0 to ffffffff,00000000,ffffffff +2024-03-09 06:25:06.942 n213-017-210:2252776:2253572 [0] NCCL INFO NVLS multicast support is not available on dev 0 +2024-03-09 06:25:06.942 n213-017-210:2252780:2253646 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 [2] 5/-1/-1->4->3 [3] 5/-1/-1->4->3 [4] 5/-1/-1->4->3 [5] 5/-1/-1->4->3 [6] 5/-1/-1->4->3 [7] 5/-1/-1->4->3 [8] 5/-1/-1->4->3 [9] 5/-1/-1->4->3 [10] 5/-1/-1->4->3 [11] 5/-1/-1->4->3 [12] 5/-1/-1->4->3 [13] 5/-1/-1->4->3 [14] 5/-1/-1->4->3 [15] 5/-1/-1->4->3 [16] 5/-1/-1->4->3 [17] 5/-1/-1->4->3 [18] 5/-1/-1->4->3 [19] 5/-1/-1->4->3 [20] 5/-1/-1->4->3 [21] 5/-1/-1->4->3 [22] 5/-1/-1->4->3 [23] 5/-1/-1->4->3 +2024-03-09 06:25:06.942 n213-017-210:2252779:2253578 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 [2] 4/-1/-1->3->2 [3] 4/-1/-1->3->2 [4] 4/-1/-1->3->2 [5] 4/-1/-1->3->2 [6] 4/-1/-1->3->2 [7] 4/-1/-1->3->2 [8] 4/-1/-1->3->2 [9] 4/-1/-1->3->2 [10] 4/-1/-1->3->2 [11] 4/-1/-1->3->2 [12] 4/-1/-1->3->2 [13] 4/-1/-1->3->2 [14] 4/-1/-1->3->2 [15] 4/-1/-1->3->2 [16] 4/-1/-1->3->2 [17] 4/-1/-1->3->2 [18] 4/-1/-1->3->2 [19] 4/-1/-1->3->2 [20] 4/-1/-1->3->2 [21] 4/-1/-1->3->2 [22] 4/-1/-1->3->2 [23] 4/-1/-1->3->2 +2024-03-09 06:25:06.942 n213-017-210:2252780:2253646 [4] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.942 n213-017-210:2252779:2253578 [3] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.943 n213-017-210:2252782:2253624 [6] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 [2] 7/-1/-1->6->5 [3] 7/-1/-1->6->5 [4] 7/-1/-1->6->5 [5] 7/-1/-1->6->5 [6] 7/-1/-1->6->5 [7] 7/-1/-1->6->5 [8] 7/-1/-1->6->5 [9] 7/-1/-1->6->5 [10] 7/-1/-1->6->5 [11] 7/-1/-1->6->5 [12] 7/-1/-1->6->5 [13] 7/-1/-1->6->5 [14] 7/-1/-1->6->5 [15] 7/-1/-1->6->5 [16] 7/-1/-1->6->5 [17] 7/-1/-1->6->5 [18] 7/-1/-1->6->5 [19] 7/-1/-1->6->5 [20] 7/-1/-1->6->5 [21] 7/-1/-1->6->5 [22] 7/-1/-1->6->5 [23] 7/-1/-1->6->5 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 00/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252783:2253658 [7] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 [2] -1/-1/-1->7->6 [3] -1/-1/-1->7->6 [4] -1/-1/-1->7->6 [5] -1/-1/-1->7->6 [6] -1/-1/-1->7->6 [7] -1/-1/-1->7->6 [8] -1/-1/-1->7->6 [9] -1/-1/-1->7->6 [10] -1/-1/-1->7->6 [11] -1/-1/-1->7->6 [12] -1/-1/-1->7->6 [13] -1/-1/-1->7->6 [14] -1/-1/-1->7->6 [15] -1/-1/-1->7->6 [16] -1/-1/-1->7->6 [17] -1/-1/-1->7->6 [18] -1/-1/-1->7->6 [19] -1/-1/-1->7->6 [20] -1/-1/-1->7->6 [21] -1/-1/-1->7->6 [22] -1/-1/-1->7->6 [23] -1/-1/-1->7->6 +2024-03-09 06:25:06.943 n213-017-210:2252778:2253608 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 [4] 3/-1/-1->2->1 [5] 3/-1/-1->2->1 [6] 3/-1/-1->2->1 [7] 3/-1/-1->2->1 [8] 3/-1/-1->2->1 [9] 3/-1/-1->2->1 [10] 3/-1/-1->2->1 [11] 3/-1/-1->2->1 [12] 3/-1/-1->2->1 [13] 3/-1/-1->2->1 [14] 3/-1/-1->2->1 [15] 3/-1/-1->2->1 [16] 3/-1/-1->2->1 [17] 3/-1/-1->2->1 [18] 3/-1/-1->2->1 [19] 3/-1/-1->2->1 [20] 3/-1/-1->2->1 [21] 3/-1/-1->2->1 [22] 3/-1/-1->2->1 [23] 3/-1/-1->2->1 +2024-03-09 06:25:06.943 n213-017-210:2252781:2253577 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 [2] 6/-1/-1->5->4 [3] 6/-1/-1->5->4 [4] 6/-1/-1->5->4 [5] 6/-1/-1->5->4 [6] 6/-1/-1->5->4 [7] 6/-1/-1->5->4 [8] 6/-1/-1->5->4 [9] 6/-1/-1->5->4 [10] 6/-1/-1->5->4 [11] 6/-1/-1->5->4 [12] 6/-1/-1->5->4 [13] 6/-1/-1->5->4 [14] 6/-1/-1->5->4 [15] 6/-1/-1->5->4 [16] 6/-1/-1->5->4 [17] 6/-1/-1->5->4 [18] 6/-1/-1->5->4 [19] 6/-1/-1->5->4 [20] 6/-1/-1->5->4 [21] 6/-1/-1->5->4 [22] 6/-1/-1->5->4 [23] 6/-1/-1->5->4 +2024-03-09 06:25:06.943 n213-017-210:2252782:2253624 [6] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.943 n213-017-210:2252783:2253658 [7] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 01/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252778:2253608 [2] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.943 n213-017-210:2252781:2253577 [5] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 02/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 03/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252777:2253640 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 [4] 2/-1/-1->1->0 [5] 2/-1/-1->1->0 [6] 2/-1/-1->1->0 [7] 2/-1/-1->1->0 [8] 2/-1/-1->1->0 [9] 2/-1/-1->1->0 [10] 2/-1/-1->1->0 [11] 2/-1/-1->1->0 [12] 2/-1/-1->1->0 [13] 2/-1/-1->1->0 [14] 2/-1/-1->1->0 [15] 2/-1/-1->1->0 [16] 2/-1/-1->1->0 [17] 2/-1/-1->1->0 [18] 2/-1/-1->1->0 [19] 2/-1/-1->1->0 [20] 2/-1/-1->1->0 [21] 2/-1/-1->1->0 [22] 2/-1/-1->1->0 [23] 2/-1/-1->1->0 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 04/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252777:2253640 [1] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 05/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 06/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 07/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 08/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 09/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 10/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 11/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 12/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 13/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 14/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 15/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 16/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 17/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 18/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 19/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 20/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 21/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 22/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 23/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 [4] 1/-1/-1->0->-1 [5] 1/-1/-1->0->-1 [6] 1/-1/-1->0->-1 [7] 1/-1/-1->0->-1 [8] 1/-1/-1->0->-1 [9] 1/-1/-1->0->-1 [10] 1/-1/-1->0->-1 [11] 1/-1/-1->0->-1 [12] 1/-1/-1->0->-1 [13] 1/-1/-1->0->-1 [14] 1/-1/-1->0->-1 [15] 1/-1/-1->0->-1 [16] 1/-1/-1->0->-1 [17] 1/-1/-1->0->-1 [18] 1/-1/-1->0->-1 [19] 1/-1/-1->0->-1 [20] 1/-1/-1->0->-1 [21] 1/-1/-1->0->-1 [22] 1/-1/-1->0->-1 [23] 1/-1/-1->0->-1 +2024-03-09 06:25:06.943 n213-017-210:2252776:2253572 [0] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:07.331 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 00/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.331 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 00/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.332 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 00/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.332 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 00/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.333 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 00/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.333 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 01/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.334 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 01/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.334 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 01/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.334 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 01/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.335 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 00/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.335 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 00/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.335 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 01/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.336 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 00/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.336 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 02/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.336 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 02/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.337 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 02/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.337 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 02/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.337 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 01/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.338 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 01/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.338 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 02/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.338 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 01/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.338 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 03/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.339 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 03/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.339 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 03/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.339 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 03/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.339 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 02/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.340 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 02/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.340 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 03/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.340 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 02/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.340 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 04/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.341 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 04/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.341 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 04/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.342 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 04/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.342 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 03/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.343 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 03/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.343 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 04/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.343 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 03/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.343 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 05/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.344 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 05/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.344 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 05/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.344 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 05/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.344 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 04/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.345 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 04/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.345 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 05/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.345 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 04/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.345 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 06/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.346 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 06/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.346 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 06/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.346 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 06/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.347 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 05/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.347 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 05/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.348 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 06/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.348 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 05/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.348 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 07/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.348 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 07/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.348 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 07/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.349 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 07/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.349 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 06/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.350 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 06/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.350 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 07/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.350 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 06/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.350 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 08/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.351 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 08/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.351 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 08/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.351 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 08/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.352 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 07/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.352 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 07/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.352 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 08/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.352 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 07/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.352 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 09/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.353 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 09/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.353 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 09/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.353 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 09/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.354 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 08/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.354 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 08/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.355 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 09/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.355 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 08/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.355 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 10/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.355 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 10/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.356 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 10/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.356 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 10/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.357 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 09/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.357 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 09/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.357 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 10/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.357 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 09/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.357 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 11/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.358 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 11/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.358 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 11/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.358 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 11/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.360 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 10/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.360 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 10/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.360 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 11/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.360 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 10/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.360 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 12/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.361 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 12/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.361 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 12/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.361 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 12/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.362 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 11/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.363 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 11/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.363 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 12/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.363 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 11/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.363 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 13/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.364 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 13/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.364 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 13/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.364 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 13/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.366 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 12/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.366 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 12/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.366 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 13/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.366 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 12/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.366 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 14/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.367 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 14/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.367 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 14/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.367 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 14/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.369 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 13/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.369 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 13/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.369 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 13/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.369 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 14/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.369 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 15/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.370 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 15/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.370 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 15/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.371 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 15/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.371 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 14/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.372 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 14/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.372 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 14/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.372 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 15/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.372 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 16/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.373 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 16/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.373 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 16/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.374 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 16/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.374 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 15/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.375 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 15/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.375 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 15/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.375 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 16/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.375 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 17/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.376 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 17/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.376 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 17/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.377 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 17/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.378 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 16/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.378 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 16/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.378 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 16/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.378 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 17/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.378 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 18/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.379 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 18/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.379 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 18/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.380 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 18/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.380 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 17/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.381 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 17/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.381 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 17/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.381 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 18/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.381 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 19/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.382 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 19/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.382 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 19/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.383 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 19/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.383 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 18/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.384 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 18/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.384 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 18/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.384 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 19/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.384 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 20/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.385 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 20/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.385 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 20/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.385 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 20/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.386 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 19/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.386 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 19/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.386 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 19/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.387 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 20/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.387 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 21/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.387 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 21/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.388 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 21/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.388 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 21/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.389 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 20/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.389 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 20/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.389 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 20/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.390 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 21/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.390 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 22/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.390 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 22/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.391 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 22/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.391 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 22/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.392 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 21/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.392 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 21/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.392 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 21/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.393 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 22/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:07.393 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 23/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:07.393 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 23/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:07.394 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 23/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:07.394 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 23/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:07.395 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 22/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.395 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 22/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.395 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 22/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.397 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 23/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:07.397 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 23/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:07.397 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 23/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:07.411 n213-017-210:2252776:2253572 [0] NCCL INFO Channel 23/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.021 n213-017-210:2252778:2253608 [2] NCCL INFO Connected all rings +2024-03-09 06:25:08.047 n213-017-210:2252779:2253578 [3] NCCL INFO Connected all rings +2024-03-09 06:25:08.052 n213-017-210:2252777:2253640 [1] NCCL INFO Connected all rings +2024-03-09 06:25:08.052 n213-017-210:2252776:2253572 [0] NCCL INFO Connected all rings +2024-03-09 06:25:08.062 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 00/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.064 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 01/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.067 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 02/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.068 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 03/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.070 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 04/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.072 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 05/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.073 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 06/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.075 n213-017-210:2252783:2253658 [7] NCCL INFO Connected all rings +2024-03-09 06:25:08.075 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 00/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.075 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 07/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.078 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 01/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.078 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 08/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.080 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 02/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.080 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 09/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.082 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 03/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.083 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 10/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.084 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 04/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.085 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 11/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.086 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 05/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.087 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 12/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.088 n213-017-210:2252782:2253624 [6] NCCL INFO Connected all rings +2024-03-09 06:25:08.089 n213-017-210:2252780:2253646 [4] NCCL INFO Connected all rings +2024-03-09 06:25:08.089 n213-017-210:2252781:2253577 [5] NCCL INFO Connected all rings +2024-03-09 06:25:08.089 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 06/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.089 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 13/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.091 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 07/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.092 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 14/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.095 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 15/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.098 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 16/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.098 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 08/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.099 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 00/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.100 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 17/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.101 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 09/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.102 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 01/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.103 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 18/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.104 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 10/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.104 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 02/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.106 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 19/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.106 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 11/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.107 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 03/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.108 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 20/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.108 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 00/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.109 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 12/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.110 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 21/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.111 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 01/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.111 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 13/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.111 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 04/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.113 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 22/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.113 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 02/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.114 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 14/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.114 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 05/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.115 n213-017-210:2252778:2253608 [2] NCCL INFO Channel 23/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:08.116 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 03/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.116 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 15/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.116 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 06/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.118 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 04/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.119 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 16/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.119 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 07/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.121 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 05/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.121 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 17/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.121 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 08/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.123 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 06/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.123 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 18/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.123 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 09/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.125 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 07/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.125 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 19/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.125 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 10/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.127 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 08/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.127 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 20/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.127 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 11/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.129 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 09/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.129 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 21/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.129 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 12/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.131 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 10/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.132 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 22/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.132 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 13/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.133 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 11/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.134 n213-017-210:2252783:2253658 [7] NCCL INFO Channel 23/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:08.134 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 14/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.136 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 12/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.136 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 15/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.137 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 13/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.138 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 16/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.139 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 14/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.140 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 17/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.141 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 15/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.142 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 18/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.144 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 16/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.144 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 19/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.146 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 00/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.147 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 17/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.147 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 20/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.148 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 01/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.150 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 00/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.150 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 18/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.150 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 00/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.150 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 21/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.151 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 02/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.152 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 01/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.152 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 19/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.153 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 01/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.153 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 22/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.154 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 03/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.155 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 02/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.156 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 20/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.156 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 02/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.156 n213-017-210:2252779:2253578 [3] NCCL INFO Channel 23/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:08.157 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 04/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.158 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 03/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.159 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 21/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.159 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 03/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.160 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 05/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.161 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 04/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.161 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 22/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.161 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 04/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.163 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 06/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.164 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 05/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.164 n213-017-210:2252777:2253640 [1] NCCL INFO Channel 23/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:08.164 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 05/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.165 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 07/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.166 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 06/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.166 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 06/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.168 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 08/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.169 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 07/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.169 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 07/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.170 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 09/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.172 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 08/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.172 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 08/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.173 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 10/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.175 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 09/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.175 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 09/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.176 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 11/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.177 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 10/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.177 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 10/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.179 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 12/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.181 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 11/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.181 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 11/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.183 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 13/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.184 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 12/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.185 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 12/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.186 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 14/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.187 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 13/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.188 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 13/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.189 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 15/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.190 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 14/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.190 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 14/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.191 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 16/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.192 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 15/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.192 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 15/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.193 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 17/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.194 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 16/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.194 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 16/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.195 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 18/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.195 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 17/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.195 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 17/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.196 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 19/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.197 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 18/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.198 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 18/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.199 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 20/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.199 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 19/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.199 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 19/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.200 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 21/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.200 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 20/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.201 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 20/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.202 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 22/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.203 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 21/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.203 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 21/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.204 n213-017-210:2252781:2253577 [5] NCCL INFO Channel 23/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:08.204 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 22/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.204 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 22/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.206 n213-017-210:2252780:2253646 [4] NCCL INFO Channel 23/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:08.206 n213-017-210:2252782:2253624 [6] NCCL INFO Channel 23/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:08.670 n213-017-210:2252776:2253572 [0] NCCL INFO Connected all trees +2024-03-09 06:25:08.671 n213-017-210:2252776:2253572 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.671 n213-017-210:2252776:2253572 [0] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.680 n213-017-210:2252777:2253640 [1] NCCL INFO Connected all trees +2024-03-09 06:25:08.680 n213-017-210:2252777:2253640 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.680 n213-017-210:2252777:2253640 [1] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.751 n213-017-210:2252778:2253608 [2] NCCL INFO Connected all trees +2024-03-09 06:25:08.751 n213-017-210:2252778:2253608 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.751 n213-017-210:2252778:2253608 [2] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.755 n213-017-210:2252783:2253658 [7] NCCL INFO Connected all trees +2024-03-09 06:25:08.755 n213-017-210:2252783:2253658 [7] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.755 n213-017-210:2252783:2253658 [7] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.760 n213-017-210:2252779:2253578 [3] NCCL INFO Connected all trees +2024-03-09 06:25:08.760 n213-017-210:2252779:2253578 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.760 n213-017-210:2252779:2253578 [3] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.764 n213-017-210:2252780:2253646 [4] NCCL INFO Connected all trees +2024-03-09 06:25:08.764 n213-017-210:2252780:2253646 [4] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.764 n213-017-210:2252780:2253646 [4] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.764 n213-017-210:2252782:2253624 [6] NCCL INFO Connected all trees +2024-03-09 06:25:08.764 n213-017-210:2252782:2253624 [6] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.764 n213-017-210:2252782:2253624 [6] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.764 n213-017-210:2252781:2253577 [5] NCCL INFO Connected all trees +2024-03-09 06:25:08.764 n213-017-210:2252781:2253577 [5] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:08.764 n213-017-210:2252781:2253577 [5] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:08.789 n213-017-210:2252778:2253608 [2] NCCL INFO comm 0x7a3a2bd0 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 4a000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.789 n213-017-210:2252782:2253624 [6] NCCL INFO comm 0x91380c80 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId c5000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.789 n213-017-210:2252780:2253646 [4] NCCL INFO comm 0x77ae8fb0 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 89000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.789 n213-017-210:2252776:2253572 [0] NCCL INFO comm 0x18afec00 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 10000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.790 n213-017-210:2252783:2253658 [7] NCCL INFO comm 0x754554d0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId c9000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.790 n213-017-210:2252779:2253578 [3] NCCL INFO comm 0x799eb500 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 4e000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.791 n213-017-210:2252777:2253640 [1] NCCL INFO comm 0x75bb64c0 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 16000 commId 0x5adbfca4a74bfde - Init COMPLETE +2024-03-09 06:25:08.792 n213-017-210:2252781:2253577 [5] NCCL INFO comm 0xa08bc940 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 8e000 commId 0x5adbfca4a74bfde - Init COMPLETE +/usr/local/lib/python3.9/dist-packages/bytedmetrics/__init__.py:10: UserWarning: bytedmetrics is renamed to bytedance.metrics, please using `bytedance.metrics` instead of `bytedmetrics` + warnings.warn("bytedmetrics is renamed to bytedance.metrics, please using `bytedance.metrics` instead of `bytedmetrics`") +wandb: ⭐️ View project at https://ml.byteintl.net/experiment/tracking/detail?Id=project_20230126_e9daa974 +wandb: 🚀 View run at https://ml.byteintl.net/experiment/tracking/detail?Id=project_20230126_e9daa974&selectedTrial=run_20240309_98cb39ab +wandb: - Waiting for wandb.init()... wandb: \ Waiting for wandb.init()... wandb: | Waiting for wandb.init()... wandb: Tracking run with wandb version 0.13.69 +wandb: Run data is saved locally in /mnt/bn/liangkeg/ruohongz/vllm/video_llava/wandb/run-20240309_062525-run_20240309_98cb39ab +wandb: Run `wandb offline` to turn off syncing. + 0%| | 0/7045 [00:007->6 [1] -1/-1/-1->7->6 [2] -1/-1/-1->7->6 [3] -1/-1/-1->7->6 [4] -1/-1/-1->7->6 [5] -1/-1/-1->7->6 [6] -1/-1/-1->7->6 [7] -1/-1/-1->7->6 [8] -1/-1/-1->7->6 [9] -1/-1/-1->7->6 [10] -1/-1/-1->7->6 [11] -1/-1/-1->7->6 [12] -1/-1/-1->7->6 [13] -1/-1/-1->7->6 [14] -1/-1/-1->7->6 [15] -1/-1/-1->7->6 [16] -1/-1/-1->7->6 [17] -1/-1/-1->7->6 [18] -1/-1/-1->7->6 [19] -1/-1/-1->7->6 [20] -1/-1/-1->7->6 [21] -1/-1/-1->7->6 [22] -1/-1/-1->7->6 [23] -1/-1/-1->7->6 +2024-03-09 06:25:35.611 n213-017-210:2252783:2254206 [7] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.611 n213-017-210:2252782:2254213 [6] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 [2] 7/-1/-1->6->5 [3] 7/-1/-1->6->5 [4] 7/-1/-1->6->5 [5] 7/-1/-1->6->5 [6] 7/-1/-1->6->5 [7] 7/-1/-1->6->5 [8] 7/-1/-1->6->5 [9] 7/-1/-1->6->5 [10] 7/-1/-1->6->5 [11] 7/-1/-1->6->5 [12] 7/-1/-1->6->5 [13] 7/-1/-1->6->5 [14] 7/-1/-1->6->5 [15] 7/-1/-1->6->5 [16] 7/-1/-1->6->5 [17] 7/-1/-1->6->5 [18] 7/-1/-1->6->5 [19] 7/-1/-1->6->5 [20] 7/-1/-1->6->5 [21] 7/-1/-1->6->5 [22] 7/-1/-1->6->5 [23] 7/-1/-1->6->5 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 00/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252782:2254213 [6] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 01/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252781:2254212 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 [2] 6/-1/-1->5->4 [3] 6/-1/-1->5->4 [4] 6/-1/-1->5->4 [5] 6/-1/-1->5->4 [6] 6/-1/-1->5->4 [7] 6/-1/-1->5->4 [8] 6/-1/-1->5->4 [9] 6/-1/-1->5->4 [10] 6/-1/-1->5->4 [11] 6/-1/-1->5->4 [12] 6/-1/-1->5->4 [13] 6/-1/-1->5->4 [14] 6/-1/-1->5->4 [15] 6/-1/-1->5->4 [16] 6/-1/-1->5->4 [17] 6/-1/-1->5->4 [18] 6/-1/-1->5->4 [19] 6/-1/-1->5->4 [20] 6/-1/-1->5->4 [21] 6/-1/-1->5->4 [22] 6/-1/-1->5->4 [23] 6/-1/-1->5->4 +2024-03-09 06:25:35.611 n213-017-210:2252780:2254210 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 [2] 5/-1/-1->4->3 [3] 5/-1/-1->4->3 [4] 5/-1/-1->4->3 [5] 5/-1/-1->4->3 [6] 5/-1/-1->4->3 [7] 5/-1/-1->4->3 [8] 5/-1/-1->4->3 [9] 5/-1/-1->4->3 [10] 5/-1/-1->4->3 [11] 5/-1/-1->4->3 [12] 5/-1/-1->4->3 [13] 5/-1/-1->4->3 [14] 5/-1/-1->4->3 [15] 5/-1/-1->4->3 [16] 5/-1/-1->4->3 [17] 5/-1/-1->4->3 [18] 5/-1/-1->4->3 [19] 5/-1/-1->4->3 [20] 5/-1/-1->4->3 [21] 5/-1/-1->4->3 [22] 5/-1/-1->4->3 [23] 5/-1/-1->4->3 +2024-03-09 06:25:35.611 n213-017-210:2252777:2254209 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 [4] 2/-1/-1->1->0 [5] 2/-1/-1->1->0 [6] 2/-1/-1->1->0 [7] 2/-1/-1->1->0 [8] 2/-1/-1->1->0 [9] 2/-1/-1->1->0 [10] 2/-1/-1->1->0 [11] 2/-1/-1->1->0 [12] 2/-1/-1->1->0 [13] 2/-1/-1->1->0 [14] 2/-1/-1->1->0 [15] 2/-1/-1->1->0 [16] 2/-1/-1->1->0 [17] 2/-1/-1->1->0 [18] 2/-1/-1->1->0 [19] 2/-1/-1->1->0 [20] 2/-1/-1->1->0 [21] 2/-1/-1->1->0 [22] 2/-1/-1->1->0 [23] 2/-1/-1->1->0 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 02/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252780:2254210 [4] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.611 n213-017-210:2252781:2254212 [5] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 03/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252777:2254209 [1] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 04/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252778:2254211 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 [4] 3/-1/-1->2->1 [5] 3/-1/-1->2->1 [6] 3/-1/-1->2->1 [7] 3/-1/-1->2->1 [8] 3/-1/-1->2->1 [9] 3/-1/-1->2->1 [10] 3/-1/-1->2->1 [11] 3/-1/-1->2->1 [12] 3/-1/-1->2->1 [13] 3/-1/-1->2->1 [14] 3/-1/-1->2->1 [15] 3/-1/-1->2->1 [16] 3/-1/-1->2->1 [17] 3/-1/-1->2->1 [18] 3/-1/-1->2->1 [19] 3/-1/-1->2->1 [20] 3/-1/-1->2->1 [21] 3/-1/-1->2->1 [22] 3/-1/-1->2->1 [23] 3/-1/-1->2->1 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 05/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 06/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252779:2254208 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 [2] 4/-1/-1->3->2 [3] 4/-1/-1->3->2 [4] 4/-1/-1->3->2 [5] 4/-1/-1->3->2 [6] 4/-1/-1->3->2 [7] 4/-1/-1->3->2 [8] 4/-1/-1->3->2 [9] 4/-1/-1->3->2 [10] 4/-1/-1->3->2 [11] 4/-1/-1->3->2 [12] 4/-1/-1->3->2 [13] 4/-1/-1->3->2 [14] 4/-1/-1->3->2 [15] 4/-1/-1->3->2 [16] 4/-1/-1->3->2 [17] 4/-1/-1->3->2 [18] 4/-1/-1->3->2 [19] 4/-1/-1->3->2 [20] 4/-1/-1->3->2 [21] 4/-1/-1->3->2 [22] 4/-1/-1->3->2 [23] 4/-1/-1->3->2 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 07/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252779:2254208 [3] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 08/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 09/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.611 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 10/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 11/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252778:2254211 [2] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 12/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 13/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 14/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 15/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 16/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 17/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 18/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 19/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 20/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 21/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 22/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 23/24 : 0 1 2 3 4 5 6 7 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 [4] 1/-1/-1->0->-1 [5] 1/-1/-1->0->-1 [6] 1/-1/-1->0->-1 [7] 1/-1/-1->0->-1 [8] 1/-1/-1->0->-1 [9] 1/-1/-1->0->-1 [10] 1/-1/-1->0->-1 [11] 1/-1/-1->0->-1 [12] 1/-1/-1->0->-1 [13] 1/-1/-1->0->-1 [14] 1/-1/-1->0->-1 [15] 1/-1/-1->0->-1 [16] 1/-1/-1->0->-1 [17] 1/-1/-1->0->-1 [18] 1/-1/-1->0->-1 [19] 1/-1/-1->0->-1 [20] 1/-1/-1->0->-1 [21] 1/-1/-1->0->-1 [22] 1/-1/-1->0->-1 [23] 1/-1/-1->0->-1 +2024-03-09 06:25:35.612 n213-017-210:2252776:2254207 [0] NCCL INFO P2P Chunksize set to 524288 +2024-03-09 06:25:35.984 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 00/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.984 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 00/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.985 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 00/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.985 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 00/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.985 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 00/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.985 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 00/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.985 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 00/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.986 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 01/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.986 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 01/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.987 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 01/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.987 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 01/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.987 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 01/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.987 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 01/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.987 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 01/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.988 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 02/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.988 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 02/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.989 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 02/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.989 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 02/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.989 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 02/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.989 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 02/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.989 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 02/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.990 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 03/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.990 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 03/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.991 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 03/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.991 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 03/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.991 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 03/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.991 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 03/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.991 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 03/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.992 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 04/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.992 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 04/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.992 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 04/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.993 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 04/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.993 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 04/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.993 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 04/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.993 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 04/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.994 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 05/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.994 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 05/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.994 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 05/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.995 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 05/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.995 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 05/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.995 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 05/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.995 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 05/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.996 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 06/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.996 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 06/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.996 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 06/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.997 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 06/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.997 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 06/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.997 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 06/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.997 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 06/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:35.998 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 00/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:35.998 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 07/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:35.998 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 07/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:35.998 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 07/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:35.998 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 07/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:35.999 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 07/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:35.999 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 07/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:35.999 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 07/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.000 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 08/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.000 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 01/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.000 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 08/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.000 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 08/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.000 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 08/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.001 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 08/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.001 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 08/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.001 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 08/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.002 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 09/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.002 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 02/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.002 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 09/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.003 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 09/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.003 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 09/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.003 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 09/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.003 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 09/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.003 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 09/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.004 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 10/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.004 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 03/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.004 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 10/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.005 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 10/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.005 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 10/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.005 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 10/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.006 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 10/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.006 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 11/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.006 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 04/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.006 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 11/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.007 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 11/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.007 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 11/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.007 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 10/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.007 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 11/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.008 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 11/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.009 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 12/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.009 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 05/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.009 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 12/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.009 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 12/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.009 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 11/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.010 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 12/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.010 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 12/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.011 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 13/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.011 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 06/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.011 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 13/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.011 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 12/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.011 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 13/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.012 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 12/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.012 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 13/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.013 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 13/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.013 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 14/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.013 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 14/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.013 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 07/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.014 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 13/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.014 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 14/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.014 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 13/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.014 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 14/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.015 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 14/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.015 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 15/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.015 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 15/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.016 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 08/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.016 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 14/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.016 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 15/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.016 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 14/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.017 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 15/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.017 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 15/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.017 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 16/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.018 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 16/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.018 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 09/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.018 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 15/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.019 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 16/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.019 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 15/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.019 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 16/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.019 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 16/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.020 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 17/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.020 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 17/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.021 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 10/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.021 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 16/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.021 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 17/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.021 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 16/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.021 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 17/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.022 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 17/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.022 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 18/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.022 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 18/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.023 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 11/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.023 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 17/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.023 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 18/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.024 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 17/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.024 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 18/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.024 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 18/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.025 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 19/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.025 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 19/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.026 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 12/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.026 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 18/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.026 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 19/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.026 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 18/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.026 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 19/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.026 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 19/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.027 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 20/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.027 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 20/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.028 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 13/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.028 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 19/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.028 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 20/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.028 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 19/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.029 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 20/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.029 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 20/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.029 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 21/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.030 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 21/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.030 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 14/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.030 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 20/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.030 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 21/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.031 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 20/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.031 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 21/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.031 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 21/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.032 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 22/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.032 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 22/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.033 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 15/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.033 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 21/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.033 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 22/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.033 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 21/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.034 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 22/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.034 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 22/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.034 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 23/0 : 7[7] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.035 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 23/0 : 6[6] -> 7[7] via P2P/CUMEM/read +2024-03-09 06:25:36.035 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 16/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.035 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 22/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.036 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 23/0 : 4[4] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.036 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 22/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.036 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 23/0 : 1[1] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.036 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 23/0 : 2[2] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.038 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 17/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.038 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 23/0 : 5[5] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.038 n213-017-210:2252776:2254207 [0] NCCL INFO Channel 23/0 : 0[0] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.040 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 18/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.041 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 19/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.043 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 20/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.045 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 21/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.047 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 22/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.049 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 23/0 : 3[3] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.624 n213-017-210:2252777:2254209 [1] NCCL INFO Connected all rings +2024-03-09 06:25:36.629 n213-017-210:2252776:2254207 [0] NCCL INFO Connected all rings +2024-03-09 06:25:36.636 n213-017-210:2252778:2254211 [2] NCCL INFO Connected all rings +2024-03-09 06:25:36.661 n213-017-210:2252779:2254208 [3] NCCL INFO Connected all rings +2024-03-09 06:25:36.664 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 00/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.666 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 01/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.667 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 02/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.669 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 03/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.671 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 04/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.672 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 05/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.673 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 06/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.674 n213-017-210:2252780:2254210 [4] NCCL INFO Connected all rings +2024-03-09 06:25:36.674 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 07/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.676 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 08/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.677 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 09/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.678 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 10/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.679 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 11/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.680 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 12/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.681 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 13/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.682 n213-017-210:2252783:2254206 [7] NCCL INFO Connected all rings +2024-03-09 06:25:36.682 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 00/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.682 n213-017-210:2252781:2254212 [5] NCCL INFO Connected all rings +2024-03-09 06:25:36.682 n213-017-210:2252782:2254213 [6] NCCL INFO Connected all rings +2024-03-09 06:25:36.682 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 14/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.683 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 01/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.684 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 15/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.685 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 02/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.686 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 16/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.687 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 17/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.689 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 18/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.690 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 19/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.691 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 03/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.692 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 20/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.692 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 00/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.693 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 04/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.693 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 21/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.694 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 01/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.694 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 05/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.695 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 22/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.696 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 02/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.696 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 06/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.697 n213-017-210:2252777:2254209 [1] NCCL INFO Channel 23/0 : 1[1] -> 0[0] via P2P/CUMEM/read +2024-03-09 06:25:36.698 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 03/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.698 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 07/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.699 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 00/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.699 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 04/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.700 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 08/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.700 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 01/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.701 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 05/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.701 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 09/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.702 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 02/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.703 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 06/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.703 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 10/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.705 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 07/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.705 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 11/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.706 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 03/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.707 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 08/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.707 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 12/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.708 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 04/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.708 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 09/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.709 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 13/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.710 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 05/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.710 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 10/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.710 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 14/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.711 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 06/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.712 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 11/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.712 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 15/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.713 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 00/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.713 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 07/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.714 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 12/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.714 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 16/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.714 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 01/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.715 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 08/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.716 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 13/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.716 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 17/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.717 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 02/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.717 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 09/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.717 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 14/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.718 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 18/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.718 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 03/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.719 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 10/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.719 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 15/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.719 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 19/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.720 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 04/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.720 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 11/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.721 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 16/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.721 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 20/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.722 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 05/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.722 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 12/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.723 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 17/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.723 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 21/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.723 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 00/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.724 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 06/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.724 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 13/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.725 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 18/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.725 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 22/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.725 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 01/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.726 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 07/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.726 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 14/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.726 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 00/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.727 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 19/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.727 n213-017-210:2252783:2254206 [7] NCCL INFO Channel 23/0 : 7[7] -> 6[6] via P2P/CUMEM/read +2024-03-09 06:25:36.727 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 02/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.728 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 08/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.728 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 15/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.728 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 01/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.729 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 20/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.729 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 03/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.729 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 09/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.730 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 16/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.730 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 02/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.730 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 21/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.731 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 04/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.731 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 10/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.731 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 17/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.731 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 03/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.732 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 22/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.732 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 05/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.733 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 11/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.733 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 18/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.733 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 04/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.734 n213-017-210:2252778:2254211 [2] NCCL INFO Channel 23/0 : 2[2] -> 1[1] via P2P/CUMEM/read +2024-03-09 06:25:36.734 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 06/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.735 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 12/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.735 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 19/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.735 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 05/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.736 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 07/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.736 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 13/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.736 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 20/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.737 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 06/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.737 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 08/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.738 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 14/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.738 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 21/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.738 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 07/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.739 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 09/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.739 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 15/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.739 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 22/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.739 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 08/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.740 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 10/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.741 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 16/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.741 n213-017-210:2252779:2254208 [3] NCCL INFO Channel 23/0 : 3[3] -> 2[2] via P2P/CUMEM/read +2024-03-09 06:25:36.742 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 09/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.742 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 11/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.742 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 17/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.743 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 10/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.744 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 12/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.744 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 18/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.745 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 11/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.745 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 13/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.746 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 19/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.746 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 12/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.746 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 14/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.747 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 20/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.747 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 13/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.748 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 15/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.749 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 21/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.750 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 14/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.750 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 16/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.750 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 22/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.751 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 15/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.751 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 17/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.752 n213-017-210:2252780:2254210 [4] NCCL INFO Channel 23/0 : 4[4] -> 3[3] via P2P/CUMEM/read +2024-03-09 06:25:36.752 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 16/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.752 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 18/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.754 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 17/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.754 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 19/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.756 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 18/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.756 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 20/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.757 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 19/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.758 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 21/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.759 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 20/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.759 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 22/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.761 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 21/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.761 n213-017-210:2252782:2254213 [6] NCCL INFO Channel 23/0 : 6[6] -> 5[5] via P2P/CUMEM/read +2024-03-09 06:25:36.762 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 22/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:36.765 n213-017-210:2252781:2254212 [5] NCCL INFO Channel 23/0 : 5[5] -> 4[4] via P2P/CUMEM/read +2024-03-09 06:25:37.194 n213-017-210:2252776:2254207 [0] NCCL INFO Connected all trees +2024-03-09 06:25:37.194 n213-017-210:2252776:2254207 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.194 n213-017-210:2252776:2254207 [0] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.230 n213-017-210:2252777:2254209 [1] NCCL INFO Connected all trees +2024-03-09 06:25:37.230 n213-017-210:2252777:2254209 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.230 n213-017-210:2252777:2254209 [1] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.278 n213-017-210:2252778:2254211 [2] NCCL INFO Connected all trees +2024-03-09 06:25:37.278 n213-017-210:2252778:2254211 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.278 n213-017-210:2252778:2254211 [2] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.286 n213-017-210:2252779:2254208 [3] NCCL INFO Connected all trees +2024-03-09 06:25:37.286 n213-017-210:2252779:2254208 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.286 n213-017-210:2252779:2254208 [3] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.294 n213-017-210:2252780:2254210 [4] NCCL INFO Connected all trees +2024-03-09 06:25:37.294 n213-017-210:2252780:2254210 [4] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.294 n213-017-210:2252780:2254210 [4] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.294 n213-017-210:2252782:2254213 [6] NCCL INFO Connected all trees +2024-03-09 06:25:37.295 n213-017-210:2252781:2254212 [5] NCCL INFO Connected all trees +2024-03-09 06:25:37.295 n213-017-210:2252782:2254213 [6] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.295 n213-017-210:2252781:2254212 [5] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.295 n213-017-210:2252781:2254212 [5] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.295 n213-017-210:2252782:2254213 [6] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.298 n213-017-210:2252783:2254206 [7] NCCL INFO Connected all trees +2024-03-09 06:25:37.298 n213-017-210:2252783:2254206 [7] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512 +2024-03-09 06:25:37.298 n213-017-210:2252783:2254206 [7] NCCL INFO 24 coll channels, 0 nvls channels, 32 p2p channels, 32 p2p channels per peer +2024-03-09 06:25:37.321 n213-017-210:2252781:2254212 [5] NCCL INFO comm 0x6f5eb560 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 8e000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252783:2254206 [7] NCCL INFO comm 0x6f584bc0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId c9000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252780:2254210 [4] NCCL INFO comm 0x6fd97400 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 89000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252782:2254213 [6] NCCL INFO comm 0x6f5c01c0 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId c5000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252779:2254208 [3] NCCL INFO comm 0x70282cc0 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 4e000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252777:2254209 [1] NCCL INFO comm 0x6ece5cc0 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 16000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252778:2254211 [2] NCCL INFO comm 0x6f970a00 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 4a000 commId 0x44ef0d75b7331d7c - Init COMPLETE +2024-03-09 06:25:37.321 n213-017-210:2252776:2254207 [0] NCCL INFO comm 0x19413970 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 10000 commId 0x44ef0d75b7331d7c - Init COMPLETE +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True). + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) +/usr/local/lib/python3.9/dist-packages/deepspeed/runtime/zero/stage_1_and_2.py:1586: UserWarning: The torch.cuda.*DtypeTensor constructors are no longer recommended. It's best to use methods such as torch.tensor(data, dtype=*, device='cuda') to create tensors. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:85.) + total_norm_cuda = get_accelerator().FloatTensor([float(total_norm)]) + 0%| | 1/7045 [00:23<45:24:42, 23.21s/it] {'loss': 1.3232, 'learning_rate': 2.358490566037736e-08, 'epoch': 0.0} + 0%| | 1/7045 [00:23<45:24:42, 23.21s/it] 0%| | 2/7045 [00:35<32:49:55, 16.78s/it] {'loss': 1.3271, 'learning_rate': 4.716981132075472e-08, 'epoch': 0.0} + 0%| | 2/7045 [00:35<32:49:55, 16.78s/it] 0%| | 3/7045 [00:46<27:39:41, 14.14s/it] {'loss': 1.3477, 'learning_rate': 7.075471698113208e-08, 'epoch': 0.0} + 0%| | 3/7045 [00:46<27:39:41, 14.14s/it] 0%| | 4/7045 [00:57<25:18:43, 12.94s/it] {'loss': 1.334, 'learning_rate': 9.433962264150944e-08, 'epoch': 0.0} + 0%| | 4/7045 [00:57<25:18:43, 12.94s/it] 0%| | 5/7045 [01:08<23:56:43, 12.24s/it] {'loss': 1.3379, 'learning_rate': 1.179245283018868e-07, 'epoch': 0.0} + 0%| | 5/7045 [01:08<23:56:43, 12.24s/it] 0%| | 6/7045 [01:19<23:05:28, 11.81s/it] {'loss': 1.3525, 'learning_rate': 1.4150943396226417e-07, 'epoch': 0.0} + 0%| | 6/7045 [01:19<23:05:28, 11.81s/it] 0%| | 7/7045 [01:30<22:44:05, 11.63s/it] {'loss': 1.3887, 'learning_rate': 1.6509433962264153e-07, 'epoch': 0.0} + 0%| | 7/7045 [01:30<22:44:05, 11.63s/it] 0%| | 8/7045 [01:41<22:22:43, 11.45s/it] {'loss': 1.3379, 'learning_rate': 1.886792452830189e-07, 'epoch': 0.0} + 0%| | 8/7045 [01:41<22:22:43, 11.45s/it] 0%| | 9/7045 [01:53<22:18:36, 11.42s/it] {'loss': 1.3311, 'learning_rate': 2.1226415094339622e-07, 'epoch': 0.0} + 0%| | 9/7045 [01:53<22:18:36, 11.42s/it] 0%| | 10/7045 [02:04<22:06:57, 11.32s/it] {'loss': 1.3672, 'learning_rate': 2.358490566037736e-07, 'epoch': 0.0} + 0%| | 10/7045 [02:04<22:06:57, 11.32s/it] 0%| | 11/7045 [02:16<22:33:47, 11.55s/it] {'loss': 1.3203, 'learning_rate': 2.59433962264151e-07, 'epoch': 0.0} + 0%| | 11/7045 [02:16<22:33:47, 11.55s/it] 0%| | 12/7045 [02:27<22:28:51, 11.51s/it] {'loss': 1.3379, 'learning_rate': 2.8301886792452833e-07, 'epoch': 0.0} + 0%| | 12/7045 [02:27<22:28:51, 11.51s/it] 0%| | 13/7045 [02:40<23:12:11, 11.88s/it] {'loss': 1.3027, 'learning_rate': 3.0660377358490567e-07, 'epoch': 0.0} + 0%| | 13/7045 [02:40<23:12:11, 11.88s/it] 0%| | 14/7045 [02:51<22:51:06, 11.70s/it] {'loss': 1.3564, 'learning_rate': 3.3018867924528305e-07, 'epoch': 0.0} + 0%| | 14/7045 [02:51<22:51:06, 11.70s/it] 0%| | 15/7045 [03:02<22:29:45, 11.52s/it] {'loss': 1.3711, 'learning_rate': 3.537735849056604e-07, 'epoch': 0.0} + 0%| | 15/7045 [03:02<22:29:45, 11.52s/it] 0%| | 16/7045 [03:14<22:15:17, 11.40s/it] {'loss': 1.3555, 'learning_rate': 3.773584905660378e-07, 'epoch': 0.0} + 0%| | 16/7045 [03:14<22:15:17, 11.40s/it] 0%| | 17/7045 [03:24<21:58:35, 11.26s/it] {'loss': 1.3701, 'learning_rate': 4.009433962264151e-07, 'epoch': 0.0} + 0%| | 17/7045 [03:24<21:58:35, 11.26s/it] 0%| | 18/7045 [03:35<21:45:49, 11.15s/it] {'loss': 1.3154, 'learning_rate': 4.2452830188679244e-07, 'epoch': 0.0} + 0%| | 18/7045 [03:35<21:45:49, 11.15s/it] 0%| | 19/7045 [03:47<22:12:10, 11.38s/it] {'loss': 1.333, 'learning_rate': 4.4811320754716983e-07, 'epoch': 0.0} + 0%| | 19/7045 [03:47<22:12:10, 11.38s/it] 0%| | 20/7045 [04:02<24:26:52, 12.53s/it] {'loss': 1.3467, 'learning_rate': 4.716981132075472e-07, 'epoch': 0.0} + 0%| | 20/7045 [04:03<24:26:52, 12.53s/it] 0%| | 21/7045 [04:14<23:35:47, 12.09s/it] {'loss': 1.2949, 'learning_rate': 4.952830188679246e-07, 'epoch': 0.0} + 0%| | 21/7045 [04:14<23:35:47, 12.09s/it] 0%| | 22/7045 [04:25<23:09:03, 11.87s/it] {'loss': 1.3311, 'learning_rate': 5.18867924528302e-07, 'epoch': 0.0} + 0%| | 22/7045 [04:25<23:09:03, 11.87s/it] 0%| | 23/7045 [04:36<22:36:22, 11.59s/it] {'loss': 1.3164, 'learning_rate': 5.424528301886793e-07, 'epoch': 0.0} + 0%| | 23/7045 [04:36<22:36:22, 11.59s/it] 0%| | 24/7045 [04:47<22:21:38, 11.47s/it] {'loss': 1.3096, 'learning_rate': 5.660377358490567e-07, 'epoch': 0.0} + 0%| | 24/7045 [04:47<22:21:38, 11.47s/it] 0%| | 25/7045 [04:58<22:07:52, 11.35s/it] {'loss': 1.3213, 'learning_rate': 5.89622641509434e-07, 'epoch': 0.0} + 0%| | 25/7045 [04:58<22:07:52, 11.35s/it] 0%| | 26/7045 [05:09<21:54:01, 11.23s/it] {'loss': 1.3467, 'learning_rate': 6.132075471698113e-07, 'epoch': 0.0} + 0%| | 26/7045 [05:09<21:54:01, 11.23s/it] 0%| | 27/7045 [05:21<22:19:47, 11.45s/it] {'loss': 1.3213, 'learning_rate': 6.367924528301888e-07, 'epoch': 0.0} + 0%| | 27/7045 [05:21<22:19:47, 11.45s/it] 0%| | 28/7045 [05:32<22:06:41, 11.34s/it] {'loss': 1.3271, 'learning_rate': 6.603773584905661e-07, 'epoch': 0.0} + 0%| | 28/7045 [05:32<22:06:41, 11.34s/it] 0%| | 29/7045 [05:43<22:05:58, 11.34s/it] {'loss': 1.3594, 'learning_rate': 6.839622641509434e-07, 'epoch': 0.0} + 0%| | 29/7045 [05:43<22:05:58, 11.34s/it] 0%| | 30/7045 [05:56<22:50:01, 11.72s/it] {'loss': 1.2861, 'learning_rate': 7.075471698113208e-07, 'epoch': 0.0} + 0%| | 30/7045 [05:56<22:50:01, 11.72s/it] 0%| | 31/7045 [06:07<22:33:23, 11.58s/it] {'loss': 1.3086, 'learning_rate': 7.311320754716981e-07, 'epoch': 0.0} + 0%| | 31/7045 [06:07<22:33:23, 11.58s/it] 0%| | 32/7045 [06:21<23:40:16, 12.15s/it] {'loss': 1.333, 'learning_rate': 7.547169811320755e-07, 'epoch': 0.0} + 0%| | 32/7045 [06:21<23:40:16, 12.15s/it] 0%| | 33/7045 [06:33<23:56:50, 12.29s/it] {'loss': 1.2344, 'learning_rate': 7.783018867924529e-07, 'epoch': 0.0} + 0%| | 33/7045 [06:33<23:56:50, 12.29s/it] 0%| | 34/7045 [06:45<23:27:43, 12.05s/it] {'loss': 1.2988, 'learning_rate': 8.018867924528302e-07, 'epoch': 0.0} + 0%| | 34/7045 [06:45<23:27:43, 12.05s/it] 0%| | 35/7045 [06:56<23:07:45, 11.88s/it] {'loss': 1.2959, 'learning_rate': 8.254716981132076e-07, 'epoch': 0.0} + 0%| | 35/7045 [06:56<23:07:45, 11.88s/it] 1%| | 36/7045 [07:08<22:48:17, 11.71s/it] {'loss': 1.3242, 'learning_rate': 8.490566037735849e-07, 'epoch': 0.01} + 1%| | 36/7045 [07:08<22:48:17, 11.71s/it] 1%| | 37/7045 [07:19<22:41:30, 11.66s/it] {'loss': 1.3135, 'learning_rate': 8.726415094339623e-07, 'epoch': 0.01} + 1%| | 37/7045 [07:19<22:41:30, 11.66s/it] 1%| | 38/7045 [07:30<22:17:08, 11.45s/it] {'loss': 1.3145, 'learning_rate': 8.962264150943397e-07, 'epoch': 0.01} + 1%| | 38/7045 [07:30<22:17:08, 11.45s/it] 1%| | 39/7045 [07:42<22:13:36, 11.42s/it] {'loss': 1.2529, 'learning_rate': 9.19811320754717e-07, 'epoch': 0.01} + 1%| | 39/7045 [07:42<22:13:36, 11.42s/it] 1%| | 40/7045 [07:52<21:54:19, 11.26s/it] {'loss': 1.2734, 'learning_rate': 9.433962264150944e-07, 'epoch': 0.01} + 1%| | 40/7045 [07:52<21:54:19, 11.26s/it] 1%| | 41/7045 [08:04<21:56:46, 11.28s/it] {'loss': 1.2686, 'learning_rate': 9.669811320754719e-07, 'epoch': 0.01} + 1%| | 41/7045 [08:04<21:56:46, 11.28s/it] 1%| | 42/7045 [08:16<22:37:50, 11.63s/it] {'loss': 1.2588, 'learning_rate': 9.90566037735849e-07, 'epoch': 0.01} + 1%| | 42/7045 [08:16<22:37:50, 11.63s/it] 1%| | 43/7045 [08:28<22:49:13, 11.73s/it] {'loss': 1.2676, 'learning_rate': 1.0141509433962265e-06, 'epoch': 0.01} + 1%| | 43/7045 [08:28<22:49:13, 11.73s/it] 1%| | 44/7045 [08:41<23:18:23, 11.98s/it] {'loss': 1.249, 'learning_rate': 1.037735849056604e-06, 'epoch': 0.01} + 1%| | 44/7045 [08:41<23:18:23, 11.98s/it] 1%| | 45/7045 [08:52<22:55:31, 11.79s/it] {'loss': 1.2314, 'learning_rate': 1.0613207547169812e-06, 'epoch': 0.01} + 1%| | 45/7045 [08:52<22:55:31, 11.79s/it] 1%| | 46/7045 [09:03<22:30:11, 11.57s/it] {'loss': 1.2705, 'learning_rate': 1.0849056603773587e-06, 'epoch': 0.01} + 1%| | 46/7045 [09:03<22:30:11, 11.57s/it] 1%| | 47/7045 [09:16<23:07:37, 11.90s/it] {'loss': 1.2188, 'learning_rate': 1.1084905660377359e-06, 'epoch': 0.01} + 1%| | 47/7045 [09:16<23:07:37, 11.90s/it] 1%| | 48/7045 [09:31<25:17:44, 13.01s/it] {'loss': 1.252, 'learning_rate': 1.1320754716981133e-06, 'epoch': 0.01} + 1%| | 48/7045 [09:31<25:17:44, 13.01s/it] 1%| | 49/7045 [09:44<24:57:11, 12.84s/it] {'loss': 1.2383, 'learning_rate': 1.1556603773584908e-06, 'epoch': 0.01} + 1%| | 49/7045 [09:44<24:57:11, 12.84s/it] 1%| | 50/7045 [09:57<24:58:43, 12.86s/it] {'loss': 1.2236, 'learning_rate': 1.179245283018868e-06, 'epoch': 0.01} + 1%| | 50/7045 [09:57<24:58:43, 12.86s/it] 1%| | 51/7045 [10:08<23:48:54, 12.26s/it] {'loss': 1.2666, 'learning_rate': 1.2028301886792454e-06, 'epoch': 0.01} + 1%| | 51/7045 [10:08<23:48:54, 12.26s/it] 1%| | 52/7045 [10:19<23:14:36, 11.97s/it] {'loss': 1.2314, 'learning_rate': 1.2264150943396227e-06, 'epoch': 0.01} + 1%| | 52/7045 [10:19<23:14:36, 11.97s/it] 1%| | 53/7045 [10:30<22:56:32, 11.81s/it] {'loss': 1.2158, 'learning_rate': 1.25e-06, 'epoch': 0.01} + 1%| | 53/7045 [10:30<22:56:32, 11.81s/it] 1%| | 54/7045 [10:42<22:44:43, 11.71s/it] {'loss': 1.2344, 'learning_rate': 1.2735849056603775e-06, 'epoch': 0.01} + 1%| | 54/7045 [10:42<22:44:43, 11.71s/it] 1%| | 55/7045 [10:53<22:40:52, 11.68s/it] {'loss': 1.2578, 'learning_rate': 1.2971698113207548e-06, 'epoch': 0.01} + 1%| | 55/7045 [10:53<22:40:52, 11.68s/it] 1%| | 56/7045 [11:05<22:26:28, 11.56s/it] {'loss': 1.2168, 'learning_rate': 1.3207547169811322e-06, 'epoch': 0.01} + 1%| | 56/7045 [11:05<22:26:28, 11.56s/it] 1%| | 57/7045 [11:16<22:19:01, 11.50s/it] {'loss': 1.2246, 'learning_rate': 1.3443396226415094e-06, 'epoch': 0.01} + 1%| | 57/7045 [11:16<22:19:01, 11.50s/it] 1%| | 58/7045 [11:28<22:18:00, 11.49s/it] {'loss': 1.2295, 'learning_rate': 1.3679245283018869e-06, 'epoch': 0.01} + 1%| | 58/7045 [11:28<22:18:00, 11.49s/it] 1%| | 59/7045 [11:39<22:17:09, 11.48s/it] {'loss': 1.2646, 'learning_rate': 1.3915094339622643e-06, 'epoch': 0.01} + 1%| | 59/7045 [11:39<22:17:09, 11.48s/it] 1%| | 60/7045 [11:52<22:53:39, 11.80s/it] {'loss': 1.2422, 'learning_rate': 1.4150943396226415e-06, 'epoch': 0.01} + 1%| | 60/7045 [11:52<22:53:39, 11.80s/it] 1%| | 61/7045 [12:03<22:53:17, 11.80s/it] {'loss': 1.2354, 'learning_rate': 1.438679245283019e-06, 'epoch': 0.01} + 1%| | 61/7045 [12:03<22:53:17, 11.80s/it] 1%| | 62/7045 [12:14<22:29:26, 11.59s/it] {'loss': 1.2217, 'learning_rate': 1.4622641509433962e-06, 'epoch': 0.01} + 1%| | 62/7045 [12:14<22:29:26, 11.59s/it] 1%| | 63/7045 [12:26<22:17:22, 11.49s/it] {'loss': 1.2334, 'learning_rate': 1.4858490566037737e-06, 'epoch': 0.01} + 1%| | 63/7045 [12:26<22:17:22, 11.49s/it] 1%| | 64/7045 [12:38<22:53:56, 11.81s/it] {'loss': 1.2275, 'learning_rate': 1.509433962264151e-06, 'epoch': 0.01} + 1%| | 64/7045 [12:38<22:53:56, 11.81s/it] 1%| | 65/7045 [12:50<22:33:37, 11.64s/it] {'loss': 1.2305, 'learning_rate': 1.5330188679245283e-06, 'epoch': 0.01} + 1%| | 65/7045 [12:50<22:33:37, 11.64s/it] 1%| | 66/7045 [13:01<22:11:56, 11.45s/it] {'loss': 1.2324, 'learning_rate': 1.5566037735849058e-06, 'epoch': 0.01} + 1%| | 66/7045 [13:01<22:11:56, 11.45s/it] 1%| | 67/7045 [13:15<23:42:04, 12.23s/it] {'loss': 1.1934, 'learning_rate': 1.580188679245283e-06, 'epoch': 0.01} + 1%| | 67/7045 [13:15<23:42:04, 12.23s/it] 1%| | 68/7045 [13:27<24:05:04, 12.43s/it] {'loss': 1.1729, 'learning_rate': 1.6037735849056604e-06, 'epoch': 0.01} + 1%| | 68/7045 [13:27<24:05:04, 12.43s/it] 1%| | 69/7045 [13:41<24:32:45, 12.67s/it] {'loss': 1.1987, 'learning_rate': 1.6273584905660379e-06, 'epoch': 0.01} + 1%| | 69/7045 [13:41<24:32:45, 12.67s/it] 1%| | 70/7045 [13:52<23:34:38, 12.17s/it] {'loss': 1.1855, 'learning_rate': 1.650943396226415e-06, 'epoch': 0.01} + 1%| | 70/7045 [13:52<23:34:38, 12.17s/it] 1%| | 71/7045 [14:04<23:24:45, 12.09s/it] {'loss': 1.1992, 'learning_rate': 1.6745283018867925e-06, 'epoch': 0.01} + 1%| | 71/7045 [14:04<23:24:45, 12.09s/it] 1%| | 72/7045 [14:15<22:59:33, 11.87s/it] {'loss': 1.209, 'learning_rate': 1.6981132075471698e-06, 'epoch': 0.01} + 1%| | 72/7045 [14:15<22:59:33, 11.87s/it] 1%| | 73/7045 [14:26<22:35:38, 11.67s/it] {'loss': 1.2334, 'learning_rate': 1.7216981132075472e-06, 'epoch': 0.01} + 1%| | 73/7045 [14:26<22:35:38, 11.67s/it] 1%| | 74/7045 [14:38<22:38:28, 11.69s/it] {'loss': 1.2217, 'learning_rate': 1.7452830188679247e-06, 'epoch': 0.01} + 1%| | 74/7045 [14:38<22:38:28, 11.69s/it] 1%| | 75/7045 [14:50<22:43:01, 11.73s/it] {'loss': 1.2227, 'learning_rate': 1.7688679245283019e-06, 'epoch': 0.01} + 1%| | 75/7045 [14:50<22:43:01, 11.73s/it] 1%| | 76/7045 [15:01<22:25:21, 11.58s/it] {'loss': 1.21, 'learning_rate': 1.7924528301886793e-06, 'epoch': 0.01} + 1%| | 76/7045 [15:01<22:25:21, 11.58s/it] 1%| | 77/7045 [15:13<22:51:32, 11.81s/it] {'loss': 1.2188, 'learning_rate': 1.8160377358490566e-06, 'epoch': 0.01} + 1%| | 77/7045 [15:13<22:51:32, 11.81s/it] 1%| | 78/7045 [15:25<22:33:25, 11.66s/it] {'loss': 1.2383, 'learning_rate': 1.839622641509434e-06, 'epoch': 0.01} + 1%| | 78/7045 [15:25<22:33:25, 11.66s/it] 1%| | 79/7045 [15:37<22:53:28, 11.83s/it] {'loss': 1.2119, 'learning_rate': 1.8632075471698114e-06, 'epoch': 0.01} + 1%| | 79/7045 [15:37<22:53:28, 11.83s/it] 1%| | 80/7045 [15:48<22:27:14, 11.61s/it] {'loss': 1.2168, 'learning_rate': 1.8867924528301889e-06, 'epoch': 0.01} + 1%| | 80/7045 [15:48<22:27:14, 11.61s/it] 1%| | 81/7045 [15:59<22:07:23, 11.44s/it] {'loss': 1.208, 'learning_rate': 1.9103773584905665e-06, 'epoch': 0.01} + 1%| | 81/7045 [15:59<22:07:23, 11.44s/it] 1%| | 82/7045 [16:10<21:52:17, 11.31s/it] {'loss': 1.2451, 'learning_rate': 1.9339622641509438e-06, 'epoch': 0.01} + 1%| | 82/7045 [16:10<21:52:17, 11.31s/it] 1%| | 83/7045 [16:22<22:30:10, 11.64s/it] {'loss': 1.1904, 'learning_rate': 1.957547169811321e-06, 'epoch': 0.01} + 1%| | 83/7045 [16:22<22:30:10, 11.64s/it] 1%| | 84/7045 [16:35<22:59:43, 11.89s/it] {'loss': 1.1729, 'learning_rate': 1.981132075471698e-06, 'epoch': 0.01} + 1%| | 84/7045 [16:35<22:59:43, 11.89s/it] 1%| | 85/7045 [16:46<22:35:50, 11.69s/it] {'loss': 1.2148, 'learning_rate': 2.004716981132076e-06, 'epoch': 0.01} + 1%| | 85/7045 [16:46<22:35:50, 11.69s/it] 1%| | 86/7045 [16:57<22:11:25, 11.48s/it] {'loss': 1.1816, 'learning_rate': 2.028301886792453e-06, 'epoch': 0.01} + 1%| | 86/7045 [16:57<22:11:25, 11.48s/it] 1%| | 87/7045 [17:08<21:44:29, 11.25s/it] {'loss': 1.2305, 'learning_rate': 2.0518867924528303e-06, 'epoch': 0.01} + 1%| | 87/7045 [17:08<21:44:29, 11.25s/it] 1%| | 88/7045 [17:19<21:41:54, 11.23s/it] {'loss': 1.2305, 'learning_rate': 2.075471698113208e-06, 'epoch': 0.01} + 1%| | 88/7045 [17:19<21:41:54, 11.23s/it] 1%|▏ | 89/7045 [17:30<21:50:16, 11.30s/it] {'loss': 1.2148, 'learning_rate': 2.099056603773585e-06, 'epoch': 0.01} + 1%|▏ | 89/7045 [17:30<21:50:16, 11.30s/it] 1%|▏ | 90/7045 [17:43<22:48:07, 11.80s/it] {'loss': 1.2383, 'learning_rate': 2.1226415094339624e-06, 'epoch': 0.01} + 1%|▏ | 90/7045 [17:43<22:48:07, 11.80s/it] 1%|▏ | 91/7045 [17:55<22:46:40, 11.79s/it] {'loss': 1.1836, 'learning_rate': 2.14622641509434e-06, 'epoch': 0.01} + 1%|▏ | 91/7045 [17:55<22:46:40, 11.79s/it] 1%|▏ | 92/7045 [18:06<22:17:37, 11.54s/it] {'loss': 1.2344, 'learning_rate': 2.1698113207547173e-06, 'epoch': 0.01} + 1%|▏ | 92/7045 [18:06<22:17:37, 11.54s/it] 1%|▏ | 93/7045 [18:20<23:47:18, 12.32s/it] {'loss': 1.1982, 'learning_rate': 2.1933962264150945e-06, 'epoch': 0.01} + 1%|▏ | 93/7045 [18:20<23:47:18, 12.32s/it] 1%|▏ | 94/7045 [18:32<23:09:59, 12.00s/it] {'loss': 1.2256, 'learning_rate': 2.2169811320754718e-06, 'epoch': 0.01} + 1%|▏ | 94/7045 [18:32<23:09:59, 12.00s/it] 1%|▏ | 95/7045 [18:43<22:46:34, 11.80s/it] {'loss': 1.2266, 'learning_rate': 2.2405660377358494e-06, 'epoch': 0.01} + 1%|▏ | 95/7045 [18:43<22:46:34, 11.80s/it] 1%|▏ | 96/7045 [18:56<23:27:16, 12.15s/it] {'loss': 1.1953, 'learning_rate': 2.2641509433962266e-06, 'epoch': 0.01} + 1%|▏ | 96/7045 [18:56<23:27:16, 12.15s/it] 1%|▏ | 97/7045 [19:07<22:49:44, 11.83s/it] {'loss': 1.2207, 'learning_rate': 2.287735849056604e-06, 'epoch': 0.01} + 1%|▏ | 97/7045 [19:07<22:49:44, 11.83s/it] 1%|▏ | 98/7045 [19:18<22:21:00, 11.58s/it] {'loss': 1.1904, 'learning_rate': 2.3113207547169815e-06, 'epoch': 0.01} + 1%|▏ | 98/7045 [19:18<22:21:00, 11.58s/it] 1%|▏ | 99/7045 [19:32<23:33:41, 12.21s/it] {'loss': 1.168, 'learning_rate': 2.3349056603773588e-06, 'epoch': 0.01} + 1%|▏ | 99/7045 [19:32<23:33:41, 12.21s/it] 1%|▏ | 100/7045 [19:44<23:52:17, 12.37s/it] {'loss': 1.1689, 'learning_rate': 2.358490566037736e-06, 'epoch': 0.01} + 1%|▏ | 100/7045 [19:44<23:52:17, 12.37s/it] 1%|▏ | 101/7045 [19:57<23:55:17, 12.40s/it] {'loss': 1.1807, 'learning_rate': 2.3820754716981136e-06, 'epoch': 0.01} + 1%|▏ | 101/7045 [19:57<23:55:17, 12.40s/it] 1%|▏ | 102/7045 [20:10<24:12:13, 12.55s/it] {'loss': 1.2002, 'learning_rate': 2.405660377358491e-06, 'epoch': 0.01} + 1%|▏ | 102/7045 [20:10<24:12:13, 12.55s/it] 1%|▏ | 103/7045 [20:21<23:18:08, 12.08s/it] {'loss': 1.1953, 'learning_rate': 2.429245283018868e-06, 'epoch': 0.01} + 1%|▏ | 103/7045 [20:21<23:18:08, 12.08s/it] 1%|▏ | 104/7045 [20:33<23:08:47, 12.01s/it] {'loss': 1.2168, 'learning_rate': 2.4528301886792453e-06, 'epoch': 0.01} + 1%|▏ | 104/7045 [20:33<23:08:47, 12.01s/it] 1%|▏ | 105/7045 [20:43<22:31:25, 11.68s/it] {'loss': 1.1855, 'learning_rate': 2.476415094339623e-06, 'epoch': 0.01} + 1%|▏ | 105/7045 [20:43<22:31:25, 11.68s/it] 2%|▏ | 106/7045 [20:55<22:23:51, 11.62s/it] {'loss': 1.2197, 'learning_rate': 2.5e-06, 'epoch': 0.02} + 2%|▏ | 106/7045 [20:55<22:23:51, 11.62s/it] 2%|▏ | 107/7045 [21:06<22:06:50, 11.47s/it] {'loss': 1.2285, 'learning_rate': 2.523584905660378e-06, 'epoch': 0.02} + 2%|▏ | 107/7045 [21:06<22:06:50, 11.47s/it] 2%|▏ | 108/7045 [21:18<22:11:10, 11.51s/it] {'loss': 1.2217, 'learning_rate': 2.547169811320755e-06, 'epoch': 0.02} + 2%|▏ | 108/7045 [21:18<22:11:10, 11.51s/it] 2%|▏ | 109/7045 [21:30<22:23:48, 11.62s/it] {'loss': 1.1982, 'learning_rate': 2.5707547169811327e-06, 'epoch': 0.02} + 2%|▏ | 109/7045 [21:30<22:23:48, 11.62s/it] 2%|▏ | 110/7045 [21:43<23:18:53, 12.10s/it] {'loss': 1.1758, 'learning_rate': 2.5943396226415095e-06, 'epoch': 0.02} + 2%|▏ | 110/7045 [21:43<23:18:53, 12.10s/it] 2%|▏ | 111/7045 [21:54<22:51:50, 11.87s/it] {'loss': 1.2197, 'learning_rate': 2.617924528301887e-06, 'epoch': 0.02} + 2%|▏ | 111/7045 [21:54<22:51:50, 11.87s/it] 2%|▏ | 112/7045 [22:07<23:17:46, 12.10s/it] {'loss': 1.1943, 'learning_rate': 2.6415094339622644e-06, 'epoch': 0.02} + 2%|▏ | 112/7045 [22:07<23:17:46, 12.10s/it] 2%|▏ | 113/7045 [22:19<23:39:44, 12.29s/it] {'loss': 1.2002, 'learning_rate': 2.665094339622642e-06, 'epoch': 0.02} + 2%|▏ | 113/7045 [22:19<23:39:44, 12.29s/it] 2%|▏ | 114/7045 [22:31<23:17:52, 12.10s/it] {'loss': 1.1953, 'learning_rate': 2.688679245283019e-06, 'epoch': 0.02} + 2%|▏ | 114/7045 [22:31<23:17:52, 12.10s/it] 2%|▏ | 115/7045 [22:43<22:55:08, 11.91s/it] {'loss': 1.1807, 'learning_rate': 2.7122641509433965e-06, 'epoch': 0.02} + 2%|▏ | 115/7045 [22:43<22:55:08, 11.91s/it] 2%|▏ | 116/7045 [22:54<22:30:28, 11.69s/it] {'loss': 1.208, 'learning_rate': 2.7358490566037738e-06, 'epoch': 0.02} + 2%|▏ | 116/7045 [22:54<22:30:28, 11.69s/it] 2%|▏ | 117/7045 [23:05<22:21:49, 11.62s/it] {'loss': 1.2354, 'learning_rate': 2.7594339622641514e-06, 'epoch': 0.02} + 2%|▏ | 117/7045 [23:05<22:21:49, 11.62s/it] 2%|▏ | 118/7045 [23:16<21:59:31, 11.43s/it] {'loss': 1.2061, 'learning_rate': 2.7830188679245286e-06, 'epoch': 0.02} + 2%|▏ | 118/7045 [23:16<21:59:31, 11.43s/it] 2%|▏ | 119/7045 [23:28<21:57:05, 11.41s/it] {'loss': 1.2041, 'learning_rate': 2.8066037735849063e-06, 'epoch': 0.02} + 2%|▏ | 119/7045 [23:28<21:57:05, 11.41s/it] 2%|▏ | 120/7045 [23:39<21:45:40, 11.31s/it] {'loss': 1.1592, 'learning_rate': 2.830188679245283e-06, 'epoch': 0.02} + 2%|▏ | 120/7045 [23:39<21:45:40, 11.31s/it] 2%|▏ | 121/7045 [23:50<21:31:36, 11.19s/it] {'loss': 1.1973, 'learning_rate': 2.8537735849056608e-06, 'epoch': 0.02} + 2%|▏ | 121/7045 [23:50<21:31:36, 11.19s/it] 2%|▏ | 122/7045 [24:01<21:23:42, 11.13s/it] {'loss': 1.1768, 'learning_rate': 2.877358490566038e-06, 'epoch': 0.02} + 2%|▏ | 122/7045 [24:01<21:23:42, 11.13s/it] 2%|▏ | 123/7045 [24:12<21:19:54, 11.09s/it] {'loss': 1.207, 'learning_rate': 2.9009433962264156e-06, 'epoch': 0.02} + 2%|▏ | 123/7045 [24:12<21:19:54, 11.09s/it] 2%|▏ | 124/7045 [24:24<22:11:56, 11.55s/it] {'loss': 1.1787, 'learning_rate': 2.9245283018867924e-06, 'epoch': 0.02} + 2%|▏ | 124/7045 [24:24<22:11:56, 11.55s/it] 2%|▏ | 125/7045 [24:35<21:58:26, 11.43s/it] {'loss': 1.1953, 'learning_rate': 2.94811320754717e-06, 'epoch': 0.02} + 2%|▏ | 125/7045 [24:35<21:58:26, 11.43s/it] 2%|▏ | 126/7045 [24:46<21:49:42, 11.36s/it] {'loss': 1.2002, 'learning_rate': 2.9716981132075473e-06, 'epoch': 0.02} + 2%|▏ | 126/7045 [24:46<21:49:42, 11.36s/it] 2%|▏ | 127/7045 [24:58<22:05:01, 11.49s/it] {'loss': 1.2012, 'learning_rate': 2.995283018867925e-06, 'epoch': 0.02} + 2%|▏ | 127/7045 [24:58<22:05:01, 11.49s/it] 2%|▏ | 128/7045 [25:10<22:10:59, 11.55s/it] {'loss': 1.2109, 'learning_rate': 3.018867924528302e-06, 'epoch': 0.02} + 2%|▏ | 128/7045 [25:10<22:10:59, 11.55s/it] 2%|▏ | 129/7045 [25:21<21:56:37, 11.42s/it] {'loss': 1.2363, 'learning_rate': 3.04245283018868e-06, 'epoch': 0.02} + 2%|▏ | 129/7045 [25:21<21:56:37, 11.42s/it] 2%|▏ | 130/7045 [25:32<21:40:02, 11.28s/it] {'loss': 1.1885, 'learning_rate': 3.0660377358490567e-06, 'epoch': 0.02} + 2%|▏ | 130/7045 [25:32<21:40:02, 11.28s/it] 2%|▏ | 131/7045 [25:44<22:15:22, 11.59s/it] {'loss': 1.1523, 'learning_rate': 3.0896226415094343e-06, 'epoch': 0.02} + 2%|▏ | 131/7045 [25:44<22:15:22, 11.59s/it] 2%|▏ | 132/7045 [25:56<22:03:58, 11.49s/it] {'loss': 1.2217, 'learning_rate': 3.1132075471698115e-06, 'epoch': 0.02} + 2%|▏ | 132/7045 [25:56<22:03:58, 11.49s/it] 2%|▏ | 133/7045 [26:07<22:12:29, 11.57s/it] {'loss': 1.1865, 'learning_rate': 3.136792452830189e-06, 'epoch': 0.02} + 2%|▏ | 133/7045 [26:07<22:12:29, 11.57s/it] 2%|▏ | 134/7045 [26:19<21:58:05, 11.44s/it] {'loss': 1.1992, 'learning_rate': 3.160377358490566e-06, 'epoch': 0.02} + 2%|▏ | 134/7045 [26:19<21:58:05, 11.44s/it] 2%|▏ | 135/7045 [26:30<21:47:33, 11.35s/it] {'loss': 1.1963, 'learning_rate': 3.1839622641509436e-06, 'epoch': 0.02} + 2%|▏ | 135/7045 [26:30<21:47:33, 11.35s/it] 2%|▏ | 136/7045 [26:41<21:41:00, 11.30s/it] {'loss': 1.1934, 'learning_rate': 3.207547169811321e-06, 'epoch': 0.02} + 2%|▏ | 136/7045 [26:41<21:41:00, 11.30s/it] 2%|▏ | 137/7045 [26:53<22:14:21, 11.59s/it] {'loss': 1.1895, 'learning_rate': 3.2311320754716985e-06, 'epoch': 0.02} + 2%|▏ | 137/7045 [26:53<22:14:21, 11.59s/it] 2%|▏ | 138/7045 [27:04<22:00:35, 11.47s/it] {'loss': 1.1768, 'learning_rate': 3.2547169811320758e-06, 'epoch': 0.02} + 2%|▏ | 138/7045 [27:04<22:00:35, 11.47s/it] 2%|▏ | 139/7045 [27:16<22:00:18, 11.47s/it] {'loss': 1.2002, 'learning_rate': 3.2783018867924534e-06, 'epoch': 0.02} + 2%|▏ | 139/7045 [27:16<22:00:18, 11.47s/it] 2%|▏ | 140/7045 [27:29<22:43:40, 11.85s/it] {'loss': 1.1748, 'learning_rate': 3.30188679245283e-06, 'epoch': 0.02} + 2%|▏ | 140/7045 [27:29<22:43:40, 11.85s/it] 2%|▏ | 141/7045 [27:40<22:26:13, 11.70s/it] {'loss': 1.2344, 'learning_rate': 3.325471698113208e-06, 'epoch': 0.02} + 2%|▏ | 141/7045 [27:40<22:26:13, 11.70s/it] 2%|▏ | 142/7045 [27:52<22:35:43, 11.78s/it] {'loss': 1.1924, 'learning_rate': 3.349056603773585e-06, 'epoch': 0.02} + 2%|▏ | 142/7045 [27:52<22:35:43, 11.78s/it] 2%|▏ | 143/7045 [28:05<23:29:47, 12.26s/it] {'loss': 1.1523, 'learning_rate': 3.3726415094339627e-06, 'epoch': 0.02} + 2%|▏ | 143/7045 [28:05<23:29:47, 12.26s/it] 2%|▏ | 144/7045 [28:16<22:48:27, 11.90s/it] {'loss': 1.2168, 'learning_rate': 3.3962264150943395e-06, 'epoch': 0.02} + 2%|▏ | 144/7045 [28:16<22:48:27, 11.90s/it] 2%|▏ | 145/7045 [28:28<22:25:53, 11.70s/it] {'loss': 1.1748, 'learning_rate': 3.419811320754717e-06, 'epoch': 0.02} + 2%|▏ | 145/7045 [28:28<22:25:53, 11.70s/it] 2%|▏ | 146/7045 [28:40<23:06:20, 12.06s/it] {'loss': 1.1719, 'learning_rate': 3.4433962264150944e-06, 'epoch': 0.02} + 2%|▏ | 146/7045 [28:40<23:06:20, 12.06s/it] 2%|▏ | 147/7045 [28:51<22:21:07, 11.67s/it] {'loss': 1.1768, 'learning_rate': 3.466981132075472e-06, 'epoch': 0.02} + 2%|▏ | 147/7045 [28:51<22:21:07, 11.67s/it] 2%|▏ | 148/7045 [29:04<22:53:33, 11.95s/it] {'loss': 1.1826, 'learning_rate': 3.4905660377358493e-06, 'epoch': 0.02} + 2%|▏ | 148/7045 [29:04<22:53:33, 11.95s/it] 2%|▏ | 149/7045 [29:15<22:28:56, 11.74s/it] {'loss': 1.2031, 'learning_rate': 3.514150943396227e-06, 'epoch': 0.02} + 2%|▏ | 149/7045 [29:15<22:28:56, 11.74s/it] 2%|▏ | 150/7045 [29:26<22:03:17, 11.52s/it] {'loss': 1.165, 'learning_rate': 3.5377358490566038e-06, 'epoch': 0.02} + 2%|▏ | 150/7045 [29:26<22:03:17, 11.52s/it] 2%|▏ | 151/7045 [29:37<21:48:18, 11.39s/it] {'loss': 1.1826, 'learning_rate': 3.5613207547169814e-06, 'epoch': 0.02} + 2%|▏ | 151/7045 [29:37<21:48:18, 11.39s/it] 2%|▏ | 152/7045 [29:49<22:13:00, 11.60s/it] {'loss': 1.2041, 'learning_rate': 3.5849056603773586e-06, 'epoch': 0.02} + 2%|▏ | 152/7045 [29:49<22:13:00, 11.60s/it] 2%|▏ | 153/7045 [30:02<22:57:40, 11.99s/it] {'loss': 1.1689, 'learning_rate': 3.6084905660377363e-06, 'epoch': 0.02} + 2%|▏ | 153/7045 [30:02<22:57:40, 11.99s/it] 2%|▏ | 154/7045 [30:14<22:43:16, 11.87s/it] {'loss': 1.1934, 'learning_rate': 3.632075471698113e-06, 'epoch': 0.02} + 2%|▏ | 154/7045 [30:14<22:43:16, 11.87s/it] 2%|▏ | 155/7045 [30:27<23:36:31, 12.34s/it] {'loss': 1.1709, 'learning_rate': 3.6556603773584908e-06, 'epoch': 0.02} + 2%|▏ | 155/7045 [30:27<23:36:31, 12.34s/it] 2%|▏ | 156/7045 [30:38<22:51:15, 11.94s/it] {'loss': 1.1885, 'learning_rate': 3.679245283018868e-06, 'epoch': 0.02} + 2%|▏ | 156/7045 [30:38<22:51:15, 11.94s/it] 2%|▏ | 157/7045 [30:50<22:32:04, 11.78s/it] {'loss': 1.2148, 'learning_rate': 3.7028301886792456e-06, 'epoch': 0.02} + 2%|▏ | 157/7045 [30:50<22:32:04, 11.78s/it] 2%|▏ | 158/7045 [31:01<22:07:27, 11.56s/it] {'loss': 1.2354, 'learning_rate': 3.726415094339623e-06, 'epoch': 0.02} + 2%|▏ | 158/7045 [31:01<22:07:27, 11.56s/it] 2%|▏ | 159/7045 [31:12<22:16:53, 11.65s/it] {'loss': 1.21, 'learning_rate': 3.7500000000000005e-06, 'epoch': 0.02} + 2%|▏ | 159/7045 [31:12<22:16:53, 11.65s/it] 2%|▏ | 160/7045 [31:25<22:35:09, 11.81s/it] {'loss': 1.1914, 'learning_rate': 3.7735849056603777e-06, 'epoch': 0.02} + 2%|▏ | 160/7045 [31:25<22:35:09, 11.81s/it] 2%|▏ | 161/7045 [31:37<22:57:38, 12.01s/it] {'loss': 1.1582, 'learning_rate': 3.797169811320755e-06, 'epoch': 0.02} + 2%|▏ | 161/7045 [31:37<22:57:38, 12.01s/it] 2%|▏ | 162/7045 [31:48<22:31:34, 11.78s/it] {'loss': 1.1846, 'learning_rate': 3.820754716981133e-06, 'epoch': 0.02} + 2%|▏ | 162/7045 [31:48<22:31:34, 11.78s/it] 2%|▏ | 163/7045 [31:59<22:06:56, 11.57s/it] {'loss': 1.2158, 'learning_rate': 3.8443396226415094e-06, 'epoch': 0.02} + 2%|▏ | 163/7045 [31:59<22:06:56, 11.57s/it] 2%|▏ | 164/7045 [32:10<21:50:16, 11.43s/it] {'loss': 1.2354, 'learning_rate': 3.8679245283018875e-06, 'epoch': 0.02} + 2%|▏ | 164/7045 [32:10<21:50:16, 11.43s/it] 2%|▏ | 165/7045 [32:21<21:30:20, 11.25s/it] {'loss': 1.2002, 'learning_rate': 3.891509433962265e-06, 'epoch': 0.02} + 2%|▏ | 165/7045 [32:21<21:30:20, 11.25s/it] 2%|▏ | 166/7045 [32:33<21:49:34, 11.42s/it] {'loss': 1.1445, 'learning_rate': 3.915094339622642e-06, 'epoch': 0.02} + 2%|▏ | 166/7045 [32:33<21:49:34, 11.42s/it] 2%|▏ | 167/7045 [32:45<21:58:52, 11.51s/it] {'loss': 1.1572, 'learning_rate': 3.938679245283019e-06, 'epoch': 0.02} + 2%|▏ | 167/7045 [32:45<21:58:52, 11.51s/it] 2%|▏ | 168/7045 [32:57<22:08:15, 11.59s/it] {'loss': 1.1816, 'learning_rate': 3.962264150943396e-06, 'epoch': 0.02} + 2%|▏ | 168/7045 [32:57<22:08:15, 11.59s/it] 2%|▏ | 169/7045 [33:08<21:44:10, 11.38s/it] {'loss': 1.2197, 'learning_rate': 3.985849056603774e-06, 'epoch': 0.02} + 2%|▏ | 169/7045 [33:08<21:44:10, 11.38s/it] 2%|▏ | 170/7045 [33:20<22:11:35, 11.62s/it] {'loss': 1.1787, 'learning_rate': 4.009433962264152e-06, 'epoch': 0.02} + 2%|▏ | 170/7045 [33:20<22:11:35, 11.62s/it] 2%|▏ | 171/7045 [33:31<21:58:16, 11.51s/it] {'loss': 1.1826, 'learning_rate': 4.033018867924529e-06, 'epoch': 0.02} + 2%|▏ | 171/7045 [33:31<21:58:16, 11.51s/it] 2%|▏ | 172/7045 [33:42<21:49:57, 11.44s/it] {'loss': 1.166, 'learning_rate': 4.056603773584906e-06, 'epoch': 0.02} + 2%|▏ | 172/7045 [33:42<21:49:57, 11.44s/it] 2%|▏ | 173/7045 [33:54<21:44:35, 11.39s/it] {'loss': 1.1699, 'learning_rate': 4.080188679245283e-06, 'epoch': 0.02} + 2%|▏ | 173/7045 [33:54<21:44:35, 11.39s/it] 2%|▏ | 174/7045 [34:06<22:36:34, 11.85s/it] {'loss': 1.167, 'learning_rate': 4.103773584905661e-06, 'epoch': 0.02} + 2%|▏ | 174/7045 [34:06<22:36:34, 11.85s/it] 2%|▏ | 175/7045 [34:18<22:25:59, 11.76s/it] {'loss': 1.2236, 'learning_rate': 4.127358490566038e-06, 'epoch': 0.02} + 2%|▏ | 175/7045 [34:18<22:25:59, 11.76s/it] 2%|▏ | 176/7045 [34:29<22:00:58, 11.54s/it] {'loss': 1.1631, 'learning_rate': 4.150943396226416e-06, 'epoch': 0.02} + 2%|▏ | 176/7045 [34:29<22:00:58, 11.54s/it] 3%|▎ | 177/7045 [34:40<21:45:14, 11.40s/it] {'loss': 1.1963, 'learning_rate': 4.174528301886792e-06, 'epoch': 0.03} + 3%|▎ | 177/7045 [34:40<21:45:14, 11.40s/it] 3%|▎ | 178/7045 [34:51<21:36:28, 11.33s/it] {'loss': 1.207, 'learning_rate': 4.19811320754717e-06, 'epoch': 0.03} + 3%|▎ | 178/7045 [34:51<21:36:28, 11.33s/it] 3%|▎ | 179/7045 [35:02<21:25:54, 11.24s/it] {'loss': 1.1924, 'learning_rate': 4.221698113207548e-06, 'epoch': 0.03} + 3%|▎ | 179/7045 [35:02<21:25:54, 11.24s/it] 3%|▎ | 180/7045 [35:14<21:38:11, 11.35s/it] {'loss': 1.1816, 'learning_rate': 4.245283018867925e-06, 'epoch': 0.03} + 3%|▎ | 180/7045 [35:14<21:38:11, 11.35s/it] 3%|▎ | 181/7045 [35:27<22:26:00, 11.77s/it] {'loss': 1.165, 'learning_rate': 4.268867924528302e-06, 'epoch': 0.03} + 3%|▎ | 181/7045 [35:27<22:26:00, 11.77s/it] 3%|▎ | 182/7045 [35:38<22:10:05, 11.63s/it] {'loss': 1.1875, 'learning_rate': 4.29245283018868e-06, 'epoch': 0.03} + 3%|▎ | 182/7045 [35:38<22:10:05, 11.63s/it] 3%|▎ | 183/7045 [35:49<21:40:03, 11.37s/it] {'loss': 1.1846, 'learning_rate': 4.3160377358490565e-06, 'epoch': 0.03} + 3%|▎ | 183/7045 [35:49<21:40:03, 11.37s/it] 3%|▎ | 184/7045 [36:00<21:30:25, 11.28s/it] {'loss': 1.2217, 'learning_rate': 4.339622641509435e-06, 'epoch': 0.03} + 3%|▎ | 184/7045 [36:00<21:30:25, 11.28s/it] 3%|▎ | 185/7045 [36:11<21:35:42, 11.33s/it] {'loss': 1.1865, 'learning_rate': 4.363207547169812e-06, 'epoch': 0.03} + 3%|▎ | 185/7045 [36:11<21:35:42, 11.33s/it] 3%|▎ | 186/7045 [36:22<21:28:45, 11.27s/it] {'loss': 1.208, 'learning_rate': 4.386792452830189e-06, 'epoch': 0.03} + 3%|▎ | 186/7045 [36:22<21:28:45, 11.27s/it] 3%|▎ | 187/7045 [36:35<22:20:00, 11.72s/it] {'loss': 1.1611, 'learning_rate': 4.410377358490566e-06, 'epoch': 0.03} + 3%|▎ | 187/7045 [36:35<22:20:00, 11.72s/it] 3%|▎ | 188/7045 [36:46<21:57:09, 11.53s/it] {'loss': 1.2227, 'learning_rate': 4.4339622641509435e-06, 'epoch': 0.03} + 3%|▎ | 188/7045 [36:46<21:57:09, 11.53s/it] 3%|▎ | 189/7045 [36:57<21:42:50, 11.40s/it] {'loss': 1.1455, 'learning_rate': 4.457547169811321e-06, 'epoch': 0.03} + 3%|▎ | 189/7045 [36:57<21:42:50, 11.40s/it] 3%|▎ | 190/7045 [37:09<22:04:12, 11.59s/it] {'loss': 1.126, 'learning_rate': 4.481132075471699e-06, 'epoch': 0.03} + 3%|▎ | 190/7045 [37:09<22:04:12, 11.59s/it] 3%|▎ | 191/7045 [37:23<23:02:56, 12.11s/it] {'loss': 1.207, 'learning_rate': 4.504716981132076e-06, 'epoch': 0.03} + 3%|▎ | 191/7045 [37:23<23:02:56, 12.11s/it] 3%|▎ | 192/7045 [37:36<23:28:48, 12.33s/it] {'loss': 1.209, 'learning_rate': 4.528301886792453e-06, 'epoch': 0.03} + 3%|▎ | 192/7045 [37:36<23:28:48, 12.33s/it] 3%|▎ | 193/7045 [37:47<23:10:49, 12.18s/it] {'loss': 1.1689, 'learning_rate': 4.5518867924528305e-06, 'epoch': 0.03} + 3%|▎ | 193/7045 [37:47<23:10:49, 12.18s/it] 3%|▎ | 194/7045 [37:59<23:09:15, 12.17s/it] {'loss': 1.1963, 'learning_rate': 4.575471698113208e-06, 'epoch': 0.03} + 3%|▎ | 194/7045 [37:59<23:09:15, 12.17s/it] 3%|▎ | 195/7045 [38:11<22:31:25, 11.84s/it] {'loss': 1.1943, 'learning_rate': 4.599056603773585e-06, 'epoch': 0.03} + 3%|▎ | 195/7045 [38:11<22:31:25, 11.84s/it] 3%|▎ | 196/7045 [38:22<22:21:33, 11.75s/it] {'loss': 1.1523, 'learning_rate': 4.622641509433963e-06, 'epoch': 0.03} + 3%|▎ | 196/7045 [38:22<22:21:33, 11.75s/it] 3%|▎ | 197/7045 [38:33<22:04:12, 11.60s/it] {'loss': 1.1592, 'learning_rate': 4.6462264150943394e-06, 'epoch': 0.03} + 3%|▎ | 197/7045 [38:33<22:04:12, 11.60s/it] 3%|▎ | 198/7045 [38:45<22:19:49, 11.74s/it] {'loss': 1.1699, 'learning_rate': 4.6698113207547175e-06, 'epoch': 0.03} + 3%|▎ | 198/7045 [38:45<22:19:49, 11.74s/it] 3%|▎ | 199/7045 [38:57<22:11:30, 11.67s/it] {'loss': 1.1758, 'learning_rate': 4.693396226415095e-06, 'epoch': 0.03} + 3%|▎ | 199/7045 [38:57<22:11:30, 11.67s/it] 3%|▎ | 200/7045 [39:08<22:01:14, 11.58s/it] {'loss': 1.1777, 'learning_rate': 4.716981132075472e-06, 'epoch': 0.03} + 3%|▎ | 200/7045 [39:08<22:01:14, 11.58s/it] 3%|▎ | 201/7045 [39:20<22:01:25, 11.58s/it] {'loss': 1.2109, 'learning_rate': 4.740566037735849e-06, 'epoch': 0.03} + 3%|▎ | 201/7045 [39:20<22:01:25, 11.58s/it] 3%|▎ | 202/7045 [39:31<21:50:53, 11.49s/it] {'loss': 1.2061, 'learning_rate': 4.764150943396227e-06, 'epoch': 0.03} + 3%|▎ | 202/7045 [39:31<21:50:53, 11.49s/it] 3%|▎ | 203/7045 [39:43<22:04:24, 11.61s/it] {'loss': 1.1729, 'learning_rate': 4.787735849056604e-06, 'epoch': 0.03} + 3%|▎ | 203/7045 [39:43<22:04:24, 11.61s/it] 3%|▎ | 204/7045 [39:54<21:44:52, 11.44s/it] {'loss': 1.2227, 'learning_rate': 4.811320754716982e-06, 'epoch': 0.03} + 3%|▎ | 204/7045 [39:54<21:44:52, 11.44s/it] 3%|▎ | 205/7045 [40:05<21:34:06, 11.35s/it] {'loss': 1.209, 'learning_rate': 4.834905660377359e-06, 'epoch': 0.03} + 3%|▎ | 205/7045 [40:05<21:34:06, 11.35s/it] 3%|▎ | 206/7045 [40:17<22:03:08, 11.61s/it] {'loss': 1.1865, 'learning_rate': 4.858490566037736e-06, 'epoch': 0.03} + 3%|▎ | 206/7045 [40:17<22:03:08, 11.61s/it] 3%|▎ | 207/7045 [40:29<22:14:55, 11.71s/it] {'loss': 1.21, 'learning_rate': 4.882075471698113e-06, 'epoch': 0.03} + 3%|▎ | 207/7045 [40:29<22:14:55, 11.71s/it] 3%|▎ | 208/7045 [40:42<22:31:01, 11.86s/it] {'loss': 1.1631, 'learning_rate': 4.905660377358491e-06, 'epoch': 0.03} + 3%|▎ | 208/7045 [40:42<22:31:01, 11.86s/it] 3%|▎ | 209/7045 [40:53<21:59:43, 11.58s/it] {'loss': 1.1406, 'learning_rate': 4.929245283018868e-06, 'epoch': 0.03} + 3%|▎ | 209/7045 [40:53<21:59:43, 11.58s/it] 3%|▎ | 210/7045 [41:04<21:50:37, 11.51s/it] {'loss': 1.168, 'learning_rate': 4.952830188679246e-06, 'epoch': 0.03} + 3%|▎ | 210/7045 [41:04<21:50:37, 11.51s/it] 3%|▎ | 211/7045 [41:16<22:23:22, 11.79s/it] {'loss': 1.1807, 'learning_rate': 4.976415094339623e-06, 'epoch': 0.03} + 3%|▎ | 211/7045 [41:16<22:23:22, 11.79s/it] 3%|▎ | 212/7045 [41:27<21:57:34, 11.57s/it] {'loss': 1.1797, 'learning_rate': 5e-06, 'epoch': 0.03} + 3%|▎ | 212/7045 [41:27<21:57:34, 11.57s/it] 3%|▎ | 213/7045 [41:40<22:41:47, 11.96s/it] {'loss': 1.1309, 'learning_rate': 4.999999735767089e-06, 'epoch': 0.03} + 3%|▎ | 213/7045 [41:40<22:41:47, 11.96s/it] 3%|▎ | 214/7045 [41:52<22:24:59, 11.81s/it] {'loss': 1.1699, 'learning_rate': 4.999998943068413e-06, 'epoch': 0.03} + 3%|▎ | 214/7045 [41:52<22:24:59, 11.81s/it] 3%|▎ | 215/7045 [42:03<21:53:33, 11.54s/it] {'loss': 1.1943, 'learning_rate': 4.999997621904137e-06, 'epoch': 0.03} + 3%|▎ | 215/7045 [42:03<21:53:33, 11.54s/it] 3%|▎ | 216/7045 [42:14<22:02:34, 11.62s/it] {'loss': 1.1924, 'learning_rate': 4.999995772274542e-06, 'epoch': 0.03} + 3%|▎ | 216/7045 [42:14<22:02:34, 11.62s/it] 3%|▎ | 217/7045 [42:26<21:59:45, 11.60s/it] {'loss': 1.1885, 'learning_rate': 4.999993394180018e-06, 'epoch': 0.03} + 3%|▎ | 217/7045 [42:26<21:59:45, 11.60s/it] 3%|▎ | 218/7045 [42:38<22:29:16, 11.86s/it] {'loss': 1.1836, 'learning_rate': 4.9999904876210694e-06, 'epoch': 0.03} + 3%|▎ | 218/7045 [42:38<22:29:16, 11.86s/it] 3%|▎ | 219/7045 [42:50<22:09:30, 11.69s/it] {'loss': 1.1289, 'learning_rate': 4.99998705259831e-06, 'epoch': 0.03} + 3%|▎ | 219/7045 [42:50<22:09:30, 11.69s/it] 3%|▎ | 220/7045 [43:02<22:14:58, 11.74s/it] {'loss': 1.1572, 'learning_rate': 4.999983089112464e-06, 'epoch': 0.03} + 3%|▎ | 220/7045 [43:02<22:14:58, 11.74s/it] 3%|▎ | 221/7045 [43:13<21:47:52, 11.50s/it] {'loss': 1.2119, 'learning_rate': 4.999978597164372e-06, 'epoch': 0.03} + 3%|▎ | 221/7045 [43:13<21:47:52, 11.50s/it] 3%|▎ | 222/7045 [43:24<21:30:40, 11.35s/it] {'loss': 1.1729, 'learning_rate': 4.999973576754983e-06, 'epoch': 0.03} + 3%|▎ | 222/7045 [43:24<21:30:40, 11.35s/it] 3%|▎ | 223/7045 [43:35<21:19:26, 11.25s/it] {'loss': 1.1914, 'learning_rate': 4.999968027885356e-06, 'epoch': 0.03} + 3%|▎ | 223/7045 [43:35<21:19:26, 11.25s/it] 3%|▎ | 224/7045 [43:47<21:48:23, 11.51s/it] {'loss': 1.1709, 'learning_rate': 4.999961950556666e-06, 'epoch': 0.03} + 3%|▎ | 224/7045 [43:47<21:48:23, 11.51s/it] 3%|▎ | 225/7045 [43:58<21:32:10, 11.37s/it] {'loss': 1.1777, 'learning_rate': 4.999955344770198e-06, 'epoch': 0.03} + 3%|▎ | 225/7045 [43:58<21:32:10, 11.37s/it] 3%|▎ | 226/7045 [44:11<22:27:48, 11.86s/it] {'loss': 1.1963, 'learning_rate': 4.999948210527345e-06, 'epoch': 0.03} + 3%|▎ | 226/7045 [44:11<22:27:48, 11.86s/it] 3%|▎ | 227/7045 [44:22<22:15:24, 11.75s/it] {'loss': 1.1533, 'learning_rate': 4.9999405478296205e-06, 'epoch': 0.03} + 3%|▎ | 227/7045 [44:22<22:15:24, 11.75s/it] 3%|▎ | 228/7045 [44:33<21:52:12, 11.55s/it] {'loss': 1.2158, 'learning_rate': 4.99993235667864e-06, 'epoch': 0.03} + 3%|▎ | 228/7045 [44:33<21:52:12, 11.55s/it] 3%|▎ | 229/7045 [44:44<21:37:20, 11.42s/it] {'loss': 1.1758, 'learning_rate': 4.999923637076137e-06, 'epoch': 0.03} + 3%|▎ | 229/7045 [44:44<21:37:20, 11.42s/it] 3%|▎ | 230/7045 [44:56<21:43:51, 11.48s/it] {'loss': 1.1934, 'learning_rate': 4.999914389023953e-06, 'epoch': 0.03} + 3%|▎ | 230/7045 [44:56<21:43:51, 11.48s/it] 3%|▎ | 231/7045 [45:09<22:22:02, 11.82s/it] {'loss': 1.1699, 'learning_rate': 4.999904612524045e-06, 'epoch': 0.03} + 3%|▎ | 231/7045 [45:09<22:22:02, 11.82s/it] 3%|▎ | 232/7045 [45:20<22:01:53, 11.64s/it] {'loss': 1.1816, 'learning_rate': 4.999894307578479e-06, 'epoch': 0.03} + 3%|▎ | 232/7045 [45:20<22:01:53, 11.64s/it] 3%|▎ | 233/7045 [45:31<21:53:25, 11.57s/it] {'loss': 1.1982, 'learning_rate': 4.999883474189432e-06, 'epoch': 0.03} + 3%|▎ | 233/7045 [45:31<21:53:25, 11.57s/it] 3%|▎ | 234/7045 [45:44<22:24:08, 11.84s/it] {'loss': 1.1582, 'learning_rate': 4.999872112359195e-06, 'epoch': 0.03} + 3%|▎ | 234/7045 [45:44<22:24:08, 11.84s/it] 3%|▎ | 235/7045 [45:55<21:59:58, 11.63s/it] {'loss': 1.2051, 'learning_rate': 4.99986022209017e-06, 'epoch': 0.03} + 3%|▎ | 235/7045 [45:55<21:59:58, 11.63s/it] 3%|▎ | 236/7045 [46:07<22:29:53, 11.90s/it] {'loss': 1.1768, 'learning_rate': 4.9998478033848704e-06, 'epoch': 0.03} + 3%|▎ | 236/7045 [46:07<22:29:53, 11.90s/it] 3%|▎ | 237/7045 [46:20<23:07:07, 12.22s/it] {'loss': 1.1602, 'learning_rate': 4.999834856245922e-06, 'epoch': 0.03} + 3%|▎ | 237/7045 [46:20<23:07:07, 12.22s/it] 3%|▎ | 238/7045 [46:32<22:36:41, 11.96s/it] {'loss': 1.1738, 'learning_rate': 4.999821380676059e-06, 'epoch': 0.03} + 3%|▎ | 238/7045 [46:32<22:36:41, 11.96s/it] 3%|▎ | 239/7045 [46:43<22:05:49, 11.69s/it] {'loss': 1.1846, 'learning_rate': 4.999807376678133e-06, 'epoch': 0.03} + 3%|▎ | 239/7045 [46:43<22:05:49, 11.69s/it] 3%|▎ | 240/7045 [46:54<21:54:20, 11.59s/it] {'loss': 1.1523, 'learning_rate': 4.9997928442551025e-06, 'epoch': 0.03} + 3%|▎ | 240/7045 [46:54<21:54:20, 11.59s/it] 3%|▎ | 241/7045 [47:05<21:32:40, 11.40s/it] {'loss': 1.166, 'learning_rate': 4.99977778341004e-06, 'epoch': 0.03} + 3%|▎ | 241/7045 [47:05<21:32:40, 11.40s/it] 3%|▎ | 242/7045 [47:17<21:43:04, 11.49s/it] {'loss': 1.1787, 'learning_rate': 4.999762194146128e-06, 'epoch': 0.03} + 3%|▎ | 242/7045 [47:17<21:43:04, 11.49s/it] 3%|▎ | 243/7045 [47:29<22:14:09, 11.77s/it] {'loss': 1.1729, 'learning_rate': 4.999746076466665e-06, 'epoch': 0.03} + 3%|▎ | 243/7045 [47:29<22:14:09, 11.77s/it] 3%|▎ | 244/7045 [47:41<22:20:11, 11.82s/it] {'loss': 1.2139, 'learning_rate': 4.999729430375054e-06, 'epoch': 0.03} + 3%|▎ | 244/7045 [47:41<22:20:11, 11.82s/it] 3%|▎ | 245/7045 [47:52<21:57:36, 11.63s/it] {'loss': 1.2129, 'learning_rate': 4.999712255874817e-06, 'epoch': 0.03} + 3%|▎ | 245/7045 [47:52<21:57:36, 11.63s/it] 3%|▎ | 246/7045 [48:04<21:59:53, 11.65s/it] {'loss': 1.1934, 'learning_rate': 4.999694552969583e-06, 'epoch': 0.03} + 3%|▎ | 246/7045 [48:04<21:59:53, 11.65s/it] 4%|▎ | 247/7045 [48:16<22:17:43, 11.81s/it] {'loss': 1.1011, 'learning_rate': 4.9996763216630935e-06, 'epoch': 0.04} + 4%|▎ | 247/7045 [48:16<22:17:43, 11.81s/it] 4%|▎ | 248/7045 [48:28<22:12:03, 11.76s/it] {'loss': 1.1504, 'learning_rate': 4.999657561959205e-06, 'epoch': 0.04} + 4%|▎ | 248/7045 [48:28<22:12:03, 11.76s/it] 4%|▎ | 249/7045 [48:40<22:20:28, 11.83s/it] {'loss': 1.1665, 'learning_rate': 4.9996382738618795e-06, 'epoch': 0.04} + 4%|▎ | 249/7045 [48:40<22:20:28, 11.83s/it] 4%|▎ | 250/7045 [48:51<21:56:25, 11.62s/it] {'loss': 1.1797, 'learning_rate': 4.9996184573751965e-06, 'epoch': 0.04} + 4%|▎ | 250/7045 [48:51<21:56:25, 11.62s/it] 4%|▎ | 251/7045 [49:04<22:39:25, 12.01s/it] {'loss': 1.1611, 'learning_rate': 4.9995981125033446e-06, 'epoch': 0.04} + 4%|▎ | 251/7045 [49:04<22:39:25, 12.01s/it] 4%|▎ | 252/7045 [49:15<22:11:11, 11.76s/it] {'loss': 1.1279, 'learning_rate': 4.999577239250625e-06, 'epoch': 0.04} + 4%|▎ | 252/7045 [49:15<22:11:11, 11.76s/it] 4%|▎ | 253/7045 [49:26<21:56:58, 11.63s/it] {'loss': 1.1963, 'learning_rate': 4.999555837621448e-06, 'epoch': 0.04} + 4%|▎ | 253/7045 [49:26<21:56:58, 11.63s/it] 4%|▎ | 254/7045 [49:38<22:00:05, 11.66s/it] {'loss': 1.1309, 'learning_rate': 4.999533907620339e-06, 'epoch': 0.04} + 4%|▎ | 254/7045 [49:38<22:00:05, 11.66s/it] 4%|▎ | 255/7045 [49:49<21:39:00, 11.48s/it] {'loss': 1.1484, 'learning_rate': 4.999511449251934e-06, 'epoch': 0.04} + 4%|▎ | 255/7045 [49:49<21:39:00, 11.48s/it] 4%|▎ | 256/7045 [50:01<22:02:58, 11.69s/it] {'loss': 1.1621, 'learning_rate': 4.99948846252098e-06, 'epoch': 0.04} + 4%|▎ | 256/7045 [50:01<22:02:58, 11.69s/it] 4%|▎ | 257/7045 [50:13<22:13:01, 11.78s/it] {'loss': 1.1621, 'learning_rate': 4.999464947432337e-06, 'epoch': 0.04} + 4%|▎ | 257/7045 [50:13<22:13:01, 11.78s/it] 4%|▎ | 258/7045 [50:26<22:36:59, 12.00s/it] {'loss': 1.1299, 'learning_rate': 4.999440903990973e-06, 'epoch': 0.04} + 4%|▎ | 258/7045 [50:26<22:36:59, 12.00s/it] 4%|▎ | 259/7045 [50:43<25:43:13, 13.64s/it] {'loss': 1.1328, 'learning_rate': 4.999416332201973e-06, 'epoch': 0.04} + 4%|▎ | 259/7045 [50:43<25:43:13, 13.64s/it] 4%|▎ | 260/7045 [50:55<24:28:02, 12.98s/it] {'loss': 1.1895, 'learning_rate': 4.999391232070531e-06, 'epoch': 0.04} + 4%|▎ | 260/7045 [50:55<24:28:02, 12.98s/it] 4%|▎ | 261/7045 [51:08<24:31:40, 13.02s/it] {'loss': 1.126, 'learning_rate': 4.999365603601951e-06, 'epoch': 0.04} + 4%|▎ | 261/7045 [51:08<24:31:40, 13.02s/it] 4%|▎ | 262/7045 [51:19<23:36:27, 12.53s/it] {'loss': 1.1621, 'learning_rate': 4.999339446801653e-06, 'epoch': 0.04} + 4%|▎ | 262/7045 [51:19<23:36:27, 12.53s/it] 4%|▎ | 263/7045 [51:32<23:56:37, 12.71s/it] {'loss': 1.1846, 'learning_rate': 4.999312761675165e-06, 'epoch': 0.04} + 4%|▎ | 263/7045 [51:32<23:56:37, 12.71s/it] 4%|▎ | 264/7045 [51:44<23:04:29, 12.25s/it] {'loss': 1.2119, 'learning_rate': 4.9992855482281265e-06, 'epoch': 0.04} + 4%|▎ | 264/7045 [51:44<23:04:29, 12.25s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 12 bytes but only got 8. + warnings.warn(str(msg)) + 4%|▍ | 265/7045 [51:57<23:39:20, 12.56s/it] {'loss': 1.1621, 'learning_rate': 4.999257806466292e-06, 'epoch': 0.04} + 4%|▍ | 265/7045 [51:57<23:39:20, 12.56s/it] 4%|▍ | 266/7045 [52:08<22:54:11, 12.16s/it] {'loss': 1.2021, 'learning_rate': 4.999229536395525e-06, 'epoch': 0.04} + 4%|▍ | 266/7045 [52:08<22:54:11, 12.16s/it] 4%|▍ | 267/7045 [52:21<23:07:27, 12.28s/it] {'loss': 1.1455, 'learning_rate': 4.999200738021802e-06, 'epoch': 0.04} + 4%|▍ | 267/7045 [52:21<23:07:27, 12.28s/it] 4%|▍ | 268/7045 [52:33<22:58:36, 12.21s/it] {'loss': 1.1943, 'learning_rate': 4.9991714113512104e-06, 'epoch': 0.04} + 4%|▍ | 268/7045 [52:33<22:58:36, 12.21s/it] 4%|▍ | 269/7045 [52:44<22:31:18, 11.97s/it] {'loss': 1.2002, 'learning_rate': 4.999141556389948e-06, 'epoch': 0.04} + 4%|▍ | 269/7045 [52:44<22:31:18, 11.97s/it] 4%|▍ | 270/7045 [52:56<22:36:18, 12.01s/it] {'loss': 1.1865, 'learning_rate': 4.9991111731443275e-06, 'epoch': 0.04} + 4%|▍ | 270/7045 [52:56<22:36:18, 12.01s/it] 4%|▍ | 271/7045 [53:09<22:50:38, 12.14s/it] {'loss': 1.1787, 'learning_rate': 4.999080261620771e-06, 'epoch': 0.04} + 4%|▍ | 271/7045 [53:09<22:50:38, 12.14s/it] 4%|▍ | 272/7045 [53:20<22:07:01, 11.76s/it] {'loss': 1.1953, 'learning_rate': 4.999048821825813e-06, 'epoch': 0.04} + 4%|▍ | 272/7045 [53:20<22:07:01, 11.76s/it] 4%|▍ | 273/7045 [53:31<21:43:15, 11.55s/it] {'loss': 1.1807, 'learning_rate': 4.999016853766098e-06, 'epoch': 0.04} + 4%|▍ | 273/7045 [53:31<21:43:15, 11.55s/it] 4%|▍ | 274/7045 [53:41<21:21:02, 11.35s/it] {'loss': 1.2012, 'learning_rate': 4.998984357448385e-06, 'epoch': 0.04} + 4%|▍ | 274/7045 [53:41<21:21:02, 11.35s/it] 4%|▍ | 275/7045 [53:53<21:28:31, 11.42s/it] {'loss': 1.1689, 'learning_rate': 4.998951332879544e-06, 'epoch': 0.04} + 4%|▍ | 275/7045 [53:53<21:28:31, 11.42s/it] 4%|▍ | 276/7045 [54:04<21:22:32, 11.37s/it] {'loss': 1.21, 'learning_rate': 4.998917780066554e-06, 'epoch': 0.04} + 4%|▍ | 276/7045 [54:04<21:22:32, 11.37s/it] 4%|▍ | 277/7045 [54:17<21:51:14, 11.62s/it] {'loss': 1.1768, 'learning_rate': 4.998883699016509e-06, 'epoch': 0.04} + 4%|▍ | 277/7045 [54:17<21:51:14, 11.62s/it] 4%|▍ | 278/7045 [54:29<22:05:18, 11.75s/it] {'loss': 1.1807, 'learning_rate': 4.998849089736612e-06, 'epoch': 0.04} + 4%|▍ | 278/7045 [54:29<22:05:18, 11.75s/it] 4%|▍ | 279/7045 [54:40<21:40:16, 11.53s/it] {'loss': 1.1777, 'learning_rate': 4.99881395223418e-06, 'epoch': 0.04} + 4%|▍ | 279/7045 [54:40<21:40:16, 11.53s/it] 4%|▍ | 280/7045 [54:51<21:26:50, 11.41s/it] {'loss': 1.1865, 'learning_rate': 4.9987782865166415e-06, 'epoch': 0.04} + 4%|▍ | 280/7045 [54:51<21:26:50, 11.41s/it] 4%|▍ | 281/7045 [55:02<21:21:21, 11.37s/it] {'loss': 1.1562, 'learning_rate': 4.998742092591533e-06, 'epoch': 0.04} + 4%|▍ | 281/7045 [55:02<21:21:21, 11.37s/it] 4%|▍ | 282/7045 [55:13<21:13:35, 11.30s/it] {'loss': 1.1787, 'learning_rate': 4.9987053704665075e-06, 'epoch': 0.04} + 4%|▍ | 282/7045 [55:13<21:13:35, 11.30s/it] 4%|▍ | 283/7045 [55:25<21:44:58, 11.58s/it] {'loss': 1.1768, 'learning_rate': 4.998668120149327e-06, 'epoch': 0.04} + 4%|▍ | 283/7045 [55:25<21:44:58, 11.58s/it] 4%|▍ | 284/7045 [55:37<21:38:59, 11.53s/it] {'loss': 1.1621, 'learning_rate': 4.998630341647866e-06, 'epoch': 0.04} + 4%|▍ | 284/7045 [55:37<21:38:59, 11.53s/it] 4%|▍ | 285/7045 [55:48<21:28:19, 11.43s/it] {'loss': 1.1934, 'learning_rate': 4.99859203497011e-06, 'epoch': 0.04} + 4%|▍ | 285/7045 [55:48<21:28:19, 11.43s/it] 4%|▍ | 286/7045 [55:59<21:16:00, 11.33s/it] {'loss': 1.1426, 'learning_rate': 4.998553200124157e-06, 'epoch': 0.04} + 4%|▍ | 286/7045 [55:59<21:16:00, 11.33s/it] 4%|▍ | 287/7045 [56:12<21:55:33, 11.68s/it] {'loss': 1.1758, 'learning_rate': 4.998513837118214e-06, 'epoch': 0.04} + 4%|▍ | 287/7045 [56:12<21:55:33, 11.68s/it] 4%|▍ | 288/7045 [56:23<21:43:18, 11.57s/it] {'loss': 1.1797, 'learning_rate': 4.998473945960605e-06, 'epoch': 0.04} + 4%|▍ | 288/7045 [56:23<21:43:18, 11.57s/it] 4%|▍ | 289/7045 [56:34<21:36:23, 11.51s/it] {'loss': 1.1748, 'learning_rate': 4.998433526659761e-06, 'epoch': 0.04} + 4%|▍ | 289/7045 [56:34<21:36:23, 11.51s/it] 4%|▍ | 290/7045 [56:47<22:20:58, 11.91s/it] {'loss': 1.1396, 'learning_rate': 4.998392579224226e-06, 'epoch': 0.04} + 4%|▍ | 290/7045 [56:47<22:20:58, 11.91s/it] 4%|▍ | 291/7045 [56:58<21:59:00, 11.72s/it] {'loss': 1.2168, 'learning_rate': 4.9983511036626555e-06, 'epoch': 0.04} + 4%|▍ | 291/7045 [56:58<21:59:00, 11.72s/it] 4%|▍ | 292/7045 [57:12<22:51:47, 12.19s/it] {'loss': 1.1104, 'learning_rate': 4.998309099983817e-06, 'epoch': 0.04} + 4%|▍ | 292/7045 [57:12<22:51:47, 12.19s/it] 4%|▍ | 293/7045 [57:24<23:02:09, 12.28s/it] {'loss': 1.1953, 'learning_rate': 4.99826656819659e-06, 'epoch': 0.04} + 4%|▍ | 293/7045 [57:24<23:02:09, 12.28s/it] 4%|▍ | 294/7045 [57:37<23:12:31, 12.38s/it] {'loss': 1.1504, 'learning_rate': 4.998223508309965e-06, 'epoch': 0.04} + 4%|▍ | 294/7045 [57:37<23:12:31, 12.38s/it] 4%|▍ | 295/7045 [57:48<22:49:40, 12.17s/it] {'loss': 1.1523, 'learning_rate': 4.998179920333044e-06, 'epoch': 0.04} + 4%|▍ | 295/7045 [57:48<22:49:40, 12.17s/it] 4%|▍ | 296/7045 [58:00<22:15:40, 11.87s/it] {'loss': 1.1973, 'learning_rate': 4.99813580427504e-06, 'epoch': 0.04} + 4%|▍ | 296/7045 [58:00<22:15:40, 11.87s/it] 4%|▍ | 297/7045 [58:11<21:42:39, 11.58s/it] {'loss': 1.1943, 'learning_rate': 4.998091160145281e-06, 'epoch': 0.04} + 4%|▍ | 297/7045 [58:11<21:42:39, 11.58s/it] 4%|▍ | 298/7045 [58:22<21:37:33, 11.54s/it] {'loss': 1.2119, 'learning_rate': 4.998045987953202e-06, 'epoch': 0.04} + 4%|▍ | 298/7045 [58:22<21:37:33, 11.54s/it] 4%|▍ | 299/7045 [58:34<22:00:57, 11.75s/it] {'loss': 1.1523, 'learning_rate': 4.998000287708353e-06, 'epoch': 0.04} + 4%|▍ | 299/7045 [58:34<22:00:57, 11.75s/it] 4%|▍ | 300/7045 [58:45<21:38:20, 11.55s/it] {'loss': 1.1602, 'learning_rate': 4.997954059420393e-06, 'epoch': 0.04} + 4%|▍ | 300/7045 [58:45<21:38:20, 11.55s/it] 4%|▍ | 301/7045 [58:56<21:18:21, 11.37s/it] {'loss': 1.1768, 'learning_rate': 4.997907303099095e-06, 'epoch': 0.04} + 4%|▍ | 301/7045 [58:56<21:18:21, 11.37s/it] 4%|▍ | 302/7045 [59:08<21:14:36, 11.34s/it] {'loss': 1.1279, 'learning_rate': 4.9978600187543435e-06, 'epoch': 0.04} + 4%|▍ | 302/7045 [59:08<21:14:36, 11.34s/it] 4%|▍ | 303/7045 [59:19<21:27:12, 11.46s/it] {'loss': 1.1367, 'learning_rate': 4.997812206396132e-06, 'epoch': 0.04} + 4%|▍ | 303/7045 [59:19<21:27:12, 11.46s/it] 4%|▍ | 304/7045 [59:31<21:22:49, 11.42s/it] {'loss': 1.1787, 'learning_rate': 4.997763866034568e-06, 'epoch': 0.04} + 4%|▍ | 304/7045 [59:31<21:22:49, 11.42s/it] 4%|▍ | 305/7045 [59:42<21:14:34, 11.35s/it] {'loss': 1.1475, 'learning_rate': 4.9977149976798715e-06, 'epoch': 0.04} + 4%|▍ | 305/7045 [59:42<21:14:34, 11.35s/it] 4%|▍ | 306/7045 [59:55<22:20:04, 11.93s/it] {'loss': 1.1143, 'learning_rate': 4.9976656013423695e-06, 'epoch': 0.04} + 4%|▍ | 306/7045 [59:55<22:20:04, 11.93s/it] 4%|▍ | 307/7045 [1:00:06<21:55:01, 11.71s/it] {'loss': 1.1963, 'learning_rate': 4.997615677032507e-06, 'epoch': 0.04} + 4%|▍ | 307/7045 [1:00:06<21:55:01, 11.71s/it] 4%|▍ | 308/7045 [1:00:17<21:27:58, 11.47s/it] {'loss': 1.1699, 'learning_rate': 4.997565224760835e-06, 'epoch': 0.04} + 4%|▍ | 308/7045 [1:00:17<21:27:58, 11.47s/it] 4%|▍ | 309/7045 [1:00:28<21:13:19, 11.34s/it] {'loss': 1.165, 'learning_rate': 4.997514244538019e-06, 'epoch': 0.04} + 4%|▍ | 309/7045 [1:00:28<21:13:19, 11.34s/it] 4%|▍ | 310/7045 [1:00:40<21:18:33, 11.39s/it] {'loss': 1.1426, 'learning_rate': 4.997462736374836e-06, 'epoch': 0.04} + 4%|▍ | 310/7045 [1:00:40<21:18:33, 11.39s/it] 4%|▍ | 311/7045 [1:00:52<21:46:52, 11.64s/it] {'loss': 1.1504, 'learning_rate': 4.997410700282174e-06, 'epoch': 0.04} + 4%|▍ | 311/7045 [1:00:52<21:46:52, 11.64s/it] 4%|▍ | 312/7045 [1:01:06<22:54:54, 12.25s/it] {'loss': 1.168, 'learning_rate': 4.997358136271032e-06, 'epoch': 0.04} + 4%|▍ | 312/7045 [1:01:06<22:54:54, 12.25s/it] 4%|▍ | 313/7045 [1:01:18<23:01:03, 12.31s/it] {'loss': 1.1553, 'learning_rate': 4.997305044352523e-06, 'epoch': 0.04} + 4%|▍ | 313/7045 [1:01:18<23:01:03, 12.31s/it] 4%|▍ | 314/7045 [1:01:30<23:00:38, 12.31s/it] {'loss': 1.1831, 'learning_rate': 4.997251424537868e-06, 'epoch': 0.04} + 4%|▍ | 314/7045 [1:01:30<23:00:38, 12.31s/it] 4%|▍ | 315/7045 [1:01:44<23:32:10, 12.59s/it] {'loss': 1.126, 'learning_rate': 4.997197276838403e-06, 'epoch': 0.04} + 4%|▍ | 315/7045 [1:01:44<23:32:10, 12.59s/it] 4%|▍ | 316/7045 [1:01:56<23:36:43, 12.63s/it] {'loss': 1.1855, 'learning_rate': 4.997142601265573e-06, 'epoch': 0.04} + 4%|▍ | 316/7045 [1:01:56<23:36:43, 12.63s/it] 4%|▍ | 317/7045 [1:02:10<24:00:54, 12.85s/it] {'loss': 1.1426, 'learning_rate': 4.997087397830936e-06, 'epoch': 0.04} + 4%|▍ | 317/7045 [1:02:10<24:00:54, 12.85s/it] 5%|▍ | 318/7045 [1:02:21<23:04:51, 12.35s/it] {'loss': 1.167, 'learning_rate': 4.997031666546161e-06, 'epoch': 0.05} + 5%|▍ | 318/7045 [1:02:21<23:04:51, 12.35s/it] 5%|▍ | 319/7045 [1:02:32<22:27:22, 12.02s/it] {'loss': 1.1943, 'learning_rate': 4.9969754074230294e-06, 'epoch': 0.05} + 5%|▍ | 319/7045 [1:02:32<22:27:22, 12.02s/it] 5%|▍ | 320/7045 [1:02:44<22:32:47, 12.07s/it] {'loss': 1.1816, 'learning_rate': 4.9969186204734336e-06, 'epoch': 0.05} + 5%|▍ | 320/7045 [1:02:44<22:32:47, 12.07s/it] 5%|▍ | 321/7045 [1:02:57<22:48:23, 12.21s/it] {'loss': 1.1484, 'learning_rate': 4.9968613057093775e-06, 'epoch': 0.05} + 5%|▍ | 321/7045 [1:02:57<22:48:23, 12.21s/it] 5%|▍ | 322/7045 [1:03:08<22:09:48, 11.87s/it] {'loss': 1.1514, 'learning_rate': 4.996803463142976e-06, 'epoch': 0.05} + 5%|▍ | 322/7045 [1:03:08<22:09:48, 11.87s/it] 5%|▍ | 323/7045 [1:03:19<21:45:47, 11.66s/it] {'loss': 1.2002, 'learning_rate': 4.996745092786457e-06, 'epoch': 0.05} + 5%|▍ | 323/7045 [1:03:19<21:45:47, 11.66s/it] 5%|▍ | 324/7045 [1:03:30<21:28:28, 11.50s/it] {'loss': 1.1738, 'learning_rate': 4.996686194652158e-06, 'epoch': 0.05} + 5%|▍ | 324/7045 [1:03:30<21:28:28, 11.50s/it] 5%|▍ | 325/7045 [1:03:41<21:13:27, 11.37s/it] {'loss': 1.1689, 'learning_rate': 4.9966267687525314e-06, 'epoch': 0.05} + 5%|▍ | 325/7045 [1:03:41<21:13:27, 11.37s/it] 5%|▍ | 326/7045 [1:03:52<21:03:33, 11.28s/it] {'loss': 1.1406, 'learning_rate': 4.996566815100137e-06, 'epoch': 0.05} + 5%|▍ | 326/7045 [1:03:52<21:03:33, 11.28s/it] 5%|▍ | 327/7045 [1:04:04<21:09:51, 11.34s/it] {'loss': 1.1475, 'learning_rate': 4.99650633370765e-06, 'epoch': 0.05} + 5%|▍ | 327/7045 [1:04:04<21:09:51, 11.34s/it] 5%|▍ | 328/7045 [1:04:15<21:09:19, 11.34s/it] {'loss': 1.1855, 'learning_rate': 4.996445324587853e-06, 'epoch': 0.05} + 5%|▍ | 328/7045 [1:04:15<21:09:19, 11.34s/it] 5%|▍ | 329/7045 [1:04:27<21:18:38, 11.42s/it] {'loss': 1.166, 'learning_rate': 4.996383787753645e-06, 'epoch': 0.05} + 5%|▍ | 329/7045 [1:04:27<21:18:38, 11.42s/it] 5%|▍ | 330/7045 [1:04:40<22:29:52, 12.06s/it] {'loss': 1.1465, 'learning_rate': 4.996321723218032e-06, 'epoch': 0.05} + 5%|▍ | 330/7045 [1:04:40<22:29:52, 12.06s/it] 5%|▍ | 331/7045 [1:04:52<22:29:19, 12.06s/it] {'loss': 1.1494, 'learning_rate': 4.996259130994135e-06, 'epoch': 0.05} + 5%|▍ | 331/7045 [1:04:52<22:29:19, 12.06s/it] 5%|▍ | 332/7045 [1:05:06<23:10:01, 12.42s/it] {'loss': 1.1514, 'learning_rate': 4.996196011095184e-06, 'epoch': 0.05} + 5%|▍ | 332/7045 [1:05:06<23:10:01, 12.42s/it] 5%|▍ | 333/7045 [1:05:17<22:22:02, 12.00s/it] {'loss': 1.1416, 'learning_rate': 4.996132363534524e-06, 'epoch': 0.05} + 5%|▍ | 333/7045 [1:05:17<22:22:02, 12.00s/it] 5%|▍ | 334/7045 [1:05:27<21:42:25, 11.64s/it] {'loss': 1.1641, 'learning_rate': 4.996068188325606e-06, 'epoch': 0.05} + 5%|▍ | 334/7045 [1:05:27<21:42:25, 11.64s/it] 5%|▍ | 335/7045 [1:05:39<21:28:13, 11.52s/it] {'loss': 1.1914, 'learning_rate': 4.996003485481996e-06, 'epoch': 0.05} + 5%|▍ | 335/7045 [1:05:39<21:28:13, 11.52s/it] 5%|▍ | 336/7045 [1:05:50<21:09:16, 11.35s/it] {'loss': 1.1709, 'learning_rate': 4.9959382550173755e-06, 'epoch': 0.05} + 5%|▍ | 336/7045 [1:05:50<21:09:16, 11.35s/it] 5%|▍ | 337/7045 [1:06:01<20:57:37, 11.25s/it] {'loss': 1.1689, 'learning_rate': 4.9958724969455285e-06, 'epoch': 0.05} + 5%|▍ | 337/7045 [1:06:01<20:57:37, 11.25s/it] 5%|▍ | 338/7045 [1:06:14<22:06:41, 11.87s/it] {'loss': 1.1504, 'learning_rate': 4.995806211280357e-06, 'epoch': 0.05} + 5%|▍ | 338/7045 [1:06:14<22:06:41, 11.87s/it] 5%|▍ | 339/7045 [1:06:25<21:40:43, 11.64s/it] {'loss': 1.1445, 'learning_rate': 4.995739398035874e-06, 'epoch': 0.05} + 5%|▍ | 339/7045 [1:06:25<21:40:43, 11.64s/it] 5%|▍ | 340/7045 [1:06:37<21:51:15, 11.73s/it] {'loss': 1.1045, 'learning_rate': 4.995672057226202e-06, 'epoch': 0.05} + 5%|▍ | 340/7045 [1:06:37<21:51:15, 11.73s/it] 5%|▍ | 341/7045 [1:06:49<21:57:01, 11.79s/it] {'loss': 1.1553, 'learning_rate': 4.995604188865576e-06, 'epoch': 0.05} + 5%|▍ | 341/7045 [1:06:49<21:57:01, 11.79s/it] 5%|▍ | 342/7045 [1:07:02<22:34:44, 12.13s/it] {'loss': 1.1641, 'learning_rate': 4.995535792968342e-06, 'epoch': 0.05} + 5%|▍ | 342/7045 [1:07:02<22:34:44, 12.13s/it] 5%|▍ | 343/7045 [1:07:13<21:59:03, 11.81s/it] {'loss': 1.165, 'learning_rate': 4.9954668695489585e-06, 'epoch': 0.05} + 5%|▍ | 343/7045 [1:07:13<21:59:03, 11.81s/it] 5%|▍ | 344/7045 [1:07:26<22:48:10, 12.25s/it] {'loss': 1.1514, 'learning_rate': 4.995397418621994e-06, 'epoch': 0.05} + 5%|▍ | 344/7045 [1:07:26<22:48:10, 12.25s/it] 5%|▍ | 345/7045 [1:07:37<22:09:12, 11.90s/it] {'loss': 1.1475, 'learning_rate': 4.995327440202132e-06, 'epoch': 0.05} + 5%|▍ | 345/7045 [1:07:37<22:09:12, 11.90s/it] 5%|▍ | 346/7045 [1:07:51<22:57:50, 12.34s/it] {'loss': 1.1318, 'learning_rate': 4.995256934304162e-06, 'epoch': 0.05} + 5%|▍ | 346/7045 [1:07:51<22:57:50, 12.34s/it] 5%|▍ | 347/7045 [1:08:02<22:15:23, 11.96s/it] {'loss': 1.1777, 'learning_rate': 4.99518590094299e-06, 'epoch': 0.05} + 5%|▍ | 347/7045 [1:08:02<22:15:23, 11.96s/it] 5%|▍ | 348/7045 [1:08:14<22:28:51, 12.08s/it] {'loss': 1.2002, 'learning_rate': 4.995114340133631e-06, 'epoch': 0.05} + 5%|▍ | 348/7045 [1:08:14<22:28:51, 12.08s/it] 5%|▍ | 349/7045 [1:08:25<21:45:05, 11.69s/it] {'loss': 1.1279, 'learning_rate': 4.995042251891211e-06, 'epoch': 0.05} + 5%|▍ | 349/7045 [1:08:25<21:45:05, 11.69s/it] 5%|▍ | 350/7045 [1:08:38<22:18:40, 12.00s/it] {'loss': 1.125, 'learning_rate': 4.99496963623097e-06, 'epoch': 0.05} + 5%|▍ | 350/7045 [1:08:38<22:18:40, 12.00s/it] 5%|▍ | 351/7045 [1:08:48<21:39:53, 11.65s/it] {'loss': 1.1133, 'learning_rate': 4.9948964931682564e-06, 'epoch': 0.05} + 5%|▍ | 351/7045 [1:08:48<21:39:53, 11.65s/it] 5%|▍ | 352/7045 [1:09:00<21:47:53, 11.72s/it] {'loss': 1.1494, 'learning_rate': 4.994822822718532e-06, 'epoch': 0.05} + 5%|▍ | 352/7045 [1:09:00<21:47:53, 11.72s/it] 5%|▌ | 353/7045 [1:09:11<21:14:47, 11.43s/it] {'loss': 1.2168, 'learning_rate': 4.994748624897371e-06, 'epoch': 0.05} + 5%|▌ | 353/7045 [1:09:11<21:14:47, 11.43s/it] 5%|▌ | 354/7045 [1:09:22<21:02:30, 11.32s/it] {'loss': 1.1738, 'learning_rate': 4.994673899720457e-06, 'epoch': 0.05} + 5%|▌ | 354/7045 [1:09:22<21:02:30, 11.32s/it] 5%|▌ | 355/7045 [1:09:35<21:51:35, 11.76s/it] {'loss': 1.1357, 'learning_rate': 4.9945986472035855e-06, 'epoch': 0.05} + 5%|▌ | 355/7045 [1:09:35<21:51:35, 11.76s/it] 5%|▌ | 356/7045 [1:09:46<21:42:30, 11.68s/it] {'loss': 1.1846, 'learning_rate': 4.994522867362664e-06, 'epoch': 0.05} + 5%|▌ | 356/7045 [1:09:46<21:42:30, 11.68s/it] 5%|▌ | 357/7045 [1:09:58<21:53:32, 11.78s/it] {'loss': 1.1377, 'learning_rate': 4.994446560213712e-06, 'epoch': 0.05} + 5%|▌ | 357/7045 [1:09:58<21:53:32, 11.78s/it] 5%|▌ | 358/7045 [1:10:10<21:37:11, 11.64s/it] {'loss': 1.1719, 'learning_rate': 4.9943697257728584e-06, 'epoch': 0.05} + 5%|▌ | 358/7045 [1:10:10<21:37:11, 11.64s/it] 5%|▌ | 359/7045 [1:10:22<21:51:37, 11.77s/it] {'loss': 1.1729, 'learning_rate': 4.994292364056346e-06, 'epoch': 0.05} + 5%|▌ | 359/7045 [1:10:22<21:51:37, 11.77s/it] 5%|▌ | 360/7045 [1:10:33<21:23:37, 11.52s/it] {'loss': 1.1572, 'learning_rate': 4.994214475080529e-06, 'epoch': 0.05} + 5%|▌ | 360/7045 [1:10:33<21:23:37, 11.52s/it] 5%|▌ | 361/7045 [1:10:44<21:04:08, 11.35s/it] {'loss': 1.1621, 'learning_rate': 4.994136058861869e-06, 'epoch': 0.05} + 5%|▌ | 361/7045 [1:10:44<21:04:08, 11.35s/it] 5%|▌ | 362/7045 [1:10:55<21:03:03, 11.34s/it] {'loss': 1.166, 'learning_rate': 4.994057115416946e-06, 'epoch': 0.05} + 5%|▌ | 362/7045 [1:10:55<21:03:03, 11.34s/it] 5%|▌ | 363/7045 [1:11:06<21:03:01, 11.34s/it] {'loss': 1.1729, 'learning_rate': 4.993977644762445e-06, 'epoch': 0.05} + 5%|▌ | 363/7045 [1:11:06<21:03:01, 11.34s/it] 5%|▌ | 364/7045 [1:11:19<21:29:41, 11.58s/it] {'loss': 1.1289, 'learning_rate': 4.993897646915165e-06, 'epoch': 0.05} + 5%|▌ | 364/7045 [1:11:19<21:29:41, 11.58s/it] 5%|▌ | 365/7045 [1:11:31<21:55:58, 11.82s/it] {'loss': 1.1484, 'learning_rate': 4.993817121892017e-06, 'epoch': 0.05} + 5%|▌ | 365/7045 [1:11:31<21:55:58, 11.82s/it] 5%|▌ | 366/7045 [1:11:43<22:14:06, 11.98s/it] {'loss': 1.1152, 'learning_rate': 4.993736069710023e-06, 'epoch': 0.05} + 5%|▌ | 366/7045 [1:11:43<22:14:06, 11.98s/it] 5%|▌ | 367/7045 [1:11:54<21:38:57, 11.67s/it] {'loss': 1.1758, 'learning_rate': 4.993654490386317e-06, 'epoch': 0.05} + 5%|▌ | 367/7045 [1:11:54<21:38:57, 11.67s/it] 5%|▌ | 368/7045 [1:12:05<21:24:32, 11.54s/it] {'loss': 1.125, 'learning_rate': 4.993572383938143e-06, 'epoch': 0.05} + 5%|▌ | 368/7045 [1:12:05<21:24:32, 11.54s/it] 5%|▌ | 369/7045 [1:12:16<21:05:44, 11.38s/it] {'loss': 1.1855, 'learning_rate': 4.993489750382856e-06, 'epoch': 0.05} + 5%|▌ | 369/7045 [1:12:16<21:05:44, 11.38s/it] 5%|▌ | 370/7045 [1:12:28<20:58:16, 11.31s/it] {'loss': 1.1328, 'learning_rate': 4.993406589737926e-06, 'epoch': 0.05} + 5%|▌ | 370/7045 [1:12:28<20:58:16, 11.31s/it] 5%|▌ | 371/7045 [1:12:39<20:54:02, 11.27s/it] {'loss': 1.1943, 'learning_rate': 4.99332290202093e-06, 'epoch': 0.05} + 5%|▌ | 371/7045 [1:12:39<20:54:02, 11.27s/it] 5%|▌ | 372/7045 [1:12:52<21:43:38, 11.72s/it] {'loss': 1.1621, 'learning_rate': 4.99323868724956e-06, 'epoch': 0.05} + 5%|▌ | 372/7045 [1:12:52<21:43:38, 11.72s/it] 5%|▌ | 373/7045 [1:13:03<21:26:19, 11.57s/it] {'loss': 1.1592, 'learning_rate': 4.993153945441617e-06, 'epoch': 0.05} + 5%|▌ | 373/7045 [1:13:03<21:26:19, 11.57s/it] 5%|▌ | 374/7045 [1:13:14<21:11:00, 11.43s/it] {'loss': 1.1797, 'learning_rate': 4.993068676615014e-06, 'epoch': 0.05} + 5%|▌ | 374/7045 [1:13:14<21:11:00, 11.43s/it] 5%|▌ | 375/7045 [1:13:26<21:35:08, 11.65s/it] {'loss': 1.1523, 'learning_rate': 4.992982880787775e-06, 'epoch': 0.05} + 5%|▌ | 375/7045 [1:13:26<21:35:08, 11.65s/it] 5%|▌ | 376/7045 [1:13:37<21:23:01, 11.54s/it] {'loss': 1.1562, 'learning_rate': 4.992896557978039e-06, 'epoch': 0.05} + 5%|▌ | 376/7045 [1:13:37<21:23:01, 11.54s/it] 5%|▌ | 377/7045 [1:13:49<21:11:40, 11.44s/it] {'loss': 1.123, 'learning_rate': 4.99280970820405e-06, 'epoch': 0.05} + 5%|▌ | 377/7045 [1:13:49<21:11:40, 11.44s/it] 5%|▌ | 378/7045 [1:14:00<21:25:06, 11.57s/it] {'loss': 1.1885, 'learning_rate': 4.99272233148417e-06, 'epoch': 0.05} + 5%|▌ | 378/7045 [1:14:00<21:25:06, 11.57s/it] 5%|▌ | 379/7045 [1:14:12<21:16:23, 11.49s/it] {'loss': 1.1758, 'learning_rate': 4.992634427836867e-06, 'epoch': 0.05} + 5%|▌ | 379/7045 [1:14:12<21:16:23, 11.49s/it] 5%|▌ | 380/7045 [1:14:24<21:25:50, 11.58s/it] {'loss': 1.1885, 'learning_rate': 4.992545997280723e-06, 'epoch': 0.05} + 5%|▌ | 380/7045 [1:14:24<21:25:50, 11.58s/it] 5%|▌ | 381/7045 [1:14:35<21:16:44, 11.50s/it] {'loss': 1.1523, 'learning_rate': 4.992457039834431e-06, 'epoch': 0.05} + 5%|▌ | 381/7045 [1:14:35<21:16:44, 11.50s/it] 5%|▌ | 382/7045 [1:14:46<21:05:44, 11.40s/it] {'loss': 1.1699, 'learning_rate': 4.992367555516796e-06, 'epoch': 0.05} + 5%|▌ | 382/7045 [1:14:46<21:05:44, 11.40s/it] 5%|▌ | 383/7045 [1:14:57<20:58:04, 11.33s/it] {'loss': 1.1709, 'learning_rate': 4.992277544346734e-06, 'epoch': 0.05} + 5%|▌ | 383/7045 [1:14:57<20:58:04, 11.33s/it] 5%|▌ | 384/7045 [1:15:08<20:44:49, 11.21s/it] {'loss': 1.1387, 'learning_rate': 4.992187006343271e-06, 'epoch': 0.05} + 5%|▌ | 384/7045 [1:15:08<20:44:49, 11.21s/it] 5%|▌ | 385/7045 [1:15:19<20:37:59, 11.15s/it] {'loss': 1.1504, 'learning_rate': 4.992095941525546e-06, 'epoch': 0.05} + 5%|▌ | 385/7045 [1:15:19<20:37:59, 11.15s/it] 5%|▌ | 386/7045 [1:15:30<20:41:44, 11.19s/it] {'loss': 1.1465, 'learning_rate': 4.9920043499128095e-06, 'epoch': 0.05} + 5%|▌ | 386/7045 [1:15:30<20:41:44, 11.19s/it] 5%|▌ | 387/7045 [1:15:41<20:33:54, 11.12s/it] {'loss': 1.1631, 'learning_rate': 4.991912231524421e-06, 'epoch': 0.05} + 5%|▌ | 387/7045 [1:15:41<20:33:54, 11.12s/it] 6%|▌ | 388/7045 [1:15:53<21:06:32, 11.42s/it] {'loss': 1.1416, 'learning_rate': 4.991819586379856e-06, 'epoch': 0.06} + 6%|▌ | 388/7045 [1:15:53<21:06:32, 11.42s/it] 6%|▌ | 389/7045 [1:16:04<20:53:04, 11.30s/it] {'loss': 1.1816, 'learning_rate': 4.991726414498695e-06, 'epoch': 0.06} + 6%|▌ | 389/7045 [1:16:04<20:53:04, 11.30s/it] 6%|▌ | 390/7045 [1:16:18<21:57:26, 11.88s/it] {'loss': 1.1719, 'learning_rate': 4.9916327159006366e-06, 'epoch': 0.06} + 6%|▌ | 390/7045 [1:16:18<21:57:26, 11.88s/it] 6%|▌ | 391/7045 [1:16:29<21:34:05, 11.67s/it] {'loss': 1.1826, 'learning_rate': 4.991538490605485e-06, 'epoch': 0.06} + 6%|▌ | 391/7045 [1:16:29<21:34:05, 11.67s/it] 6%|▌ | 392/7045 [1:16:40<21:20:50, 11.55s/it] {'loss': 1.1992, 'learning_rate': 4.991443738633157e-06, 'epoch': 0.06} + 6%|▌ | 392/7045 [1:16:40<21:20:50, 11.55s/it] 6%|▌ | 393/7045 [1:16:52<21:22:45, 11.57s/it] {'loss': 1.1836, 'learning_rate': 4.9913484600036854e-06, 'epoch': 0.06} + 6%|▌ | 393/7045 [1:16:52<21:22:45, 11.57s/it] 6%|▌ | 394/7045 [1:17:05<22:00:56, 11.92s/it] {'loss': 1.1211, 'learning_rate': 4.9912526547372096e-06, 'epoch': 0.06} + 6%|▌ | 394/7045 [1:17:05<22:00:56, 11.92s/it] 6%|▌ | 395/7045 [1:17:16<21:36:08, 11.69s/it] {'loss': 1.1768, 'learning_rate': 4.99115632285398e-06, 'epoch': 0.06} + 6%|▌ | 395/7045 [1:17:16<21:36:08, 11.69s/it] 6%|▌ | 396/7045 [1:17:29<22:32:44, 12.21s/it] {'loss': 1.127, 'learning_rate': 4.991059464374361e-06, 'epoch': 0.06} + 6%|▌ | 396/7045 [1:17:29<22:32:44, 12.21s/it] 6%|▌ | 397/7045 [1:17:41<22:21:48, 12.11s/it] {'loss': 1.1484, 'learning_rate': 4.990962079318828e-06, 'epoch': 0.06} + 6%|▌ | 397/7045 [1:17:41<22:21:48, 12.11s/it] 6%|▌ | 398/7045 [1:17:52<21:51:28, 11.84s/it] {'loss': 1.209, 'learning_rate': 4.990864167707965e-06, 'epoch': 0.06} + 6%|▌ | 398/7045 [1:17:52<21:51:28, 11.84s/it] 6%|▌ | 399/7045 [1:18:03<21:24:34, 11.60s/it] {'loss': 1.1475, 'learning_rate': 4.99076572956247e-06, 'epoch': 0.06} + 6%|▌ | 399/7045 [1:18:03<21:24:34, 11.60s/it] 6%|▌ | 400/7045 [1:18:14<21:04:30, 11.42s/it] {'loss': 1.1729, 'learning_rate': 4.990666764903152e-06, 'epoch': 0.06} + 6%|▌ | 400/7045 [1:18:14<21:04:30, 11.42s/it] 6%|▌ | 401/7045 [1:18:26<21:07:18, 11.44s/it] {'loss': 1.208, 'learning_rate': 4.99056727375093e-06, 'epoch': 0.06} + 6%|▌ | 401/7045 [1:18:26<21:07:18, 11.44s/it] 6%|▌ | 402/7045 [1:18:38<21:50:48, 11.84s/it] {'loss': 1.1943, 'learning_rate': 4.990467256126835e-06, 'epoch': 0.06} + 6%|▌ | 402/7045 [1:18:38<21:50:48, 11.84s/it] 6%|▌ | 403/7045 [1:18:49<21:19:49, 11.56s/it] {'loss': 1.1768, 'learning_rate': 4.9903667120520104e-06, 'epoch': 0.06} + 6%|▌ | 403/7045 [1:18:49<21:19:49, 11.56s/it] 6%|▌ | 404/7045 [1:19:01<21:10:15, 11.48s/it] {'loss': 1.1436, 'learning_rate': 4.990265641547709e-06, 'epoch': 0.06} + 6%|▌ | 404/7045 [1:19:01<21:10:15, 11.48s/it] 6%|▌ | 405/7045 [1:19:13<21:38:45, 11.74s/it] {'loss': 1.1611, 'learning_rate': 4.990164044635296e-06, 'epoch': 0.06} + 6%|▌ | 405/7045 [1:19:13<21:38:45, 11.74s/it] 6%|▌ | 406/7045 [1:19:24<21:21:07, 11.58s/it] {'loss': 1.1963, 'learning_rate': 4.990061921336248e-06, 'epoch': 0.06} + 6%|▌ | 406/7045 [1:19:24<21:21:07, 11.58s/it] 6%|▌ | 407/7045 [1:19:35<21:08:31, 11.47s/it] {'loss': 1.165, 'learning_rate': 4.989959271672151e-06, 'epoch': 0.06} + 6%|▌ | 407/7045 [1:19:35<21:08:31, 11.47s/it] 6%|▌ | 408/7045 [1:19:47<21:02:44, 11.42s/it] {'loss': 1.1797, 'learning_rate': 4.989856095664706e-06, 'epoch': 0.06} + 6%|▌ | 408/7045 [1:19:47<21:02:44, 11.42s/it] 6%|▌ | 409/7045 [1:19:58<20:41:55, 11.23s/it] {'loss': 1.1553, 'learning_rate': 4.98975239333572e-06, 'epoch': 0.06} + 6%|▌ | 409/7045 [1:19:58<20:41:55, 11.23s/it] 6%|▌ | 410/7045 [1:20:09<20:55:59, 11.36s/it] {'loss': 1.1885, 'learning_rate': 4.989648164707118e-06, 'epoch': 0.06} + 6%|▌ | 410/7045 [1:20:09<20:55:59, 11.36s/it] 6%|▌ | 411/7045 [1:20:22<21:48:34, 11.84s/it] {'loss': 1.1353, 'learning_rate': 4.989543409800929e-06, 'epoch': 0.06} + 6%|▌ | 411/7045 [1:20:22<21:48:34, 11.84s/it] 6%|▌ | 412/7045 [1:20:34<21:40:03, 11.76s/it] {'loss': 1.1748, 'learning_rate': 4.989438128639299e-06, 'epoch': 0.06} + 6%|▌ | 412/7045 [1:20:34<21:40:03, 11.76s/it] 6%|▌ | 413/7045 [1:20:45<21:25:50, 11.63s/it] {'loss': 1.2236, 'learning_rate': 4.989332321244482e-06, 'epoch': 0.06} + 6%|▌ | 413/7045 [1:20:45<21:25:50, 11.63s/it] 6%|▌ | 414/7045 [1:20:56<21:15:42, 11.54s/it] {'loss': 1.1406, 'learning_rate': 4.989225987638844e-06, 'epoch': 0.06} + 6%|▌ | 414/7045 [1:20:56<21:15:42, 11.54s/it] 6%|▌ | 415/7045 [1:21:08<21:05:45, 11.45s/it] {'loss': 1.1729, 'learning_rate': 4.9891191278448635e-06, 'epoch': 0.06} + 6%|▌ | 415/7045 [1:21:08<21:05:45, 11.45s/it] 6%|▌ | 416/7045 [1:21:19<20:52:23, 11.34s/it] {'loss': 1.1895, 'learning_rate': 4.989011741885128e-06, 'epoch': 0.06} + 6%|▌ | 416/7045 [1:21:19<20:52:23, 11.34s/it] 6%|▌ | 417/7045 [1:21:30<20:39:18, 11.22s/it] {'loss': 1.165, 'learning_rate': 4.988903829782339e-06, 'epoch': 0.06} + 6%|▌ | 417/7045 [1:21:30<20:39:18, 11.22s/it] 6%|▌ | 418/7045 [1:21:42<21:05:00, 11.45s/it] {'loss': 1.1611, 'learning_rate': 4.988795391559307e-06, 'epoch': 0.06} + 6%|▌ | 418/7045 [1:21:42<21:05:00, 11.45s/it] 6%|▌ | 419/7045 [1:21:53<20:55:17, 11.37s/it] {'loss': 1.1611, 'learning_rate': 4.988686427238953e-06, 'epoch': 0.06} + 6%|▌ | 419/7045 [1:21:53<20:55:17, 11.37s/it] 6%|▌ | 420/7045 [1:22:06<21:46:56, 11.84s/it] {'loss': 1.1113, 'learning_rate': 4.9885769368443124e-06, 'epoch': 0.06} + 6%|▌ | 420/7045 [1:22:06<21:46:56, 11.84s/it] 6%|▌ | 421/7045 [1:22:17<21:22:31, 11.62s/it] {'loss': 1.1641, 'learning_rate': 4.988466920398528e-06, 'epoch': 0.06} + 6%|▌ | 421/7045 [1:22:17<21:22:31, 11.62s/it] 6%|▌ | 422/7045 [1:22:28<21:04:29, 11.46s/it] {'loss': 1.125, 'learning_rate': 4.9883563779248575e-06, 'epoch': 0.06} + 6%|▌ | 422/7045 [1:22:28<21:04:29, 11.46s/it] 6%|▌ | 423/7045 [1:22:41<21:47:09, 11.84s/it] {'loss': 1.1533, 'learning_rate': 4.988245309446669e-06, 'epoch': 0.06} + 6%|▌ | 423/7045 [1:22:41<21:47:09, 11.84s/it] 6%|▌ | 424/7045 [1:22:54<22:39:01, 12.32s/it] {'loss': 1.1162, 'learning_rate': 4.988133714987438e-06, 'epoch': 0.06} + 6%|▌ | 424/7045 [1:22:54<22:39:01, 12.32s/it] 6%|▌ | 425/7045 [1:23:05<21:57:52, 11.94s/it] {'loss': 1.2109, 'learning_rate': 4.988021594570756e-06, 'epoch': 0.06} + 6%|▌ | 425/7045 [1:23:05<21:57:52, 11.94s/it] 6%|▌ | 426/7045 [1:23:17<21:39:53, 11.78s/it] {'loss': 1.1387, 'learning_rate': 4.9879089482203226e-06, 'epoch': 0.06} + 6%|▌ | 426/7045 [1:23:17<21:39:53, 11.78s/it] 6%|▌ | 427/7045 [1:23:29<21:50:13, 11.88s/it] {'loss': 1.1738, 'learning_rate': 4.9877957759599504e-06, 'epoch': 0.06} + 6%|▌ | 427/7045 [1:23:29<21:50:13, 11.88s/it] 6%|▌ | 428/7045 [1:23:40<21:25:19, 11.65s/it] {'loss': 1.1299, 'learning_rate': 4.987682077813562e-06, 'epoch': 0.06} + 6%|▌ | 428/7045 [1:23:40<21:25:19, 11.65s/it] 6%|▌ | 429/7045 [1:23:55<23:24:38, 12.74s/it] {'loss': 1.1768, 'learning_rate': 4.987567853805193e-06, 'epoch': 0.06} + 6%|▌ | 429/7045 [1:23:55<23:24:38, 12.74s/it] 6%|▌ | 430/7045 [1:24:08<23:21:50, 12.72s/it] {'loss': 1.1514, 'learning_rate': 4.987453103958988e-06, 'epoch': 0.06} + 6%|▌ | 430/7045 [1:24:08<23:21:50, 12.72s/it] 6%|▌ | 431/7045 [1:24:19<22:33:46, 12.28s/it] {'loss': 1.1953, 'learning_rate': 4.987337828299203e-06, 'epoch': 0.06} + 6%|▌ | 431/7045 [1:24:19<22:33:46, 12.28s/it] 6%|▌ | 432/7045 [1:24:30<21:56:03, 11.94s/it] {'loss': 1.1543, 'learning_rate': 4.987222026850205e-06, 'epoch': 0.06} + 6%|▌ | 432/7045 [1:24:30<21:56:03, 11.94s/it] 6%|▌ | 433/7045 [1:24:41<21:25:24, 11.66s/it] {'loss': 1.1787, 'learning_rate': 4.987105699636474e-06, 'epoch': 0.06} + 6%|▌ | 433/7045 [1:24:41<21:25:24, 11.66s/it] 6%|▌ | 434/7045 [1:24:54<22:09:27, 12.07s/it] {'loss': 1.1611, 'learning_rate': 4.986988846682601e-06, 'epoch': 0.06} + 6%|▌ | 434/7045 [1:24:54<22:09:27, 12.07s/it] 6%|▌ | 435/7045 [1:25:05<21:30:06, 11.71s/it] {'loss': 1.1826, 'learning_rate': 4.986871468013285e-06, 'epoch': 0.06} + 6%|▌ | 435/7045 [1:25:05<21:30:06, 11.71s/it] 6%|▌ | 436/7045 [1:25:16<21:06:31, 11.50s/it] {'loss': 1.1855, 'learning_rate': 4.986753563653339e-06, 'epoch': 0.06} + 6%|▌ | 436/7045 [1:25:16<21:06:31, 11.50s/it] 6%|▌ | 437/7045 [1:25:28<21:09:09, 11.52s/it] {'loss': 1.1387, 'learning_rate': 4.986635133627687e-06, 'epoch': 0.06} + 6%|▌ | 437/7045 [1:25:28<21:09:09, 11.52s/it] 6%|▌ | 438/7045 [1:25:39<21:10:24, 11.54s/it] {'loss': 1.1914, 'learning_rate': 4.986516177961364e-06, 'epoch': 0.06} + 6%|▌ | 438/7045 [1:25:39<21:10:24, 11.54s/it] 6%|▌ | 439/7045 [1:25:51<21:29:21, 11.71s/it] {'loss': 1.1836, 'learning_rate': 4.986396696679514e-06, 'epoch': 0.06} + 6%|▌ | 439/7045 [1:25:51<21:29:21, 11.71s/it] 6%|▌ | 440/7045 [1:26:03<21:22:36, 11.65s/it] {'loss': 1.1895, 'learning_rate': 4.986276689807394e-06, 'epoch': 0.06} + 6%|▌ | 440/7045 [1:26:03<21:22:36, 11.65s/it] 6%|▋ | 441/7045 [1:26:14<21:16:03, 11.59s/it] {'loss': 1.1504, 'learning_rate': 4.986156157370372e-06, 'epoch': 0.06} + 6%|▋ | 441/7045 [1:26:14<21:16:03, 11.59s/it] 6%|▋ | 442/7045 [1:26:27<21:56:06, 11.96s/it] {'loss': 1.127, 'learning_rate': 4.986035099393928e-06, 'epoch': 0.06} + 6%|▋ | 442/7045 [1:26:27<21:56:06, 11.96s/it] 6%|▋ | 443/7045 [1:26:38<21:27:13, 11.70s/it] {'loss': 1.1592, 'learning_rate': 4.98591351590365e-06, 'epoch': 0.06} + 6%|▋ | 443/7045 [1:26:38<21:27:13, 11.70s/it] 6%|▋ | 444/7045 [1:26:49<20:57:29, 11.43s/it] {'loss': 1.1523, 'learning_rate': 4.985791406925241e-06, 'epoch': 0.06} + 6%|▋ | 444/7045 [1:26:49<20:57:29, 11.43s/it] 6%|▋ | 445/7045 [1:27:02<21:49:00, 11.90s/it] {'loss': 1.1172, 'learning_rate': 4.985668772484512e-06, 'epoch': 0.06} + 6%|▋ | 445/7045 [1:27:02<21:49:00, 11.90s/it] 6%|▋ | 446/7045 [1:27:13<21:29:11, 11.72s/it] {'loss': 1.1396, 'learning_rate': 4.985545612607387e-06, 'epoch': 0.06} + 6%|▋ | 446/7045 [1:27:13<21:29:11, 11.72s/it] 6%|▋ | 447/7045 [1:27:25<21:21:32, 11.65s/it] {'loss': 1.1719, 'learning_rate': 4.9854219273199e-06, 'epoch': 0.06} + 6%|▋ | 447/7045 [1:27:25<21:21:32, 11.65s/it] 6%|▋ | 448/7045 [1:27:37<21:31:23, 11.75s/it] {'loss': 1.1406, 'learning_rate': 4.9852977166481955e-06, 'epoch': 0.06} + 6%|▋ | 448/7045 [1:27:37<21:31:23, 11.75s/it] 6%|▋ | 449/7045 [1:27:48<21:09:40, 11.55s/it] {'loss': 1.209, 'learning_rate': 4.985172980618531e-06, 'epoch': 0.06} + 6%|▋ | 449/7045 [1:27:48<21:09:40, 11.55s/it] 6%|▋ | 450/7045 [1:28:00<21:28:17, 11.72s/it] {'loss': 1.1445, 'learning_rate': 4.985047719257274e-06, 'epoch': 0.06} + 6%|▋ | 450/7045 [1:28:00<21:28:17, 11.72s/it] 6%|▋ | 451/7045 [1:28:11<21:14:35, 11.60s/it] {'loss': 1.0962, 'learning_rate': 4.984921932590903e-06, 'epoch': 0.06} + 6%|▋ | 451/7045 [1:28:11<21:14:35, 11.60s/it] 6%|▋ | 452/7045 [1:28:22<20:54:20, 11.42s/it] {'loss': 1.1895, 'learning_rate': 4.9847956206460065e-06, 'epoch': 0.06} + 6%|▋ | 452/7045 [1:28:22<20:54:20, 11.42s/it] 6%|▋ | 453/7045 [1:28:34<20:51:52, 11.39s/it] {'loss': 1.1611, 'learning_rate': 4.9846687834492864e-06, 'epoch': 0.06} + 6%|▋ | 453/7045 [1:28:34<20:51:52, 11.39s/it] 6%|▋ | 454/7045 [1:28:45<20:48:00, 11.36s/it] {'loss': 1.1099, 'learning_rate': 4.9845414210275545e-06, 'epoch': 0.06} + 6%|▋ | 454/7045 [1:28:45<20:48:00, 11.36s/it] 6%|▋ | 455/7045 [1:28:56<20:47:59, 11.36s/it] {'loss': 1.168, 'learning_rate': 4.984413533407733e-06, 'epoch': 0.06} + 6%|▋ | 455/7045 [1:28:56<20:47:59, 11.36s/it] 6%|▋ | 456/7045 [1:29:08<20:50:13, 11.38s/it] {'loss': 1.1455, 'learning_rate': 4.984285120616854e-06, 'epoch': 0.06} + 6%|▋ | 456/7045 [1:29:08<20:50:13, 11.38s/it] 6%|▋ | 457/7045 [1:29:19<20:38:22, 11.28s/it] {'loss': 1.1787, 'learning_rate': 4.984156182682065e-06, 'epoch': 0.06} + 6%|▋ | 457/7045 [1:29:19<20:38:22, 11.28s/it] 7%|▋ | 458/7045 [1:29:30<20:31:23, 11.22s/it] {'loss': 1.1699, 'learning_rate': 4.98402671963062e-06, 'epoch': 0.07} + 7%|▋ | 458/7045 [1:29:30<20:31:23, 11.22s/it] 7%|▋ | 459/7045 [1:29:41<20:24:34, 11.16s/it] {'loss': 1.1729, 'learning_rate': 4.9838967314898865e-06, 'epoch': 0.07} + 7%|▋ | 459/7045 [1:29:41<20:24:34, 11.16s/it] 7%|▋ | 460/7045 [1:29:52<20:26:46, 11.18s/it] {'loss': 1.1855, 'learning_rate': 4.983766218287341e-06, 'epoch': 0.07} + 7%|▋ | 460/7045 [1:29:52<20:26:46, 11.18s/it] 7%|▋ | 461/7045 [1:30:03<20:24:20, 11.16s/it] {'loss': 1.1836, 'learning_rate': 4.983635180050573e-06, 'epoch': 0.07} + 7%|▋ | 461/7045 [1:30:03<20:24:20, 11.16s/it] 7%|▋ | 462/7045 [1:30:16<21:17:04, 11.64s/it] {'loss': 1.1333, 'learning_rate': 4.983503616807283e-06, 'epoch': 0.07} + 7%|▋ | 462/7045 [1:30:16<21:17:04, 11.64s/it] 7%|▋ | 463/7045 [1:30:28<21:17:56, 11.65s/it] {'loss': 1.1787, 'learning_rate': 4.983371528585281e-06, 'epoch': 0.07} + 7%|▋ | 463/7045 [1:30:28<21:17:56, 11.65s/it] 7%|▋ | 464/7045 [1:30:39<21:07:53, 11.56s/it] {'loss': 1.1606, 'learning_rate': 4.983238915412488e-06, 'epoch': 0.07} + 7%|▋ | 464/7045 [1:30:39<21:07:53, 11.56s/it] 7%|▋ | 465/7045 [1:30:50<20:53:33, 11.43s/it] {'loss': 1.1592, 'learning_rate': 4.983105777316938e-06, 'epoch': 0.07} + 7%|▋ | 465/7045 [1:30:50<20:53:33, 11.43s/it] 7%|▋ | 466/7045 [1:31:01<20:39:17, 11.30s/it] {'loss': 1.2002, 'learning_rate': 4.9829721143267724e-06, 'epoch': 0.07} + 7%|▋ | 466/7045 [1:31:01<20:39:17, 11.30s/it] 7%|▋ | 467/7045 [1:31:12<20:40:29, 11.31s/it] {'loss': 1.1465, 'learning_rate': 4.982837926470248e-06, 'epoch': 0.07} + 7%|▋ | 467/7045 [1:31:12<20:40:29, 11.31s/it] 7%|▋ | 468/7045 [1:31:23<20:25:53, 11.18s/it] {'loss': 1.1162, 'learning_rate': 4.982703213775729e-06, 'epoch': 0.07} + 7%|▋ | 468/7045 [1:31:23<20:25:53, 11.18s/it] 7%|▋ | 469/7045 [1:31:34<20:22:15, 11.15s/it] {'loss': 1.1582, 'learning_rate': 4.982567976271692e-06, 'epoch': 0.07} + 7%|▋ | 469/7045 [1:31:34<20:22:15, 11.15s/it] 7%|▋ | 470/7045 [1:31:48<21:28:57, 11.76s/it] {'loss': 1.1641, 'learning_rate': 4.982432213986725e-06, 'epoch': 0.07} + 7%|▋ | 470/7045 [1:31:48<21:28:57, 11.76s/it] 7%|▋ | 471/7045 [1:31:59<21:18:54, 11.67s/it] {'loss': 1.1855, 'learning_rate': 4.982295926949526e-06, 'epoch': 0.07} + 7%|▋ | 471/7045 [1:31:59<21:18:54, 11.67s/it] 7%|▋ | 472/7045 [1:32:11<21:35:28, 11.83s/it] {'loss': 1.1602, 'learning_rate': 4.982159115188904e-06, 'epoch': 0.07} + 7%|▋ | 472/7045 [1:32:11<21:35:28, 11.83s/it] 7%|▋ | 473/7045 [1:32:22<21:15:06, 11.64s/it] {'loss': 1.1787, 'learning_rate': 4.982021778733779e-06, 'epoch': 0.07} + 7%|▋ | 473/7045 [1:32:22<21:15:06, 11.64s/it] 7%|▋ | 474/7045 [1:32:35<21:58:15, 12.04s/it] {'loss': 1.1338, 'learning_rate': 4.981883917613182e-06, 'epoch': 0.07} + 7%|▋ | 474/7045 [1:32:35<21:58:15, 12.04s/it] 7%|▋ | 475/7045 [1:32:47<21:33:24, 11.81s/it] {'loss': 1.1465, 'learning_rate': 4.981745531856255e-06, 'epoch': 0.07} + 7%|▋ | 475/7045 [1:32:47<21:33:24, 11.81s/it] 7%|▋ | 476/7045 [1:32:58<21:22:33, 11.71s/it] {'loss': 1.1514, 'learning_rate': 4.981606621492251e-06, 'epoch': 0.07} + 7%|▋ | 476/7045 [1:32:58<21:22:33, 11.71s/it] 7%|▋ | 477/7045 [1:33:09<21:01:20, 11.52s/it] {'loss': 1.1494, 'learning_rate': 4.981467186550534e-06, 'epoch': 0.07} + 7%|▋ | 477/7045 [1:33:09<21:01:20, 11.52s/it] 7%|▋ | 478/7045 [1:33:20<20:42:31, 11.35s/it] {'loss': 1.1641, 'learning_rate': 4.981327227060579e-06, 'epoch': 0.07} + 7%|▋ | 478/7045 [1:33:20<20:42:31, 11.35s/it] 7%|▋ | 479/7045 [1:33:32<20:45:19, 11.38s/it] {'loss': 1.1914, 'learning_rate': 4.98118674305197e-06, 'epoch': 0.07} + 7%|▋ | 479/7045 [1:33:32<20:45:19, 11.38s/it] 7%|▋ | 480/7045 [1:33:43<20:34:06, 11.28s/it] {'loss': 1.1177, 'learning_rate': 4.981045734554405e-06, 'epoch': 0.07} + 7%|▋ | 480/7045 [1:33:43<20:34:06, 11.28s/it] 7%|▋ | 481/7045 [1:33:54<20:39:47, 11.33s/it] {'loss': 1.1572, 'learning_rate': 4.98090420159769e-06, 'epoch': 0.07} + 7%|▋ | 481/7045 [1:33:54<20:39:47, 11.33s/it] 7%|▋ | 482/7045 [1:34:05<20:39:21, 11.33s/it] {'loss': 1.1807, 'learning_rate': 4.980762144211744e-06, 'epoch': 0.07} + 7%|▋ | 482/7045 [1:34:05<20:39:21, 11.33s/it]/usr/local/lib/python3.9/dist-packages/PIL/Image.py:3074: DecompressionBombWarning: Image size (159222407 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. + warnings.warn( + 7%|▋ | 483/7045 [1:34:17<20:30:25, 11.25s/it] {'loss': 1.1572, 'learning_rate': 4.980619562426596e-06, 'epoch': 0.07} + 7%|▋ | 483/7045 [1:34:17<20:30:25, 11.25s/it] 7%|▋ | 484/7045 [1:34:29<20:59:39, 11.52s/it] {'loss': 1.1338, 'learning_rate': 4.980476456272386e-06, 'epoch': 0.07} + 7%|▋ | 484/7045 [1:34:29<20:59:39, 11.52s/it] 7%|▋ | 485/7045 [1:34:42<21:59:09, 12.07s/it] {'loss': 1.1484, 'learning_rate': 4.980332825779364e-06, 'epoch': 0.07} + 7%|▋ | 485/7045 [1:34:42<21:59:09, 12.07s/it] 7%|▋ | 486/7045 [1:34:55<22:14:24, 12.21s/it] {'loss': 1.1553, 'learning_rate': 4.980188670977891e-06, 'epoch': 0.07} + 7%|▋ | 486/7045 [1:34:55<22:14:24, 12.21s/it] 7%|▋ | 487/7045 [1:35:06<21:52:54, 12.01s/it] {'loss': 1.165, 'learning_rate': 4.980043991898441e-06, 'epoch': 0.07} + 7%|▋ | 487/7045 [1:35:06<21:52:54, 12.01s/it] 7%|▋ | 488/7045 [1:35:17<21:20:39, 11.72s/it] {'loss': 1.1758, 'learning_rate': 4.979898788571595e-06, 'epoch': 0.07} + 7%|▋ | 488/7045 [1:35:17<21:20:39, 11.72s/it] 7%|▋ | 489/7045 [1:35:28<21:00:54, 11.54s/it] {'loss': 1.1582, 'learning_rate': 4.9797530610280495e-06, 'epoch': 0.07} + 7%|▋ | 489/7045 [1:35:28<21:00:54, 11.54s/it] 7%|▋ | 490/7045 [1:35:40<21:22:53, 11.74s/it] {'loss': 1.1064, 'learning_rate': 4.979606809298608e-06, 'epoch': 0.07} + 7%|▋ | 490/7045 [1:35:40<21:22:53, 11.74s/it] 7%|▋ | 491/7045 [1:35:51<20:58:28, 11.52s/it] {'loss': 1.1934, 'learning_rate': 4.979460033414186e-06, 'epoch': 0.07} + 7%|▋ | 491/7045 [1:35:51<20:58:28, 11.52s/it] 7%|▋ | 492/7045 [1:36:03<20:43:20, 11.38s/it] {'loss': 1.1426, 'learning_rate': 4.979312733405811e-06, 'epoch': 0.07} + 7%|▋ | 492/7045 [1:36:03<20:43:20, 11.38s/it] 7%|▋ | 493/7045 [1:36:16<21:37:19, 11.88s/it] {'loss': 1.1816, 'learning_rate': 4.979164909304619e-06, 'epoch': 0.07} + 7%|▋ | 493/7045 [1:36:16<21:37:19, 11.88s/it] 7%|▋ | 494/7045 [1:36:29<22:28:31, 12.35s/it] {'loss': 1.1465, 'learning_rate': 4.979016561141857e-06, 'epoch': 0.07} + 7%|▋ | 494/7045 [1:36:29<22:28:31, 12.35s/it] 7%|▋ | 495/7045 [1:36:41<22:32:27, 12.39s/it] {'loss': 1.1514, 'learning_rate': 4.9788676889488865e-06, 'epoch': 0.07} + 7%|▋ | 495/7045 [1:36:41<22:32:27, 12.39s/it] 7%|▋ | 496/7045 [1:36:53<21:50:18, 12.00s/it] {'loss': 1.167, 'learning_rate': 4.978718292757176e-06, 'epoch': 0.07} + 7%|▋ | 496/7045 [1:36:53<21:50:18, 12.00s/it] 7%|▋ | 497/7045 [1:37:04<21:15:42, 11.69s/it] {'loss': 1.1875, 'learning_rate': 4.978568372598304e-06, 'epoch': 0.07} + 7%|▋ | 497/7045 [1:37:04<21:15:42, 11.69s/it] 7%|▋ | 498/7045 [1:37:15<20:51:57, 11.47s/it] {'loss': 1.1504, 'learning_rate': 4.9784179285039646e-06, 'epoch': 0.07} + 7%|▋ | 498/7045 [1:37:15<20:51:57, 11.47s/it] 7%|▋ | 499/7045 [1:37:26<21:05:08, 11.60s/it] {'loss': 1.209, 'learning_rate': 4.978266960505957e-06, 'epoch': 0.07} + 7%|▋ | 499/7045 [1:37:26<21:05:08, 11.60s/it] 7%|▋ | 500/7045 [1:37:37<20:48:20, 11.44s/it] {'loss': 1.1562, 'learning_rate': 4.978115468636195e-06, 'epoch': 0.07} + 7%|▋ | 500/7045 [1:37:37<20:48:20, 11.44s/it] 7%|▋ | 501/7045 [1:37:48<20:31:28, 11.29s/it] {'loss': 1.1729, 'learning_rate': 4.977963452926703e-06, 'epoch': 0.07} + 7%|▋ | 501/7045 [1:37:48<20:31:28, 11.29s/it] 7%|▋ | 502/7045 [1:38:00<20:44:14, 11.41s/it] {'loss': 1.1201, 'learning_rate': 4.9778109134096125e-06, 'epoch': 0.07} + 7%|▋ | 502/7045 [1:38:00<20:44:14, 11.41s/it] 7%|▋ | 503/7045 [1:38:14<21:51:12, 12.03s/it] {'loss': 1.1426, 'learning_rate': 4.97765785011717e-06, 'epoch': 0.07} + 7%|▋ | 503/7045 [1:38:14<21:51:12, 12.03s/it] 7%|▋ | 504/7045 [1:38:26<21:48:00, 12.00s/it] {'loss': 1.1992, 'learning_rate': 4.97750426308173e-06, 'epoch': 0.07} + 7%|▋ | 504/7045 [1:38:26<21:48:00, 12.00s/it] 7%|▋ | 505/7045 [1:38:38<22:11:57, 12.22s/it] {'loss': 1.1206, 'learning_rate': 4.97735015233576e-06, 'epoch': 0.07} + 7%|▋ | 505/7045 [1:38:38<22:11:57, 12.22s/it] 7%|▋ | 506/7045 [1:38:51<22:31:24, 12.40s/it] {'loss': 1.1494, 'learning_rate': 4.977195517911836e-06, 'epoch': 0.07} + 7%|▋ | 506/7045 [1:38:51<22:31:24, 12.40s/it] 7%|▋ | 507/7045 [1:39:02<21:50:36, 12.03s/it] {'loss': 1.168, 'learning_rate': 4.9770403598426465e-06, 'epoch': 0.07} + 7%|▋ | 507/7045 [1:39:02<21:50:36, 12.03s/it] 7%|▋ | 508/7045 [1:39:13<21:19:36, 11.74s/it] {'loss': 1.1768, 'learning_rate': 4.976884678160988e-06, 'epoch': 0.07} + 7%|▋ | 508/7045 [1:39:13<21:19:36, 11.74s/it] 7%|▋ | 509/7045 [1:39:26<22:04:26, 12.16s/it] {'loss': 1.1113, 'learning_rate': 4.976728472899771e-06, 'epoch': 0.07} + 7%|▋ | 509/7045 [1:39:26<22:04:26, 12.16s/it] 7%|▋ | 510/7045 [1:39:38<21:40:15, 11.94s/it] {'loss': 1.1582, 'learning_rate': 4.976571744092015e-06, 'epoch': 0.07} + 7%|▋ | 510/7045 [1:39:38<21:40:15, 11.94s/it] 7%|▋ | 511/7045 [1:39:49<21:19:19, 11.75s/it] {'loss': 1.1572, 'learning_rate': 4.976414491770849e-06, 'epoch': 0.07} + 7%|▋ | 511/7045 [1:39:49<21:19:19, 11.75s/it] 7%|▋ | 512/7045 [1:40:01<21:32:11, 11.87s/it] {'loss': 1.1699, 'learning_rate': 4.9762567159695155e-06, 'epoch': 0.07} + 7%|▋ | 512/7045 [1:40:01<21:32:11, 11.87s/it] 7%|▋ | 513/7045 [1:40:14<21:56:36, 12.09s/it] {'loss': 1.1553, 'learning_rate': 4.976098416721366e-06, 'epoch': 0.07} + 7%|▋ | 513/7045 [1:40:14<21:56:36, 12.09s/it] 7%|▋ | 514/7045 [1:40:26<21:40:04, 11.94s/it] {'loss': 1.1553, 'learning_rate': 4.975939594059862e-06, 'epoch': 0.07} + 7%|▋ | 514/7045 [1:40:26<21:40:04, 11.94s/it] 7%|▋ | 515/7045 [1:40:37<21:11:10, 11.68s/it] {'loss': 1.1387, 'learning_rate': 4.975780248018578e-06, 'epoch': 0.07} + 7%|▋ | 515/7045 [1:40:37<21:11:10, 11.68s/it] 7%|▋ | 516/7045 [1:40:48<20:46:51, 11.46s/it] {'loss': 1.1611, 'learning_rate': 4.975620378631195e-06, 'epoch': 0.07} + 7%|▋ | 516/7045 [1:40:48<20:46:51, 11.46s/it] 7%|▋ | 517/7045 [1:40:59<20:48:31, 11.48s/it] {'loss': 1.1875, 'learning_rate': 4.97545998593151e-06, 'epoch': 0.07} + 7%|▋ | 517/7045 [1:40:59<20:48:31, 11.48s/it] 7%|▋ | 518/7045 [1:41:10<20:47:19, 11.47s/it] {'loss': 1.1709, 'learning_rate': 4.975299069953426e-06, 'epoch': 0.07} + 7%|▋ | 518/7045 [1:41:10<20:47:19, 11.47s/it] 7%|▋ | 519/7045 [1:41:22<20:46:03, 11.46s/it] {'loss': 1.1621, 'learning_rate': 4.9751376307309585e-06, 'epoch': 0.07} + 7%|▋ | 519/7045 [1:41:22<20:46:03, 11.46s/it] 7%|▋ | 520/7045 [1:41:33<20:26:05, 11.27s/it] {'loss': 1.1797, 'learning_rate': 4.9749756682982344e-06, 'epoch': 0.07} + 7%|▋ | 520/7045 [1:41:33<20:26:05, 11.27s/it] 7%|▋ | 521/7045 [1:41:44<20:32:37, 11.34s/it] {'loss': 1.1475, 'learning_rate': 4.97481318268949e-06, 'epoch': 0.07} + 7%|▋ | 521/7045 [1:41:44<20:32:37, 11.34s/it] 7%|▋ | 522/7045 [1:41:57<21:06:48, 11.65s/it] {'loss': 1.1719, 'learning_rate': 4.974650173939072e-06, 'epoch': 0.07} + 7%|▋ | 522/7045 [1:41:57<21:06:48, 11.65s/it] 7%|▋ | 523/7045 [1:42:08<20:47:11, 11.47s/it] {'loss': 1.1377, 'learning_rate': 4.974486642081439e-06, 'epoch': 0.07} + 7%|▋ | 523/7045 [1:42:08<20:47:11, 11.47s/it] 7%|▋ | 524/7045 [1:42:19<20:51:27, 11.51s/it] {'loss': 1.1523, 'learning_rate': 4.974322587151159e-06, 'epoch': 0.07} + 7%|▋ | 524/7045 [1:42:19<20:51:27, 11.51s/it] 7%|▋ | 525/7045 [1:42:31<21:11:56, 11.70s/it] {'loss': 1.1738, 'learning_rate': 4.974158009182912e-06, 'epoch': 0.07} + 7%|▋ | 525/7045 [1:42:31<21:11:56, 11.70s/it] 7%|▋ | 526/7045 [1:42:42<20:47:01, 11.48s/it] {'loss': 1.1689, 'learning_rate': 4.973992908211486e-06, 'epoch': 0.07} + 7%|▋ | 526/7045 [1:42:42<20:47:01, 11.48s/it] 7%|▋ | 527/7045 [1:42:54<20:50:07, 11.51s/it] {'loss': 1.1416, 'learning_rate': 4.973827284271781e-06, 'epoch': 0.07} + 7%|▋ | 527/7045 [1:42:54<20:50:07, 11.51s/it] 7%|▋ | 528/7045 [1:43:05<20:38:30, 11.40s/it] {'loss': 1.1416, 'learning_rate': 4.973661137398809e-06, 'epoch': 0.07} + 7%|▋ | 528/7045 [1:43:05<20:38:30, 11.40s/it] 8%|▊ | 529/7045 [1:43:16<20:28:54, 11.32s/it] {'loss': 1.1875, 'learning_rate': 4.97349446762769e-06, 'epoch': 0.08} + 8%|▊ | 529/7045 [1:43:16<20:28:54, 11.32s/it] 8%|▊ | 530/7045 [1:43:29<21:04:57, 11.65s/it] {'loss': 1.1475, 'learning_rate': 4.973327274993657e-06, 'epoch': 0.08} + 8%|▊ | 530/7045 [1:43:29<21:04:57, 11.65s/it] 8%|▊ | 531/7045 [1:43:41<21:10:15, 11.70s/it] {'loss': 1.1875, 'learning_rate': 4.973159559532051e-06, 'epoch': 0.08} + 8%|▊ | 531/7045 [1:43:41<21:10:15, 11.70s/it] 8%|▊ | 532/7045 [1:43:52<20:54:32, 11.56s/it] {'loss': 1.1602, 'learning_rate': 4.972991321278325e-06, 'epoch': 0.08} + 8%|▊ | 532/7045 [1:43:52<20:54:32, 11.56s/it] 8%|▊ | 533/7045 [1:44:03<20:44:07, 11.46s/it] {'loss': 1.1699, 'learning_rate': 4.9728225602680425e-06, 'epoch': 0.08} + 8%|▊ | 533/7045 [1:44:03<20:44:07, 11.46s/it] 8%|▊ | 534/7045 [1:44:14<20:30:38, 11.34s/it] {'loss': 1.1787, 'learning_rate': 4.972653276536878e-06, 'epoch': 0.08} + 8%|▊ | 534/7045 [1:44:14<20:30:38, 11.34s/it] 8%|▊ | 535/7045 [1:44:26<21:02:03, 11.63s/it] {'loss': 1.166, 'learning_rate': 4.972483470120614e-06, 'epoch': 0.08} + 8%|▊ | 535/7045 [1:44:26<21:02:03, 11.63s/it] 8%|▊ | 536/7045 [1:44:39<21:31:02, 11.90s/it] {'loss': 1.1177, 'learning_rate': 4.972313141055146e-06, 'epoch': 0.08} + 8%|▊ | 536/7045 [1:44:39<21:31:02, 11.90s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2541 > 2048). Running this sequence through the model will result in indexing errors + 8%|▊ | 537/7045 [1:44:50<21:15:28, 11.76s/it] {'loss': 1.1895, 'learning_rate': 4.9721422893764805e-06, 'epoch': 0.08} + 8%|▊ | 537/7045 [1:44:50<21:15:28, 11.76s/it] 8%|▊ | 538/7045 [1:45:01<20:54:10, 11.56s/it] {'loss': 1.1514, 'learning_rate': 4.9719709151207315e-06, 'epoch': 0.08} + 8%|▊ | 538/7045 [1:45:01<20:54:10, 11.56s/it] 8%|▊ | 539/7045 [1:45:12<20:34:34, 11.39s/it] {'loss': 1.168, 'learning_rate': 4.9717990183241265e-06, 'epoch': 0.08} + 8%|▊ | 539/7045 [1:45:12<20:34:34, 11.39s/it] 8%|▊ | 540/7045 [1:45:25<21:25:33, 11.86s/it] {'loss': 1.1475, 'learning_rate': 4.971626599023002e-06, 'epoch': 0.08} + 8%|▊ | 540/7045 [1:45:25<21:25:33, 11.86s/it] 8%|▊ | 541/7045 [1:45:37<21:09:47, 11.71s/it] {'loss': 1.1641, 'learning_rate': 4.971453657253803e-06, 'epoch': 0.08} + 8%|▊ | 541/7045 [1:45:37<21:09:47, 11.71s/it] 8%|▊ | 542/7045 [1:45:49<21:39:04, 11.99s/it] {'loss': 1.1543, 'learning_rate': 4.971280193053089e-06, 'epoch': 0.08} + 8%|▊ | 542/7045 [1:45:49<21:39:04, 11.99s/it] 8%|▊ | 543/7045 [1:46:01<21:20:32, 11.82s/it] {'loss': 1.1992, 'learning_rate': 4.971106206457529e-06, 'epoch': 0.08} + 8%|▊ | 543/7045 [1:46:01<21:20:32, 11.82s/it] 8%|▊ | 544/7045 [1:46:12<21:06:29, 11.69s/it] {'loss': 1.1455, 'learning_rate': 4.970931697503899e-06, 'epoch': 0.08} + 8%|▊ | 544/7045 [1:46:12<21:06:29, 11.69s/it] 8%|▊ | 545/7045 [1:46:24<20:56:08, 11.60s/it] {'loss': 1.1445, 'learning_rate': 4.970756666229089e-06, 'epoch': 0.08} + 8%|▊ | 545/7045 [1:46:24<20:56:08, 11.60s/it] 8%|▊ | 546/7045 [1:46:36<21:20:51, 11.83s/it] {'loss': 1.1514, 'learning_rate': 4.9705811126700975e-06, 'epoch': 0.08} + 8%|▊ | 546/7045 [1:46:36<21:20:51, 11.83s/it] 8%|▊ | 547/7045 [1:46:47<20:57:09, 11.61s/it] {'loss': 1.165, 'learning_rate': 4.970405036864035e-06, 'epoch': 0.08} + 8%|▊ | 547/7045 [1:46:47<20:57:09, 11.61s/it] 8%|▊ | 548/7045 [1:46:58<20:47:41, 11.52s/it] {'loss': 1.1943, 'learning_rate': 4.970228438848122e-06, 'epoch': 0.08} + 8%|▊ | 548/7045 [1:46:58<20:47:41, 11.52s/it] 8%|▊ | 549/7045 [1:47:09<20:25:42, 11.32s/it] {'loss': 1.1152, 'learning_rate': 4.970051318659687e-06, 'epoch': 0.08} + 8%|▊ | 549/7045 [1:47:09<20:25:42, 11.32s/it] 8%|▊ | 550/7045 [1:47:20<20:21:11, 11.28s/it] {'loss': 1.1162, 'learning_rate': 4.969873676336171e-06, 'epoch': 0.08} + 8%|▊ | 550/7045 [1:47:20<20:21:11, 11.28s/it] 8%|▊ | 551/7045 [1:47:32<20:44:26, 11.50s/it] {'loss': 1.1826, 'learning_rate': 4.969695511915127e-06, 'epoch': 0.08} + 8%|▊ | 551/7045 [1:47:32<20:44:26, 11.50s/it] 8%|▊ | 552/7045 [1:47:44<20:52:02, 11.57s/it] {'loss': 1.1543, 'learning_rate': 4.969516825434215e-06, 'epoch': 0.08} + 8%|▊ | 552/7045 [1:47:44<20:52:02, 11.57s/it] 8%|▊ | 553/7045 [1:47:56<21:14:43, 11.78s/it] {'loss': 1.1943, 'learning_rate': 4.969337616931208e-06, 'epoch': 0.08} + 8%|▊ | 553/7045 [1:47:56<21:14:43, 11.78s/it] 8%|▊ | 554/7045 [1:48:14<24:11:45, 13.42s/it] {'loss': 1.0889, 'learning_rate': 4.969157886443988e-06, 'epoch': 0.08} + 8%|▊ | 554/7045 [1:48:14<24:11:45, 13.42s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 8%|▊ | 555/7045 [1:48:25<23:08:58, 12.84s/it] {'loss': 1.1572, 'learning_rate': 4.9689776340105466e-06, 'epoch': 0.08} + 8%|▊ | 555/7045 [1:48:25<23:08:58, 12.84s/it] 8%|▊ | 556/7045 [1:48:39<23:29:42, 13.03s/it] {'loss': 1.1294, 'learning_rate': 4.968796859668987e-06, 'epoch': 0.08} + 8%|▊ | 556/7045 [1:48:39<23:29:42, 13.03s/it] 8%|▊ | 557/7045 [1:48:51<23:22:57, 12.97s/it] {'loss': 1.1416, 'learning_rate': 4.968615563457523e-06, 'epoch': 0.08} + 8%|▊ | 557/7045 [1:48:51<23:22:57, 12.97s/it] 8%|▊ | 558/7045 [1:49:05<23:27:38, 13.02s/it] {'loss': 1.1387, 'learning_rate': 4.968433745414478e-06, 'epoch': 0.08} + 8%|▊ | 558/7045 [1:49:05<23:27:38, 13.02s/it] 8%|▊ | 559/7045 [1:49:16<22:24:32, 12.44s/it] {'loss': 1.1709, 'learning_rate': 4.968251405578286e-06, 'epoch': 0.08} + 8%|▊ | 559/7045 [1:49:16<22:24:32, 12.44s/it] 8%|▊ | 560/7045 [1:49:27<21:43:07, 12.06s/it] {'loss': 1.1465, 'learning_rate': 4.9680685439874895e-06, 'epoch': 0.08} + 8%|▊ | 560/7045 [1:49:27<21:43:07, 12.06s/it] 8%|▊ | 561/7045 [1:49:38<21:08:04, 11.73s/it] {'loss': 1.1631, 'learning_rate': 4.967885160680746e-06, 'epoch': 0.08} + 8%|▊ | 561/7045 [1:49:38<21:08:04, 11.73s/it] 8%|▊ | 562/7045 [1:49:52<22:30:10, 12.50s/it] {'loss': 1.1772, 'learning_rate': 4.9677012556968175e-06, 'epoch': 0.08} + 8%|▊ | 562/7045 [1:49:52<22:30:10, 12.50s/it] 8%|▊ | 563/7045 [1:50:03<21:54:11, 12.16s/it] {'loss': 1.1436, 'learning_rate': 4.96751682907458e-06, 'epoch': 0.08} + 8%|▊ | 563/7045 [1:50:03<21:54:11, 12.16s/it] 8%|▊ | 564/7045 [1:50:15<21:22:27, 11.87s/it] {'loss': 1.1582, 'learning_rate': 4.967331880853019e-06, 'epoch': 0.08} + 8%|▊ | 564/7045 [1:50:15<21:22:27, 11.87s/it] 8%|▊ | 565/7045 [1:50:26<21:17:57, 11.83s/it] {'loss': 1.1514, 'learning_rate': 4.9671464110712306e-06, 'epoch': 0.08} + 8%|▊ | 565/7045 [1:50:26<21:17:57, 11.83s/it] 8%|▊ | 566/7045 [1:50:38<20:58:20, 11.65s/it] {'loss': 1.1152, 'learning_rate': 4.966960419768419e-06, 'epoch': 0.08} + 8%|▊ | 566/7045 [1:50:38<20:58:20, 11.65s/it] 8%|▊ | 567/7045 [1:50:49<20:51:04, 11.59s/it] {'loss': 1.168, 'learning_rate': 4.9667739069839e-06, 'epoch': 0.08} + 8%|▊ | 567/7045 [1:50:49<20:51:04, 11.59s/it] 8%|▊ | 568/7045 [1:51:03<21:55:54, 12.19s/it] {'loss': 1.1025, 'learning_rate': 4.9665868727571024e-06, 'epoch': 0.08} + 8%|▊ | 568/7045 [1:51:03<21:55:54, 12.19s/it] 8%|▊ | 569/7045 [1:51:14<21:17:16, 11.83s/it] {'loss': 1.168, 'learning_rate': 4.966399317127561e-06, 'epoch': 0.08} + 8%|▊ | 569/7045 [1:51:14<21:17:16, 11.83s/it] 8%|▊ | 570/7045 [1:51:26<21:37:00, 12.02s/it] {'loss': 1.1758, 'learning_rate': 4.966211240134922e-06, 'epoch': 0.08} + 8%|▊ | 570/7045 [1:51:26<21:37:00, 12.02s/it] 8%|▊ | 571/7045 [1:51:37<21:03:39, 11.71s/it] {'loss': 1.165, 'learning_rate': 4.9660226418189435e-06, 'epoch': 0.08} + 8%|▊ | 571/7045 [1:51:37<21:03:39, 11.71s/it] 8%|▊ | 572/7045 [1:51:48<20:48:05, 11.57s/it] {'loss': 1.1816, 'learning_rate': 4.965833522219491e-06, 'epoch': 0.08} + 8%|▊ | 572/7045 [1:51:48<20:48:05, 11.57s/it] 8%|▊ | 573/7045 [1:52:01<21:10:13, 11.78s/it] {'loss': 1.1416, 'learning_rate': 4.965643881376544e-06, 'epoch': 0.08} + 8%|▊ | 573/7045 [1:52:01<21:10:13, 11.78s/it] 8%|▊ | 574/7045 [1:52:12<20:52:40, 11.61s/it] {'loss': 1.1787, 'learning_rate': 4.965453719330189e-06, 'epoch': 0.08} + 8%|▊ | 574/7045 [1:52:12<20:52:40, 11.61s/it] 8%|▊ | 575/7045 [1:52:23<20:34:51, 11.45s/it] {'loss': 1.1133, 'learning_rate': 4.9652630361206224e-06, 'epoch': 0.08} + 8%|▊ | 575/7045 [1:52:23<20:34:51, 11.45s/it] 8%|▊ | 576/7045 [1:52:34<20:12:51, 11.25s/it] {'loss': 1.1426, 'learning_rate': 4.965071831788153e-06, 'epoch': 0.08} + 8%|▊ | 576/7045 [1:52:34<20:12:51, 11.25s/it] 8%|▊ | 577/7045 [1:52:47<21:22:21, 11.90s/it] {'loss': 1.0801, 'learning_rate': 4.9648801063732e-06, 'epoch': 0.08} + 8%|▊ | 577/7045 [1:52:47<21:22:21, 11.90s/it] 8%|▊ | 578/7045 [1:52:59<21:15:02, 11.83s/it] {'loss': 1.168, 'learning_rate': 4.96468785991629e-06, 'epoch': 0.08} + 8%|▊ | 578/7045 [1:52:59<21:15:02, 11.83s/it] 8%|▊ | 579/7045 [1:53:10<20:52:09, 11.62s/it] {'loss': 1.1924, 'learning_rate': 4.964495092458061e-06, 'epoch': 0.08} + 8%|▊ | 579/7045 [1:53:10<20:52:09, 11.62s/it] 8%|▊ | 580/7045 [1:53:23<21:31:01, 11.98s/it] {'loss': 1.1445, 'learning_rate': 4.964301804039263e-06, 'epoch': 0.08} + 8%|▊ | 580/7045 [1:53:23<21:31:01, 11.98s/it] 8%|▊ | 581/7045 [1:53:34<21:09:10, 11.78s/it] {'loss': 1.1592, 'learning_rate': 4.964107994700753e-06, 'epoch': 0.08} + 8%|▊ | 581/7045 [1:53:34<21:09:10, 11.78s/it] 8%|▊ | 582/7045 [1:53:46<21:17:02, 11.86s/it] {'loss': 1.1318, 'learning_rate': 4.9639136644835e-06, 'epoch': 0.08} + 8%|▊ | 582/7045 [1:53:46<21:17:02, 11.86s/it] 8%|▊ | 583/7045 [1:53:57<20:49:48, 11.60s/it] {'loss': 1.1465, 'learning_rate': 4.963718813428584e-06, 'epoch': 0.08} + 8%|▊ | 583/7045 [1:53:57<20:49:48, 11.60s/it] 8%|▊ | 584/7045 [1:54:08<20:36:10, 11.48s/it] {'loss': 1.1699, 'learning_rate': 4.963523441577193e-06, 'epoch': 0.08} + 8%|▊ | 584/7045 [1:54:08<20:36:10, 11.48s/it] 8%|▊ | 585/7045 [1:54:20<20:49:42, 11.61s/it] {'loss': 1.1436, 'learning_rate': 4.963327548970624e-06, 'epoch': 0.08} + 8%|▊ | 585/7045 [1:54:20<20:49:42, 11.61s/it] 8%|▊ | 586/7045 [1:54:31<20:27:00, 11.40s/it] {'loss': 1.1631, 'learning_rate': 4.96313113565029e-06, 'epoch': 0.08} + 8%|▊ | 586/7045 [1:54:31<20:27:00, 11.40s/it] 8%|▊ | 587/7045 [1:54:42<20:19:32, 11.33s/it] {'loss': 1.1357, 'learning_rate': 4.962934201657706e-06, 'epoch': 0.08} + 8%|▊ | 587/7045 [1:54:42<20:19:32, 11.33s/it] 8%|▊ | 588/7045 [1:54:54<20:32:04, 11.45s/it] {'loss': 1.145, 'learning_rate': 4.962736747034504e-06, 'epoch': 0.08} + 8%|▊ | 588/7045 [1:54:54<20:32:04, 11.45s/it] 8%|▊ | 589/7045 [1:55:06<20:45:42, 11.58s/it] {'loss': 1.1152, 'learning_rate': 4.962538771822423e-06, 'epoch': 0.08} + 8%|▊ | 589/7045 [1:55:06<20:45:42, 11.58s/it] 8%|▊ | 590/7045 [1:55:19<21:39:26, 12.08s/it] {'loss': 1.1494, 'learning_rate': 4.96234027606331e-06, 'epoch': 0.08} + 8%|▊ | 590/7045 [1:55:19<21:39:26, 12.08s/it] 8%|▊ | 591/7045 [1:55:31<21:50:02, 12.18s/it] {'loss': 1.1353, 'learning_rate': 4.962141259799127e-06, 'epoch': 0.08} + 8%|▊ | 591/7045 [1:55:32<21:50:02, 12.18s/it] 8%|▊ | 592/7045 [1:55:43<21:28:58, 11.98s/it] {'loss': 1.1611, 'learning_rate': 4.961941723071942e-06, 'epoch': 0.08} + 8%|▊ | 592/7045 [1:55:43<21:28:58, 11.98s/it] 8%|▊ | 593/7045 [1:55:55<21:32:34, 12.02s/it] {'loss': 1.1465, 'learning_rate': 4.9617416659239335e-06, 'epoch': 0.08} + 8%|▊ | 593/7045 [1:55:55<21:32:34, 12.02s/it] 8%|▊ | 594/7045 [1:56:08<22:03:06, 12.31s/it] {'loss': 1.1562, 'learning_rate': 4.961541088397392e-06, 'epoch': 0.08} + 8%|▊ | 594/7045 [1:56:08<22:03:06, 12.31s/it] 8%|▊ | 595/7045 [1:56:20<21:55:37, 12.24s/it] {'loss': 1.1641, 'learning_rate': 4.961339990534718e-06, 'epoch': 0.08} + 8%|▊ | 595/7045 [1:56:20<21:55:37, 12.24s/it] 8%|▊ | 596/7045 [1:56:32<21:25:56, 11.96s/it] {'loss': 1.1123, 'learning_rate': 4.961138372378418e-06, 'epoch': 0.08} + 8%|▊ | 596/7045 [1:56:32<21:25:56, 11.96s/it] 8%|▊ | 597/7045 [1:56:44<21:35:16, 12.05s/it] {'loss': 1.1387, 'learning_rate': 4.960936233971115e-06, 'epoch': 0.08} + 8%|▊ | 597/7045 [1:56:44<21:35:16, 12.05s/it] 8%|▊ | 598/7045 [1:56:55<21:08:33, 11.81s/it] {'loss': 1.1904, 'learning_rate': 4.960733575355534e-06, 'epoch': 0.08} + 8%|▊ | 598/7045 [1:56:55<21:08:33, 11.81s/it] 9%|▊ | 599/7045 [1:57:07<21:11:31, 11.84s/it] {'loss': 1.1553, 'learning_rate': 4.960530396574517e-06, 'epoch': 0.09} + 9%|▊ | 599/7045 [1:57:07<21:11:31, 11.84s/it] 9%|▊ | 600/7045 [1:57:18<21:02:18, 11.75s/it] {'loss': 1.1846, 'learning_rate': 4.960326697671012e-06, 'epoch': 0.09} + 9%|▊ | 600/7045 [1:57:18<21:02:18, 11.75s/it] 9%|▊ | 601/7045 [1:57:30<20:42:17, 11.57s/it] {'loss': 1.1719, 'learning_rate': 4.960122478688079e-06, 'epoch': 0.09} + 9%|▊ | 601/7045 [1:57:30<20:42:17, 11.57s/it] 9%|▊ | 602/7045 [1:57:40<20:20:36, 11.37s/it] {'loss': 1.1416, 'learning_rate': 4.959917739668888e-06, 'epoch': 0.09} + 9%|▊ | 602/7045 [1:57:40<20:20:36, 11.37s/it] 9%|▊ | 603/7045 [1:57:53<20:56:22, 11.70s/it] {'loss': 1.127, 'learning_rate': 4.959712480656715e-06, 'epoch': 0.09} + 9%|▊ | 603/7045 [1:57:53<20:56:22, 11.70s/it] 9%|▊ | 604/7045 [1:58:06<21:35:14, 12.07s/it] {'loss': 1.1006, 'learning_rate': 4.959506701694953e-06, 'epoch': 0.09} + 9%|▊ | 604/7045 [1:58:06<21:35:14, 12.07s/it] 9%|▊ | 605/7045 [1:58:18<21:23:05, 11.95s/it] {'loss': 1.1689, 'learning_rate': 4.959300402827098e-06, 'epoch': 0.09} + 9%|▊ | 605/7045 [1:58:18<21:23:05, 11.95s/it] 9%|▊ | 606/7045 [1:58:29<20:51:00, 11.66s/it] {'loss': 1.1768, 'learning_rate': 4.959093584096758e-06, 'epoch': 0.09} + 9%|▊ | 606/7045 [1:58:29<20:51:00, 11.66s/it] 9%|▊ | 607/7045 [1:58:40<20:32:25, 11.49s/it] {'loss': 1.1436, 'learning_rate': 4.958886245547654e-06, 'epoch': 0.09} + 9%|▊ | 607/7045 [1:58:40<20:32:25, 11.49s/it] 9%|▊ | 608/7045 [1:58:50<20:10:28, 11.28s/it] {'loss': 1.1323, 'learning_rate': 4.958678387223614e-06, 'epoch': 0.09} + 9%|▊ | 608/7045 [1:58:50<20:10:28, 11.28s/it] 9%|▊ | 609/7045 [1:59:03<20:58:19, 11.73s/it] {'loss': 1.1318, 'learning_rate': 4.9584700091685765e-06, 'epoch': 0.09} + 9%|▊ | 609/7045 [1:59:03<20:58:19, 11.73s/it] 9%|▊ | 610/7045 [1:59:16<21:15:43, 11.89s/it] {'loss': 1.1416, 'learning_rate': 4.958261111426589e-06, 'epoch': 0.09} + 9%|▊ | 610/7045 [1:59:16<21:15:43, 11.89s/it] 9%|▊ | 611/7045 [1:59:28<21:21:13, 11.95s/it] {'loss': 1.1592, 'learning_rate': 4.958051694041809e-06, 'epoch': 0.09} + 9%|▊ | 611/7045 [1:59:28<21:21:13, 11.95s/it] 9%|▊ | 612/7045 [1:59:39<20:52:51, 11.69s/it] {'loss': 1.1357, 'learning_rate': 4.957841757058506e-06, 'epoch': 0.09} + 9%|▊ | 612/7045 [1:59:39<20:52:51, 11.69s/it] 9%|▊ | 613/7045 [1:59:50<20:36:40, 11.54s/it] {'loss': 1.1211, 'learning_rate': 4.957631300521058e-06, 'epoch': 0.09} + 9%|▊ | 613/7045 [1:59:50<20:36:40, 11.54s/it] 9%|▊ | 614/7045 [2:00:01<20:19:19, 11.38s/it] {'loss': 1.1318, 'learning_rate': 4.95742032447395e-06, 'epoch': 0.09} + 9%|▊ | 614/7045 [2:00:01<20:19:19, 11.38s/it] 9%|▊ | 615/7045 [2:00:14<21:07:24, 11.83s/it] {'loss': 1.1572, 'learning_rate': 4.957208828961784e-06, 'epoch': 0.09} + 9%|▊ | 615/7045 [2:00:14<21:07:24, 11.83s/it] 9%|▊ | 616/7045 [2:00:26<21:10:44, 11.86s/it] {'loss': 1.1445, 'learning_rate': 4.956996814029262e-06, 'epoch': 0.09} + 9%|▊ | 616/7045 [2:00:26<21:10:44, 11.86s/it] 9%|▉ | 617/7045 [2:00:37<21:08:41, 11.84s/it] {'loss': 1.166, 'learning_rate': 4.956784279721205e-06, 'epoch': 0.09} + 9%|▉ | 617/7045 [2:00:37<21:08:41, 11.84s/it] 9%|▉ | 618/7045 [2:00:48<20:35:37, 11.54s/it] {'loss': 1.126, 'learning_rate': 4.956571226082538e-06, 'epoch': 0.09} + 9%|▉ | 618/7045 [2:00:48<20:35:37, 11.54s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2330 > 2048). Running this sequence through the model will result in indexing errors + 9%|▉ | 619/7045 [2:01:01<20:58:41, 11.75s/it] {'loss': 1.1504, 'learning_rate': 4.956357653158299e-06, 'epoch': 0.09} + 9%|▉ | 619/7045 [2:01:01<20:58:41, 11.75s/it] 9%|▉ | 620/7045 [2:01:12<20:38:04, 11.56s/it] {'loss': 1.1797, 'learning_rate': 4.956143560993633e-06, 'epoch': 0.09} + 9%|▉ | 620/7045 [2:01:12<20:38:04, 11.56s/it] 9%|▉ | 621/7045 [2:01:25<21:20:24, 11.96s/it] {'loss': 1.1396, 'learning_rate': 4.955928949633796e-06, 'epoch': 0.09} + 9%|▉ | 621/7045 [2:01:25<21:20:24, 11.96s/it] 9%|▉ | 622/7045 [2:01:36<20:52:29, 11.70s/it] {'loss': 1.1611, 'learning_rate': 4.955713819124155e-06, 'epoch': 0.09} + 9%|▉ | 622/7045 [2:01:36<20:52:29, 11.70s/it] 9%|▉ | 623/7045 [2:01:48<21:00:39, 11.78s/it] {'loss': 1.1582, 'learning_rate': 4.955498169510186e-06, 'epoch': 0.09} + 9%|▉ | 623/7045 [2:01:48<21:00:39, 11.78s/it] 9%|▉ | 624/7045 [2:01:58<20:31:03, 11.50s/it] {'loss': 1.1631, 'learning_rate': 4.955282000837472e-06, 'epoch': 0.09} + 9%|▉ | 624/7045 [2:01:58<20:31:03, 11.50s/it] 9%|▉ | 625/7045 [2:02:12<21:36:38, 12.12s/it] {'loss': 1.1675, 'learning_rate': 4.955065313151711e-06, 'epoch': 0.09} + 9%|▉ | 625/7045 [2:02:12<21:36:38, 12.12s/it] 9%|▉ | 626/7045 [2:02:23<21:04:51, 11.82s/it] {'loss': 1.1367, 'learning_rate': 4.954848106498706e-06, 'epoch': 0.09} + 9%|▉ | 626/7045 [2:02:23<21:04:51, 11.82s/it] 9%|▉ | 627/7045 [2:02:34<20:43:25, 11.62s/it] {'loss': 1.1709, 'learning_rate': 4.954630380924373e-06, 'epoch': 0.09} + 9%|▉ | 627/7045 [2:02:34<20:43:25, 11.62s/it] 9%|▉ | 628/7045 [2:02:47<21:06:01, 11.84s/it] {'loss': 1.1216, 'learning_rate': 4.9544121364747335e-06, 'epoch': 0.09} + 9%|▉ | 628/7045 [2:02:47<21:06:01, 11.84s/it] 9%|▉ | 629/7045 [2:02:58<20:40:15, 11.60s/it] {'loss': 1.1338, 'learning_rate': 4.954193373195925e-06, 'epoch': 0.09} + 9%|▉ | 629/7045 [2:02:58<20:40:15, 11.60s/it] 9%|▉ | 630/7045 [2:03:09<20:28:54, 11.49s/it] {'loss': 1.1436, 'learning_rate': 4.9539740911341874e-06, 'epoch': 0.09} + 9%|▉ | 630/7045 [2:03:09<20:28:54, 11.49s/it] 9%|▉ | 631/7045 [2:03:22<21:15:37, 11.93s/it] {'loss': 1.1392, 'learning_rate': 4.953754290335877e-06, 'epoch': 0.09} + 9%|▉ | 631/7045 [2:03:22<21:15:37, 11.93s/it] 9%|▉ | 632/7045 [2:03:33<20:50:23, 11.70s/it] {'loss': 1.1768, 'learning_rate': 4.953533970847455e-06, 'epoch': 0.09} + 9%|▉ | 632/7045 [2:03:33<20:50:23, 11.70s/it] 9%|▉ | 633/7045 [2:03:46<21:40:45, 12.17s/it] {'loss': 1.0889, 'learning_rate': 4.953313132715494e-06, 'epoch': 0.09} + 9%|▉ | 633/7045 [2:03:46<21:40:45, 12.17s/it] 9%|▉ | 634/7045 [2:04:00<22:20:13, 12.54s/it] {'loss': 1.1582, 'learning_rate': 4.953091775986677e-06, 'epoch': 0.09} + 9%|▉ | 634/7045 [2:04:00<22:20:13, 12.54s/it] 9%|▉ | 635/7045 [2:04:11<21:39:49, 12.17s/it] {'loss': 1.168, 'learning_rate': 4.952869900707795e-06, 'epoch': 0.09} + 9%|▉ | 635/7045 [2:04:11<21:39:49, 12.17s/it] 9%|▉ | 636/7045 [2:04:29<24:33:44, 13.80s/it] {'loss': 1.1729, 'learning_rate': 4.952647506925749e-06, 'epoch': 0.09} + 9%|▉ | 636/7045 [2:04:29<24:33:44, 13.80s/it] 9%|▉ | 637/7045 [2:04:40<23:15:57, 13.07s/it] {'loss': 1.1328, 'learning_rate': 4.952424594687553e-06, 'epoch': 0.09} + 9%|▉ | 637/7045 [2:04:40<23:15:57, 13.07s/it] 9%|▉ | 638/7045 [2:04:53<23:25:30, 13.16s/it] {'loss': 1.1455, 'learning_rate': 4.952201164040323e-06, 'epoch': 0.09} + 9%|▉ | 638/7045 [2:04:53<23:25:30, 13.16s/it] 9%|▉ | 639/7045 [2:05:04<22:15:20, 12.51s/it] {'loss': 1.1245, 'learning_rate': 4.951977215031293e-06, 'epoch': 0.09} + 9%|▉ | 639/7045 [2:05:04<22:15:20, 12.51s/it] 9%|▉ | 640/7045 [2:05:15<21:28:29, 12.07s/it] {'loss': 1.1182, 'learning_rate': 4.9517527477078e-06, 'epoch': 0.09} + 9%|▉ | 640/7045 [2:05:15<21:28:29, 12.07s/it] 9%|▉ | 641/7045 [2:05:27<21:07:45, 11.88s/it] {'loss': 1.165, 'learning_rate': 4.951527762117295e-06, 'epoch': 0.09} + 9%|▉ | 641/7045 [2:05:27<21:07:45, 11.88s/it] 9%|▉ | 642/7045 [2:05:38<20:47:42, 11.69s/it] {'loss': 1.166, 'learning_rate': 4.9513022583073364e-06, 'epoch': 0.09} + 9%|▉ | 642/7045 [2:05:38<20:47:42, 11.69s/it] 9%|▉ | 643/7045 [2:05:50<20:56:08, 11.77s/it] {'loss': 1.1118, 'learning_rate': 4.951076236325593e-06, 'epoch': 0.09} + 9%|▉ | 643/7045 [2:05:50<20:56:08, 11.77s/it] 9%|▉ | 644/7045 [2:06:02<21:13:53, 11.94s/it] {'loss': 1.1787, 'learning_rate': 4.950849696219842e-06, 'epoch': 0.09} + 9%|▉ | 644/7045 [2:06:02<21:13:53, 11.94s/it] 9%|▉ | 645/7045 [2:06:13<20:39:14, 11.62s/it] {'loss': 1.1504, 'learning_rate': 4.9506226380379715e-06, 'epoch': 0.09} + 9%|▉ | 645/7045 [2:06:13<20:39:14, 11.62s/it] 9%|▉ | 646/7045 [2:06:25<20:28:52, 11.52s/it] {'loss': 1.1191, 'learning_rate': 4.95039506182798e-06, 'epoch': 0.09} + 9%|▉ | 646/7045 [2:06:25<20:28:52, 11.52s/it] 9%|▉ | 647/7045 [2:06:35<20:10:33, 11.35s/it] {'loss': 1.1523, 'learning_rate': 4.95016696763797e-06, 'epoch': 0.09} + 9%|▉ | 647/7045 [2:06:35<20:10:33, 11.35s/it] 9%|▉ | 648/7045 [2:06:47<19:59:35, 11.25s/it] {'loss': 1.1572, 'learning_rate': 4.94993835551616e-06, 'epoch': 0.09} + 9%|▉ | 648/7045 [2:06:47<19:59:35, 11.25s/it] 9%|▉ | 649/7045 [2:06:58<20:00:52, 11.27s/it] {'loss': 1.1953, 'learning_rate': 4.949709225510876e-06, 'epoch': 0.09} + 9%|▉ | 649/7045 [2:06:58<20:00:52, 11.27s/it] 9%|▉ | 650/7045 [2:07:11<20:47:44, 11.71s/it] {'loss': 1.1504, 'learning_rate': 4.949479577670552e-06, 'epoch': 0.09} + 9%|▉ | 650/7045 [2:07:11<20:47:44, 11.71s/it] 9%|▉ | 651/7045 [2:07:21<20:23:04, 11.48s/it] {'loss': 1.1436, 'learning_rate': 4.949249412043733e-06, 'epoch': 0.09} + 9%|▉ | 651/7045 [2:07:21<20:23:04, 11.48s/it] 9%|▉ | 652/7045 [2:07:33<20:32:34, 11.57s/it] {'loss': 1.1309, 'learning_rate': 4.949018728679071e-06, 'epoch': 0.09} + 9%|▉ | 652/7045 [2:07:33<20:32:34, 11.57s/it] 9%|▉ | 653/7045 [2:07:44<20:14:44, 11.40s/it] {'loss': 1.1895, 'learning_rate': 4.948787527625332e-06, 'epoch': 0.09} + 9%|▉ | 653/7045 [2:07:44<20:14:44, 11.40s/it] 9%|▉ | 654/7045 [2:07:56<20:11:15, 11.37s/it] {'loss': 1.1465, 'learning_rate': 4.948555808931388e-06, 'epoch': 0.09} + 9%|▉ | 654/7045 [2:07:56<20:11:15, 11.37s/it] 9%|▉ | 655/7045 [2:08:07<20:06:10, 11.33s/it] {'loss': 1.1377, 'learning_rate': 4.94832357264622e-06, 'epoch': 0.09} + 9%|▉ | 655/7045 [2:08:07<20:06:10, 11.33s/it] 9%|▉ | 656/7045 [2:08:20<20:51:04, 11.75s/it] {'loss': 1.0732, 'learning_rate': 4.94809081881892e-06, 'epoch': 0.09} + 9%|▉ | 656/7045 [2:08:20<20:51:04, 11.75s/it] 9%|▉ | 657/7045 [2:08:31<20:29:22, 11.55s/it] {'loss': 1.1494, 'learning_rate': 4.94785754749869e-06, 'epoch': 0.09} + 9%|▉ | 657/7045 [2:08:31<20:29:22, 11.55s/it] 9%|▉ | 658/7045 [2:08:41<20:08:05, 11.35s/it] {'loss': 1.1406, 'learning_rate': 4.94762375873484e-06, 'epoch': 0.09} + 9%|▉ | 658/7045 [2:08:41<20:08:05, 11.35s/it] 9%|▉ | 659/7045 [2:08:54<20:35:43, 11.61s/it] {'loss': 1.1416, 'learning_rate': 4.9473894525767885e-06, 'epoch': 0.09} + 9%|▉ | 659/7045 [2:08:54<20:35:43, 11.61s/it] 9%|▉ | 660/7045 [2:09:05<20:37:34, 11.63s/it] {'loss': 1.1201, 'learning_rate': 4.947154629074066e-06, 'epoch': 0.09} + 9%|▉ | 660/7045 [2:09:05<20:37:34, 11.63s/it] 9%|▉ | 661/7045 [2:09:17<20:21:20, 11.48s/it] {'loss': 1.1777, 'learning_rate': 4.94691928827631e-06, 'epoch': 0.09} + 9%|▉ | 661/7045 [2:09:17<20:21:20, 11.48s/it] 9%|▉ | 662/7045 [2:09:28<20:21:14, 11.48s/it] {'loss': 1.1201, 'learning_rate': 4.946683430233269e-06, 'epoch': 0.09} + 9%|▉ | 662/7045 [2:09:28<20:21:14, 11.48s/it] 9%|▉ | 663/7045 [2:09:39<19:59:47, 11.28s/it] {'loss': 1.1074, 'learning_rate': 4.9464470549948e-06, 'epoch': 0.09} + 9%|▉ | 663/7045 [2:09:39<19:59:47, 11.28s/it] 9%|▉ | 664/7045 [2:09:50<19:52:28, 11.21s/it] {'loss': 1.166, 'learning_rate': 4.9462101626108696e-06, 'epoch': 0.09} + 9%|▉ | 664/7045 [2:09:50<19:52:28, 11.21s/it] 9%|▉ | 665/7045 [2:10:01<19:45:29, 11.15s/it] {'loss': 1.1514, 'learning_rate': 4.945972753131554e-06, 'epoch': 0.09} + 9%|▉ | 665/7045 [2:10:01<19:45:29, 11.15s/it] 9%|▉ | 666/7045 [2:10:14<20:48:06, 11.74s/it] {'loss': 1.1162, 'learning_rate': 4.945734826607037e-06, 'epoch': 0.09} + 9%|▉ | 666/7045 [2:10:14<20:48:06, 11.74s/it] 9%|▉ | 667/7045 [2:10:25<20:32:12, 11.59s/it] {'loss': 1.1582, 'learning_rate': 4.945496383087613e-06, 'epoch': 0.09} + 9%|▉ | 667/7045 [2:10:25<20:32:12, 11.59s/it] 9%|▉ | 668/7045 [2:10:37<20:41:40, 11.68s/it] {'loss': 1.1377, 'learning_rate': 4.945257422623688e-06, 'epoch': 0.09} + 9%|▉ | 668/7045 [2:10:37<20:41:40, 11.68s/it] 9%|▉ | 669/7045 [2:10:50<21:20:45, 12.05s/it] {'loss': 1.1382, 'learning_rate': 4.945017945265773e-06, 'epoch': 0.09} + 9%|▉ | 669/7045 [2:10:50<21:20:45, 12.05s/it] 10%|▉ | 670/7045 [2:11:01<21:00:06, 11.86s/it] {'loss': 1.1748, 'learning_rate': 4.944777951064491e-06, 'epoch': 0.1} + 10%|▉ | 670/7045 [2:11:01<21:00:06, 11.86s/it] 10%|▉ | 671/7045 [2:11:13<20:55:23, 11.82s/it] {'loss': 1.1582, 'learning_rate': 4.944537440070572e-06, 'epoch': 0.1} + 10%|▉ | 671/7045 [2:11:13<20:55:23, 11.82s/it] 10%|▉ | 672/7045 [2:11:24<20:32:49, 11.61s/it] {'loss': 1.168, 'learning_rate': 4.9442964123348604e-06, 'epoch': 0.1} + 10%|▉ | 672/7045 [2:11:24<20:32:49, 11.61s/it] 10%|▉ | 673/7045 [2:11:36<20:20:48, 11.50s/it] {'loss': 1.1543, 'learning_rate': 4.944054867908302e-06, 'epoch': 0.1} + 10%|▉ | 673/7045 [2:11:36<20:20:48, 11.50s/it] 10%|▉ | 674/7045 [2:11:48<20:52:48, 11.80s/it] {'loss': 1.1699, 'learning_rate': 4.943812806841958e-06, 'epoch': 0.1} + 10%|▉ | 674/7045 [2:11:48<20:52:48, 11.80s/it] 10%|▉ | 675/7045 [2:11:59<20:35:47, 11.64s/it] {'loss': 1.1631, 'learning_rate': 4.943570229186996e-06, 'epoch': 0.1} + 10%|▉ | 675/7045 [2:11:59<20:35:47, 11.64s/it] 10%|▉ | 676/7045 [2:12:11<20:26:48, 11.56s/it] {'loss': 1.1436, 'learning_rate': 4.943327134994695e-06, 'epoch': 0.1} + 10%|▉ | 676/7045 [2:12:11<20:26:48, 11.56s/it] 10%|▉ | 677/7045 [2:12:24<21:17:43, 12.04s/it] {'loss': 1.1519, 'learning_rate': 4.94308352431644e-06, 'epoch': 0.1} + 10%|▉ | 677/7045 [2:12:24<21:17:43, 12.04s/it] 10%|▉ | 678/7045 [2:12:35<20:51:08, 11.79s/it] {'loss': 1.1152, 'learning_rate': 4.942839397203728e-06, 'epoch': 0.1} + 10%|▉ | 678/7045 [2:12:35<20:51:08, 11.79s/it] 10%|▉ | 679/7045 [2:12:46<20:29:07, 11.58s/it] {'loss': 1.1816, 'learning_rate': 4.942594753708165e-06, 'epoch': 0.1} + 10%|▉ | 679/7045 [2:12:46<20:29:07, 11.58s/it] 10%|▉ | 680/7045 [2:12:58<20:39:03, 11.68s/it] {'loss': 1.1719, 'learning_rate': 4.942349593881464e-06, 'epoch': 0.1} + 10%|▉ | 680/7045 [2:12:58<20:39:03, 11.68s/it] 10%|▉ | 681/7045 [2:13:09<20:22:47, 11.53s/it] {'loss': 1.1689, 'learning_rate': 4.942103917775448e-06, 'epoch': 0.1} + 10%|▉ | 681/7045 [2:13:09<20:22:47, 11.53s/it] 10%|▉ | 682/7045 [2:13:21<20:40:47, 11.70s/it] {'loss': 1.1348, 'learning_rate': 4.941857725442051e-06, 'epoch': 0.1} + 10%|▉ | 682/7045 [2:13:21<20:40:47, 11.70s/it] 10%|▉ | 683/7045 [2:13:34<20:59:11, 11.88s/it] {'loss': 1.1211, 'learning_rate': 4.941611016933313e-06, 'epoch': 0.1} + 10%|▉ | 683/7045 [2:13:34<20:59:11, 11.88s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2250 > 2048). Running this sequence through the model will result in indexing errors + 10%|▉ | 684/7045 [2:13:44<20:24:33, 11.55s/it] {'loss': 1.1699, 'learning_rate': 4.941363792301387e-06, 'epoch': 0.1} + 10%|▉ | 684/7045 [2:13:44<20:24:33, 11.55s/it] 10%|▉ | 685/7045 [2:13:55<20:10:04, 11.42s/it] {'loss': 1.1719, 'learning_rate': 4.941116051598531e-06, 'epoch': 0.1} + 10%|▉ | 685/7045 [2:13:56<20:10:04, 11.42s/it] 10%|▉ | 686/7045 [2:14:08<20:35:24, 11.66s/it] {'loss': 1.1504, 'learning_rate': 4.940867794877116e-06, 'epoch': 0.1} + 10%|▉ | 686/7045 [2:14:08<20:35:24, 11.66s/it] 10%|▉ | 687/7045 [2:14:19<20:21:13, 11.52s/it] {'loss': 1.1523, 'learning_rate': 4.940619022189616e-06, 'epoch': 0.1} + 10%|▉ | 687/7045 [2:14:19<20:21:13, 11.52s/it] 10%|▉ | 688/7045 [2:14:30<20:12:21, 11.44s/it] {'loss': 1.168, 'learning_rate': 4.940369733588624e-06, 'epoch': 0.1} + 10%|▉ | 688/7045 [2:14:30<20:12:21, 11.44s/it] 10%|▉ | 689/7045 [2:14:43<20:41:18, 11.72s/it] {'loss': 1.1289, 'learning_rate': 4.9401199291268314e-06, 'epoch': 0.1} + 10%|▉ | 689/7045 [2:14:43<20:41:18, 11.72s/it] 10%|▉ | 690/7045 [2:14:55<21:18:26, 12.07s/it] {'loss': 1.1641, 'learning_rate': 4.9398696088570455e-06, 'epoch': 0.1} + 10%|▉ | 690/7045 [2:14:55<21:18:26, 12.07s/it] 10%|▉ | 691/7045 [2:15:09<21:53:04, 12.40s/it] {'loss': 1.123, 'learning_rate': 4.93961877283218e-06, 'epoch': 0.1} + 10%|▉ | 691/7045 [2:15:09<21:53:04, 12.40s/it] 10%|▉ | 692/7045 [2:15:20<21:11:49, 12.01s/it] {'loss': 1.1318, 'learning_rate': 4.939367421105259e-06, 'epoch': 0.1} + 10%|▉ | 692/7045 [2:15:20<21:11:49, 12.01s/it] 10%|▉ | 693/7045 [2:15:31<20:56:43, 11.87s/it] {'loss': 1.125, 'learning_rate': 4.939115553729415e-06, 'epoch': 0.1} + 10%|▉ | 693/7045 [2:15:31<20:56:43, 11.87s/it] 10%|▉ | 694/7045 [2:15:42<20:35:30, 11.67s/it] {'loss': 1.1641, 'learning_rate': 4.938863170757887e-06, 'epoch': 0.1} + 10%|▉ | 694/7045 [2:15:42<20:35:30, 11.67s/it] 10%|▉ | 695/7045 [2:15:53<20:10:50, 11.44s/it] {'loss': 1.1836, 'learning_rate': 4.938610272244027e-06, 'epoch': 0.1} + 10%|▉ | 695/7045 [2:15:53<20:10:50, 11.44s/it] 10%|▉ | 696/7045 [2:16:05<20:05:20, 11.39s/it] {'loss': 1.165, 'learning_rate': 4.938356858241295e-06, 'epoch': 0.1} + 10%|▉ | 696/7045 [2:16:05<20:05:20, 11.39s/it] 10%|▉ | 697/7045 [2:16:17<20:44:38, 11.76s/it] {'loss': 1.1836, 'learning_rate': 4.938102928803259e-06, 'epoch': 0.1} + 10%|▉ | 697/7045 [2:16:17<20:44:38, 11.76s/it] 10%|▉ | 698/7045 [2:16:29<20:38:08, 11.70s/it] {'loss': 1.1348, 'learning_rate': 4.937848483983594e-06, 'epoch': 0.1} + 10%|▉ | 698/7045 [2:16:29<20:38:08, 11.70s/it] 10%|▉ | 699/7045 [2:16:40<20:19:15, 11.53s/it] {'loss': 1.126, 'learning_rate': 4.9375935238360896e-06, 'epoch': 0.1} + 10%|▉ | 699/7045 [2:16:40<20:19:15, 11.53s/it] 10%|▉ | 700/7045 [2:16:51<19:58:36, 11.33s/it] {'loss': 1.1211, 'learning_rate': 4.937338048414638e-06, 'epoch': 0.1} + 10%|▉ | 700/7045 [2:16:51<19:58:36, 11.33s/it] 10%|▉ | 701/7045 [2:17:08<23:03:36, 13.09s/it] {'loss': 1.1416, 'learning_rate': 4.937082057773245e-06, 'epoch': 0.1} + 10%|▉ | 701/7045 [2:17:08<23:03:36, 13.09s/it] 10%|▉ | 702/7045 [2:17:19<22:09:43, 12.58s/it] {'loss': 1.1455, 'learning_rate': 4.9368255519660215e-06, 'epoch': 0.1} + 10%|▉ | 702/7045 [2:17:19<22:09:43, 12.58s/it] 10%|▉ | 703/7045 [2:17:30<21:18:59, 12.10s/it] {'loss': 1.1328, 'learning_rate': 4.936568531047192e-06, 'epoch': 0.1} + 10%|▉ | 703/7045 [2:17:30<21:18:59, 12.10s/it] 10%|▉ | 704/7045 [2:17:42<20:51:14, 11.84s/it] {'loss': 1.1035, 'learning_rate': 4.936310995071085e-06, 'epoch': 0.1} + 10%|▉ | 704/7045 [2:17:42<20:51:14, 11.84s/it] 10%|█ | 705/7045 [2:17:52<20:19:48, 11.54s/it] {'loss': 1.1357, 'learning_rate': 4.936052944092142e-06, 'epoch': 0.1} + 10%|█ | 705/7045 [2:17:52<20:19:48, 11.54s/it] 10%|█ | 706/7045 [2:18:04<20:19:34, 11.54s/it] {'loss': 1.2178, 'learning_rate': 4.93579437816491e-06, 'epoch': 0.1} + 10%|█ | 706/7045 [2:18:04<20:19:34, 11.54s/it] 10%|█ | 707/7045 [2:18:16<20:42:59, 11.77s/it] {'loss': 1.1592, 'learning_rate': 4.935535297344047e-06, 'epoch': 0.1} + 10%|█ | 707/7045 [2:18:16<20:42:59, 11.77s/it] 10%|█ | 708/7045 [2:18:27<20:15:17, 11.51s/it] {'loss': 1.1738, 'learning_rate': 4.935275701684318e-06, 'epoch': 0.1} + 10%|█ | 708/7045 [2:18:27<20:15:17, 11.51s/it] 10%|█ | 709/7045 [2:18:38<20:06:52, 11.43s/it] {'loss': 1.1445, 'learning_rate': 4.935015591240599e-06, 'epoch': 0.1} + 10%|█ | 709/7045 [2:18:38<20:06:52, 11.43s/it] 10%|█ | 710/7045 [2:18:50<20:00:10, 11.37s/it] {'loss': 1.1338, 'learning_rate': 4.9347549660678745e-06, 'epoch': 0.1} + 10%|█ | 710/7045 [2:18:50<20:00:10, 11.37s/it] 10%|█ | 711/7045 [2:19:01<19:53:52, 11.31s/it] {'loss': 1.1338, 'learning_rate': 4.934493826221236e-06, 'epoch': 0.1} + 10%|█ | 711/7045 [2:19:01<19:53:52, 11.31s/it] 10%|█ | 712/7045 [2:19:12<19:57:03, 11.34s/it] {'loss': 1.1738, 'learning_rate': 4.934232171755885e-06, 'epoch': 0.1} + 10%|█ | 712/7045 [2:19:12<19:57:03, 11.34s/it] 10%|█ | 713/7045 [2:19:24<20:06:01, 11.43s/it] {'loss': 1.1543, 'learning_rate': 4.933970002727131e-06, 'epoch': 0.1} + 10%|█ | 713/7045 [2:19:24<20:06:01, 11.43s/it] 10%|█ | 714/7045 [2:19:37<20:48:22, 11.83s/it] {'loss': 1.1611, 'learning_rate': 4.933707319190394e-06, 'epoch': 0.1} + 10%|█ | 714/7045 [2:19:37<20:48:22, 11.83s/it] 10%|█ | 715/7045 [2:19:48<20:41:09, 11.76s/it] {'loss': 1.1167, 'learning_rate': 4.933444121201202e-06, 'epoch': 0.1} + 10%|█ | 715/7045 [2:19:48<20:41:09, 11.76s/it] 10%|█ | 716/7045 [2:19:59<20:20:23, 11.57s/it] {'loss': 1.1152, 'learning_rate': 4.933180408815191e-06, 'epoch': 0.1} + 10%|█ | 716/7045 [2:19:59<20:20:23, 11.57s/it] 10%|█ | 717/7045 [2:20:11<20:23:55, 11.60s/it] {'loss': 1.0952, 'learning_rate': 4.932916182088105e-06, 'epoch': 0.1} + 10%|█ | 717/7045 [2:20:11<20:23:55, 11.60s/it] 10%|█ | 718/7045 [2:20:24<21:14:36, 12.09s/it] {'loss': 1.1592, 'learning_rate': 4.932651441075801e-06, 'epoch': 0.1} + 10%|█ | 718/7045 [2:20:24<21:14:36, 12.09s/it] 10%|█ | 719/7045 [2:20:35<20:38:42, 11.75s/it] {'loss': 1.1338, 'learning_rate': 4.932386185834238e-06, 'epoch': 0.1} + 10%|█ | 719/7045 [2:20:35<20:38:42, 11.75s/it] 10%|█ | 720/7045 [2:20:47<20:52:45, 11.88s/it] {'loss': 1.1572, 'learning_rate': 4.932120416419489e-06, 'epoch': 0.1} + 10%|█ | 720/7045 [2:20:47<20:52:45, 11.88s/it] 10%|█ | 721/7045 [2:20:59<20:39:40, 11.76s/it] {'loss': 1.1528, 'learning_rate': 4.931854132887735e-06, 'epoch': 0.1} + 10%|█ | 721/7045 [2:20:59<20:39:40, 11.76s/it] 10%|█ | 722/7045 [2:21:10<20:19:12, 11.57s/it] {'loss': 1.1748, 'learning_rate': 4.931587335295263e-06, 'epoch': 0.1} + 10%|█ | 722/7045 [2:21:10<20:19:12, 11.57s/it] 10%|█ | 723/7045 [2:21:24<21:30:31, 12.25s/it] {'loss': 1.1738, 'learning_rate': 4.931320023698471e-06, 'epoch': 0.1} + 10%|█ | 723/7045 [2:21:24<21:30:31, 12.25s/it] 10%|█ | 724/7045 [2:21:36<21:19:01, 12.14s/it] {'loss': 1.1221, 'learning_rate': 4.931052198153865e-06, 'epoch': 0.1} + 10%|█ | 724/7045 [2:21:36<21:19:01, 12.14s/it] 10%|█ | 725/7045 [2:21:47<20:45:39, 11.83s/it] {'loss': 1.1416, 'learning_rate': 4.93078385871806e-06, 'epoch': 0.1} + 10%|█ | 725/7045 [2:21:47<20:45:39, 11.83s/it] 10%|█ | 726/7045 [2:21:58<20:19:49, 11.58s/it] {'loss': 1.1709, 'learning_rate': 4.930515005447779e-06, 'epoch': 0.1} + 10%|█ | 726/7045 [2:21:58<20:19:49, 11.58s/it] 10%|█ | 727/7045 [2:22:10<20:22:43, 11.61s/it] {'loss': 1.1738, 'learning_rate': 4.930245638399854e-06, 'epoch': 0.1} + 10%|█ | 727/7045 [2:22:10<20:22:43, 11.61s/it] 10%|█ | 728/7045 [2:22:21<20:15:54, 11.55s/it] {'loss': 1.1377, 'learning_rate': 4.929975757631225e-06, 'epoch': 0.1} + 10%|█ | 728/7045 [2:22:21<20:15:54, 11.55s/it] 10%|█ | 729/7045 [2:22:33<20:46:56, 11.85s/it] {'loss': 1.0908, 'learning_rate': 4.929705363198943e-06, 'epoch': 0.1} + 10%|█ | 729/7045 [2:22:33<20:46:56, 11.85s/it] 10%|█ | 730/7045 [2:22:45<20:51:30, 11.89s/it] {'loss': 1.1401, 'learning_rate': 4.929434455160163e-06, 'epoch': 0.1} + 10%|█ | 730/7045 [2:22:45<20:51:30, 11.89s/it] 10%|█ | 731/7045 [2:22:57<20:43:26, 11.82s/it] {'loss': 1.1865, 'learning_rate': 4.929163033572153e-06, 'epoch': 0.1} + 10%|█ | 731/7045 [2:22:57<20:43:26, 11.82s/it] 10%|█ | 732/7045 [2:23:09<20:49:28, 11.88s/it] {'loss': 1.1719, 'learning_rate': 4.928891098492286e-06, 'epoch': 0.1} + 10%|█ | 732/7045 [2:23:09<20:49:28, 11.88s/it] 10%|█ | 733/7045 [2:23:21<20:34:28, 11.73s/it] {'loss': 1.1494, 'learning_rate': 4.9286186499780485e-06, 'epoch': 0.1} + 10%|█ | 733/7045 [2:23:21<20:34:28, 11.73s/it] 10%|█ | 734/7045 [2:23:32<20:38:21, 11.77s/it] {'loss': 1.2041, 'learning_rate': 4.92834568808703e-06, 'epoch': 0.1} + 10%|█ | 734/7045 [2:23:32<20:38:21, 11.77s/it] 10%|█ | 735/7045 [2:23:43<20:15:56, 11.56s/it] {'loss': 1.2012, 'learning_rate': 4.9280722128769305e-06, 'epoch': 0.1} + 10%|█ | 735/7045 [2:23:43<20:15:56, 11.56s/it] 10%|█ | 736/7045 [2:23:56<20:57:36, 11.96s/it] {'loss': 1.1084, 'learning_rate': 4.927798224405561e-06, 'epoch': 0.1} + 10%|█ | 736/7045 [2:23:56<20:57:36, 11.96s/it] 10%|█ | 737/7045 [2:24:09<21:06:41, 12.05s/it] {'loss': 1.1182, 'learning_rate': 4.927523722730837e-06, 'epoch': 0.1} + 10%|█ | 737/7045 [2:24:09<21:06:41, 12.05s/it] 10%|█ | 738/7045 [2:24:20<20:41:36, 11.81s/it] {'loss': 1.165, 'learning_rate': 4.9272487079107855e-06, 'epoch': 0.1} + 10%|█ | 738/7045 [2:24:20<20:41:36, 11.81s/it] 10%|█ | 739/7045 [2:24:31<20:26:02, 11.67s/it] {'loss': 1.1357, 'learning_rate': 4.9269731800035416e-06, 'epoch': 0.1} + 10%|█ | 739/7045 [2:24:31<20:26:02, 11.67s/it] 11%|█ | 740/7045 [2:24:42<20:12:53, 11.54s/it] {'loss': 1.1377, 'learning_rate': 4.9266971390673456e-06, 'epoch': 0.11} + 11%|█ | 740/7045 [2:24:42<20:12:53, 11.54s/it] 11%|█ | 741/7045 [2:24:54<20:24:16, 11.65s/it] {'loss': 1.1299, 'learning_rate': 4.9264205851605504e-06, 'epoch': 0.11} + 11%|█ | 741/7045 [2:24:54<20:24:16, 11.65s/it] 11%|█ | 742/7045 [2:25:06<20:09:47, 11.52s/it] {'loss': 1.1777, 'learning_rate': 4.926143518341617e-06, 'epoch': 0.11} + 11%|█ | 742/7045 [2:25:06<20:09:47, 11.52s/it] 11%|█ | 743/7045 [2:25:18<20:46:49, 11.87s/it] {'loss': 1.1108, 'learning_rate': 4.925865938669111e-06, 'epoch': 0.11} + 11%|█ | 743/7045 [2:25:18<20:46:49, 11.87s/it] 11%|█ | 744/7045 [2:25:29<20:20:21, 11.62s/it] {'loss': 1.1631, 'learning_rate': 4.92558784620171e-06, 'epoch': 0.11} + 11%|█ | 744/7045 [2:25:29<20:20:21, 11.62s/it] 11%|█ | 745/7045 [2:25:41<20:28:03, 11.70s/it] {'loss': 1.1357, 'learning_rate': 4.925309240998199e-06, 'epoch': 0.11} + 11%|█ | 745/7045 [2:25:41<20:28:03, 11.70s/it] 11%|█ | 746/7045 [2:25:52<20:05:51, 11.49s/it] {'loss': 1.1494, 'learning_rate': 4.925030123117472e-06, 'epoch': 0.11} + 11%|█ | 746/7045 [2:25:52<20:05:51, 11.49s/it] 11%|█ | 747/7045 [2:26:05<21:00:49, 12.01s/it] {'loss': 1.1123, 'learning_rate': 4.92475049261853e-06, 'epoch': 0.11} + 11%|█ | 747/7045 [2:26:05<21:00:49, 12.01s/it] 11%|█ | 748/7045 [2:26:17<20:33:18, 11.75s/it] {'loss': 1.1406, 'learning_rate': 4.9244703495604825e-06, 'epoch': 0.11} + 11%|█ | 748/7045 [2:26:17<20:33:18, 11.75s/it] 11%|█ | 749/7045 [2:26:28<20:13:05, 11.56s/it] {'loss': 1.1631, 'learning_rate': 4.92418969400255e-06, 'epoch': 0.11} + 11%|█ | 749/7045 [2:26:28<20:13:05, 11.56s/it] 11%|█ | 750/7045 [2:26:40<20:36:34, 11.79s/it] {'loss': 1.123, 'learning_rate': 4.923908526004057e-06, 'epoch': 0.11} + 11%|█ | 750/7045 [2:26:40<20:36:34, 11.79s/it] 11%|█ | 751/7045 [2:26:51<20:15:52, 11.59s/it] {'loss': 1.166, 'learning_rate': 4.923626845624438e-06, 'epoch': 0.11} + 11%|█ | 751/7045 [2:26:51<20:15:52, 11.59s/it] 11%|█ | 752/7045 [2:27:02<20:08:20, 11.52s/it] {'loss': 1.1533, 'learning_rate': 4.92334465292324e-06, 'epoch': 0.11} + 11%|█ | 752/7045 [2:27:02<20:08:20, 11.52s/it] 11%|█ | 753/7045 [2:27:15<20:24:28, 11.68s/it] {'loss': 1.1689, 'learning_rate': 4.923061947960111e-06, 'epoch': 0.11} + 11%|█ | 753/7045 [2:27:15<20:24:28, 11.68s/it] 11%|█ | 754/7045 [2:27:26<20:08:00, 11.52s/it] {'loss': 1.1279, 'learning_rate': 4.922778730794813e-06, 'epoch': 0.11} + 11%|█ | 754/7045 [2:27:26<20:08:00, 11.52s/it] 11%|█ | 755/7045 [2:27:37<20:02:07, 11.47s/it] {'loss': 1.1367, 'learning_rate': 4.922495001487213e-06, 'epoch': 0.11} + 11%|█ | 755/7045 [2:27:37<20:02:07, 11.47s/it] 11%|█ | 756/7045 [2:27:48<19:53:44, 11.39s/it] {'loss': 1.124, 'learning_rate': 4.922210760097289e-06, 'epoch': 0.11} + 11%|█ | 756/7045 [2:27:48<19:53:44, 11.39s/it] 11%|█ | 757/7045 [2:28:00<20:21:10, 11.65s/it] {'loss': 1.1533, 'learning_rate': 4.921926006685123e-06, 'epoch': 0.11} + 11%|█ | 757/7045 [2:28:00<20:21:10, 11.65s/it] 11%|█ | 758/7045 [2:28:12<20:05:40, 11.51s/it] {'loss': 1.1621, 'learning_rate': 4.921640741310913e-06, 'epoch': 0.11} + 11%|█ | 758/7045 [2:28:12<20:05:40, 11.51s/it] 11%|█ | 759/7045 [2:28:23<20:08:18, 11.53s/it] {'loss': 1.1436, 'learning_rate': 4.921354964034954e-06, 'epoch': 0.11} + 11%|█ | 759/7045 [2:28:23<20:08:18, 11.53s/it] 11%|█ | 760/7045 [2:28:35<19:59:51, 11.45s/it] {'loss': 1.1738, 'learning_rate': 4.92106867491766e-06, 'epoch': 0.11} + 11%|█ | 760/7045 [2:28:35<19:59:51, 11.45s/it] 11%|█ | 761/7045 [2:28:47<20:44:36, 11.88s/it] {'loss': 1.1543, 'learning_rate': 4.920781874019547e-06, 'epoch': 0.11} + 11%|█ | 761/7045 [2:28:47<20:44:36, 11.88s/it] 11%|█ | 762/7045 [2:28:59<20:44:29, 11.88s/it] {'loss': 1.1787, 'learning_rate': 4.920494561401241e-06, 'epoch': 0.11} + 11%|█ | 762/7045 [2:28:59<20:44:29, 11.88s/it] 11%|█ | 763/7045 [2:29:10<20:20:27, 11.66s/it] {'loss': 1.1592, 'learning_rate': 4.9202067371234755e-06, 'epoch': 0.11} + 11%|█ | 763/7045 [2:29:10<20:20:27, 11.66s/it] 11%|█ | 764/7045 [2:29:21<20:02:03, 11.48s/it] {'loss': 1.1709, 'learning_rate': 4.9199184012470925e-06, 'epoch': 0.11} + 11%|█ | 764/7045 [2:29:22<20:02:03, 11.48s/it] 11%|█ | 765/7045 [2:29:32<19:44:29, 11.32s/it] {'loss': 1.1592, 'learning_rate': 4.919629553833043e-06, 'epoch': 0.11} + 11%|█ | 765/7045 [2:29:32<19:44:29, 11.32s/it] 11%|█ | 766/7045 [2:29:44<19:40:58, 11.29s/it] {'loss': 1.1387, 'learning_rate': 4.9193401949423855e-06, 'epoch': 0.11} + 11%|█ | 766/7045 [2:29:44<19:40:58, 11.29s/it] 11%|█ | 767/7045 [2:29:55<19:37:37, 11.25s/it] {'loss': 1.1455, 'learning_rate': 4.919050324636285e-06, 'epoch': 0.11} + 11%|█ | 767/7045 [2:29:55<19:37:37, 11.25s/it] 11%|█ | 768/7045 [2:30:06<19:28:46, 11.17s/it] {'loss': 1.1621, 'learning_rate': 4.918759942976018e-06, 'epoch': 0.11} + 11%|█ | 768/7045 [2:30:06<19:28:46, 11.17s/it] 11%|█ | 769/7045 [2:30:17<19:28:39, 11.17s/it] {'loss': 1.1274, 'learning_rate': 4.918469050022967e-06, 'epoch': 0.11} + 11%|█ | 769/7045 [2:30:17<19:28:39, 11.17s/it] 11%|█ | 770/7045 [2:30:28<19:35:29, 11.24s/it] {'loss': 1.1768, 'learning_rate': 4.918177645838622e-06, 'epoch': 0.11} + 11%|█ | 770/7045 [2:30:28<19:35:29, 11.24s/it] 11%|█ | 771/7045 [2:30:40<19:35:18, 11.24s/it] {'loss': 1.1533, 'learning_rate': 4.917885730484582e-06, 'epoch': 0.11} + 11%|█ | 771/7045 [2:30:40<19:35:18, 11.24s/it] 11%|█ | 772/7045 [2:30:52<20:05:04, 11.53s/it] {'loss': 1.167, 'learning_rate': 4.9175933040225535e-06, 'epoch': 0.11} + 11%|█ | 772/7045 [2:30:52<20:05:04, 11.53s/it] 11%|█ | 773/7045 [2:31:03<20:02:42, 11.51s/it] {'loss': 1.168, 'learning_rate': 4.917300366514352e-06, 'epoch': 0.11} + 11%|█ | 773/7045 [2:31:03<20:02:42, 11.51s/it] 11%|█ | 774/7045 [2:31:15<19:54:50, 11.43s/it] {'loss': 1.1562, 'learning_rate': 4.9170069180219005e-06, 'epoch': 0.11} + 11%|█ | 774/7045 [2:31:15<19:54:50, 11.43s/it] 11%|█ | 775/7045 [2:31:26<19:57:32, 11.46s/it] {'loss': 1.1914, 'learning_rate': 4.916712958607231e-06, 'epoch': 0.11} + 11%|█ | 775/7045 [2:31:26<19:57:32, 11.46s/it] 11%|█ | 776/7045 [2:31:37<19:40:58, 11.30s/it] {'loss': 1.1895, 'learning_rate': 4.916418488332479e-06, 'epoch': 0.11} + 11%|█ | 776/7045 [2:31:37<19:40:58, 11.30s/it] 11%|█ | 777/7045 [2:31:48<19:34:07, 11.24s/it] {'loss': 1.1562, 'learning_rate': 4.916123507259896e-06, 'epoch': 0.11} + 11%|█ | 777/7045 [2:31:48<19:34:07, 11.24s/it] 11%|█ | 778/7045 [2:32:01<20:29:35, 11.77s/it] {'loss': 1.1177, 'learning_rate': 4.915828015451835e-06, 'epoch': 0.11} + 11%|█ | 778/7045 [2:32:01<20:29:35, 11.77s/it] 11%|█ | 779/7045 [2:32:12<20:07:19, 11.56s/it] {'loss': 1.1895, 'learning_rate': 4.915532012970758e-06, 'epoch': 0.11} + 11%|█ | 779/7045 [2:32:12<20:07:19, 11.56s/it] 11%|█ | 780/7045 [2:32:23<19:58:54, 11.48s/it] {'loss': 1.1289, 'learning_rate': 4.915235499879236e-06, 'epoch': 0.11} + 11%|█ | 780/7045 [2:32:23<19:58:54, 11.48s/it] 11%|█ | 781/7045 [2:32:35<19:46:10, 11.36s/it] {'loss': 1.1553, 'learning_rate': 4.91493847623995e-06, 'epoch': 0.11} + 11%|█ | 781/7045 [2:32:35<19:46:10, 11.36s/it] 11%|█ | 782/7045 [2:32:46<19:37:46, 11.28s/it] {'loss': 1.1709, 'learning_rate': 4.914640942115685e-06, 'epoch': 0.11} + 11%|█ | 782/7045 [2:32:46<19:37:46, 11.28s/it] 11%|█ | 783/7045 [2:32:57<19:40:53, 11.31s/it] {'loss': 1.1348, 'learning_rate': 4.9143428975693355e-06, 'epoch': 0.11} + 11%|█ | 783/7045 [2:32:57<19:40:53, 11.31s/it] 11%|█ | 784/7045 [2:33:08<19:32:22, 11.23s/it] {'loss': 1.2012, 'learning_rate': 4.9140443426639044e-06, 'epoch': 0.11} + 11%|█ | 784/7045 [2:33:08<19:32:22, 11.23s/it] 11%|█ | 785/7045 [2:33:22<20:45:09, 11.93s/it] {'loss': 1.1382, 'learning_rate': 4.913745277462503e-06, 'epoch': 0.11} + 11%|█ | 785/7045 [2:33:22<20:45:09, 11.93s/it] 11%|█ | 786/7045 [2:33:33<20:29:47, 11.79s/it] {'loss': 1.1387, 'learning_rate': 4.913445702028348e-06, 'epoch': 0.11} + 11%|█ | 786/7045 [2:33:33<20:29:47, 11.79s/it] 11%|█ | 787/7045 [2:33:45<20:38:36, 11.88s/it] {'loss': 1.1719, 'learning_rate': 4.913145616424766e-06, 'epoch': 0.11} + 11%|█ | 787/7045 [2:33:45<20:38:36, 11.88s/it] 11%|█ | 788/7045 [2:33:57<20:21:26, 11.71s/it] {'loss': 1.1416, 'learning_rate': 4.9128450207151915e-06, 'epoch': 0.11} + 11%|█ | 788/7045 [2:33:57<20:21:26, 11.71s/it] 11%|█ | 789/7045 [2:34:10<21:10:07, 12.18s/it] {'loss': 1.1367, 'learning_rate': 4.912543914963167e-06, 'epoch': 0.11} + 11%|█ | 789/7045 [2:34:10<21:10:07, 12.18s/it] 11%|█ | 790/7045 [2:34:22<21:24:22, 12.32s/it] {'loss': 1.1172, 'learning_rate': 4.912242299232341e-06, 'epoch': 0.11} + 11%|█ | 790/7045 [2:34:22<21:24:22, 12.32s/it] 11%|█ | 791/7045 [2:34:36<21:55:38, 12.62s/it] {'loss': 1.1006, 'learning_rate': 4.911940173586471e-06, 'epoch': 0.11} + 11%|█ | 791/7045 [2:34:36<21:55:38, 12.62s/it] 11%|█ | 792/7045 [2:34:49<22:06:28, 12.73s/it] {'loss': 1.1211, 'learning_rate': 4.911637538089421e-06, 'epoch': 0.11} + 11%|█ | 792/7045 [2:34:49<22:06:28, 12.73s/it] 11%|█▏ | 793/7045 [2:35:01<21:53:04, 12.60s/it] {'loss': 1.127, 'learning_rate': 4.911334392805167e-06, 'epoch': 0.11} + 11%|█▏ | 793/7045 [2:35:01<21:53:04, 12.60s/it] 11%|█▏ | 794/7045 [2:35:14<21:48:54, 12.56s/it] {'loss': 1.1377, 'learning_rate': 4.911030737797788e-06, 'epoch': 0.11} + 11%|█▏ | 794/7045 [2:35:14<21:48:54, 12.56s/it] 11%|█▏ | 795/7045 [2:35:25<21:04:06, 12.14s/it] {'loss': 1.1621, 'learning_rate': 4.910726573131474e-06, 'epoch': 0.11} + 11%|█▏ | 795/7045 [2:35:25<21:04:06, 12.14s/it] 11%|█▏ | 796/7045 [2:35:36<20:38:37, 11.89s/it] {'loss': 1.1709, 'learning_rate': 4.910421898870518e-06, 'epoch': 0.11} + 11%|█▏ | 796/7045 [2:35:36<20:38:37, 11.89s/it] 11%|█▏ | 797/7045 [2:35:48<20:54:57, 12.05s/it] {'loss': 1.1768, 'learning_rate': 4.910116715079327e-06, 'epoch': 0.11} + 11%|█▏ | 797/7045 [2:35:48<20:54:57, 12.05s/it] 11%|█▏ | 798/7045 [2:36:01<21:01:23, 12.12s/it] {'loss': 1.1416, 'learning_rate': 4.909811021822411e-06, 'epoch': 0.11} + 11%|█▏ | 798/7045 [2:36:01<21:01:23, 12.12s/it] 11%|█▏ | 799/7045 [2:36:14<21:34:10, 12.43s/it] {'loss': 1.126, 'learning_rate': 4.9095048191643904e-06, 'epoch': 0.11} + 11%|█▏ | 799/7045 [2:36:14<21:34:10, 12.43s/it] 11%|█▏ | 800/7045 [2:36:27<21:46:56, 12.56s/it] {'loss': 1.1484, 'learning_rate': 4.909198107169991e-06, 'epoch': 0.11} + 11%|█▏ | 800/7045 [2:36:27<21:46:56, 12.56s/it] 11%|█▏ | 801/7045 [2:36:39<21:38:39, 12.48s/it] {'loss': 1.167, 'learning_rate': 4.90889088590405e-06, 'epoch': 0.11} + 11%|█▏ | 801/7045 [2:36:39<21:38:39, 12.48s/it] 11%|█▏ | 802/7045 [2:36:50<20:55:52, 12.07s/it] {'loss': 1.1836, 'learning_rate': 4.908583155431508e-06, 'epoch': 0.11} + 11%|█▏ | 802/7045 [2:36:50<20:55:52, 12.07s/it] 11%|█▏ | 803/7045 [2:37:01<20:18:06, 11.71s/it] {'loss': 1.125, 'learning_rate': 4.9082749158174146e-06, 'epoch': 0.11} + 11%|█▏ | 803/7045 [2:37:01<20:18:06, 11.71s/it] 11%|█▏ | 804/7045 [2:37:14<20:49:48, 12.02s/it] {'loss': 1.1641, 'learning_rate': 4.907966167126927e-06, 'epoch': 0.11} + 11%|█▏ | 804/7045 [2:37:14<20:49:48, 12.02s/it] 11%|█▏ | 805/7045 [2:37:27<21:42:01, 12.52s/it] {'loss': 1.0947, 'learning_rate': 4.907656909425314e-06, 'epoch': 0.11} + 11%|█▏ | 805/7045 [2:37:27<21:42:01, 12.52s/it] 11%|█▏ | 806/7045 [2:37:39<21:00:31, 12.12s/it] {'loss': 1.1309, 'learning_rate': 4.9073471427779436e-06, 'epoch': 0.11} + 11%|█▏ | 806/7045 [2:37:39<21:00:31, 12.12s/it] 11%|█▏ | 807/7045 [2:37:50<20:44:15, 11.97s/it] {'loss': 1.1934, 'learning_rate': 4.9070368672503e-06, 'epoch': 0.11} + 11%|█▏ | 807/7045 [2:37:50<20:44:15, 11.97s/it] 11%|█▏ | 808/7045 [2:38:01<20:18:31, 11.72s/it] {'loss': 1.1699, 'learning_rate': 4.906726082907969e-06, 'epoch': 0.11} + 11%|█▏ | 808/7045 [2:38:01<20:18:31, 11.72s/it] 11%|█▏ | 809/7045 [2:38:13<20:06:34, 11.61s/it] {'loss': 1.1406, 'learning_rate': 4.906414789816647e-06, 'epoch': 0.11} + 11%|█▏ | 809/7045 [2:38:13<20:06:34, 11.61s/it] 11%|█▏ | 810/7045 [2:38:24<19:56:52, 11.52s/it] {'loss': 1.1582, 'learning_rate': 4.906102988042137e-06, 'epoch': 0.11} + 11%|█▏ | 810/7045 [2:38:24<19:56:52, 11.52s/it] 12%|█▏ | 811/7045 [2:38:35<19:39:28, 11.35s/it] {'loss': 1.1533, 'learning_rate': 4.90579067765035e-06, 'epoch': 0.12} + 12%|█▏ | 811/7045 [2:38:35<19:39:28, 11.35s/it] 12%|█▏ | 812/7045 [2:38:47<19:54:11, 11.50s/it] {'loss': 1.1328, 'learning_rate': 4.905477858707304e-06, 'epoch': 0.12} + 12%|█▏ | 812/7045 [2:38:47<19:54:11, 11.50s/it] 12%|█▏ | 813/7045 [2:38:58<19:50:45, 11.46s/it] {'loss': 1.1709, 'learning_rate': 4.905164531279124e-06, 'epoch': 0.12} + 12%|█▏ | 813/7045 [2:38:58<19:50:45, 11.46s/it] 12%|█▏ | 814/7045 [2:39:09<19:46:21, 11.42s/it] {'loss': 1.1133, 'learning_rate': 4.904850695432043e-06, 'epoch': 0.12} + 12%|█▏ | 814/7045 [2:39:09<19:46:21, 11.42s/it] 12%|█▏ | 815/7045 [2:39:24<21:30:44, 12.43s/it] {'loss': 1.0947, 'learning_rate': 4.9045363512324034e-06, 'epoch': 0.12} + 12%|█▏ | 815/7045 [2:39:24<21:30:44, 12.43s/it] 12%|█▏ | 816/7045 [2:39:36<21:13:30, 12.27s/it] {'loss': 1.0977, 'learning_rate': 4.904221498746652e-06, 'epoch': 0.12} + 12%|█▏ | 816/7045 [2:39:36<21:13:30, 12.27s/it] 12%|█▏ | 817/7045 [2:39:48<20:55:45, 12.10s/it] {'loss': 1.1162, 'learning_rate': 4.9039061380413435e-06, 'epoch': 0.12} + 12%|█▏ | 817/7045 [2:39:48<20:55:45, 12.10s/it] 12%|█▏ | 818/7045 [2:39:59<20:16:09, 11.72s/it] {'loss': 1.1514, 'learning_rate': 4.903590269183143e-06, 'epoch': 0.12} + 12%|█▏ | 818/7045 [2:39:59<20:16:09, 11.72s/it] 12%|█▏ | 819/7045 [2:40:10<20:10:22, 11.66s/it] {'loss': 1.1523, 'learning_rate': 4.903273892238818e-06, 'epoch': 0.12} + 12%|█▏ | 819/7045 [2:40:10<20:10:22, 11.66s/it] 12%|█▏ | 820/7045 [2:40:23<20:44:05, 11.99s/it] {'loss': 1.1436, 'learning_rate': 4.902957007275249e-06, 'epoch': 0.12} + 12%|█▏ | 820/7045 [2:40:23<20:44:05, 11.99s/it] 12%|█▏ | 821/7045 [2:40:36<21:02:13, 12.17s/it] {'loss': 1.1396, 'learning_rate': 4.90263961435942e-06, 'epoch': 0.12} + 12%|█▏ | 821/7045 [2:40:36<21:02:13, 12.17s/it] 12%|█▏ | 822/7045 [2:40:47<20:39:19, 11.95s/it] {'loss': 1.168, 'learning_rate': 4.902321713558423e-06, 'epoch': 0.12} + 12%|█▏ | 822/7045 [2:40:47<20:39:19, 11.95s/it] 12%|█▏ | 823/7045 [2:41:01<21:32:10, 12.46s/it] {'loss': 1.1182, 'learning_rate': 4.902003304939459e-06, 'epoch': 0.12} + 12%|█▏ | 823/7045 [2:41:01<21:32:10, 12.46s/it] 12%|█▏ | 824/7045 [2:41:12<20:41:37, 11.98s/it] {'loss': 1.1484, 'learning_rate': 4.901684388569834e-06, 'epoch': 0.12} + 12%|█▏ | 824/7045 [2:41:12<20:41:37, 11.98s/it] 12%|█▏ | 825/7045 [2:41:23<20:23:22, 11.80s/it] {'loss': 1.1611, 'learning_rate': 4.9013649645169634e-06, 'epoch': 0.12} + 12%|█▏ | 825/7045 [2:41:23<20:23:22, 11.80s/it] 12%|█▏ | 826/7045 [2:41:35<20:35:42, 11.92s/it] {'loss': 1.1279, 'learning_rate': 4.901045032848369e-06, 'epoch': 0.12} + 12%|█▏ | 826/7045 [2:41:35<20:35:42, 11.92s/it] 12%|█▏ | 827/7045 [2:41:46<20:00:55, 11.59s/it] {'loss': 1.1191, 'learning_rate': 4.900724593631679e-06, 'epoch': 0.12} + 12%|█▏ | 827/7045 [2:41:46<20:00:55, 11.59s/it] 12%|█▏ | 828/7045 [2:41:59<20:47:34, 12.04s/it] {'loss': 1.1738, 'learning_rate': 4.900403646934632e-06, 'epoch': 0.12} + 12%|█▏ | 828/7045 [2:41:59<20:47:34, 12.04s/it] 12%|█▏ | 829/7045 [2:42:10<20:12:29, 11.70s/it] {'loss': 1.1797, 'learning_rate': 4.90008219282507e-06, 'epoch': 0.12} + 12%|█▏ | 829/7045 [2:42:10<20:12:29, 11.70s/it] 12%|█▏ | 830/7045 [2:42:21<19:58:50, 11.57s/it] {'loss': 1.1953, 'learning_rate': 4.899760231370945e-06, 'epoch': 0.12} + 12%|█▏ | 830/7045 [2:42:21<19:58:50, 11.57s/it] 12%|█▏ | 831/7045 [2:42:33<20:09:38, 11.68s/it] {'loss': 1.1465, 'learning_rate': 4.899437762640313e-06, 'epoch': 0.12} + 12%|█▏ | 831/7045 [2:42:33<20:09:38, 11.68s/it] 12%|█▏ | 832/7045 [2:42:44<19:59:03, 11.58s/it] {'loss': 1.1729, 'learning_rate': 4.899114786701343e-06, 'epoch': 0.12} + 12%|█▏ | 832/7045 [2:42:44<19:59:03, 11.58s/it] 12%|█▏ | 833/7045 [2:42:56<19:51:40, 11.51s/it] {'loss': 1.1934, 'learning_rate': 4.898791303622305e-06, 'epoch': 0.12} + 12%|█▏ | 833/7045 [2:42:56<19:51:40, 11.51s/it] 12%|█▏ | 834/7045 [2:43:07<19:43:23, 11.43s/it] {'loss': 1.1719, 'learning_rate': 4.89846731347158e-06, 'epoch': 0.12} + 12%|█▏ | 834/7045 [2:43:07<19:43:23, 11.43s/it] 12%|█▏ | 835/7045 [2:43:18<19:36:38, 11.37s/it] {'loss': 1.1484, 'learning_rate': 4.898142816317654e-06, 'epoch': 0.12} + 12%|█▏ | 835/7045 [2:43:18<19:36:38, 11.37s/it] 12%|█▏ | 836/7045 [2:43:32<20:38:27, 11.97s/it] {'loss': 1.1279, 'learning_rate': 4.897817812229124e-06, 'epoch': 0.12} + 12%|█▏ | 836/7045 [2:43:32<20:38:27, 11.97s/it] 12%|█▏ | 837/7045 [2:43:43<20:15:46, 11.75s/it] {'loss': 1.1484, 'learning_rate': 4.897492301274689e-06, 'epoch': 0.12} + 12%|█▏ | 837/7045 [2:43:43<20:15:46, 11.75s/it] 12%|█▏ | 838/7045 [2:43:55<20:22:27, 11.82s/it] {'loss': 1.125, 'learning_rate': 4.897166283523157e-06, 'epoch': 0.12} + 12%|█▏ | 838/7045 [2:43:55<20:22:27, 11.82s/it] 12%|█▏ | 839/7045 [2:44:06<19:52:45, 11.53s/it] {'loss': 1.1406, 'learning_rate': 4.896839759043445e-06, 'epoch': 0.12} + 12%|█▏ | 839/7045 [2:44:06<19:52:45, 11.53s/it] 12%|█▏ | 840/7045 [2:44:19<20:33:04, 11.92s/it] {'loss': 1.1387, 'learning_rate': 4.8965127279045776e-06, 'epoch': 0.12} + 12%|█▏ | 840/7045 [2:44:19<20:33:04, 11.92s/it] 12%|█▏ | 841/7045 [2:44:31<20:42:05, 12.01s/it] {'loss': 1.2129, 'learning_rate': 4.896185190175681e-06, 'epoch': 0.12} + 12%|█▏ | 841/7045 [2:44:31<20:42:05, 12.01s/it] 12%|█▏ | 842/7045 [2:44:44<21:11:06, 12.30s/it] {'loss': 1.1377, 'learning_rate': 4.895857145925995e-06, 'epoch': 0.12} + 12%|█▏ | 842/7045 [2:44:44<21:11:06, 12.30s/it] 12%|█▏ | 843/7045 [2:44:55<20:38:36, 11.98s/it] {'loss': 1.1846, 'learning_rate': 4.8955285952248615e-06, 'epoch': 0.12} + 12%|█▏ | 843/7045 [2:44:55<20:38:36, 11.98s/it] 12%|█▏ | 844/7045 [2:45:08<20:59:37, 12.19s/it] {'loss': 1.1587, 'learning_rate': 4.895199538141733e-06, 'epoch': 0.12} + 12%|█▏ | 844/7045 [2:45:08<20:59:37, 12.19s/it] 12%|█▏ | 845/7045 [2:45:19<20:23:57, 11.84s/it] {'loss': 1.1592, 'learning_rate': 4.894869974746168e-06, 'epoch': 0.12} + 12%|█▏ | 845/7045 [2:45:19<20:23:57, 11.84s/it] 12%|█▏ | 846/7045 [2:45:30<19:55:15, 11.57s/it] {'loss': 1.1318, 'learning_rate': 4.89453990510783e-06, 'epoch': 0.12} + 12%|█▏ | 846/7045 [2:45:30<19:55:15, 11.57s/it] 12%|█▏ | 847/7045 [2:45:42<20:19:45, 11.81s/it] {'loss': 1.1392, 'learning_rate': 4.894209329296494e-06, 'epoch': 0.12} + 12%|█▏ | 847/7045 [2:45:42<20:19:45, 11.81s/it] 12%|█▏ | 848/7045 [2:45:53<20:02:00, 11.64s/it] {'loss': 1.1533, 'learning_rate': 4.893878247382037e-06, 'epoch': 0.12} + 12%|█▏ | 848/7045 [2:45:53<20:02:00, 11.64s/it] 12%|█▏ | 849/7045 [2:46:04<19:49:08, 11.52s/it] {'loss': 1.1592, 'learning_rate': 4.893546659434445e-06, 'epoch': 0.12} + 12%|█▏ | 849/7045 [2:46:04<19:49:08, 11.52s/it] 12%|█▏ | 850/7045 [2:46:16<19:38:52, 11.42s/it] {'loss': 1.1074, 'learning_rate': 4.893214565523811e-06, 'epoch': 0.12} + 12%|█▏ | 850/7045 [2:46:16<19:38:52, 11.42s/it] 12%|█▏ | 851/7045 [2:46:28<19:52:29, 11.55s/it] {'loss': 1.1738, 'learning_rate': 4.892881965720338e-06, 'epoch': 0.12} + 12%|█▏ | 851/7045 [2:46:28<19:52:29, 11.55s/it] 12%|█▏ | 852/7045 [2:46:41<20:43:38, 12.05s/it] {'loss': 1.1455, 'learning_rate': 4.892548860094331e-06, 'epoch': 0.12} + 12%|█▏ | 852/7045 [2:46:41<20:43:38, 12.05s/it] 12%|█▏ | 853/7045 [2:46:52<20:24:19, 11.86s/it] {'loss': 1.1357, 'learning_rate': 4.8922152487162025e-06, 'epoch': 0.12} + 12%|█▏ | 853/7045 [2:46:52<20:24:19, 11.86s/it] 12%|█▏ | 854/7045 [2:47:04<20:13:39, 11.76s/it] {'loss': 1.2129, 'learning_rate': 4.891881131656476e-06, 'epoch': 0.12} + 12%|█▏ | 854/7045 [2:47:04<20:13:39, 11.76s/it] 12%|█▏ | 855/7045 [2:47:15<20:05:59, 11.69s/it] {'loss': 1.1963, 'learning_rate': 4.891546508985778e-06, 'epoch': 0.12} + 12%|█▏ | 855/7045 [2:47:15<20:05:59, 11.69s/it] 12%|█▏ | 856/7045 [2:47:26<19:52:25, 11.56s/it] {'loss': 1.1348, 'learning_rate': 4.891211380774843e-06, 'epoch': 0.12} + 12%|█▏ | 856/7045 [2:47:26<19:52:25, 11.56s/it] 12%|█▏ | 857/7045 [2:47:38<19:52:17, 11.56s/it] {'loss': 1.1738, 'learning_rate': 4.890875747094513e-06, 'epoch': 0.12} + 12%|█▏ | 857/7045 [2:47:38<19:52:17, 11.56s/it] 12%|█▏ | 858/7045 [2:47:49<19:43:32, 11.48s/it] {'loss': 1.1533, 'learning_rate': 4.890539608015737e-06, 'epoch': 0.12} + 12%|█▏ | 858/7045 [2:47:49<19:43:32, 11.48s/it] 12%|█▏ | 859/7045 [2:48:02<20:24:04, 11.87s/it] {'loss': 1.1416, 'learning_rate': 4.890202963609568e-06, 'epoch': 0.12} + 12%|█▏ | 859/7045 [2:48:02<20:24:04, 11.87s/it] 12%|█▏ | 860/7045 [2:48:13<20:06:05, 11.70s/it] {'loss': 1.1348, 'learning_rate': 4.88986581394717e-06, 'epoch': 0.12} + 12%|█▏ | 860/7045 [2:48:13<20:06:05, 11.70s/it] 12%|█▏ | 861/7045 [2:48:25<19:51:27, 11.56s/it] {'loss': 1.1494, 'learning_rate': 4.8895281590998115e-06, 'epoch': 0.12} + 12%|█▏ | 861/7045 [2:48:25<19:51:27, 11.56s/it] 12%|█▏ | 862/7045 [2:48:36<19:41:06, 11.46s/it] {'loss': 1.1523, 'learning_rate': 4.889189999138868e-06, 'epoch': 0.12} + 12%|█▏ | 862/7045 [2:48:36<19:41:06, 11.46s/it] 12%|█▏ | 863/7045 [2:48:47<19:30:44, 11.36s/it] {'loss': 1.1934, 'learning_rate': 4.8888513341358215e-06, 'epoch': 0.12} + 12%|█▏ | 863/7045 [2:48:47<19:30:44, 11.36s/it] 12%|█▏ | 864/7045 [2:48:59<19:36:14, 11.42s/it] {'loss': 1.1484, 'learning_rate': 4.888512164162262e-06, 'epoch': 0.12} + 12%|█▏ | 864/7045 [2:48:59<19:36:14, 11.42s/it] 12%|█▏ | 865/7045 [2:49:10<19:29:26, 11.35s/it] {'loss': 1.1826, 'learning_rate': 4.888172489289884e-06, 'epoch': 0.12} + 12%|█▏ | 865/7045 [2:49:10<19:29:26, 11.35s/it] 12%|█▏ | 866/7045 [2:49:23<20:25:34, 11.90s/it] {'loss': 1.144, 'learning_rate': 4.887832309590492e-06, 'epoch': 0.12} + 12%|█▏ | 866/7045 [2:49:23<20:25:34, 11.90s/it] 12%|█▏ | 867/7045 [2:49:35<20:17:01, 11.82s/it] {'loss': 1.1611, 'learning_rate': 4.8874916251359935e-06, 'epoch': 0.12} + 12%|█▏ | 867/7045 [2:49:35<20:17:01, 11.82s/it] 12%|█▏ | 868/7045 [2:49:47<20:31:35, 11.96s/it] {'loss': 1.1025, 'learning_rate': 4.887150435998406e-06, 'epoch': 0.12} + 12%|█▏ | 868/7045 [2:49:47<20:31:35, 11.96s/it] 12%|█▏ | 869/7045 [2:50:01<21:35:08, 12.58s/it] {'loss': 1.1128, 'learning_rate': 4.886808742249851e-06, 'epoch': 0.12} + 12%|█▏ | 869/7045 [2:50:01<21:35:08, 12.58s/it] 12%|█▏ | 870/7045 [2:50:13<21:24:47, 12.48s/it] {'loss': 1.1411, 'learning_rate': 4.886466543962559e-06, 'epoch': 0.12} + 12%|█▏ | 870/7045 [2:50:13<21:24:47, 12.48s/it] 12%|█▏ | 871/7045 [2:50:24<20:49:15, 12.14s/it] {'loss': 1.146, 'learning_rate': 4.886123841208865e-06, 'epoch': 0.12} + 12%|█▏ | 871/7045 [2:50:24<20:49:15, 12.14s/it] 12%|█▏ | 872/7045 [2:50:35<20:11:46, 11.78s/it] {'loss': 1.1416, 'learning_rate': 4.885780634061212e-06, 'epoch': 0.12} + 12%|█▏ | 872/7045 [2:50:35<20:11:46, 11.78s/it] 12%|█▏ | 873/7045 [2:50:47<19:50:32, 11.57s/it] {'loss': 1.1504, 'learning_rate': 4.88543692259215e-06, 'epoch': 0.12} + 12%|█▏ | 873/7045 [2:50:47<19:50:32, 11.57s/it] 12%|█▏ | 874/7045 [2:50:58<19:38:54, 11.46s/it] {'loss': 1.1621, 'learning_rate': 4.885092706874334e-06, 'epoch': 0.12} + 12%|█▏ | 874/7045 [2:50:58<19:38:54, 11.46s/it] 12%|█▏ | 875/7045 [2:51:09<19:24:47, 11.33s/it] {'loss': 1.1211, 'learning_rate': 4.884747986980527e-06, 'epoch': 0.12} + 12%|█▏ | 875/7045 [2:51:09<19:24:47, 11.33s/it] 12%|█▏ | 876/7045 [2:51:20<19:24:08, 11.32s/it] {'loss': 1.1455, 'learning_rate': 4.884402762983598e-06, 'epoch': 0.12} + 12%|█▏ | 876/7045 [2:51:20<19:24:08, 11.32s/it] 12%|█▏ | 877/7045 [2:51:32<19:39:24, 11.47s/it] {'loss': 1.1523, 'learning_rate': 4.884057034956523e-06, 'epoch': 0.12} + 12%|█▏ | 877/7045 [2:51:32<19:39:24, 11.47s/it] 12%|█▏ | 878/7045 [2:51:43<19:44:04, 11.52s/it] {'loss': 1.1621, 'learning_rate': 4.883710802972383e-06, 'epoch': 0.12} + 12%|█▏ | 878/7045 [2:51:44<19:44:04, 11.52s/it] 12%|█▏ | 879/7045 [2:51:55<19:29:48, 11.38s/it] {'loss': 1.1924, 'learning_rate': 4.883364067104368e-06, 'epoch': 0.12} + 12%|█▏ | 879/7045 [2:51:55<19:29:48, 11.38s/it] 12%|█▏ | 880/7045 [2:52:07<20:00:23, 11.68s/it] {'loss': 1.1157, 'learning_rate': 4.883016827425773e-06, 'epoch': 0.12} + 12%|█▏ | 880/7045 [2:52:07<20:00:23, 11.68s/it] 13%|█▎ | 881/7045 [2:52:20<20:40:24, 12.07s/it] {'loss': 1.1187, 'learning_rate': 4.882669084009999e-06, 'epoch': 0.13} + 13%|█▎ | 881/7045 [2:52:20<20:40:24, 12.07s/it] 13%|█▎ | 882/7045 [2:52:31<20:09:39, 11.78s/it] {'loss': 1.1553, 'learning_rate': 4.882320836930554e-06, 'epoch': 0.13} + 13%|█▎ | 882/7045 [2:52:31<20:09:39, 11.78s/it] 13%|█▎ | 883/7045 [2:52:42<19:56:09, 11.65s/it] {'loss': 1.1172, 'learning_rate': 4.8819720862610545e-06, 'epoch': 0.13} + 13%|█▎ | 883/7045 [2:52:42<19:56:09, 11.65s/it] 13%|█▎ | 884/7045 [2:52:54<19:48:34, 11.58s/it] {'loss': 1.1572, 'learning_rate': 4.881622832075219e-06, 'epoch': 0.13} + 13%|█▎ | 884/7045 [2:52:54<19:48:34, 11.58s/it] 13%|█▎ | 885/7045 [2:53:06<19:59:15, 11.68s/it] {'loss': 1.1191, 'learning_rate': 4.881273074446877e-06, 'epoch': 0.13} + 13%|█▎ | 885/7045 [2:53:06<19:59:15, 11.68s/it] 13%|█▎ | 886/7045 [2:53:18<20:21:51, 11.90s/it] {'loss': 1.1099, 'learning_rate': 4.880922813449962e-06, 'epoch': 0.13} + 13%|█▎ | 886/7045 [2:53:18<20:21:51, 11.90s/it] 13%|█▎ | 887/7045 [2:53:30<20:34:56, 12.03s/it] {'loss': 1.1426, 'learning_rate': 4.8805720491585155e-06, 'epoch': 0.13} + 13%|█▎ | 887/7045 [2:53:30<20:34:56, 12.03s/it] 13%|█▎ | 888/7045 [2:53:43<21:02:04, 12.30s/it] {'loss': 1.1172, 'learning_rate': 4.880220781646682e-06, 'epoch': 0.13} + 13%|█▎ | 888/7045 [2:53:43<21:02:04, 12.30s/it] 13%|█▎ | 889/7045 [2:53:55<20:55:04, 12.23s/it] {'loss': 1.127, 'learning_rate': 4.879869010988715e-06, 'epoch': 0.13} + 13%|█▎ | 889/7045 [2:53:55<20:55:04, 12.23s/it] 13%|█▎ | 890/7045 [2:54:08<20:52:21, 12.21s/it] {'loss': 1.1592, 'learning_rate': 4.879516737258976e-06, 'epoch': 0.13} + 13%|█▎ | 890/7045 [2:54:08<20:52:21, 12.21s/it] 13%|█▎ | 891/7045 [2:54:19<20:34:11, 12.03s/it] {'loss': 1.0967, 'learning_rate': 4.8791639605319294e-06, 'epoch': 0.13} + 13%|█▎ | 891/7045 [2:54:19<20:34:11, 12.03s/it] 13%|█▎ | 892/7045 [2:54:31<20:12:02, 11.82s/it] {'loss': 1.1279, 'learning_rate': 4.878810680882148e-06, 'epoch': 0.13} + 13%|█▎ | 892/7045 [2:54:31<20:12:02, 11.82s/it] 13%|█▎ | 893/7045 [2:54:41<19:41:43, 11.53s/it] {'loss': 1.1299, 'learning_rate': 4.878456898384309e-06, 'epoch': 0.13} + 13%|█▎ | 893/7045 [2:54:41<19:41:43, 11.53s/it] 13%|█▎ | 894/7045 [2:54:52<19:15:34, 11.27s/it] {'loss': 1.1201, 'learning_rate': 4.878102613113198e-06, 'epoch': 0.13} + 13%|█▎ | 894/7045 [2:54:52<19:15:34, 11.27s/it] 13%|█▎ | 895/7045 [2:55:03<19:10:12, 11.22s/it] {'loss': 1.1582, 'learning_rate': 4.877747825143707e-06, 'epoch': 0.13} + 13%|█▎ | 895/7045 [2:55:03<19:10:12, 11.22s/it] 13%|█▎ | 896/7045 [2:55:14<19:07:32, 11.20s/it] {'loss': 1.1689, 'learning_rate': 4.877392534550832e-06, 'epoch': 0.13} + 13%|█▎ | 896/7045 [2:55:14<19:07:32, 11.20s/it] 13%|█▎ | 897/7045 [2:55:25<18:58:17, 11.11s/it] {'loss': 1.1377, 'learning_rate': 4.877036741409676e-06, 'epoch': 0.13} + 13%|█▎ | 897/7045 [2:55:25<18:58:17, 11.11s/it] 13%|█▎ | 898/7045 [2:55:36<19:00:21, 11.13s/it] {'loss': 1.166, 'learning_rate': 4.876680445795452e-06, 'epoch': 0.13} + 13%|█▎ | 898/7045 [2:55:36<19:00:21, 11.13s/it] 13%|█▎ | 899/7045 [2:55:48<19:28:28, 11.41s/it] {'loss': 1.1309, 'learning_rate': 4.8763236477834726e-06, 'epoch': 0.13} + 13%|█▎ | 899/7045 [2:55:48<19:28:28, 11.41s/it] 13%|█▎ | 900/7045 [2:56:00<19:20:34, 11.33s/it] {'loss': 1.1543, 'learning_rate': 4.875966347449162e-06, 'epoch': 0.13} + 13%|█▎ | 900/7045 [2:56:00<19:20:34, 11.33s/it] 13%|█▎ | 901/7045 [2:56:12<19:50:56, 11.63s/it] {'loss': 1.1416, 'learning_rate': 4.875608544868047e-06, 'epoch': 0.13} + 13%|█▎ | 901/7045 [2:56:12<19:50:56, 11.63s/it] 13%|█▎ | 902/7045 [2:56:25<20:28:43, 12.00s/it] {'loss': 1.1436, 'learning_rate': 4.875250240115763e-06, 'epoch': 0.13} + 13%|█▎ | 902/7045 [2:56:25<20:28:43, 12.00s/it] 13%|█▎ | 903/7045 [2:56:38<20:53:03, 12.24s/it] {'loss': 1.1323, 'learning_rate': 4.874891433268051e-06, 'epoch': 0.13} + 13%|█▎ | 903/7045 [2:56:38<20:53:03, 12.24s/it] 13%|█▎ | 904/7045 [2:56:49<20:17:51, 11.90s/it] {'loss': 1.1768, 'learning_rate': 4.874532124400758e-06, 'epoch': 0.13} + 13%|█▎ | 904/7045 [2:56:49<20:17:51, 11.90s/it] 13%|█▎ | 905/7045 [2:57:01<20:37:05, 12.09s/it] {'loss': 1.1431, 'learning_rate': 4.874172313589837e-06, 'epoch': 0.13} + 13%|█▎ | 905/7045 [2:57:01<20:37:05, 12.09s/it] 13%|█▎ | 906/7045 [2:57:12<20:06:37, 11.79s/it] {'loss': 1.1533, 'learning_rate': 4.873812000911346e-06, 'epoch': 0.13} + 13%|█▎ | 906/7045 [2:57:12<20:06:37, 11.79s/it] 13%|█▎ | 907/7045 [2:57:23<19:44:59, 11.58s/it] {'loss': 1.1406, 'learning_rate': 4.8734511864414505e-06, 'epoch': 0.13} + 13%|█▎ | 907/7045 [2:57:23<19:44:59, 11.58s/it] 13%|█▎ | 908/7045 [2:57:35<19:38:40, 11.52s/it] {'loss': 1.1436, 'learning_rate': 4.873089870256423e-06, 'epoch': 0.13} + 13%|█▎ | 908/7045 [2:57:35<19:38:40, 11.52s/it] 13%|█▎ | 909/7045 [2:57:46<19:18:22, 11.33s/it] {'loss': 1.1436, 'learning_rate': 4.8727280524326395e-06, 'epoch': 0.13} + 13%|█▎ | 909/7045 [2:57:46<19:18:22, 11.33s/it] 13%|█▎ | 910/7045 [2:57:59<20:18:27, 11.92s/it] {'loss': 1.0693, 'learning_rate': 4.872365733046584e-06, 'epoch': 0.13} + 13%|█▎ | 910/7045 [2:57:59<20:18:27, 11.92s/it] 13%|█▎ | 911/7045 [2:58:10<20:06:11, 11.80s/it] {'loss': 1.166, 'learning_rate': 4.872002912174846e-06, 'epoch': 0.13} + 13%|█▎ | 911/7045 [2:58:10<20:06:11, 11.80s/it] 13%|█▎ | 912/7045 [2:58:23<20:28:16, 12.02s/it] {'loss': 1.1416, 'learning_rate': 4.871639589894119e-06, 'epoch': 0.13} + 13%|█▎ | 912/7045 [2:58:23<20:28:16, 12.02s/it] 13%|█▎ | 913/7045 [2:58:34<20:04:56, 11.79s/it] {'loss': 1.1875, 'learning_rate': 4.871275766281207e-06, 'epoch': 0.13} + 13%|█▎ | 913/7045 [2:58:34<20:04:56, 11.79s/it] 13%|█▎ | 914/7045 [2:58:46<20:05:26, 11.80s/it] {'loss': 1.1396, 'learning_rate': 4.870911441413016e-06, 'epoch': 0.13} + 13%|█▎ | 914/7045 [2:58:46<20:05:26, 11.80s/it] 13%|█▎ | 915/7045 [2:58:57<19:46:20, 11.61s/it] {'loss': 1.1377, 'learning_rate': 4.870546615366561e-06, 'epoch': 0.13} + 13%|█▎ | 915/7045 [2:58:57<19:46:20, 11.61s/it] 13%|█▎ | 916/7045 [2:59:09<19:59:55, 11.75s/it] {'loss': 1.165, 'learning_rate': 4.870181288218958e-06, 'epoch': 0.13} + 13%|█▎ | 916/7045 [2:59:09<19:59:55, 11.75s/it] 13%|█▎ | 917/7045 [2:59:21<19:51:58, 11.67s/it] {'loss': 1.1162, 'learning_rate': 4.869815460047435e-06, 'epoch': 0.13} + 13%|█▎ | 917/7045 [2:59:21<19:51:58, 11.67s/it] 13%|█▎ | 918/7045 [2:59:32<19:40:45, 11.56s/it] {'loss': 1.1621, 'learning_rate': 4.869449130929321e-06, 'epoch': 0.13} + 13%|█▎ | 918/7045 [2:59:32<19:40:45, 11.56s/it] 13%|█▎ | 919/7045 [2:59:43<19:20:41, 11.37s/it] {'loss': 1.1436, 'learning_rate': 4.869082300942054e-06, 'epoch': 0.13} + 13%|█▎ | 919/7045 [2:59:43<19:20:41, 11.37s/it] 13%|█▎ | 920/7045 [2:59:54<19:21:09, 11.37s/it] {'loss': 1.1357, 'learning_rate': 4.868714970163178e-06, 'epoch': 0.13} + 13%|█▎ | 920/7045 [2:59:54<19:21:09, 11.37s/it] 13%|█▎ | 921/7045 [3:00:05<19:04:40, 11.21s/it] {'loss': 1.1211, 'learning_rate': 4.868347138670339e-06, 'epoch': 0.13} + 13%|█▎ | 921/7045 [3:00:05<19:04:40, 11.21s/it] 13%|█▎ | 922/7045 [3:00:17<19:25:58, 11.43s/it] {'loss': 1.168, 'learning_rate': 4.867978806541293e-06, 'epoch': 0.13} + 13%|█▎ | 922/7045 [3:00:17<19:25:58, 11.43s/it] 13%|█▎ | 923/7045 [3:00:29<19:25:06, 11.42s/it] {'loss': 1.1309, 'learning_rate': 4.867609973853902e-06, 'epoch': 0.13} + 13%|█▎ | 923/7045 [3:00:29<19:25:06, 11.42s/it] 13%|█▎ | 924/7045 [3:00:40<19:09:20, 11.27s/it] {'loss': 1.1025, 'learning_rate': 4.8672406406861295e-06, 'epoch': 0.13} + 13%|█▎ | 924/7045 [3:00:40<19:09:20, 11.27s/it] 13%|█▎ | 925/7045 [3:00:51<19:15:41, 11.33s/it] {'loss': 1.1299, 'learning_rate': 4.866870807116049e-06, 'epoch': 0.13} + 13%|█▎ | 925/7045 [3:00:51<19:15:41, 11.33s/it] 13%|█▎ | 926/7045 [3:01:02<19:12:06, 11.30s/it] {'loss': 1.1641, 'learning_rate': 4.866500473221838e-06, 'epoch': 0.13} + 13%|█▎ | 926/7045 [3:01:02<19:12:06, 11.30s/it] 13%|█▎ | 927/7045 [3:01:13<19:06:36, 11.24s/it] {'loss': 1.1758, 'learning_rate': 4.866129639081779e-06, 'epoch': 0.13} + 13%|█▎ | 927/7045 [3:01:13<19:06:36, 11.24s/it] 13%|█▎ | 928/7045 [3:01:25<19:05:35, 11.24s/it] {'loss': 1.1533, 'learning_rate': 4.865758304774263e-06, 'epoch': 0.13} + 13%|█▎ | 928/7045 [3:01:25<19:05:35, 11.24s/it] 13%|█▎ | 929/7045 [3:01:36<19:10:16, 11.28s/it] {'loss': 1.1299, 'learning_rate': 4.865386470377785e-06, 'epoch': 0.13} + 13%|█▎ | 929/7045 [3:01:36<19:10:16, 11.28s/it] 13%|█▎ | 930/7045 [3:01:48<19:21:23, 11.40s/it] {'loss': 1.1777, 'learning_rate': 4.865014135970944e-06, 'epoch': 0.13} + 13%|█▎ | 930/7045 [3:01:48<19:21:23, 11.40s/it] 13%|█▎ | 931/7045 [3:01:59<19:13:11, 11.32s/it] {'loss': 1.1377, 'learning_rate': 4.864641301632446e-06, 'epoch': 0.13} + 13%|█▎ | 931/7045 [3:01:59<19:13:11, 11.32s/it] 13%|█▎ | 932/7045 [3:02:10<19:16:41, 11.35s/it] {'loss': 1.1338, 'learning_rate': 4.864267967441106e-06, 'epoch': 0.13} + 13%|█▎ | 932/7045 [3:02:10<19:16:41, 11.35s/it] 13%|█▎ | 933/7045 [3:02:21<19:13:39, 11.33s/it] {'loss': 1.2051, 'learning_rate': 4.863894133475839e-06, 'epoch': 0.13} + 13%|█▎ | 933/7045 [3:02:21<19:13:39, 11.33s/it] 13%|█▎ | 934/7045 [3:02:33<19:10:12, 11.29s/it] {'loss': 1.1357, 'learning_rate': 4.86351979981567e-06, 'epoch': 0.13} + 13%|█▎ | 934/7045 [3:02:33<19:10:12, 11.29s/it] 13%|█▎ | 935/7045 [3:02:45<19:33:31, 11.52s/it] {'loss': 1.166, 'learning_rate': 4.863144966539728e-06, 'epoch': 0.13} + 13%|█▎ | 935/7045 [3:02:45<19:33:31, 11.52s/it] 13%|█▎ | 936/7045 [3:02:56<19:30:49, 11.50s/it] {'loss': 1.1826, 'learning_rate': 4.862769633727247e-06, 'epoch': 0.13} + 13%|█▎ | 936/7045 [3:02:56<19:30:49, 11.50s/it] 13%|█▎ | 937/7045 [3:03:08<19:36:14, 11.55s/it] {'loss': 1.1709, 'learning_rate': 4.8623938014575664e-06, 'epoch': 0.13} + 13%|█▎ | 937/7045 [3:03:08<19:36:14, 11.55s/it] 13%|█▎ | 938/7045 [3:03:19<19:35:24, 11.55s/it] {'loss': 1.1357, 'learning_rate': 4.862017469810134e-06, 'epoch': 0.13} + 13%|█▎ | 938/7045 [3:03:19<19:35:24, 11.55s/it] 13%|█▎ | 939/7045 [3:03:30<19:20:52, 11.41s/it] {'loss': 1.1719, 'learning_rate': 4.861640638864499e-06, 'epoch': 0.13} + 13%|█▎ | 939/7045 [3:03:30<19:20:52, 11.41s/it] 13%|█▎ | 940/7045 [3:03:42<19:17:02, 11.37s/it] {'loss': 1.1445, 'learning_rate': 4.86126330870032e-06, 'epoch': 0.13} + 13%|█▎ | 940/7045 [3:03:42<19:17:02, 11.37s/it] 13%|█▎ | 941/7045 [3:03:53<19:12:32, 11.33s/it] {'loss': 1.124, 'learning_rate': 4.860885479397359e-06, 'epoch': 0.13} + 13%|█▎ | 941/7045 [3:03:53<19:12:32, 11.33s/it] 13%|█▎ | 942/7045 [3:04:04<19:04:55, 11.26s/it] {'loss': 1.1768, 'learning_rate': 4.8605071510354836e-06, 'epoch': 0.13} + 13%|█▎ | 942/7045 [3:04:04<19:04:55, 11.26s/it] 13%|█▎ | 943/7045 [3:04:17<19:47:06, 11.67s/it] {'loss': 1.1523, 'learning_rate': 4.8601283236946684e-06, 'epoch': 0.13} + 13%|█▎ | 943/7045 [3:04:17<19:47:06, 11.67s/it] 13%|█▎ | 944/7045 [3:04:28<19:28:32, 11.49s/it] {'loss': 1.166, 'learning_rate': 4.8597489974549905e-06, 'epoch': 0.13} + 13%|█▎ | 944/7045 [3:04:28<19:28:32, 11.49s/it] 13%|█▎ | 945/7045 [3:04:40<19:36:09, 11.57s/it] {'loss': 1.1582, 'learning_rate': 4.859369172396635e-06, 'epoch': 0.13} + 13%|█▎ | 945/7045 [3:04:40<19:36:09, 11.57s/it] 13%|█▎ | 946/7045 [3:04:51<19:19:10, 11.40s/it] {'loss': 1.1445, 'learning_rate': 4.858988848599892e-06, 'epoch': 0.13} + 13%|█▎ | 946/7045 [3:04:51<19:19:10, 11.40s/it] 13%|█▎ | 947/7045 [3:05:03<19:49:56, 11.71s/it] {'loss': 1.1245, 'learning_rate': 4.858608026145157e-06, 'epoch': 0.13} + 13%|█▎ | 947/7045 [3:05:03<19:49:56, 11.71s/it] 13%|█▎ | 948/7045 [3:05:16<20:26:30, 12.07s/it] {'loss': 1.1396, 'learning_rate': 4.858226705112931e-06, 'epoch': 0.13} + 13%|█▎ | 948/7045 [3:05:16<20:26:30, 12.07s/it] 13%|█▎ | 949/7045 [3:05:27<19:58:14, 11.79s/it] {'loss': 1.0967, 'learning_rate': 4.857844885583818e-06, 'epoch': 0.13} + 13%|█▎ | 949/7045 [3:05:27<19:58:14, 11.79s/it] 13%|█▎ | 950/7045 [3:05:39<20:16:34, 11.98s/it] {'loss': 1.1211, 'learning_rate': 4.8574625676385315e-06, 'epoch': 0.13} + 13%|█▎ | 950/7045 [3:05:39<20:16:34, 11.98s/it] 13%|█▎ | 951/7045 [3:05:51<19:51:19, 11.73s/it] {'loss': 1.1621, 'learning_rate': 4.857079751357887e-06, 'epoch': 0.13} + 13%|█▎ | 951/7045 [3:05:51<19:51:19, 11.73s/it] 14%|█▎ | 952/7045 [3:06:02<19:36:31, 11.59s/it] {'loss': 1.126, 'learning_rate': 4.8566964368228084e-06, 'epoch': 0.14} + 14%|█▎ | 952/7045 [3:06:02<19:36:31, 11.59s/it] 14%|█▎ | 953/7045 [3:06:13<19:17:49, 11.40s/it] {'loss': 1.1348, 'learning_rate': 4.856312624114321e-06, 'epoch': 0.14} + 14%|█▎ | 953/7045 [3:06:13<19:17:49, 11.40s/it] 14%|█▎ | 954/7045 [3:06:26<20:02:43, 11.85s/it] {'loss': 1.1641, 'learning_rate': 4.855928313313558e-06, 'epoch': 0.14} + 14%|█▎ | 954/7045 [3:06:26<20:02:43, 11.85s/it] 14%|█▎ | 955/7045 [3:06:37<19:49:33, 11.72s/it] {'loss': 1.1582, 'learning_rate': 4.855543504501759e-06, 'epoch': 0.14} + 14%|█▎ | 955/7045 [3:06:37<19:49:33, 11.72s/it] 14%|█▎ | 956/7045 [3:06:48<19:29:27, 11.52s/it] {'loss': 1.1279, 'learning_rate': 4.855158197760266e-06, 'epoch': 0.14} + 14%|█▎ | 956/7045 [3:06:48<19:29:27, 11.52s/it] 14%|█▎ | 957/7045 [3:06:59<19:17:34, 11.41s/it] {'loss': 1.1289, 'learning_rate': 4.854772393170528e-06, 'epoch': 0.14} + 14%|█▎ | 957/7045 [3:06:59<19:17:34, 11.41s/it] 14%|█▎ | 958/7045 [3:07:12<19:48:40, 11.72s/it] {'loss': 1.0913, 'learning_rate': 4.854386090814098e-06, 'epoch': 0.14} + 14%|█▎ | 958/7045 [3:07:12<19:48:40, 11.72s/it] 14%|█▎ | 959/7045 [3:07:26<21:00:07, 12.42s/it] {'loss': 1.1641, 'learning_rate': 4.8539992907726366e-06, 'epoch': 0.14} + 14%|█▎ | 959/7045 [3:07:26<21:00:07, 12.42s/it] 14%|█▎ | 960/7045 [3:07:37<20:30:06, 12.13s/it] {'loss': 1.1602, 'learning_rate': 4.853611993127906e-06, 'epoch': 0.14} + 14%|█▎ | 960/7045 [3:07:37<20:30:06, 12.13s/it] 14%|█▎ | 961/7045 [3:07:49<20:07:02, 11.90s/it] {'loss': 1.1797, 'learning_rate': 4.853224197961778e-06, 'epoch': 0.14} + 14%|█▎ | 961/7045 [3:07:49<20:07:02, 11.90s/it] 14%|█▎ | 962/7045 [3:08:00<19:42:00, 11.66s/it] {'loss': 1.1553, 'learning_rate': 4.852835905356225e-06, 'epoch': 0.14} + 14%|█▎ | 962/7045 [3:08:00<19:42:00, 11.66s/it] 14%|█▎ | 963/7045 [3:08:11<19:32:58, 11.57s/it] {'loss': 1.167, 'learning_rate': 4.8524471153933285e-06, 'epoch': 0.14} + 14%|█▎ | 963/7045 [3:08:11<19:32:58, 11.57s/it] 14%|█▎ | 964/7045 [3:08:22<19:05:33, 11.30s/it] {'loss': 1.1045, 'learning_rate': 4.852057828155272e-06, 'epoch': 0.14} + 14%|█▎ | 964/7045 [3:08:22<19:05:33, 11.30s/it] 14%|█▎ | 965/7045 [3:08:33<18:58:18, 11.23s/it] {'loss': 1.165, 'learning_rate': 4.851668043724347e-06, 'epoch': 0.14} + 14%|█▎ | 965/7045 [3:08:33<18:58:18, 11.23s/it] 14%|█▎ | 966/7045 [3:08:45<19:37:42, 11.62s/it] {'loss': 1.1045, 'learning_rate': 4.8512777621829465e-06, 'epoch': 0.14} + 14%|█▎ | 966/7045 [3:08:45<19:37:42, 11.62s/it] 14%|█▎ | 967/7045 [3:08:58<20:08:10, 11.93s/it] {'loss': 1.1084, 'learning_rate': 4.850886983613573e-06, 'epoch': 0.14} + 14%|█▎ | 967/7045 [3:08:58<20:08:10, 11.93s/it] 14%|█▎ | 968/7045 [3:09:09<19:34:47, 11.60s/it] {'loss': 1.126, 'learning_rate': 4.850495708098831e-06, 'epoch': 0.14} + 14%|█▎ | 968/7045 [3:09:09<19:34:47, 11.60s/it] 14%|█▍ | 969/7045 [3:09:21<19:37:37, 11.63s/it] {'loss': 1.1426, 'learning_rate': 4.850103935721429e-06, 'epoch': 0.14} + 14%|█▍ | 969/7045 [3:09:21<19:37:37, 11.63s/it] 14%|█▍ | 970/7045 [3:09:32<19:23:52, 11.50s/it] {'loss': 1.1729, 'learning_rate': 4.849711666564184e-06, 'epoch': 0.14} + 14%|█▍ | 970/7045 [3:09:32<19:23:52, 11.50s/it] 14%|█▍ | 971/7045 [3:09:43<19:14:30, 11.40s/it] {'loss': 1.1621, 'learning_rate': 4.849318900710016e-06, 'epoch': 0.14} + 14%|█▍ | 971/7045 [3:09:43<19:14:30, 11.40s/it] 14%|█▍ | 972/7045 [3:09:54<19:11:03, 11.37s/it] {'loss': 1.1729, 'learning_rate': 4.848925638241951e-06, 'epoch': 0.14} + 14%|█▍ | 972/7045 [3:09:54<19:11:03, 11.37s/it] 14%|█▍ | 973/7045 [3:10:05<19:01:16, 11.28s/it] {'loss': 1.1318, 'learning_rate': 4.848531879243118e-06, 'epoch': 0.14} + 14%|█▍ | 973/7045 [3:10:05<19:01:16, 11.28s/it] 14%|█▍ | 974/7045 [3:10:17<19:25:38, 11.52s/it] {'loss': 1.1484, 'learning_rate': 4.848137623796754e-06, 'epoch': 0.14} + 14%|█▍ | 974/7045 [3:10:17<19:25:38, 11.52s/it] 14%|█▍ | 975/7045 [3:10:28<19:09:28, 11.36s/it] {'loss': 1.1914, 'learning_rate': 4.847742871986198e-06, 'epoch': 0.14} + 14%|█▍ | 975/7045 [3:10:28<19:09:28, 11.36s/it] 14%|█▍ | 976/7045 [3:10:40<19:07:13, 11.34s/it] {'loss': 1.1641, 'learning_rate': 4.847347623894895e-06, 'epoch': 0.14} + 14%|█▍ | 976/7045 [3:10:40<19:07:13, 11.34s/it] 14%|█▍ | 977/7045 [3:10:51<18:55:26, 11.23s/it] {'loss': 1.1387, 'learning_rate': 4.846951879606396e-06, 'epoch': 0.14} + 14%|█▍ | 977/7045 [3:10:51<18:55:26, 11.23s/it] 14%|█▍ | 978/7045 [3:11:03<19:35:42, 11.63s/it] {'loss': 1.1514, 'learning_rate': 4.846555639204354e-06, 'epoch': 0.14} + 14%|█▍ | 978/7045 [3:11:03<19:35:42, 11.63s/it] 14%|█▍ | 979/7045 [3:11:14<19:22:40, 11.50s/it] {'loss': 1.1416, 'learning_rate': 4.846158902772532e-06, 'epoch': 0.14} + 14%|█▍ | 979/7045 [3:11:14<19:22:40, 11.50s/it] 14%|█▍ | 980/7045 [3:11:27<20:07:20, 11.94s/it] {'loss': 1.167, 'learning_rate': 4.845761670394792e-06, 'epoch': 0.14} + 14%|█▍ | 980/7045 [3:11:27<20:07:20, 11.94s/it] 14%|█▍ | 981/7045 [3:11:39<20:10:37, 11.98s/it] {'loss': 1.1611, 'learning_rate': 4.845363942155104e-06, 'epoch': 0.14} + 14%|█▍ | 981/7045 [3:11:39<20:10:37, 11.98s/it] 14%|█▍ | 982/7045 [3:11:52<20:17:11, 12.05s/it] {'loss': 1.1582, 'learning_rate': 4.844965718137542e-06, 'epoch': 0.14} + 14%|█▍ | 982/7045 [3:11:52<20:17:11, 12.05s/it] 14%|█▍ | 983/7045 [3:12:05<20:47:19, 12.35s/it] {'loss': 1.123, 'learning_rate': 4.844566998426286e-06, 'epoch': 0.14} + 14%|█▍ | 983/7045 [3:12:05<20:47:19, 12.35s/it] 14%|█▍ | 984/7045 [3:12:16<20:01:43, 11.90s/it] {'loss': 1.125, 'learning_rate': 4.844167783105621e-06, 'epoch': 0.14} + 14%|█▍ | 984/7045 [3:12:16<20:01:43, 11.90s/it] 14%|█▍ | 985/7045 [3:12:27<19:42:38, 11.71s/it] {'loss': 1.1826, 'learning_rate': 4.843768072259933e-06, 'epoch': 0.14} + 14%|█▍ | 985/7045 [3:12:27<19:42:38, 11.71s/it] 14%|█▍ | 986/7045 [3:12:37<19:10:24, 11.39s/it] {'loss': 1.1572, 'learning_rate': 4.843367865973717e-06, 'epoch': 0.14} + 14%|█▍ | 986/7045 [3:12:37<19:10:24, 11.39s/it] 14%|█▍ | 987/7045 [3:12:48<18:55:55, 11.25s/it] {'loss': 1.1309, 'learning_rate': 4.842967164331571e-06, 'epoch': 0.14} + 14%|█▍ | 987/7045 [3:12:48<18:55:55, 11.25s/it] 14%|█▍ | 988/7045 [3:13:01<19:31:15, 11.60s/it] {'loss': 1.1289, 'learning_rate': 4.842565967418197e-06, 'epoch': 0.14} + 14%|█▍ | 988/7045 [3:13:01<19:31:15, 11.60s/it] 14%|█▍ | 989/7045 [3:13:12<19:10:07, 11.39s/it] {'loss': 1.1621, 'learning_rate': 4.842164275318404e-06, 'epoch': 0.14} + 14%|█▍ | 989/7045 [3:13:12<19:10:07, 11.39s/it] 14%|█▍ | 990/7045 [3:13:26<20:25:31, 12.14s/it] {'loss': 1.1196, 'learning_rate': 4.841762088117102e-06, 'epoch': 0.14} + 14%|█▍ | 990/7045 [3:13:26<20:25:31, 12.14s/it] 14%|█▍ | 991/7045 [3:13:37<19:57:16, 11.87s/it] {'loss': 1.103, 'learning_rate': 4.841359405899311e-06, 'epoch': 0.14} + 14%|█▍ | 991/7045 [3:13:37<19:57:16, 11.87s/it] 14%|█▍ | 992/7045 [3:13:49<19:57:02, 11.87s/it] {'loss': 1.1592, 'learning_rate': 4.84095622875015e-06, 'epoch': 0.14} + 14%|█▍ | 992/7045 [3:13:49<19:57:02, 11.87s/it] 14%|█▍ | 993/7045 [3:14:00<19:44:10, 11.74s/it] {'loss': 1.1641, 'learning_rate': 4.840552556754846e-06, 'epoch': 0.14} + 14%|█▍ | 993/7045 [3:14:00<19:44:10, 11.74s/it] 14%|█▍ | 994/7045 [3:14:11<19:25:59, 11.56s/it] {'loss': 1.1318, 'learning_rate': 4.84014838999873e-06, 'epoch': 0.14} + 14%|█▍ | 994/7045 [3:14:11<19:25:59, 11.56s/it] 14%|█▍ | 995/7045 [3:14:22<19:06:17, 11.37s/it] {'loss': 1.123, 'learning_rate': 4.839743728567237e-06, 'epoch': 0.14} + 14%|█▍ | 995/7045 [3:14:22<19:06:17, 11.37s/it] 14%|█▍ | 996/7045 [3:14:34<19:26:35, 11.57s/it] {'loss': 1.167, 'learning_rate': 4.839338572545906e-06, 'epoch': 0.14} + 14%|█▍ | 996/7045 [3:14:34<19:26:35, 11.57s/it] 14%|█▍ | 997/7045 [3:14:45<19:07:33, 11.38s/it] {'loss': 1.1484, 'learning_rate': 4.838932922020384e-06, 'epoch': 0.14} + 14%|█▍ | 997/7045 [3:14:45<19:07:33, 11.38s/it] 14%|█▍ | 998/7045 [3:14:58<19:37:54, 11.69s/it] {'loss': 1.1294, 'learning_rate': 4.838526777076417e-06, 'epoch': 0.14} + 14%|█▍ | 998/7045 [3:14:58<19:37:54, 11.69s/it] 14%|█▍ | 999/7045 [3:15:09<19:15:44, 11.47s/it] {'loss': 1.1953, 'learning_rate': 4.83812013779986e-06, 'epoch': 0.14} + 14%|█▍ | 999/7045 [3:15:09<19:15:44, 11.47s/it] 14%|█▍ | 1000/7045 [3:15:20<19:04:01, 11.36s/it] {'loss': 1.1445, 'learning_rate': 4.837713004276671e-06, 'epoch': 0.14} + 14%|█▍ | 1000/7045 [3:15:20<19:04:01, 11.36s/it] 14%|█▍ | 1001/7045 [3:15:31<18:54:44, 11.26s/it] {'loss': 1.1455, 'learning_rate': 4.837305376592912e-06, 'epoch': 0.14} + 14%|█▍ | 1001/7045 [3:15:31<18:54:44, 11.26s/it] 14%|█▍ | 1002/7045 [3:15:42<18:58:43, 11.31s/it] {'loss': 1.1289, 'learning_rate': 4.836897254834752e-06, 'epoch': 0.14} + 14%|█▍ | 1002/7045 [3:15:42<18:58:43, 11.31s/it] 14%|█▍ | 1003/7045 [3:15:54<19:03:44, 11.36s/it] {'loss': 1.1055, 'learning_rate': 4.836488639088458e-06, 'epoch': 0.14} + 14%|█▍ | 1003/7045 [3:15:54<19:03:44, 11.36s/it] 14%|█▍ | 1004/7045 [3:16:05<19:13:36, 11.46s/it] {'loss': 1.1426, 'learning_rate': 4.83607952944041e-06, 'epoch': 0.14} + 14%|█▍ | 1004/7045 [3:16:05<19:13:36, 11.46s/it] 14%|█▍ | 1005/7045 [3:16:16<19:00:07, 11.33s/it] {'loss': 1.1377, 'learning_rate': 4.835669925977086e-06, 'epoch': 0.14} + 14%|█▍ | 1005/7045 [3:16:16<19:00:07, 11.33s/it] 14%|█▍ | 1006/7045 [3:16:28<18:59:42, 11.32s/it] {'loss': 1.1553, 'learning_rate': 4.8352598287850704e-06, 'epoch': 0.14} + 14%|█▍ | 1006/7045 [3:16:28<18:59:42, 11.32s/it] 14%|█▍ | 1007/7045 [3:16:40<19:27:39, 11.60s/it] {'loss': 1.1553, 'learning_rate': 4.834849237951053e-06, 'epoch': 0.14} + 14%|█▍ | 1007/7045 [3:16:40<19:27:39, 11.60s/it] 14%|█▍ | 1008/7045 [3:16:51<19:19:59, 11.53s/it] {'loss': 1.1328, 'learning_rate': 4.834438153561828e-06, 'epoch': 0.14} + 14%|█▍ | 1008/7045 [3:16:51<19:19:59, 11.53s/it] 14%|█▍ | 1009/7045 [3:17:02<19:02:59, 11.36s/it] {'loss': 1.165, 'learning_rate': 4.834026575704291e-06, 'epoch': 0.14} + 14%|█▍ | 1009/7045 [3:17:02<19:02:59, 11.36s/it] 14%|█▍ | 1010/7045 [3:17:14<19:09:51, 11.43s/it] {'loss': 1.1504, 'learning_rate': 4.8336145044654445e-06, 'epoch': 0.14} + 14%|█▍ | 1010/7045 [3:17:14<19:09:51, 11.43s/it] 14%|█▍ | 1011/7045 [3:17:25<19:11:44, 11.45s/it] {'loss': 1.1807, 'learning_rate': 4.833201939932396e-06, 'epoch': 0.14} + 14%|█▍ | 1011/7045 [3:17:25<19:11:44, 11.45s/it] 14%|█▍ | 1012/7045 [3:17:37<19:06:34, 11.40s/it] {'loss': 1.1475, 'learning_rate': 4.832788882192355e-06, 'epoch': 0.14} + 14%|█▍ | 1012/7045 [3:17:37<19:06:34, 11.40s/it] 14%|█▍ | 1013/7045 [3:17:49<19:35:46, 11.70s/it] {'loss': 1.1484, 'learning_rate': 4.832375331332636e-06, 'epoch': 0.14} + 14%|█▍ | 1013/7045 [3:17:49<19:35:46, 11.70s/it] 14%|█▍ | 1014/7045 [3:18:00<19:22:24, 11.56s/it] {'loss': 1.1309, 'learning_rate': 4.831961287440658e-06, 'epoch': 0.14} + 14%|█▍ | 1014/7045 [3:18:00<19:22:24, 11.56s/it] 14%|█▍ | 1015/7045 [3:18:12<19:24:54, 11.59s/it] {'loss': 1.1367, 'learning_rate': 4.831546750603946e-06, 'epoch': 0.14} + 14%|█▍ | 1015/7045 [3:18:12<19:24:54, 11.59s/it] 14%|█▍ | 1016/7045 [3:18:24<19:28:45, 11.63s/it] {'loss': 1.1172, 'learning_rate': 4.831131720910125e-06, 'epoch': 0.14} + 14%|█▍ | 1016/7045 [3:18:24<19:28:45, 11.63s/it] 14%|█▍ | 1017/7045 [3:18:35<19:23:06, 11.58s/it] {'loss': 1.1816, 'learning_rate': 4.8307161984469275e-06, 'epoch': 0.14} + 14%|█▍ | 1017/7045 [3:18:35<19:23:06, 11.58s/it] 14%|█▍ | 1018/7045 [3:18:46<19:20:30, 11.55s/it] {'loss': 1.1641, 'learning_rate': 4.83030018330219e-06, 'epoch': 0.14} + 14%|█▍ | 1018/7045 [3:18:47<19:20:30, 11.55s/it] 14%|█▍ | 1019/7045 [3:18:58<19:33:44, 11.69s/it] {'loss': 1.127, 'learning_rate': 4.8298836755638515e-06, 'epoch': 0.14} + 14%|█▍ | 1019/7045 [3:18:58<19:33:44, 11.69s/it] 14%|█▍ | 1020/7045 [3:19:10<19:17:25, 11.53s/it] {'loss': 1.1465, 'learning_rate': 4.829466675319956e-06, 'epoch': 0.14} + 14%|█▍ | 1020/7045 [3:19:10<19:17:25, 11.53s/it] 14%|█▍ | 1021/7045 [3:19:22<19:32:26, 11.68s/it] {'loss': 1.1377, 'learning_rate': 4.829049182658652e-06, 'epoch': 0.14} + 14%|█▍ | 1021/7045 [3:19:22<19:32:26, 11.68s/it] 15%|█▍ | 1022/7045 [3:19:33<19:20:04, 11.56s/it] {'loss': 1.1289, 'learning_rate': 4.8286311976681915e-06, 'epoch': 0.15} + 15%|█▍ | 1022/7045 [3:19:33<19:20:04, 11.56s/it] 15%|█▍ | 1023/7045 [3:19:45<19:34:05, 11.70s/it] {'loss': 1.1182, 'learning_rate': 4.828212720436931e-06, 'epoch': 0.15} + 15%|█▍ | 1023/7045 [3:19:45<19:34:05, 11.70s/it] 15%|█▍ | 1024/7045 [3:19:56<19:14:09, 11.50s/it] {'loss': 1.1846, 'learning_rate': 4.827793751053331e-06, 'epoch': 0.15} + 15%|█▍ | 1024/7045 [3:19:56<19:14:09, 11.50s/it] 15%|█▍ | 1025/7045 [3:20:08<19:14:41, 11.51s/it] {'loss': 1.1484, 'learning_rate': 4.827374289605956e-06, 'epoch': 0.15} + 15%|█▍ | 1025/7045 [3:20:08<19:14:41, 11.51s/it] 15%|█▍ | 1026/7045 [3:20:19<19:25:17, 11.62s/it] {'loss': 1.1553, 'learning_rate': 4.826954336183475e-06, 'epoch': 0.15} + 15%|█▍ | 1026/7045 [3:20:19<19:25:17, 11.62s/it] 15%|█▍ | 1027/7045 [3:20:31<19:17:29, 11.54s/it] {'loss': 1.1338, 'learning_rate': 4.826533890874658e-06, 'epoch': 0.15} + 15%|█▍ | 1027/7045 [3:20:31<19:17:29, 11.54s/it] 15%|█▍ | 1028/7045 [3:20:44<19:59:34, 11.96s/it] {'loss': 1.1787, 'learning_rate': 4.826112953768385e-06, 'epoch': 0.15} + 15%|█▍ | 1028/7045 [3:20:44<19:59:34, 11.96s/it] 15%|█▍ | 1029/7045 [3:20:56<20:09:14, 12.06s/it] {'loss': 1.167, 'learning_rate': 4.825691524953633e-06, 'epoch': 0.15} + 15%|█▍ | 1029/7045 [3:20:56<20:09:14, 12.06s/it] 15%|█▍ | 1030/7045 [3:21:09<20:31:36, 12.29s/it] {'loss': 1.0918, 'learning_rate': 4.8252696045194875e-06, 'epoch': 0.15} + 15%|█▍ | 1030/7045 [3:21:09<20:31:36, 12.29s/it] 15%|█▍ | 1031/7045 [3:21:22<20:48:14, 12.45s/it] {'loss': 1.165, 'learning_rate': 4.824847192555137e-06, 'epoch': 0.15} + 15%|█▍ | 1031/7045 [3:21:22<20:48:14, 12.45s/it] 15%|█▍ | 1032/7045 [3:21:33<20:15:04, 12.12s/it] {'loss': 1.1807, 'learning_rate': 4.824424289149873e-06, 'epoch': 0.15} + 15%|█▍ | 1032/7045 [3:21:33<20:15:04, 12.12s/it] 15%|█▍ | 1033/7045 [3:21:44<19:52:14, 11.90s/it] {'loss': 1.1592, 'learning_rate': 4.8240008943930935e-06, 'epoch': 0.15} + 15%|█▍ | 1033/7045 [3:21:44<19:52:14, 11.90s/it] 15%|█▍ | 1034/7045 [3:21:56<19:43:42, 11.82s/it] {'loss': 1.1934, 'learning_rate': 4.823577008374296e-06, 'epoch': 0.15} + 15%|█▍ | 1034/7045 [3:21:56<19:43:42, 11.82s/it] 15%|█▍ | 1035/7045 [3:22:07<19:32:00, 11.70s/it] {'loss': 1.1699, 'learning_rate': 4.823152631183085e-06, 'epoch': 0.15} + 15%|█▍ | 1035/7045 [3:22:07<19:32:00, 11.70s/it] 15%|█▍ | 1036/7045 [3:22:19<19:28:24, 11.67s/it] {'loss': 1.1445, 'learning_rate': 4.822727762909168e-06, 'epoch': 0.15} + 15%|█▍ | 1036/7045 [3:22:19<19:28:24, 11.67s/it] 15%|█▍ | 1037/7045 [3:22:30<19:18:02, 11.56s/it] {'loss': 1.1367, 'learning_rate': 4.822302403642357e-06, 'epoch': 0.15} + 15%|█▍ | 1037/7045 [3:22:30<19:18:02, 11.56s/it] 15%|█▍ | 1038/7045 [3:22:41<19:04:44, 11.43s/it] {'loss': 1.1201, 'learning_rate': 4.821876553472566e-06, 'epoch': 0.15} + 15%|█▍ | 1038/7045 [3:22:41<19:04:44, 11.43s/it] 15%|█▍ | 1039/7045 [3:22:53<18:53:33, 11.32s/it] {'loss': 1.1367, 'learning_rate': 4.821450212489814e-06, 'epoch': 0.15} + 15%|█▍ | 1039/7045 [3:22:53<18:53:33, 11.32s/it] 15%|█▍ | 1040/7045 [3:23:04<18:52:26, 11.31s/it] {'loss': 1.1348, 'learning_rate': 4.821023380784225e-06, 'epoch': 0.15} + 15%|█▍ | 1040/7045 [3:23:04<18:52:26, 11.31s/it] 15%|█▍ | 1041/7045 [3:23:17<19:38:58, 11.78s/it] {'loss': 1.1318, 'learning_rate': 4.820596058446025e-06, 'epoch': 0.15} + 15%|█▍ | 1041/7045 [3:23:17<19:38:58, 11.78s/it] 15%|█▍ | 1042/7045 [3:23:28<19:27:42, 11.67s/it] {'loss': 1.1992, 'learning_rate': 4.820168245565542e-06, 'epoch': 0.15} + 15%|█▍ | 1042/7045 [3:23:28<19:27:42, 11.67s/it] 15%|█▍ | 1043/7045 [3:23:39<19:06:10, 11.46s/it] {'loss': 1.1113, 'learning_rate': 4.819739942233213e-06, 'epoch': 0.15} + 15%|█▍ | 1043/7045 [3:23:39<19:06:10, 11.46s/it] 15%|█▍ | 1044/7045 [3:23:50<18:59:28, 11.39s/it] {'loss': 1.1504, 'learning_rate': 4.819311148539572e-06, 'epoch': 0.15} + 15%|█▍ | 1044/7045 [3:23:50<18:59:28, 11.39s/it] 15%|█▍ | 1045/7045 [3:24:02<18:55:17, 11.35s/it] {'loss': 1.1348, 'learning_rate': 4.818881864575263e-06, 'epoch': 0.15} + 15%|█▍ | 1045/7045 [3:24:02<18:55:17, 11.35s/it] 15%|█▍ | 1046/7045 [3:24:13<18:46:34, 11.27s/it] {'loss': 1.1348, 'learning_rate': 4.81845209043103e-06, 'epoch': 0.15} + 15%|█▍ | 1046/7045 [3:24:13<18:46:34, 11.27s/it] 15%|█▍ | 1047/7045 [3:24:24<18:51:54, 11.32s/it] {'loss': 1.1436, 'learning_rate': 4.818021826197721e-06, 'epoch': 0.15} + 15%|█▍ | 1047/7045 [3:24:24<18:51:54, 11.32s/it] 15%|█▍ | 1048/7045 [3:24:38<20:00:10, 12.01s/it] {'loss': 1.1699, 'learning_rate': 4.817591071966288e-06, 'epoch': 0.15} + 15%|█▍ | 1048/7045 [3:24:38<20:00:10, 12.01s/it] 15%|█▍ | 1049/7045 [3:24:49<19:51:33, 11.92s/it] {'loss': 1.1543, 'learning_rate': 4.817159827827786e-06, 'epoch': 0.15} + 15%|█▍ | 1049/7045 [3:24:49<19:51:33, 11.92s/it] 15%|█▍ | 1050/7045 [3:25:03<20:25:10, 12.26s/it] {'loss': 1.085, 'learning_rate': 4.816728093873377e-06, 'epoch': 0.15} + 15%|█▍ | 1050/7045 [3:25:03<20:25:10, 12.26s/it] 15%|█▍ | 1051/7045 [3:25:15<20:19:48, 12.21s/it] {'loss': 1.1846, 'learning_rate': 4.816295870194319e-06, 'epoch': 0.15} + 15%|█▍ | 1051/7045 [3:25:15<20:19:48, 12.21s/it] 15%|█▍ | 1052/7045 [3:25:27<20:13:32, 12.15s/it] {'loss': 1.1406, 'learning_rate': 4.815863156881981e-06, 'epoch': 0.15} + 15%|█▍ | 1052/7045 [3:25:27<20:13:32, 12.15s/it] 15%|█▍ | 1053/7045 [3:25:39<20:27:23, 12.29s/it] {'loss': 1.1533, 'learning_rate': 4.815429954027834e-06, 'epoch': 0.15} + 15%|█▍ | 1053/7045 [3:25:39<20:27:23, 12.29s/it] 15%|█▍ | 1054/7045 [3:25:50<19:51:43, 11.94s/it] {'loss': 1.1221, 'learning_rate': 4.814996261723448e-06, 'epoch': 0.15} + 15%|█▍ | 1054/7045 [3:25:50<19:51:43, 11.94s/it] 15%|█▍ | 1055/7045 [3:26:03<20:25:26, 12.27s/it] {'loss': 1.1504, 'learning_rate': 4.814562080060501e-06, 'epoch': 0.15} + 15%|█▍ | 1055/7045 [3:26:03<20:25:26, 12.27s/it] 15%|█▍ | 1056/7045 [3:26:15<20:18:42, 12.21s/it] {'loss': 1.1416, 'learning_rate': 4.814127409130773e-06, 'epoch': 0.15} + 15%|█▍ | 1056/7045 [3:26:15<20:18:42, 12.21s/it] 15%|█▌ | 1057/7045 [3:26:27<19:44:30, 11.87s/it] {'loss': 1.1157, 'learning_rate': 4.813692249026148e-06, 'epoch': 0.15} + 15%|█▌ | 1057/7045 [3:26:27<19:44:30, 11.87s/it] 15%|█▌ | 1058/7045 [3:26:38<19:28:14, 11.71s/it] {'loss': 1.1309, 'learning_rate': 4.813256599838614e-06, 'epoch': 0.15} + 15%|█▌ | 1058/7045 [3:26:38<19:28:14, 11.71s/it] 15%|█▌ | 1059/7045 [3:26:50<19:26:31, 11.69s/it] {'loss': 1.1455, 'learning_rate': 4.812820461660258e-06, 'epoch': 0.15} + 15%|█▌ | 1059/7045 [3:26:50<19:26:31, 11.69s/it] 15%|█▌ | 1060/7045 [3:27:01<19:09:23, 11.52s/it] {'loss': 1.1396, 'learning_rate': 4.812383834583276e-06, 'epoch': 0.15} + 15%|█▌ | 1060/7045 [3:27:01<19:09:23, 11.52s/it] 15%|█▌ | 1061/7045 [3:27:12<18:50:40, 11.34s/it] {'loss': 1.1729, 'learning_rate': 4.811946718699964e-06, 'epoch': 0.15} + 15%|█▌ | 1061/7045 [3:27:12<18:50:40, 11.34s/it] 15%|█▌ | 1062/7045 [3:27:22<18:38:35, 11.22s/it] {'loss': 1.0669, 'learning_rate': 4.811509114102723e-06, 'epoch': 0.15} + 15%|█▌ | 1062/7045 [3:27:22<18:38:35, 11.22s/it] 15%|█▌ | 1063/7045 [3:27:34<18:47:01, 11.30s/it] {'loss': 1.1621, 'learning_rate': 4.8110710208840575e-06, 'epoch': 0.15} + 15%|█▌ | 1063/7045 [3:27:34<18:47:01, 11.30s/it] 15%|█▌ | 1064/7045 [3:27:45<18:47:46, 11.31s/it] {'loss': 1.1406, 'learning_rate': 4.810632439136572e-06, 'epoch': 0.15} + 15%|█▌ | 1064/7045 [3:27:45<18:47:46, 11.31s/it] 15%|█▌ | 1065/7045 [3:27:58<19:38:37, 11.83s/it] {'loss': 1.105, 'learning_rate': 4.810193368952979e-06, 'epoch': 0.15} + 15%|█▌ | 1065/7045 [3:27:58<19:38:37, 11.83s/it] 15%|█▌ | 1066/7045 [3:28:11<19:49:02, 11.93s/it] {'loss': 1.1426, 'learning_rate': 4.80975381042609e-06, 'epoch': 0.15} + 15%|█▌ | 1066/7045 [3:28:11<19:49:02, 11.93s/it] 15%|█▌ | 1067/7045 [3:28:22<19:32:31, 11.77s/it] {'loss': 1.127, 'learning_rate': 4.809313763648823e-06, 'epoch': 0.15} + 15%|█▌ | 1067/7045 [3:28:22<19:32:31, 11.77s/it] 15%|█▌ | 1068/7045 [3:28:34<19:42:32, 11.87s/it] {'loss': 1.1128, 'learning_rate': 4.808873228714197e-06, 'epoch': 0.15} + 15%|█▌ | 1068/7045 [3:28:34<19:42:32, 11.87s/it] 15%|█▌ | 1069/7045 [3:28:45<19:21:26, 11.66s/it] {'loss': 1.1318, 'learning_rate': 4.8084322057153365e-06, 'epoch': 0.15} + 15%|█▌ | 1069/7045 [3:28:45<19:21:26, 11.66s/it] 15%|█▌ | 1070/7045 [3:28:57<19:15:18, 11.60s/it] {'loss': 1.1284, 'learning_rate': 4.807990694745466e-06, 'epoch': 0.15} + 15%|█▌ | 1070/7045 [3:28:57<19:15:18, 11.60s/it] 15%|█▌ | 1071/7045 [3:29:08<19:02:01, 11.47s/it] {'loss': 1.1602, 'learning_rate': 4.807548695897915e-06, 'epoch': 0.15} + 15%|█▌ | 1071/7045 [3:29:08<19:02:01, 11.47s/it] 15%|█▌ | 1072/7045 [3:29:19<19:00:05, 11.45s/it] {'loss': 1.124, 'learning_rate': 4.807106209266118e-06, 'epoch': 0.15} + 15%|█▌ | 1072/7045 [3:29:19<19:00:05, 11.45s/it] 15%|█▌ | 1073/7045 [3:29:31<19:00:10, 11.46s/it] {'loss': 1.1602, 'learning_rate': 4.8066632349436094e-06, 'epoch': 0.15} + 15%|█▌ | 1073/7045 [3:29:31<19:00:10, 11.46s/it] 15%|█▌ | 1074/7045 [3:29:43<19:16:09, 11.62s/it] {'loss': 1.1299, 'learning_rate': 4.806219773024028e-06, 'epoch': 0.15} + 15%|█▌ | 1074/7045 [3:29:43<19:16:09, 11.62s/it] 15%|█▌ | 1075/7045 [3:29:55<19:48:49, 11.95s/it] {'loss': 1.1064, 'learning_rate': 4.805775823601115e-06, 'epoch': 0.15} + 15%|█▌ | 1075/7045 [3:29:55<19:48:49, 11.95s/it] 15%|█▌ | 1076/7045 [3:30:07<19:28:40, 11.75s/it] {'loss': 1.1514, 'learning_rate': 4.805331386768714e-06, 'epoch': 0.15} + 15%|█▌ | 1076/7045 [3:30:07<19:28:40, 11.75s/it] 15%|█▌ | 1077/7045 [3:30:18<19:15:17, 11.61s/it] {'loss': 1.1621, 'learning_rate': 4.804886462620777e-06, 'epoch': 0.15} + 15%|█▌ | 1077/7045 [3:30:18<19:15:17, 11.61s/it] 15%|█▌ | 1078/7045 [3:30:32<20:14:08, 12.21s/it] {'loss': 1.0977, 'learning_rate': 4.80444105125135e-06, 'epoch': 0.15} + 15%|█▌ | 1078/7045 [3:30:32<20:14:08, 12.21s/it] 15%|█▌ | 1079/7045 [3:30:43<19:46:37, 11.93s/it] {'loss': 1.1543, 'learning_rate': 4.8039951527545914e-06, 'epoch': 0.15} + 15%|█▌ | 1079/7045 [3:30:43<19:46:37, 11.93s/it] 15%|█▌ | 1080/7045 [3:30:55<20:01:54, 12.09s/it] {'loss': 1.1484, 'learning_rate': 4.803548767224755e-06, 'epoch': 0.15} + 15%|█▌ | 1080/7045 [3:30:55<20:01:54, 12.09s/it] 15%|█▌ | 1081/7045 [3:31:07<19:39:47, 11.87s/it] {'loss': 1.127, 'learning_rate': 4.8031018947562015e-06, 'epoch': 0.15} + 15%|█▌ | 1081/7045 [3:31:07<19:39:47, 11.87s/it] 15%|█▌ | 1082/7045 [3:31:18<19:20:33, 11.68s/it] {'loss': 1.1514, 'learning_rate': 4.802654535443394e-06, 'epoch': 0.15} + 15%|█▌ | 1082/7045 [3:31:18<19:20:33, 11.68s/it] 15%|█▌ | 1083/7045 [3:31:31<20:05:59, 12.14s/it] {'loss': 1.0918, 'learning_rate': 4.8022066893808976e-06, 'epoch': 0.15} + 15%|█▌ | 1083/7045 [3:31:31<20:05:59, 12.14s/it] 15%|█▌ | 1084/7045 [3:31:44<20:19:40, 12.28s/it] {'loss': 1.1074, 'learning_rate': 4.801758356663381e-06, 'epoch': 0.15} + 15%|█▌ | 1084/7045 [3:31:44<20:19:40, 12.28s/it] 15%|█▌ | 1085/7045 [3:31:58<21:19:50, 12.88s/it] {'loss': 1.1201, 'learning_rate': 4.801309537385615e-06, 'epoch': 0.15} + 15%|█▌ | 1085/7045 [3:31:58<21:19:50, 12.88s/it] 15%|█▌ | 1086/7045 [3:32:09<20:28:49, 12.37s/it] {'loss': 1.166, 'learning_rate': 4.800860231642476e-06, 'epoch': 0.15} + 15%|█▌ | 1086/7045 [3:32:09<20:28:49, 12.37s/it] 15%|█▌ | 1087/7045 [3:32:20<19:51:36, 12.00s/it] {'loss': 1.1729, 'learning_rate': 4.800410439528938e-06, 'epoch': 0.15} + 15%|█▌ | 1087/7045 [3:32:20<19:51:36, 12.00s/it] 15%|█▌ | 1088/7045 [3:32:33<20:01:15, 12.10s/it] {'loss': 1.1348, 'learning_rate': 4.799960161140083e-06, 'epoch': 0.15} + 15%|█▌ | 1088/7045 [3:32:33<20:01:15, 12.10s/it] 15%|█▌ | 1089/7045 [3:32:44<19:38:01, 11.87s/it] {'loss': 1.1455, 'learning_rate': 4.7995093965710935e-06, 'epoch': 0.15} + 15%|█▌ | 1089/7045 [3:32:44<19:38:01, 11.87s/it] 15%|█▌ | 1090/7045 [3:32:55<19:23:36, 11.72s/it] {'loss': 1.1699, 'learning_rate': 4.799058145917254e-06, 'epoch': 0.15} + 15%|█▌ | 1090/7045 [3:32:55<19:23:36, 11.72s/it] 15%|█▌ | 1091/7045 [3:33:07<19:29:50, 11.79s/it] {'loss': 1.0811, 'learning_rate': 4.798606409273954e-06, 'epoch': 0.15} + 15%|█▌ | 1091/7045 [3:33:07<19:29:50, 11.79s/it] 16%|█▌ | 1092/7045 [3:33:18<19:03:06, 11.52s/it] {'loss': 1.1523, 'learning_rate': 4.798154186736684e-06, 'epoch': 0.16} + 16%|█▌ | 1092/7045 [3:33:18<19:03:06, 11.52s/it] 16%|█▌ | 1093/7045 [3:33:29<18:54:00, 11.43s/it] {'loss': 1.1641, 'learning_rate': 4.7977014784010365e-06, 'epoch': 0.16} + 16%|█▌ | 1093/7045 [3:33:29<18:54:00, 11.43s/it] 16%|█▌ | 1094/7045 [3:33:41<18:48:32, 11.38s/it] {'loss': 1.1494, 'learning_rate': 4.79724828436271e-06, 'epoch': 0.16} + 16%|█▌ | 1094/7045 [3:33:41<18:48:32, 11.38s/it] 16%|█▌ | 1095/7045 [3:33:53<19:27:31, 11.77s/it] {'loss': 1.1226, 'learning_rate': 4.796794604717502e-06, 'epoch': 0.16} + 16%|█▌ | 1095/7045 [3:33:53<19:27:31, 11.77s/it] 16%|█▌ | 1096/7045 [3:34:05<19:25:21, 11.75s/it] {'loss': 1.0659, 'learning_rate': 4.796340439561314e-06, 'epoch': 0.16} + 16%|█▌ | 1096/7045 [3:34:05<19:25:21, 11.75s/it] 16%|█▌ | 1097/7045 [3:34:16<19:02:21, 11.52s/it] {'loss': 1.1338, 'learning_rate': 4.795885788990151e-06, 'epoch': 0.16} + 16%|█▌ | 1097/7045 [3:34:16<19:02:21, 11.52s/it] 16%|█▌ | 1098/7045 [3:34:27<18:45:45, 11.36s/it] {'loss': 1.1582, 'learning_rate': 4.79543065310012e-06, 'epoch': 0.16} + 16%|█▌ | 1098/7045 [3:34:27<18:45:45, 11.36s/it] 16%|█▌ | 1099/7045 [3:34:39<18:48:02, 11.38s/it] {'loss': 1.1787, 'learning_rate': 4.79497503198743e-06, 'epoch': 0.16} + 16%|█▌ | 1099/7045 [3:34:39<18:48:02, 11.38s/it] 16%|█▌ | 1100/7045 [3:34:50<18:38:58, 11.29s/it] {'loss': 1.124, 'learning_rate': 4.794518925748394e-06, 'epoch': 0.16} + 16%|█▌ | 1100/7045 [3:34:50<18:38:58, 11.29s/it] 16%|█▌ | 1101/7045 [3:35:01<18:38:58, 11.30s/it] {'loss': 1.1436, 'learning_rate': 4.794062334479424e-06, 'epoch': 0.16} + 16%|█▌ | 1101/7045 [3:35:01<18:38:58, 11.30s/it] 16%|█▌ | 1102/7045 [3:35:12<18:34:41, 11.25s/it] {'loss': 1.1768, 'learning_rate': 4.793605258277041e-06, 'epoch': 0.16} + 16%|█▌ | 1102/7045 [3:35:12<18:34:41, 11.25s/it] 16%|█▌ | 1103/7045 [3:35:23<18:28:54, 11.20s/it] {'loss': 1.1523, 'learning_rate': 4.7931476972378625e-06, 'epoch': 0.16} + 16%|█▌ | 1103/7045 [3:35:23<18:28:54, 11.20s/it] 16%|█▌ | 1104/7045 [3:35:35<18:36:40, 11.28s/it] {'loss': 1.1475, 'learning_rate': 4.7926896514586104e-06, 'epoch': 0.16} + 16%|█▌ | 1104/7045 [3:35:35<18:36:40, 11.28s/it] 16%|█▌ | 1105/7045 [3:35:46<18:48:51, 11.40s/it] {'loss': 1.127, 'learning_rate': 4.792231121036111e-06, 'epoch': 0.16} + 16%|█▌ | 1105/7045 [3:35:46<18:48:51, 11.40s/it] 16%|█▌ | 1106/7045 [3:35:57<18:38:51, 11.30s/it] {'loss': 1.167, 'learning_rate': 4.791772106067289e-06, 'epoch': 0.16} + 16%|█▌ | 1106/7045 [3:35:57<18:38:51, 11.30s/it] 16%|█▌ | 1107/7045 [3:36:08<18:25:32, 11.17s/it] {'loss': 1.1562, 'learning_rate': 4.791312606649176e-06, 'epoch': 0.16} + 16%|█▌ | 1107/7045 [3:36:08<18:25:32, 11.17s/it] 16%|█▌ | 1108/7045 [3:36:19<18:27:12, 11.19s/it] {'loss': 1.1533, 'learning_rate': 4.790852622878901e-06, 'epoch': 0.16} + 16%|█▌ | 1108/7045 [3:36:19<18:27:12, 11.19s/it] 16%|█▌ | 1109/7045 [3:36:31<18:30:06, 11.22s/it] {'loss': 1.1318, 'learning_rate': 4.7903921548537025e-06, 'epoch': 0.16} + 16%|█▌ | 1109/7045 [3:36:31<18:30:06, 11.22s/it] 16%|█▌ | 1110/7045 [3:36:42<18:25:02, 11.17s/it] {'loss': 1.0967, 'learning_rate': 4.789931202670914e-06, 'epoch': 0.16} + 16%|█▌ | 1110/7045 [3:36:42<18:25:02, 11.17s/it] 16%|█▌ | 1111/7045 [3:36:53<18:34:01, 11.26s/it] {'loss': 1.1562, 'learning_rate': 4.7894697664279755e-06, 'epoch': 0.16} + 16%|█▌ | 1111/7045 [3:36:53<18:34:01, 11.26s/it] 16%|█▌ | 1112/7045 [3:37:05<18:37:24, 11.30s/it] {'loss': 1.1328, 'learning_rate': 4.7890078462224276e-06, 'epoch': 0.16} + 16%|█▌ | 1112/7045 [3:37:05<18:37:24, 11.30s/it] 16%|█▌ | 1113/7045 [3:37:16<18:30:01, 11.23s/it] {'loss': 1.167, 'learning_rate': 4.788545442151915e-06, 'epoch': 0.16} + 16%|█▌ | 1113/7045 [3:37:16<18:30:01, 11.23s/it] 16%|█▌ | 1114/7045 [3:37:27<18:44:30, 11.38s/it] {'loss': 1.1729, 'learning_rate': 4.788082554314183e-06, 'epoch': 0.16} + 16%|█▌ | 1114/7045 [3:37:27<18:44:30, 11.38s/it] 16%|█▌ | 1115/7045 [3:37:39<18:42:19, 11.36s/it] {'loss': 1.1475, 'learning_rate': 4.787619182807081e-06, 'epoch': 0.16} + 16%|█▌ | 1115/7045 [3:37:39<18:42:19, 11.36s/it] 16%|█▌ | 1116/7045 [3:37:50<18:40:55, 11.34s/it] {'loss': 1.1592, 'learning_rate': 4.787155327728557e-06, 'epoch': 0.16} + 16%|█▌ | 1116/7045 [3:37:50<18:40:55, 11.34s/it] 16%|█▌ | 1117/7045 [3:38:01<18:37:01, 11.31s/it] {'loss': 1.1309, 'learning_rate': 4.786690989176665e-06, 'epoch': 0.16} + 16%|█▌ | 1117/7045 [3:38:01<18:37:01, 11.31s/it] 16%|█▌ | 1118/7045 [3:38:12<18:32:37, 11.26s/it] {'loss': 1.1216, 'learning_rate': 4.78622616724956e-06, 'epoch': 0.16} + 16%|█▌ | 1118/7045 [3:38:12<18:32:37, 11.26s/it] 16%|█▌ | 1119/7045 [3:38:24<18:40:57, 11.35s/it] {'loss': 1.1411, 'learning_rate': 4.785760862045499e-06, 'epoch': 0.16} + 16%|█▌ | 1119/7045 [3:38:24<18:40:57, 11.35s/it] 16%|█▌ | 1120/7045 [3:38:35<18:40:19, 11.35s/it] {'loss': 1.1689, 'learning_rate': 4.78529507366284e-06, 'epoch': 0.16} + 16%|█▌ | 1120/7045 [3:38:35<18:40:19, 11.35s/it] 16%|█▌ | 1121/7045 [3:38:47<18:37:30, 11.32s/it] {'loss': 1.1318, 'learning_rate': 4.784828802200046e-06, 'epoch': 0.16} + 16%|█▌ | 1121/7045 [3:38:47<18:37:30, 11.32s/it] 16%|█▌ | 1122/7045 [3:38:58<18:42:19, 11.37s/it] {'loss': 1.1191, 'learning_rate': 4.78436204775568e-06, 'epoch': 0.16} + 16%|█▌ | 1122/7045 [3:38:58<18:42:19, 11.37s/it] 16%|█▌ | 1123/7045 [3:39:09<18:30:38, 11.25s/it] {'loss': 1.168, 'learning_rate': 4.783894810428405e-06, 'epoch': 0.16} + 16%|█▌ | 1123/7045 [3:39:09<18:30:38, 11.25s/it] 16%|█▌ | 1124/7045 [3:39:22<19:11:32, 11.67s/it] {'loss': 1.1553, 'learning_rate': 4.783427090316992e-06, 'epoch': 0.16} + 16%|█▌ | 1124/7045 [3:39:22<19:11:32, 11.67s/it] 16%|█▌ | 1125/7045 [3:39:34<19:34:03, 11.90s/it] {'loss': 1.1191, 'learning_rate': 4.7829588875203106e-06, 'epoch': 0.16} + 16%|█▌ | 1125/7045 [3:39:34<19:34:03, 11.90s/it] 16%|█▌ | 1126/7045 [3:39:46<19:43:42, 12.00s/it] {'loss': 1.1201, 'learning_rate': 4.78249020213733e-06, 'epoch': 0.16} + 16%|█▌ | 1126/7045 [3:39:46<19:43:42, 12.00s/it] 16%|█▌ | 1127/7045 [3:39:57<19:16:55, 11.73s/it] {'loss': 1.1611, 'learning_rate': 4.7820210342671245e-06, 'epoch': 0.16} + 16%|█▌ | 1127/7045 [3:39:57<19:16:55, 11.73s/it] 16%|█▌ | 1128/7045 [3:40:09<19:05:03, 11.61s/it] {'loss': 1.166, 'learning_rate': 4.781551384008871e-06, 'epoch': 0.16} + 16%|█▌ | 1128/7045 [3:40:09<19:05:03, 11.61s/it] 16%|█▌ | 1129/7045 [3:40:20<19:02:03, 11.58s/it] {'loss': 1.1162, 'learning_rate': 4.781081251461847e-06, 'epoch': 0.16} + 16%|█▌ | 1129/7045 [3:40:20<19:02:03, 11.58s/it] 16%|█▌ | 1130/7045 [3:40:31<18:46:02, 11.42s/it] {'loss': 1.1104, 'learning_rate': 4.780610636725431e-06, 'epoch': 0.16} + 16%|█▌ | 1130/7045 [3:40:31<18:46:02, 11.42s/it] 16%|█▌ | 1131/7045 [3:40:42<18:31:31, 11.28s/it] {'loss': 1.1289, 'learning_rate': 4.780139539899105e-06, 'epoch': 0.16} + 16%|█▌ | 1131/7045 [3:40:42<18:31:31, 11.28s/it] 16%|█▌ | 1132/7045 [3:40:53<18:20:07, 11.16s/it] {'loss': 1.1377, 'learning_rate': 4.779667961082452e-06, 'epoch': 0.16} + 16%|█▌ | 1132/7045 [3:40:53<18:20:07, 11.16s/it] 16%|█▌ | 1133/7045 [3:41:05<18:48:53, 11.46s/it] {'loss': 1.1396, 'learning_rate': 4.779195900375158e-06, 'epoch': 0.16} + 16%|█▌ | 1133/7045 [3:41:05<18:48:53, 11.46s/it] 16%|█▌ | 1134/7045 [3:41:17<19:01:13, 11.58s/it] {'loss': 1.126, 'learning_rate': 4.778723357877012e-06, 'epoch': 0.16} + 16%|█▌ | 1134/7045 [3:41:17<19:01:13, 11.58s/it] 16%|█▌ | 1135/7045 [3:41:30<19:25:19, 11.83s/it] {'loss': 1.1201, 'learning_rate': 4.778250333687899e-06, 'epoch': 0.16} + 16%|█▌ | 1135/7045 [3:41:30<19:25:19, 11.83s/it] 16%|█▌ | 1136/7045 [3:41:41<19:14:15, 11.72s/it] {'loss': 1.1328, 'learning_rate': 4.777776827907813e-06, 'epoch': 0.16} + 16%|█▌ | 1136/7045 [3:41:41<19:14:15, 11.72s/it] 16%|█▌ | 1137/7045 [3:41:52<18:59:55, 11.58s/it] {'loss': 1.1758, 'learning_rate': 4.777302840636845e-06, 'epoch': 0.16} + 16%|█▌ | 1137/7045 [3:41:52<18:59:55, 11.58s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 16%|█▌ | 1138/7045 [3:42:04<18:49:52, 11.48s/it] {'loss': 1.1201, 'learning_rate': 4.776828371975189e-06, 'epoch': 0.16} + 16%|█▌ | 1138/7045 [3:42:04<18:49:52, 11.48s/it] 16%|█▌ | 1139/7045 [3:42:16<19:10:44, 11.69s/it] {'loss': 1.1592, 'learning_rate': 4.776353422023145e-06, 'epoch': 0.16} + 16%|█▌ | 1139/7045 [3:42:16<19:10:44, 11.69s/it] 16%|█▌ | 1140/7045 [3:42:27<19:10:06, 11.69s/it] {'loss': 1.1699, 'learning_rate': 4.775877990881105e-06, 'epoch': 0.16} + 16%|█▌ | 1140/7045 [3:42:27<19:10:06, 11.69s/it] 16%|█▌ | 1141/7045 [3:42:38<18:49:23, 11.48s/it] {'loss': 1.1943, 'learning_rate': 4.775402078649574e-06, 'epoch': 0.16} + 16%|█▌ | 1141/7045 [3:42:38<18:49:23, 11.48s/it] 16%|█▌ | 1142/7045 [3:42:50<18:43:18, 11.42s/it] {'loss': 1.1719, 'learning_rate': 4.774925685429149e-06, 'epoch': 0.16} + 16%|█▌ | 1142/7045 [3:42:50<18:43:18, 11.42s/it] 16%|█▌ | 1143/7045 [3:43:01<18:35:41, 11.34s/it] {'loss': 1.1562, 'learning_rate': 4.7744488113205364e-06, 'epoch': 0.16} + 16%|█▌ | 1143/7045 [3:43:01<18:35:41, 11.34s/it] 16%|█▌ | 1144/7045 [3:43:12<18:24:07, 11.23s/it] {'loss': 1.1592, 'learning_rate': 4.77397145642454e-06, 'epoch': 0.16} + 16%|█▌ | 1144/7045 [3:43:12<18:24:07, 11.23s/it] 16%|█▋ | 1145/7045 [3:43:24<18:50:17, 11.49s/it] {'loss': 1.1523, 'learning_rate': 4.7734936208420645e-06, 'epoch': 0.16} + 16%|█▋ | 1145/7045 [3:43:24<18:50:17, 11.49s/it] 16%|█▋ | 1146/7045 [3:43:35<18:37:35, 11.37s/it] {'loss': 1.124, 'learning_rate': 4.77301530467412e-06, 'epoch': 0.16} + 16%|█▋ | 1146/7045 [3:43:35<18:37:35, 11.37s/it] 16%|█▋ | 1147/7045 [3:43:47<18:54:24, 11.54s/it] {'loss': 1.1455, 'learning_rate': 4.772536508021815e-06, 'epoch': 0.16} + 16%|█▋ | 1147/7045 [3:43:47<18:54:24, 11.54s/it] 16%|█▋ | 1148/7045 [3:43:58<18:43:04, 11.43s/it] {'loss': 1.1592, 'learning_rate': 4.772057230986359e-06, 'epoch': 0.16} + 16%|█▋ | 1148/7045 [3:43:58<18:43:04, 11.43s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 16%|█▋ | 1149/7045 [3:44:11<19:35:57, 11.97s/it] {'loss': 1.123, 'learning_rate': 4.771577473669067e-06, 'epoch': 0.16} + 16%|█▋ | 1149/7045 [3:44:11<19:35:57, 11.97s/it] 16%|█▋ | 1150/7045 [3:44:24<19:45:30, 12.07s/it] {'loss': 1.1353, 'learning_rate': 4.771097236171353e-06, 'epoch': 0.16} + 16%|█▋ | 1150/7045 [3:44:24<19:45:30, 12.07s/it] 16%|█▋ | 1151/7045 [3:44:35<19:24:12, 11.85s/it] {'loss': 1.1504, 'learning_rate': 4.770616518594731e-06, 'epoch': 0.16} + 16%|█▋ | 1151/7045 [3:44:35<19:24:12, 11.85s/it] 16%|█▋ | 1152/7045 [3:44:47<19:27:47, 11.89s/it] {'loss': 1.1553, 'learning_rate': 4.770135321040819e-06, 'epoch': 0.16} + 16%|█▋ | 1152/7045 [3:44:47<19:27:47, 11.89s/it] 16%|█▋ | 1153/7045 [3:44:58<19:04:53, 11.66s/it] {'loss': 1.0986, 'learning_rate': 4.769653643611337e-06, 'epoch': 0.16} + 16%|█▋ | 1153/7045 [3:44:58<19:04:53, 11.66s/it] 16%|█▋ | 1154/7045 [3:45:11<19:53:37, 12.16s/it] {'loss': 1.1655, 'learning_rate': 4.769171486408102e-06, 'epoch': 0.16} + 16%|█▋ | 1154/7045 [3:45:11<19:53:37, 12.16s/it] 16%|█▋ | 1155/7045 [3:45:22<19:18:24, 11.80s/it] {'loss': 1.1338, 'learning_rate': 4.768688849533039e-06, 'epoch': 0.16} + 16%|█▋ | 1155/7045 [3:45:22<19:18:24, 11.80s/it] 16%|█▋ | 1156/7045 [3:45:33<18:55:05, 11.56s/it] {'loss': 1.1719, 'learning_rate': 4.768205733088167e-06, 'epoch': 0.16} + 16%|█▋ | 1156/7045 [3:45:33<18:55:05, 11.56s/it] 16%|█▋ | 1157/7045 [3:45:45<18:43:39, 11.45s/it] {'loss': 1.1582, 'learning_rate': 4.767722137175613e-06, 'epoch': 0.16} + 16%|█▋ | 1157/7045 [3:45:45<18:43:39, 11.45s/it] 16%|█▋ | 1158/7045 [3:46:02<21:44:26, 13.29s/it] {'loss': 1.1333, 'learning_rate': 4.767238061897601e-06, 'epoch': 0.16} + 16%|█▋ | 1158/7045 [3:46:02<21:44:26, 13.29s/it] 16%|█▋ | 1159/7045 [3:46:13<20:45:40, 12.70s/it] {'loss': 1.1592, 'learning_rate': 4.76675350735646e-06, 'epoch': 0.16} + 16%|█▋ | 1159/7045 [3:46:13<20:45:40, 12.70s/it] 16%|█▋ | 1160/7045 [3:46:25<19:57:49, 12.21s/it] {'loss': 1.167, 'learning_rate': 4.766268473654616e-06, 'epoch': 0.16} + 16%|█▋ | 1160/7045 [3:46:25<19:57:49, 12.21s/it] 16%|█▋ | 1161/7045 [3:46:35<19:17:39, 11.80s/it] {'loss': 1.1484, 'learning_rate': 4.765782960894599e-06, 'epoch': 0.16} + 16%|█▋ | 1161/7045 [3:46:35<19:17:39, 11.80s/it] 16%|█▋ | 1162/7045 [3:46:46<18:54:03, 11.57s/it] {'loss': 1.1367, 'learning_rate': 4.76529696917904e-06, 'epoch': 0.16} + 16%|█▋ | 1162/7045 [3:46:46<18:54:03, 11.57s/it] 17%|█▋ | 1163/7045 [3:46:59<19:36:23, 12.00s/it] {'loss': 1.1104, 'learning_rate': 4.764810498610671e-06, 'epoch': 0.17} + 17%|█▋ | 1163/7045 [3:46:59<19:36:23, 12.00s/it] 17%|█▋ | 1164/7045 [3:47:13<20:15:33, 12.40s/it] {'loss': 1.1396, 'learning_rate': 4.764323549292326e-06, 'epoch': 0.17} + 17%|█▋ | 1164/7045 [3:47:13<20:15:33, 12.40s/it] 17%|█▋ | 1165/7045 [3:47:24<19:29:13, 11.93s/it] {'loss': 1.1299, 'learning_rate': 4.763836121326939e-06, 'epoch': 0.17} + 17%|█▋ | 1165/7045 [3:47:24<19:29:13, 11.93s/it] 17%|█▋ | 1166/7045 [3:47:35<19:01:59, 11.65s/it] {'loss': 1.1631, 'learning_rate': 4.763348214817545e-06, 'epoch': 0.17} + 17%|█▋ | 1166/7045 [3:47:35<19:01:59, 11.65s/it] 17%|█▋ | 1167/7045 [3:47:46<18:47:43, 11.51s/it] {'loss': 1.1592, 'learning_rate': 4.762859829867281e-06, 'epoch': 0.17} + 17%|█▋ | 1167/7045 [3:47:46<18:47:43, 11.51s/it] 17%|█▋ | 1168/7045 [3:47:58<18:54:00, 11.58s/it] {'loss': 1.167, 'learning_rate': 4.762370966579385e-06, 'epoch': 0.17} + 17%|█▋ | 1168/7045 [3:47:58<18:54:00, 11.58s/it] 17%|█▋ | 1169/7045 [3:48:10<19:27:50, 11.92s/it] {'loss': 1.1172, 'learning_rate': 4.761881625057196e-06, 'epoch': 0.17} + 17%|█▋ | 1169/7045 [3:48:10<19:27:50, 11.92s/it] 17%|█▋ | 1170/7045 [3:48:21<19:03:23, 11.68s/it] {'loss': 1.1152, 'learning_rate': 4.761391805404154e-06, 'epoch': 0.17} + 17%|█▋ | 1170/7045 [3:48:21<19:03:23, 11.68s/it] 17%|█▋ | 1171/7045 [3:48:33<18:50:19, 11.55s/it] {'loss': 1.1475, 'learning_rate': 4.7609015077238e-06, 'epoch': 0.17} + 17%|█▋ | 1171/7045 [3:48:33<18:50:19, 11.55s/it] 17%|█▋ | 1172/7045 [3:48:44<18:49:36, 11.54s/it] {'loss': 1.1631, 'learning_rate': 4.760410732119777e-06, 'epoch': 0.17} + 17%|█▋ | 1172/7045 [3:48:44<18:49:36, 11.54s/it] 17%|█▋ | 1173/7045 [3:48:55<18:29:13, 11.33s/it] {'loss': 1.1006, 'learning_rate': 4.759919478695828e-06, 'epoch': 0.17} + 17%|█▋ | 1173/7045 [3:48:55<18:29:13, 11.33s/it] 17%|█▋ | 1174/7045 [3:49:07<18:51:31, 11.56s/it] {'loss': 1.1309, 'learning_rate': 4.7594277475557956e-06, 'epoch': 0.17} + 17%|█▋ | 1174/7045 [3:49:07<18:51:31, 11.56s/it] 17%|█▋ | 1175/7045 [3:49:19<19:06:54, 11.72s/it] {'loss': 1.1177, 'learning_rate': 4.758935538803629e-06, 'epoch': 0.17} + 17%|█▋ | 1175/7045 [3:49:19<19:06:54, 11.72s/it] 17%|█▋ | 1176/7045 [3:49:31<19:22:36, 11.89s/it] {'loss': 1.1133, 'learning_rate': 4.758442852543369e-06, 'epoch': 0.17} + 17%|█▋ | 1176/7045 [3:49:31<19:22:36, 11.89s/it] 17%|█▋ | 1177/7045 [3:49:43<19:05:13, 11.71s/it] {'loss': 1.1523, 'learning_rate': 4.757949688879167e-06, 'epoch': 0.17} + 17%|█▋ | 1177/7045 [3:49:43<19:05:13, 11.71s/it] 17%|█▋ | 1178/7045 [3:49:54<18:54:21, 11.60s/it] {'loss': 1.1572, 'learning_rate': 4.757456047915271e-06, 'epoch': 0.17} + 17%|█▋ | 1178/7045 [3:49:54<18:54:21, 11.60s/it] 17%|█▋ | 1179/7045 [3:50:05<18:39:16, 11.45s/it] {'loss': 1.1172, 'learning_rate': 4.756961929756027e-06, 'epoch': 0.17} + 17%|█▋ | 1179/7045 [3:50:05<18:39:16, 11.45s/it] 17%|█▋ | 1180/7045 [3:50:16<18:27:56, 11.33s/it] {'loss': 1.1104, 'learning_rate': 4.756467334505887e-06, 'epoch': 0.17} + 17%|█▋ | 1180/7045 [3:50:16<18:27:56, 11.33s/it] 17%|█▋ | 1181/7045 [3:50:27<18:19:24, 11.25s/it] {'loss': 1.1396, 'learning_rate': 4.755972262269401e-06, 'epoch': 0.17} + 17%|█▋ | 1181/7045 [3:50:27<18:19:24, 11.25s/it] 17%|█▋ | 1182/7045 [3:50:38<18:14:50, 11.20s/it] {'loss': 1.1279, 'learning_rate': 4.75547671315122e-06, 'epoch': 0.17} + 17%|█▋ | 1182/7045 [3:50:38<18:14:50, 11.20s/it] 17%|█▋ | 1183/7045 [3:50:50<18:28:06, 11.34s/it] {'loss': 1.1123, 'learning_rate': 4.754980687256098e-06, 'epoch': 0.17} + 17%|█▋ | 1183/7045 [3:50:50<18:28:06, 11.34s/it] 17%|█▋ | 1184/7045 [3:51:02<18:43:45, 11.50s/it] {'loss': 1.124, 'learning_rate': 4.754484184688887e-06, 'epoch': 0.17} + 17%|█▋ | 1184/7045 [3:51:02<18:43:45, 11.50s/it] 17%|█▋ | 1185/7045 [3:51:13<18:35:20, 11.42s/it] {'loss': 1.1406, 'learning_rate': 4.7539872055545395e-06, 'epoch': 0.17} + 17%|█▋ | 1185/7045 [3:51:13<18:35:20, 11.42s/it] 17%|█▋ | 1186/7045 [3:51:24<18:28:55, 11.36s/it] {'loss': 1.1797, 'learning_rate': 4.753489749958113e-06, 'epoch': 0.17} + 17%|█▋ | 1186/7045 [3:51:24<18:28:55, 11.36s/it] 17%|█▋ | 1187/7045 [3:51:36<18:30:59, 11.38s/it] {'loss': 1.1133, 'learning_rate': 4.752991818004761e-06, 'epoch': 0.17} + 17%|█▋ | 1187/7045 [3:51:36<18:30:59, 11.38s/it] 17%|█▋ | 1188/7045 [3:51:47<18:22:04, 11.29s/it] {'loss': 1.1318, 'learning_rate': 4.75249340979974e-06, 'epoch': 0.17} + 17%|█▋ | 1188/7045 [3:51:47<18:22:04, 11.29s/it] 17%|█▋ | 1189/7045 [3:51:58<18:21:54, 11.29s/it] {'loss': 1.167, 'learning_rate': 4.751994525448406e-06, 'epoch': 0.17} + 17%|█▋ | 1189/7045 [3:51:58<18:21:54, 11.29s/it] 17%|█▋ | 1190/7045 [3:52:10<18:25:08, 11.33s/it] {'loss': 1.1113, 'learning_rate': 4.751495165056216e-06, 'epoch': 0.17} + 17%|█▋ | 1190/7045 [3:52:10<18:25:08, 11.33s/it] 17%|█▋ | 1191/7045 [3:52:22<19:05:13, 11.74s/it] {'loss': 1.1562, 'learning_rate': 4.75099532872873e-06, 'epoch': 0.17} + 17%|█▋ | 1191/7045 [3:52:22<19:05:13, 11.74s/it] 17%|█▋ | 1192/7045 [3:52:33<18:40:48, 11.49s/it] {'loss': 1.1191, 'learning_rate': 4.750495016571604e-06, 'epoch': 0.17} + 17%|█▋ | 1192/7045 [3:52:33<18:40:48, 11.49s/it] 17%|█▋ | 1193/7045 [3:52:45<18:41:41, 11.50s/it] {'loss': 1.1338, 'learning_rate': 4.7499942286906004e-06, 'epoch': 0.17} + 17%|█▋ | 1193/7045 [3:52:45<18:41:41, 11.50s/it] 17%|█▋ | 1194/7045 [3:52:56<18:23:46, 11.32s/it] {'loss': 1.1455, 'learning_rate': 4.749492965191576e-06, 'epoch': 0.17} + 17%|█▋ | 1194/7045 [3:52:56<18:23:46, 11.32s/it] 17%|█▋ | 1195/7045 [3:53:07<18:14:28, 11.23s/it] {'loss': 1.1309, 'learning_rate': 4.7489912261804925e-06, 'epoch': 0.17} + 17%|█▋ | 1195/7045 [3:53:07<18:14:28, 11.23s/it] 17%|█▋ | 1196/7045 [3:53:19<18:49:14, 11.58s/it] {'loss': 1.1523, 'learning_rate': 4.74848901176341e-06, 'epoch': 0.17} + 17%|█▋ | 1196/7045 [3:53:19<18:49:14, 11.58s/it] 17%|█▋ | 1197/7045 [3:53:30<18:32:23, 11.41s/it] {'loss': 1.1396, 'learning_rate': 4.74798632204649e-06, 'epoch': 0.17} + 17%|█▋ | 1197/7045 [3:53:30<18:32:23, 11.41s/it] 17%|█▋ | 1198/7045 [3:53:42<18:33:17, 11.42s/it] {'loss': 1.1592, 'learning_rate': 4.747483157135993e-06, 'epoch': 0.17} + 17%|█▋ | 1198/7045 [3:53:42<18:33:17, 11.42s/it] 17%|█▋ | 1199/7045 [3:53:53<18:24:16, 11.33s/it] {'loss': 1.1914, 'learning_rate': 4.746979517138284e-06, 'epoch': 0.17} + 17%|█▋ | 1199/7045 [3:53:53<18:24:16, 11.33s/it] 17%|█▋ | 1200/7045 [3:54:03<18:09:58, 11.19s/it] {'loss': 1.1338, 'learning_rate': 4.746475402159824e-06, 'epoch': 0.17} + 17%|█▋ | 1200/7045 [3:54:03<18:09:58, 11.19s/it] 17%|█▋ | 1201/7045 [3:54:14<17:56:16, 11.05s/it] {'loss': 1.124, 'learning_rate': 4.745970812307174e-06, 'epoch': 0.17} + 17%|█▋ | 1201/7045 [3:54:14<17:56:16, 11.05s/it] 17%|█▋ | 1202/7045 [3:54:26<18:11:22, 11.21s/it] {'loss': 1.1631, 'learning_rate': 4.745465747687002e-06, 'epoch': 0.17} + 17%|█▋ | 1202/7045 [3:54:26<18:11:22, 11.21s/it] 17%|█▋ | 1203/7045 [3:54:38<18:36:43, 11.47s/it] {'loss': 1.1143, 'learning_rate': 4.744960208406068e-06, 'epoch': 0.17} + 17%|█▋ | 1203/7045 [3:54:38<18:36:43, 11.47s/it] 17%|█▋ | 1204/7045 [3:54:49<18:32:03, 11.42s/it] {'loss': 1.1396, 'learning_rate': 4.744454194571238e-06, 'epoch': 0.17} + 17%|█▋ | 1204/7045 [3:54:49<18:32:03, 11.42s/it] 17%|█▋ | 1205/7045 [3:55:01<18:36:48, 11.47s/it] {'loss': 1.1689, 'learning_rate': 4.743947706289475e-06, 'epoch': 0.17} + 17%|█▋ | 1205/7045 [3:55:01<18:36:48, 11.47s/it] 17%|█▋ | 1206/7045 [3:55:13<18:51:40, 11.63s/it] {'loss': 1.1548, 'learning_rate': 4.743440743667843e-06, 'epoch': 0.17} + 17%|█▋ | 1206/7045 [3:55:13<18:51:40, 11.63s/it] 17%|█▋ | 1207/7045 [3:55:24<18:45:43, 11.57s/it] {'loss': 1.106, 'learning_rate': 4.742933306813511e-06, 'epoch': 0.17} + 17%|█▋ | 1207/7045 [3:55:24<18:45:43, 11.57s/it] 17%|█▋ | 1208/7045 [3:55:35<18:33:40, 11.45s/it] {'loss': 1.1543, 'learning_rate': 4.742425395833739e-06, 'epoch': 0.17} + 17%|█▋ | 1208/7045 [3:55:35<18:33:40, 11.45s/it] 17%|█▋ | 1209/7045 [3:55:47<18:35:29, 11.47s/it] {'loss': 1.1758, 'learning_rate': 4.741917010835897e-06, 'epoch': 0.17} + 17%|█▋ | 1209/7045 [3:55:47<18:35:29, 11.47s/it] 17%|█▋ | 1210/7045 [3:55:58<18:33:16, 11.45s/it] {'loss': 1.1494, 'learning_rate': 4.741408151927448e-06, 'epoch': 0.17} + 17%|█▋ | 1210/7045 [3:55:58<18:33:16, 11.45s/it] 17%|█▋ | 1211/7045 [3:56:11<19:07:20, 11.80s/it] {'loss': 1.1309, 'learning_rate': 4.7408988192159585e-06, 'epoch': 0.17} + 17%|█▋ | 1211/7045 [3:56:11<19:07:20, 11.80s/it] 17%|█▋ | 1212/7045 [3:56:22<18:45:40, 11.58s/it] {'loss': 1.165, 'learning_rate': 4.740389012809094e-06, 'epoch': 0.17} + 17%|█▋ | 1212/7045 [3:56:22<18:45:40, 11.58s/it] 17%|█▋ | 1213/7045 [3:56:35<19:15:27, 11.89s/it] {'loss': 1.1299, 'learning_rate': 4.739878732814621e-06, 'epoch': 0.17} + 17%|█▋ | 1213/7045 [3:56:35<19:15:27, 11.89s/it] 17%|█▋ | 1214/7045 [3:56:47<19:23:00, 11.97s/it] {'loss': 1.1562, 'learning_rate': 4.739367979340407e-06, 'epoch': 0.17} + 17%|█▋ | 1214/7045 [3:56:47<19:23:00, 11.97s/it] 17%|█▋ | 1215/7045 [3:56:57<18:47:05, 11.60s/it] {'loss': 1.1367, 'learning_rate': 4.738856752494416e-06, 'epoch': 0.17} + 17%|█▋ | 1215/7045 [3:56:57<18:47:05, 11.60s/it] 17%|█▋ | 1216/7045 [3:57:10<19:19:00, 11.93s/it] {'loss': 1.126, 'learning_rate': 4.738345052384715e-06, 'epoch': 0.17} + 17%|█▋ | 1216/7045 [3:57:10<19:19:00, 11.93s/it] 17%|█▋ | 1217/7045 [3:57:22<19:06:14, 11.80s/it] {'loss': 1.1602, 'learning_rate': 4.737832879119472e-06, 'epoch': 0.17} + 17%|█▋ | 1217/7045 [3:57:22<19:06:14, 11.80s/it] 17%|█▋ | 1218/7045 [3:57:33<18:45:11, 11.59s/it] {'loss': 1.1494, 'learning_rate': 4.737320232806951e-06, 'epoch': 0.17} + 17%|█▋ | 1218/7045 [3:57:33<18:45:11, 11.59s/it] 17%|█▋ | 1219/7045 [3:57:44<18:32:00, 11.45s/it] {'loss': 1.124, 'learning_rate': 4.736807113555521e-06, 'epoch': 0.17} + 17%|█▋ | 1219/7045 [3:57:44<18:32:00, 11.45s/it] 17%|█▋ | 1220/7045 [3:57:55<18:26:39, 11.40s/it] {'loss': 1.1582, 'learning_rate': 4.736293521473645e-06, 'epoch': 0.17} + 17%|█▋ | 1220/7045 [3:57:55<18:26:39, 11.40s/it] 17%|█▋ | 1221/7045 [3:58:08<19:12:23, 11.87s/it] {'loss': 1.1484, 'learning_rate': 4.7357794566698934e-06, 'epoch': 0.17} + 17%|█▋ | 1221/7045 [3:58:08<19:12:23, 11.87s/it] 17%|█▋ | 1222/7045 [3:58:21<19:53:25, 12.30s/it] {'loss': 1.1069, 'learning_rate': 4.73526491925293e-06, 'epoch': 0.17} + 17%|█▋ | 1222/7045 [3:58:21<19:53:25, 12.30s/it] 17%|█▋ | 1223/7045 [3:58:35<20:16:41, 12.54s/it] {'loss': 1.1304, 'learning_rate': 4.7347499093315205e-06, 'epoch': 0.17} + 17%|█▋ | 1223/7045 [3:58:35<20:16:41, 12.54s/it] 17%|█▋ | 1224/7045 [3:58:46<19:31:44, 12.08s/it] {'loss': 1.1084, 'learning_rate': 4.734234427014533e-06, 'epoch': 0.17} + 17%|█▋ | 1224/7045 [3:58:46<19:31:44, 12.08s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 2. + warnings.warn(str(msg)) + 17%|█▋ | 1225/7045 [3:58:57<19:00:19, 11.76s/it] {'loss': 1.1211, 'learning_rate': 4.733718472410932e-06, 'epoch': 0.17} + 17%|█▋ | 1225/7045 [3:58:57<19:00:19, 11.76s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 12 bytes but only got 10. + warnings.warn(str(msg)) + 17%|█▋ | 1226/7045 [3:59:07<18:32:49, 11.47s/it] {'loss': 1.1504, 'learning_rate': 4.733202045629784e-06, 'epoch': 0.17} + 17%|█▋ | 1226/7045 [3:59:07<18:32:49, 11.47s/it] 17%|█▋ | 1227/7045 [3:59:18<18:21:57, 11.36s/it] {'loss': 1.127, 'learning_rate': 4.7326851467802534e-06, 'epoch': 0.17} + 17%|█▋ | 1227/7045 [3:59:18<18:21:57, 11.36s/it] 17%|█▋ | 1228/7045 [3:59:30<18:13:55, 11.28s/it] {'loss': 1.167, 'learning_rate': 4.732167775971607e-06, 'epoch': 0.17} + 17%|█▋ | 1228/7045 [3:59:30<18:13:55, 11.28s/it] 17%|█▋ | 1229/7045 [3:59:41<18:11:09, 11.26s/it] {'loss': 1.1406, 'learning_rate': 4.7316499333132086e-06, 'epoch': 0.17} + 17%|█▋ | 1229/7045 [3:59:41<18:11:09, 11.26s/it] 17%|█▋ | 1230/7045 [3:59:52<18:07:10, 11.22s/it] {'loss': 1.1436, 'learning_rate': 4.731131618914523e-06, 'epoch': 0.17} + 17%|█▋ | 1230/7045 [3:59:52<18:07:10, 11.22s/it] 17%|█▋ | 1231/7045 [4:00:04<18:47:11, 11.63s/it] {'loss': 1.1362, 'learning_rate': 4.7306128328851164e-06, 'epoch': 0.17} + 17%|█▋ | 1231/7045 [4:00:04<18:47:11, 11.63s/it] 17%|█▋ | 1232/7045 [4:00:16<18:53:56, 11.70s/it] {'loss': 1.1226, 'learning_rate': 4.7300935753346525e-06, 'epoch': 0.17} + 17%|█▋ | 1232/7045 [4:00:16<18:53:56, 11.70s/it] 18%|█▊ | 1233/7045 [4:00:28<18:57:55, 11.75s/it] {'loss': 1.1025, 'learning_rate': 4.729573846372893e-06, 'epoch': 0.18} + 18%|█▊ | 1233/7045 [4:00:28<18:57:55, 11.75s/it] 18%|█▊ | 1234/7045 [4:00:39<18:35:49, 11.52s/it] {'loss': 1.1348, 'learning_rate': 4.729053646109706e-06, 'epoch': 0.18} + 18%|█▊ | 1234/7045 [4:00:39<18:35:49, 11.52s/it] 18%|█▊ | 1235/7045 [4:00:50<18:16:36, 11.32s/it] {'loss': 1.0957, 'learning_rate': 4.7285329746550505e-06, 'epoch': 0.18} + 18%|█▊ | 1235/7045 [4:00:50<18:16:36, 11.32s/it] 18%|█▊ | 1236/7045 [4:01:01<18:10:16, 11.26s/it] {'loss': 1.1445, 'learning_rate': 4.72801183211899e-06, 'epoch': 0.18} + 18%|█▊ | 1236/7045 [4:01:01<18:10:16, 11.26s/it] 18%|█▊ | 1237/7045 [4:01:14<19:00:14, 11.78s/it] {'loss': 1.1216, 'learning_rate': 4.72749021861169e-06, 'epoch': 0.18} + 18%|█▊ | 1237/7045 [4:01:14<19:00:14, 11.78s/it] 18%|█▊ | 1238/7045 [4:01:25<18:45:15, 11.63s/it] {'loss': 1.1387, 'learning_rate': 4.726968134243409e-06, 'epoch': 0.18} + 18%|█▊ | 1238/7045 [4:01:25<18:45:15, 11.63s/it] 18%|█▊ | 1239/7045 [4:01:36<18:26:44, 11.44s/it] {'loss': 1.1592, 'learning_rate': 4.72644557912451e-06, 'epoch': 0.18} + 18%|█▊ | 1239/7045 [4:01:36<18:26:44, 11.44s/it] 18%|█▊ | 1240/7045 [4:01:49<18:56:28, 11.75s/it] {'loss': 1.125, 'learning_rate': 4.725922553365454e-06, 'epoch': 0.18} + 18%|█▊ | 1240/7045 [4:01:49<18:56:28, 11.75s/it] 18%|█▊ | 1241/7045 [4:02:01<19:03:50, 11.82s/it] {'loss': 1.165, 'learning_rate': 4.7253990570768004e-06, 'epoch': 0.18} + 18%|█▊ | 1241/7045 [4:02:01<19:03:50, 11.82s/it] 18%|█▊ | 1242/7045 [4:02:12<18:41:00, 11.59s/it] {'loss': 1.1211, 'learning_rate': 4.724875090369211e-06, 'epoch': 0.18} + 18%|█▊ | 1242/7045 [4:02:12<18:41:00, 11.59s/it] 18%|█▊ | 1243/7045 [4:02:25<19:21:28, 12.01s/it] {'loss': 1.0571, 'learning_rate': 4.724350653353444e-06, 'epoch': 0.18} + 18%|█▊ | 1243/7045 [4:02:25<19:21:28, 12.01s/it] 18%|█▊ | 1244/7045 [4:02:36<18:58:46, 11.78s/it] {'loss': 1.167, 'learning_rate': 4.723825746140358e-06, 'epoch': 0.18} + 18%|█▊ | 1244/7045 [4:02:36<18:58:46, 11.78s/it] 18%|█▊ | 1245/7045 [4:02:48<19:10:45, 11.90s/it] {'loss': 1.1299, 'learning_rate': 4.723300368840912e-06, 'epoch': 0.18} + 18%|█▊ | 1245/7045 [4:02:48<19:10:45, 11.90s/it] 18%|█▊ | 1246/7045 [4:03:00<18:54:33, 11.74s/it] {'loss': 1.168, 'learning_rate': 4.722774521566164e-06, 'epoch': 0.18} + 18%|█▊ | 1246/7045 [4:03:00<18:54:33, 11.74s/it] 18%|█▊ | 1247/7045 [4:03:11<18:44:05, 11.63s/it] {'loss': 1.1152, 'learning_rate': 4.722248204427269e-06, 'epoch': 0.18} + 18%|█▊ | 1247/7045 [4:03:11<18:44:05, 11.63s/it] 18%|█▊ | 1248/7045 [4:03:22<18:30:45, 11.50s/it] {'loss': 1.1084, 'learning_rate': 4.721721417535485e-06, 'epoch': 0.18} + 18%|█▊ | 1248/7045 [4:03:22<18:30:45, 11.50s/it] 18%|█▊ | 1249/7045 [4:03:34<18:22:50, 11.42s/it] {'loss': 1.1309, 'learning_rate': 4.721194161002167e-06, 'epoch': 0.18} + 18%|█▊ | 1249/7045 [4:03:34<18:22:50, 11.42s/it] 18%|█▊ | 1250/7045 [4:03:46<18:50:55, 11.71s/it] {'loss': 1.1191, 'learning_rate': 4.7206664349387695e-06, 'epoch': 0.18} + 18%|█▊ | 1250/7045 [4:03:46<18:50:55, 11.71s/it] 18%|█▊ | 1251/7045 [4:03:59<19:43:04, 12.25s/it] {'loss': 1.1182, 'learning_rate': 4.720138239456847e-06, 'epoch': 0.18} + 18%|█▊ | 1251/7045 [4:03:59<19:43:04, 12.25s/it] 18%|█▊ | 1252/7045 [4:04:13<20:08:51, 12.52s/it] {'loss': 1.0986, 'learning_rate': 4.719609574668052e-06, 'epoch': 0.18} + 18%|█▊ | 1252/7045 [4:04:13<20:08:51, 12.52s/it] 18%|█▊ | 1253/7045 [4:04:24<19:25:22, 12.07s/it] {'loss': 1.1533, 'learning_rate': 4.719080440684139e-06, 'epoch': 0.18} + 18%|█▊ | 1253/7045 [4:04:24<19:25:22, 12.07s/it] 18%|█▊ | 1254/7045 [4:04:35<19:02:44, 11.84s/it] {'loss': 1.1768, 'learning_rate': 4.718550837616958e-06, 'epoch': 0.18} + 18%|█▊ | 1254/7045 [4:04:35<19:02:44, 11.84s/it] 18%|█▊ | 1255/7045 [4:04:47<19:02:49, 11.84s/it] {'loss': 1.1426, 'learning_rate': 4.718020765578459e-06, 'epoch': 0.18} + 18%|█▊ | 1255/7045 [4:04:47<19:02:49, 11.84s/it] 18%|█▊ | 1256/7045 [4:04:59<19:27:20, 12.10s/it] {'loss': 1.1035, 'learning_rate': 4.7174902246806946e-06, 'epoch': 0.18} + 18%|█▊ | 1256/7045 [4:04:59<19:27:20, 12.10s/it] 18%|█▊ | 1257/7045 [4:05:10<18:51:38, 11.73s/it] {'loss': 1.1504, 'learning_rate': 4.7169592150358116e-06, 'epoch': 0.18} + 18%|█▊ | 1257/7045 [4:05:10<18:51:38, 11.73s/it] 18%|█▊ | 1258/7045 [4:05:24<19:33:37, 12.17s/it] {'loss': 1.1694, 'learning_rate': 4.716427736756059e-06, 'epoch': 0.18} + 18%|█▊ | 1258/7045 [4:05:24<19:33:37, 12.17s/it] 18%|█▊ | 1259/7045 [4:05:36<19:46:05, 12.30s/it] {'loss': 1.1216, 'learning_rate': 4.715895789953785e-06, 'epoch': 0.18} + 18%|█▊ | 1259/7045 [4:05:36<19:46:05, 12.30s/it] 18%|█▊ | 1260/7045 [4:05:47<19:10:53, 11.94s/it] {'loss': 1.1191, 'learning_rate': 4.715363374741434e-06, 'epoch': 0.18} + 18%|█▊ | 1260/7045 [4:05:47<19:10:53, 11.94s/it] 18%|█▊ | 1261/7045 [4:05:58<18:50:06, 11.72s/it] {'loss': 1.1416, 'learning_rate': 4.714830491231553e-06, 'epoch': 0.18} + 18%|█▊ | 1261/7045 [4:05:58<18:50:06, 11.72s/it] 18%|█▊ | 1262/7045 [4:06:12<19:50:26, 12.35s/it] {'loss': 1.1172, 'learning_rate': 4.714297139536786e-06, 'epoch': 0.18} + 18%|█▊ | 1262/7045 [4:06:12<19:50:26, 12.35s/it] 18%|█▊ | 1263/7045 [4:06:24<19:46:08, 12.31s/it] {'loss': 1.1201, 'learning_rate': 4.713763319769874e-06, 'epoch': 0.18} + 18%|█▊ | 1263/7045 [4:06:24<19:46:08, 12.31s/it] 18%|█▊ | 1264/7045 [4:06:36<19:17:13, 12.01s/it] {'loss': 1.166, 'learning_rate': 4.713229032043662e-06, 'epoch': 0.18} + 18%|█▊ | 1264/7045 [4:06:36<19:17:13, 12.01s/it] 18%|█▊ | 1265/7045 [4:06:47<18:52:42, 11.76s/it] {'loss': 1.125, 'learning_rate': 4.712694276471091e-06, 'epoch': 0.18} + 18%|█▊ | 1265/7045 [4:06:47<18:52:42, 11.76s/it] 18%|█▊ | 1266/7045 [4:06:58<18:42:42, 11.66s/it] {'loss': 1.1387, 'learning_rate': 4.712159053165199e-06, 'epoch': 0.18} + 18%|█▊ | 1266/7045 [4:06:58<18:42:42, 11.66s/it] 18%|█▊ | 1267/7045 [4:07:10<18:34:57, 11.58s/it] {'loss': 1.1133, 'learning_rate': 4.7116233622391265e-06, 'epoch': 0.18} + 18%|█▊ | 1267/7045 [4:07:10<18:34:57, 11.58s/it] 18%|█▊ | 1268/7045 [4:07:22<18:47:39, 11.71s/it] {'loss': 1.1475, 'learning_rate': 4.711087203806111e-06, 'epoch': 0.18} + 18%|█▊ | 1268/7045 [4:07:22<18:47:39, 11.71s/it] 18%|█▊ | 1269/7045 [4:07:36<19:45:53, 12.32s/it] {'loss': 1.0767, 'learning_rate': 4.7105505779794894e-06, 'epoch': 0.18} + 18%|█▊ | 1269/7045 [4:07:36<19:45:53, 12.32s/it] 18%|█▊ | 1270/7045 [4:07:47<19:11:01, 11.96s/it] {'loss': 1.1562, 'learning_rate': 4.7100134848726955e-06, 'epoch': 0.18} + 18%|█▊ | 1270/7045 [4:07:47<19:11:01, 11.96s/it] 18%|█▊ | 1271/7045 [4:07:58<18:48:23, 11.73s/it] {'loss': 1.1094, 'learning_rate': 4.709475924599265e-06, 'epoch': 0.18} + 18%|█▊ | 1271/7045 [4:07:58<18:48:23, 11.73s/it] 18%|█▊ | 1272/7045 [4:08:09<18:33:58, 11.58s/it] {'loss': 1.1357, 'learning_rate': 4.70893789727283e-06, 'epoch': 0.18} + 18%|█▊ | 1272/7045 [4:08:09<18:33:58, 11.58s/it] 18%|█▊ | 1273/7045 [4:08:20<18:25:34, 11.49s/it] {'loss': 1.1494, 'learning_rate': 4.708399403007122e-06, 'epoch': 0.18} + 18%|█▊ | 1273/7045 [4:08:20<18:25:34, 11.49s/it] 18%|█▊ | 1274/7045 [4:08:31<18:10:58, 11.34s/it] {'loss': 1.1309, 'learning_rate': 4.7078604419159715e-06, 'epoch': 0.18} + 18%|█▊ | 1274/7045 [4:08:31<18:10:58, 11.34s/it] 18%|█▊ | 1275/7045 [4:08:42<18:05:26, 11.29s/it] {'loss': 1.1523, 'learning_rate': 4.7073210141133085e-06, 'epoch': 0.18} + 18%|█▊ | 1275/7045 [4:08:42<18:05:26, 11.29s/it] 18%|█▊ | 1276/7045 [4:08:54<18:17:34, 11.42s/it] {'loss': 1.1436, 'learning_rate': 4.70678111971316e-06, 'epoch': 0.18} + 18%|█▊ | 1276/7045 [4:08:54<18:17:34, 11.42s/it] 18%|█▊ | 1277/7045 [4:09:05<18:08:48, 11.33s/it] {'loss': 1.1641, 'learning_rate': 4.706240758829652e-06, 'epoch': 0.18} + 18%|█▊ | 1277/7045 [4:09:05<18:08:48, 11.33s/it] 18%|█▊ | 1278/7045 [4:09:17<18:22:26, 11.47s/it] {'loss': 1.0986, 'learning_rate': 4.705699931577009e-06, 'epoch': 0.18} + 18%|█▊ | 1278/7045 [4:09:17<18:22:26, 11.47s/it] 18%|█▊ | 1279/7045 [4:09:29<18:46:59, 11.73s/it] {'loss': 1.1016, 'learning_rate': 4.705158638069555e-06, 'epoch': 0.18} + 18%|█▊ | 1279/7045 [4:09:29<18:46:59, 11.73s/it] 18%|█▊ | 1280/7045 [4:09:42<19:04:58, 11.92s/it] {'loss': 1.1191, 'learning_rate': 4.7046168784217126e-06, 'epoch': 0.18} + 18%|█▊ | 1280/7045 [4:09:42<19:04:58, 11.92s/it] 18%|█▊ | 1281/7045 [4:09:53<18:44:58, 11.71s/it] {'loss': 1.1338, 'learning_rate': 4.7040746527480025e-06, 'epoch': 0.18} + 18%|█▊ | 1281/7045 [4:09:53<18:44:58, 11.71s/it] 18%|█▊ | 1282/7045 [4:10:05<18:43:12, 11.69s/it] {'loss': 1.1191, 'learning_rate': 4.703531961163042e-06, 'epoch': 0.18} + 18%|█▊ | 1282/7045 [4:10:05<18:43:12, 11.69s/it] 18%|█▊ | 1283/7045 [4:10:17<18:54:48, 11.82s/it] {'loss': 1.1289, 'learning_rate': 4.70298880378155e-06, 'epoch': 0.18} + 18%|█▊ | 1283/7045 [4:10:17<18:54:48, 11.82s/it] 18%|█▊ | 1284/7045 [4:10:28<18:30:32, 11.57s/it] {'loss': 1.1758, 'learning_rate': 4.702445180718343e-06, 'epoch': 0.18} + 18%|█▊ | 1284/7045 [4:10:28<18:30:32, 11.57s/it] 18%|█▊ | 1285/7045 [4:10:41<19:15:50, 12.04s/it] {'loss': 1.1162, 'learning_rate': 4.701901092088335e-06, 'epoch': 0.18} + 18%|█▊ | 1285/7045 [4:10:41<19:15:50, 12.04s/it] 18%|█▊ | 1286/7045 [4:10:52<18:48:44, 11.76s/it] {'loss': 1.1133, 'learning_rate': 4.701356538006538e-06, 'epoch': 0.18} + 18%|█▊ | 1286/7045 [4:10:52<18:48:44, 11.76s/it] 18%|█▊ | 1287/7045 [4:11:03<18:38:51, 11.66s/it] {'loss': 1.165, 'learning_rate': 4.700811518588066e-06, 'epoch': 0.18} + 18%|█▊ | 1287/7045 [4:11:03<18:38:51, 11.66s/it] 18%|█▊ | 1288/7045 [4:11:16<18:50:51, 11.79s/it] {'loss': 1.1279, 'learning_rate': 4.700266033948124e-06, 'epoch': 0.18} + 18%|█▊ | 1288/7045 [4:11:16<18:50:51, 11.79s/it] 18%|█▊ | 1289/7045 [4:11:27<18:28:16, 11.55s/it] {'loss': 1.1338, 'learning_rate': 4.699720084202025e-06, 'epoch': 0.18} + 18%|█▊ | 1289/7045 [4:11:27<18:28:16, 11.55s/it] 18%|█▊ | 1290/7045 [4:11:39<18:46:03, 11.74s/it] {'loss': 1.1597, 'learning_rate': 4.699173669465171e-06, 'epoch': 0.18} + 18%|█▊ | 1290/7045 [4:11:39<18:46:03, 11.74s/it] 18%|█▊ | 1291/7045 [4:11:50<18:32:44, 11.60s/it] {'loss': 1.1328, 'learning_rate': 4.6986267898530705e-06, 'epoch': 0.18} + 18%|█▊ | 1291/7045 [4:11:50<18:32:44, 11.60s/it] 18%|█▊ | 1292/7045 [4:12:02<18:37:57, 11.66s/it] {'loss': 1.1611, 'learning_rate': 4.698079445481323e-06, 'epoch': 0.18} + 18%|█▊ | 1292/7045 [4:12:02<18:37:57, 11.66s/it] 18%|█▊ | 1293/7045 [4:12:13<18:33:27, 11.61s/it] {'loss': 1.1543, 'learning_rate': 4.697531636465632e-06, 'epoch': 0.18} + 18%|█▊ | 1293/7045 [4:12:13<18:33:27, 11.61s/it] 18%|█▊ | 1294/7045 [4:12:25<18:23:30, 11.51s/it] {'loss': 1.0977, 'learning_rate': 4.696983362921797e-06, 'epoch': 0.18} + 18%|█▊ | 1294/7045 [4:12:25<18:23:30, 11.51s/it] 18%|█▊ | 1295/7045 [4:12:37<18:56:01, 11.85s/it] {'loss': 1.1309, 'learning_rate': 4.696434624965712e-06, 'epoch': 0.18} + 18%|█▊ | 1295/7045 [4:12:37<18:56:01, 11.85s/it] 18%|█▊ | 1296/7045 [4:12:48<18:32:34, 11.61s/it] {'loss': 1.1689, 'learning_rate': 4.695885422713377e-06, 'epoch': 0.18} + 18%|█▊ | 1296/7045 [4:12:48<18:32:34, 11.61s/it] 18%|█▊ | 1297/7045 [4:12:59<18:20:36, 11.49s/it] {'loss': 1.1465, 'learning_rate': 4.695335756280883e-06, 'epoch': 0.18} + 18%|█▊ | 1297/7045 [4:12:59<18:20:36, 11.49s/it] 18%|█▊ | 1298/7045 [4:13:10<17:59:54, 11.27s/it] {'loss': 1.1265, 'learning_rate': 4.694785625784425e-06, 'epoch': 0.18} + 18%|█▊ | 1298/7045 [4:13:10<17:59:54, 11.27s/it] 18%|█▊ | 1299/7045 [4:13:21<17:54:48, 11.22s/it] {'loss': 1.1338, 'learning_rate': 4.694235031340289e-06, 'epoch': 0.18} + 18%|█▊ | 1299/7045 [4:13:21<17:54:48, 11.22s/it] 18%|█▊ | 1300/7045 [4:13:33<18:19:52, 11.49s/it] {'loss': 1.168, 'learning_rate': 4.693683973064866e-06, 'epoch': 0.18} + 18%|█▊ | 1300/7045 [4:13:33<18:19:52, 11.49s/it] 18%|█▊ | 1301/7045 [4:13:45<18:17:38, 11.47s/it] {'loss': 1.1738, 'learning_rate': 4.6931324510746415e-06, 'epoch': 0.18} + 18%|█▊ | 1301/7045 [4:13:45<18:17:38, 11.47s/it] 18%|█▊ | 1302/7045 [4:13:57<18:26:20, 11.56s/it] {'loss': 1.1465, 'learning_rate': 4.6925804654862e-06, 'epoch': 0.18} + 18%|█▊ | 1302/7045 [4:13:57<18:26:20, 11.56s/it] 18%|█▊ | 1303/7045 [4:14:08<18:15:12, 11.44s/it] {'loss': 1.1094, 'learning_rate': 4.692028016416222e-06, 'epoch': 0.18} + 18%|█▊ | 1303/7045 [4:14:08<18:15:12, 11.44s/it] 19%|█▊ | 1304/7045 [4:14:19<18:05:01, 11.34s/it] {'loss': 1.1484, 'learning_rate': 4.69147510398149e-06, 'epoch': 0.19} + 19%|█▊ | 1304/7045 [4:14:19<18:05:01, 11.34s/it] 19%|█▊ | 1305/7045 [4:14:30<18:00:14, 11.29s/it] {'loss': 1.1846, 'learning_rate': 4.690921728298881e-06, 'epoch': 0.19} + 19%|█▊ | 1305/7045 [4:14:30<18:00:14, 11.29s/it] 19%|█▊ | 1306/7045 [4:14:41<17:55:45, 11.25s/it] {'loss': 1.1348, 'learning_rate': 4.690367889485371e-06, 'epoch': 0.19} + 19%|█▊ | 1306/7045 [4:14:41<17:55:45, 11.25s/it] 19%|█▊ | 1307/7045 [4:14:52<17:49:22, 11.18s/it] {'loss': 1.123, 'learning_rate': 4.689813587658034e-06, 'epoch': 0.19} + 19%|█▊ | 1307/7045 [4:14:52<17:49:22, 11.18s/it] 19%|█▊ | 1308/7045 [4:15:04<17:58:13, 11.28s/it] {'loss': 1.1797, 'learning_rate': 4.689258822934042e-06, 'epoch': 0.19} + 19%|█▊ | 1308/7045 [4:15:04<17:58:13, 11.28s/it] 19%|█▊ | 1309/7045 [4:15:17<19:04:30, 11.97s/it] {'loss': 1.125, 'learning_rate': 4.6887035954306656e-06, 'epoch': 0.19} + 19%|█▊ | 1309/7045 [4:15:17<19:04:30, 11.97s/it] 19%|█▊ | 1310/7045 [4:15:31<19:44:24, 12.39s/it] {'loss': 1.144, 'learning_rate': 4.68814790526527e-06, 'epoch': 0.19} + 19%|█▊ | 1310/7045 [4:15:31<19:44:24, 12.39s/it] 19%|█▊ | 1311/7045 [4:15:43<19:45:35, 12.41s/it] {'loss': 1.1089, 'learning_rate': 4.687591752555323e-06, 'epoch': 0.19} + 19%|█▊ | 1311/7045 [4:15:43<19:45:35, 12.41s/it] 19%|█▊ | 1312/7045 [4:15:55<19:18:48, 12.13s/it] {'loss': 1.126, 'learning_rate': 4.687035137418386e-06, 'epoch': 0.19} + 19%|█▊ | 1312/7045 [4:15:55<19:18:48, 12.13s/it] 19%|█▊ | 1313/7045 [4:16:06<19:07:26, 12.01s/it] {'loss': 1.0898, 'learning_rate': 4.68647805997212e-06, 'epoch': 0.19} + 19%|█▊ | 1313/7045 [4:16:06<19:07:26, 12.01s/it] 19%|█▊ | 1314/7045 [4:16:19<19:13:35, 12.08s/it] {'loss': 1.1475, 'learning_rate': 4.685920520334284e-06, 'epoch': 0.19} + 19%|█▊ | 1314/7045 [4:16:19<19:13:35, 12.08s/it] 19%|█▊ | 1315/7045 [4:16:30<18:43:10, 11.76s/it] {'loss': 1.1611, 'learning_rate': 4.685362518622735e-06, 'epoch': 0.19} + 19%|█▊ | 1315/7045 [4:16:30<18:43:10, 11.76s/it] 19%|█▊ | 1316/7045 [4:16:42<18:53:27, 11.87s/it] {'loss': 1.1299, 'learning_rate': 4.684804054955426e-06, 'epoch': 0.19} + 19%|█▊ | 1316/7045 [4:16:42<18:53:27, 11.87s/it] 19%|█▊ | 1317/7045 [4:16:53<18:29:35, 11.62s/it] {'loss': 1.1328, 'learning_rate': 4.684245129450409e-06, 'epoch': 0.19} + 19%|█▊ | 1317/7045 [4:16:53<18:29:35, 11.62s/it] 19%|█▊ | 1318/7045 [4:17:06<19:13:25, 12.08s/it] {'loss': 1.1201, 'learning_rate': 4.683685742225832e-06, 'epoch': 0.19} + 19%|█▊ | 1318/7045 [4:17:06<19:13:25, 12.08s/it] 19%|█▊ | 1319/7045 [4:17:17<18:44:06, 11.78s/it] {'loss': 1.1309, 'learning_rate': 4.683125893399943e-06, 'epoch': 0.19} + 19%|█▊ | 1319/7045 [4:17:17<18:44:06, 11.78s/it] 19%|█▊ | 1320/7045 [4:17:28<18:31:18, 11.65s/it] {'loss': 1.124, 'learning_rate': 4.682565583091086e-06, 'epoch': 0.19} + 19%|█▊ | 1320/7045 [4:17:28<18:31:18, 11.65s/it] 19%|█▉ | 1321/7045 [4:17:41<19:01:19, 11.96s/it] {'loss': 1.1953, 'learning_rate': 4.682004811417703e-06, 'epoch': 0.19} + 19%|█▉ | 1321/7045 [4:17:41<19:01:19, 11.96s/it] 19%|█▉ | 1322/7045 [4:17:53<18:48:53, 11.84s/it] {'loss': 1.1284, 'learning_rate': 4.681443578498333e-06, 'epoch': 0.19} + 19%|█▉ | 1322/7045 [4:17:53<18:48:53, 11.84s/it] 19%|█▉ | 1323/7045 [4:18:04<18:28:11, 11.62s/it] {'loss': 1.1045, 'learning_rate': 4.6808818844516145e-06, 'epoch': 0.19} + 19%|█▉ | 1323/7045 [4:18:04<18:28:11, 11.62s/it] 19%|█▉ | 1324/7045 [4:18:15<18:17:20, 11.51s/it] {'loss': 1.2012, 'learning_rate': 4.680319729396281e-06, 'epoch': 0.19} + 19%|█▉ | 1324/7045 [4:18:15<18:17:20, 11.51s/it] 19%|█▉ | 1325/7045 [4:18:26<18:08:13, 11.42s/it] {'loss': 1.1377, 'learning_rate': 4.679757113451163e-06, 'epoch': 0.19} + 19%|█▉ | 1325/7045 [4:18:26<18:08:13, 11.42s/it] 19%|█▉ | 1326/7045 [4:18:37<17:56:39, 11.30s/it] {'loss': 1.1582, 'learning_rate': 4.679194036735193e-06, 'epoch': 0.19} + 19%|█▉ | 1326/7045 [4:18:37<17:56:39, 11.30s/it] 19%|█▉ | 1327/7045 [4:18:50<18:45:20, 11.81s/it] {'loss': 1.1543, 'learning_rate': 4.678630499367395e-06, 'epoch': 0.19} + 19%|█▉ | 1327/7045 [4:18:50<18:45:20, 11.81s/it] 19%|█▉ | 1328/7045 [4:19:01<18:29:47, 11.65s/it] {'loss': 1.1484, 'learning_rate': 4.6780665014668935e-06, 'epoch': 0.19} + 19%|█▉ | 1328/7045 [4:19:01<18:29:47, 11.65s/it] 19%|█▉ | 1329/7045 [4:19:14<18:41:19, 11.77s/it] {'loss': 1.1582, 'learning_rate': 4.67750204315291e-06, 'epoch': 0.19} + 19%|█▉ | 1329/7045 [4:19:14<18:41:19, 11.77s/it] 19%|█▉ | 1330/7045 [4:19:26<18:47:17, 11.84s/it] {'loss': 1.1211, 'learning_rate': 4.676937124544765e-06, 'epoch': 0.19} + 19%|█▉ | 1330/7045 [4:19:26<18:47:17, 11.84s/it] 19%|█▉ | 1331/7045 [4:19:37<18:30:57, 11.67s/it] {'loss': 1.1118, 'learning_rate': 4.676371745761873e-06, 'epoch': 0.19} + 19%|█▉ | 1331/7045 [4:19:37<18:30:57, 11.67s/it] 19%|█▉ | 1332/7045 [4:19:48<18:30:29, 11.66s/it] {'loss': 1.1553, 'learning_rate': 4.675805906923747e-06, 'epoch': 0.19} + 19%|█▉ | 1332/7045 [4:19:48<18:30:29, 11.66s/it] 19%|█▉ | 1333/7045 [4:20:01<19:05:10, 12.03s/it] {'loss': 1.1484, 'learning_rate': 4.675239608149999e-06, 'epoch': 0.19} + 19%|█▉ | 1333/7045 [4:20:01<19:05:10, 12.03s/it] 19%|█▉ | 1334/7045 [4:20:14<19:34:01, 12.33s/it] {'loss': 1.1201, 'learning_rate': 4.674672849560335e-06, 'epoch': 0.19} + 19%|█▉ | 1334/7045 [4:20:14<19:34:01, 12.33s/it] 19%|█▉ | 1335/7045 [4:20:26<19:20:28, 12.19s/it] {'loss': 1.1289, 'learning_rate': 4.674105631274561e-06, 'epoch': 0.19} + 19%|█▉ | 1335/7045 [4:20:26<19:20:28, 12.19s/it] 19%|█▉ | 1336/7045 [4:20:37<18:51:22, 11.89s/it] {'loss': 1.1797, 'learning_rate': 4.67353795341258e-06, 'epoch': 0.19} + 19%|█▉ | 1336/7045 [4:20:37<18:51:22, 11.89s/it] 19%|█▉ | 1337/7045 [4:20:48<18:25:10, 11.62s/it] {'loss': 1.1201, 'learning_rate': 4.6729698160943906e-06, 'epoch': 0.19} + 19%|█▉ | 1337/7045 [4:20:48<18:25:10, 11.62s/it] 19%|█▉ | 1338/7045 [4:21:01<18:41:06, 11.79s/it] {'loss': 1.1631, 'learning_rate': 4.6724012194400885e-06, 'epoch': 0.19} + 19%|█▉ | 1338/7045 [4:21:01<18:41:06, 11.79s/it] 19%|█▉ | 1339/7045 [4:21:12<18:24:03, 11.61s/it] {'loss': 1.1299, 'learning_rate': 4.671832163569868e-06, 'epoch': 0.19} + 19%|█▉ | 1339/7045 [4:21:12<18:24:03, 11.61s/it] 19%|█▉ | 1340/7045 [4:21:24<18:35:11, 11.73s/it] {'loss': 1.1675, 'learning_rate': 4.6712626486040206e-06, 'epoch': 0.19} + 19%|█▉ | 1340/7045 [4:21:24<18:35:11, 11.73s/it] 19%|█▉ | 1341/7045 [4:21:35<18:20:34, 11.58s/it] {'loss': 1.1328, 'learning_rate': 4.6706926746629315e-06, 'epoch': 0.19} + 19%|█▉ | 1341/7045 [4:21:35<18:20:34, 11.58s/it] 19%|█▉ | 1342/7045 [4:21:47<18:22:00, 11.59s/it] {'loss': 1.1426, 'learning_rate': 4.670122241867088e-06, 'epoch': 0.19} + 19%|█▉ | 1342/7045 [4:21:47<18:22:00, 11.59s/it] 19%|█▉ | 1343/7045 [4:21:58<18:09:12, 11.46s/it] {'loss': 1.1748, 'learning_rate': 4.669551350337071e-06, 'epoch': 0.19} + 19%|█▉ | 1343/7045 [4:21:58<18:09:12, 11.46s/it] 19%|█▉ | 1344/7045 [4:22:09<18:00:15, 11.37s/it] {'loss': 1.1572, 'learning_rate': 4.668980000193558e-06, 'epoch': 0.19} + 19%|█▉ | 1344/7045 [4:22:09<18:00:15, 11.37s/it] 19%|█▉ | 1345/7045 [4:22:22<18:36:37, 11.75s/it] {'loss': 1.1152, 'learning_rate': 4.668408191557326e-06, 'epoch': 0.19} + 19%|█▉ | 1345/7045 [4:22:22<18:36:37, 11.75s/it] 19%|█▉ | 1346/7045 [4:22:33<18:17:45, 11.56s/it] {'loss': 1.1562, 'learning_rate': 4.667835924549247e-06, 'epoch': 0.19} + 19%|█▉ | 1346/7045 [4:22:33<18:17:45, 11.56s/it] 19%|█▉ | 1347/7045 [4:22:45<18:32:22, 11.71s/it] {'loss': 1.1699, 'learning_rate': 4.667263199290292e-06, 'epoch': 0.19} + 19%|█▉ | 1347/7045 [4:22:45<18:32:22, 11.71s/it] 19%|█▉ | 1348/7045 [4:22:56<18:27:54, 11.67s/it] {'loss': 1.1768, 'learning_rate': 4.666690015901524e-06, 'epoch': 0.19} + 19%|█▉ | 1348/7045 [4:22:56<18:27:54, 11.67s/it] 19%|█▉ | 1349/7045 [4:23:10<19:18:07, 12.20s/it] {'loss': 1.125, 'learning_rate': 4.666116374504108e-06, 'epoch': 0.19} + 19%|█▉ | 1349/7045 [4:23:10<19:18:07, 12.20s/it] 19%|█▉ | 1350/7045 [4:23:21<18:49:07, 11.90s/it] {'loss': 1.1172, 'learning_rate': 4.665542275219305e-06, 'epoch': 0.19} + 19%|█▉ | 1350/7045 [4:23:21<18:49:07, 11.90s/it] 19%|█▉ | 1351/7045 [4:23:34<19:16:55, 12.19s/it] {'loss': 1.1553, 'learning_rate': 4.66496771816847e-06, 'epoch': 0.19} + 19%|█▉ | 1351/7045 [4:23:34<19:16:55, 12.19s/it] 19%|█▉ | 1352/7045 [4:23:47<19:32:30, 12.36s/it] {'loss': 1.1348, 'learning_rate': 4.664392703473057e-06, 'epoch': 0.19} + 19%|█▉ | 1352/7045 [4:23:47<19:32:30, 12.36s/it] 19%|█▉ | 1353/7045 [4:23:59<19:23:11, 12.26s/it] {'loss': 1.1221, 'learning_rate': 4.663817231254616e-06, 'epoch': 0.19} + 19%|█▉ | 1353/7045 [4:23:59<19:23:11, 12.26s/it] 19%|█▉ | 1354/7045 [4:24:10<18:50:16, 11.92s/it] {'loss': 1.168, 'learning_rate': 4.663241301634795e-06, 'epoch': 0.19} + 19%|█▉ | 1354/7045 [4:24:10<18:50:16, 11.92s/it] 19%|█▉ | 1355/7045 [4:24:21<18:28:51, 11.69s/it] {'loss': 1.1426, 'learning_rate': 4.662664914735336e-06, 'epoch': 0.19} + 19%|█▉ | 1355/7045 [4:24:21<18:28:51, 11.69s/it] 19%|█▉ | 1356/7045 [4:24:32<18:20:49, 11.61s/it] {'loss': 1.1182, 'learning_rate': 4.662088070678081e-06, 'epoch': 0.19} + 19%|█▉ | 1356/7045 [4:24:32<18:20:49, 11.61s/it] 19%|█▉ | 1357/7045 [4:24:45<18:56:59, 11.99s/it] {'loss': 1.123, 'learning_rate': 4.6615107695849675e-06, 'epoch': 0.19} + 19%|█▉ | 1357/7045 [4:24:45<18:56:59, 11.99s/it] 19%|█▉ | 1358/7045 [4:24:56<18:31:34, 11.73s/it] {'loss': 1.1328, 'learning_rate': 4.6609330115780264e-06, 'epoch': 0.19} + 19%|█▉ | 1358/7045 [4:24:56<18:31:34, 11.73s/it] 19%|█▉ | 1359/7045 [4:25:08<18:18:05, 11.59s/it] {'loss': 1.1426, 'learning_rate': 4.660354796779388e-06, 'epoch': 0.19} + 19%|█▉ | 1359/7045 [4:25:08<18:18:05, 11.59s/it] 19%|█▉ | 1360/7045 [4:25:20<18:36:40, 11.79s/it] {'loss': 1.124, 'learning_rate': 4.659776125311282e-06, 'epoch': 0.19} + 19%|█▉ | 1360/7045 [4:25:20<18:36:40, 11.79s/it] 19%|█▉ | 1361/7045 [4:25:33<19:23:58, 12.29s/it] {'loss': 1.1753, 'learning_rate': 4.65919699729603e-06, 'epoch': 0.19} + 19%|█▉ | 1361/7045 [4:25:33<19:23:58, 12.29s/it] 19%|█▉ | 1362/7045 [4:25:45<18:55:13, 11.99s/it] {'loss': 1.1582, 'learning_rate': 4.65861741285605e-06, 'epoch': 0.19} + 19%|█▉ | 1362/7045 [4:25:45<18:55:13, 11.99s/it] 19%|█▉ | 1363/7045 [4:25:56<18:34:44, 11.77s/it] {'loss': 1.1255, 'learning_rate': 4.658037372113861e-06, 'epoch': 0.19} + 19%|█▉ | 1363/7045 [4:25:56<18:34:44, 11.77s/it] 19%|█▉ | 1364/7045 [4:26:07<18:15:37, 11.57s/it] {'loss': 1.127, 'learning_rate': 4.657456875192074e-06, 'epoch': 0.19} + 19%|█▉ | 1364/7045 [4:26:07<18:15:37, 11.57s/it] 19%|█▉ | 1365/7045 [4:26:20<18:56:51, 12.01s/it] {'loss': 1.1147, 'learning_rate': 4.6568759222134e-06, 'epoch': 0.19} + 19%|█▉ | 1365/7045 [4:26:20<18:56:51, 12.01s/it] 19%|█▉ | 1366/7045 [4:26:31<18:28:22, 11.71s/it] {'loss': 1.1602, 'learning_rate': 4.656294513300642e-06, 'epoch': 0.19} + 19%|█▉ | 1366/7045 [4:26:31<18:28:22, 11.71s/it] 19%|█▉ | 1367/7045 [4:26:43<18:49:54, 11.94s/it] {'loss': 1.1377, 'learning_rate': 4.655712648576703e-06, 'epoch': 0.19} + 19%|█▉ | 1367/7045 [4:26:43<18:49:54, 11.94s/it] 19%|█▉ | 1368/7045 [4:26:55<18:38:32, 11.82s/it] {'loss': 1.1436, 'learning_rate': 4.655130328164581e-06, 'epoch': 0.19} + 19%|█▉ | 1368/7045 [4:26:55<18:38:32, 11.82s/it] 19%|█▉ | 1369/7045 [4:27:06<18:20:23, 11.63s/it] {'loss': 1.1299, 'learning_rate': 4.654547552187372e-06, 'epoch': 0.19} + 19%|█▉ | 1369/7045 [4:27:06<18:20:23, 11.63s/it] 19%|█▉ | 1370/7045 [4:27:17<18:03:43, 11.46s/it] {'loss': 1.1787, 'learning_rate': 4.653964320768265e-06, 'epoch': 0.19} + 19%|█▉ | 1370/7045 [4:27:17<18:03:43, 11.46s/it] 19%|█▉ | 1371/7045 [4:27:30<18:25:43, 11.69s/it] {'loss': 1.168, 'learning_rate': 4.6533806340305474e-06, 'epoch': 0.19} + 19%|█▉ | 1371/7045 [4:27:30<18:25:43, 11.69s/it] 19%|█▉ | 1372/7045 [4:27:42<19:01:02, 12.07s/it] {'loss': 1.1377, 'learning_rate': 4.652796492097605e-06, 'epoch': 0.19} + 19%|█▉ | 1372/7045 [4:27:42<19:01:02, 12.07s/it] 19%|█▉ | 1373/7045 [4:27:55<19:23:06, 12.30s/it] {'loss': 1.1133, 'learning_rate': 4.652211895092913e-06, 'epoch': 0.19} + 19%|█▉ | 1373/7045 [4:27:55<19:23:06, 12.30s/it] 20%|█▉ | 1374/7045 [4:28:07<18:52:36, 11.98s/it] {'loss': 1.1284, 'learning_rate': 4.6516268431400515e-06, 'epoch': 0.2} + 20%|█▉ | 1374/7045 [4:28:07<18:52:36, 11.98s/it] 20%|█▉ | 1375/7045 [4:28:18<18:36:37, 11.82s/it] {'loss': 1.1582, 'learning_rate': 4.651041336362689e-06, 'epoch': 0.2} + 20%|█▉ | 1375/7045 [4:28:18<18:36:37, 11.82s/it] 20%|█▉ | 1376/7045 [4:28:31<18:57:22, 12.04s/it] {'loss': 1.1709, 'learning_rate': 4.650455374884597e-06, 'epoch': 0.2} + 20%|█▉ | 1376/7045 [4:28:31<18:57:22, 12.04s/it] 20%|█▉ | 1377/7045 [4:28:42<18:33:55, 11.79s/it] {'loss': 1.1211, 'learning_rate': 4.649868958829638e-06, 'epoch': 0.2} + 20%|█▉ | 1377/7045 [4:28:42<18:33:55, 11.79s/it] 20%|█▉ | 1378/7045 [4:28:53<18:08:32, 11.53s/it] {'loss': 1.1406, 'learning_rate': 4.649282088321772e-06, 'epoch': 0.2} + 20%|█▉ | 1378/7045 [4:28:53<18:08:32, 11.53s/it] 20%|█▉ | 1379/7045 [4:29:06<18:46:36, 11.93s/it] {'loss': 1.1548, 'learning_rate': 4.648694763485056e-06, 'epoch': 0.2} + 20%|█▉ | 1379/7045 [4:29:06<18:46:36, 11.93s/it] 20%|█▉ | 1380/7045 [4:29:17<18:44:02, 11.91s/it] {'loss': 1.1055, 'learning_rate': 4.648106984443641e-06, 'epoch': 0.2} + 20%|█▉ | 1380/7045 [4:29:17<18:44:02, 11.91s/it] 20%|█▉ | 1381/7045 [4:29:29<18:29:52, 11.76s/it] {'loss': 1.1533, 'learning_rate': 4.6475187513217776e-06, 'epoch': 0.2} + 20%|█▉ | 1381/7045 [4:29:29<18:29:52, 11.76s/it] 20%|█▉ | 1382/7045 [4:29:40<18:19:12, 11.65s/it] {'loss': 1.1416, 'learning_rate': 4.64693006424381e-06, 'epoch': 0.2} + 20%|█▉ | 1382/7045 [4:29:40<18:19:12, 11.65s/it] 20%|█▉ | 1383/7045 [4:29:52<18:19:19, 11.65s/it] {'loss': 1.209, 'learning_rate': 4.6463409233341776e-06, 'epoch': 0.2} + 20%|█▉ | 1383/7045 [4:29:52<18:19:19, 11.65s/it] 20%|█▉ | 1384/7045 [4:30:05<18:48:42, 11.96s/it] {'loss': 1.1416, 'learning_rate': 4.645751328717417e-06, 'epoch': 0.2} + 20%|█▉ | 1384/7045 [4:30:05<18:48:42, 11.96s/it] 20%|█▉ | 1385/7045 [4:30:16<18:41:50, 11.89s/it] {'loss': 1.1592, 'learning_rate': 4.64516128051816e-06, 'epoch': 0.2} + 20%|█▉ | 1385/7045 [4:30:16<18:41:50, 11.89s/it] 20%|█▉ | 1386/7045 [4:30:28<18:47:28, 11.95s/it] {'loss': 1.1699, 'learning_rate': 4.644570778861135e-06, 'epoch': 0.2} + 20%|█▉ | 1386/7045 [4:30:28<18:47:28, 11.95s/it] 20%|█▉ | 1387/7045 [4:30:40<18:30:59, 11.78s/it] {'loss': 1.084, 'learning_rate': 4.6439798238711676e-06, 'epoch': 0.2} + 20%|█▉ | 1387/7045 [4:30:40<18:30:59, 11.78s/it] 20%|█▉ | 1388/7045 [4:30:51<18:09:05, 11.55s/it] {'loss': 1.1357, 'learning_rate': 4.6433884156731755e-06, 'epoch': 0.2} + 20%|█▉ | 1388/7045 [4:30:51<18:09:05, 11.55s/it] 20%|█▉ | 1389/7045 [4:31:03<18:21:05, 11.68s/it] {'loss': 1.083, 'learning_rate': 4.642796554392175e-06, 'epoch': 0.2} + 20%|█▉ | 1389/7045 [4:31:03<18:21:05, 11.68s/it] 20%|█▉ | 1390/7045 [4:31:15<18:36:59, 11.85s/it] {'loss': 1.1221, 'learning_rate': 4.642204240153278e-06, 'epoch': 0.2} + 20%|█▉ | 1390/7045 [4:31:15<18:36:59, 11.85s/it] 20%|█▉ | 1391/7045 [4:31:26<18:24:09, 11.72s/it] {'loss': 1.1826, 'learning_rate': 4.6416114730816906e-06, 'epoch': 0.2} + 20%|█▉ | 1391/7045 [4:31:26<18:24:09, 11.72s/it] 20%|█▉ | 1392/7045 [4:31:37<17:59:44, 11.46s/it] {'loss': 1.1201, 'learning_rate': 4.641018253302716e-06, 'epoch': 0.2} + 20%|█▉ | 1392/7045 [4:31:37<17:59:44, 11.46s/it] 20%|█▉ | 1393/7045 [4:31:51<19:13:32, 12.25s/it] {'loss': 1.1348, 'learning_rate': 4.6404245809417536e-06, 'epoch': 0.2} + 20%|█▉ | 1393/7045 [4:31:51<19:13:32, 12.25s/it] 20%|█▉ | 1394/7045 [4:32:02<18:39:07, 11.88s/it] {'loss': 1.0996, 'learning_rate': 4.6398304561242965e-06, 'epoch': 0.2} + 20%|█▉ | 1394/7045 [4:32:02<18:39:07, 11.88s/it] 20%|█▉ | 1395/7045 [4:32:13<18:16:55, 11.65s/it] {'loss': 1.1562, 'learning_rate': 4.6392358789759364e-06, 'epoch': 0.2} + 20%|█▉ | 1395/7045 [4:32:13<18:16:55, 11.65s/it] 20%|█▉ | 1396/7045 [4:32:25<18:15:03, 11.63s/it] {'loss': 1.1709, 'learning_rate': 4.638640849622356e-06, 'epoch': 0.2} + 20%|█▉ | 1396/7045 [4:32:25<18:15:03, 11.63s/it] 20%|█▉ | 1397/7045 [4:32:36<18:04:38, 11.52s/it] {'loss': 1.1201, 'learning_rate': 4.638045368189338e-06, 'epoch': 0.2} + 20%|█▉ | 1397/7045 [4:32:36<18:04:38, 11.52s/it] 20%|█▉ | 1398/7045 [4:32:47<17:50:55, 11.38s/it] {'loss': 1.1709, 'learning_rate': 4.637449434802759e-06, 'epoch': 0.2} + 20%|█▉ | 1398/7045 [4:32:47<17:50:55, 11.38s/it] 20%|█▉ | 1399/7045 [4:32:59<17:51:15, 11.38s/it] {'loss': 1.0859, 'learning_rate': 4.636853049588591e-06, 'epoch': 0.2} + 20%|█▉ | 1399/7045 [4:32:59<17:51:15, 11.38s/it] 20%|█▉ | 1400/7045 [4:33:11<18:10:15, 11.59s/it] {'loss': 1.1055, 'learning_rate': 4.6362562126729016e-06, 'epoch': 0.2} + 20%|█▉ | 1400/7045 [4:33:11<18:10:15, 11.59s/it] 20%|█▉ | 1401/7045 [4:33:22<17:56:32, 11.44s/it] {'loss': 1.0571, 'learning_rate': 4.635658924181854e-06, 'epoch': 0.2} + 20%|█▉ | 1401/7045 [4:33:22<17:56:32, 11.44s/it] 20%|█▉ | 1402/7045 [4:33:35<18:52:37, 12.04s/it] {'loss': 1.1094, 'learning_rate': 4.635061184241707e-06, 'epoch': 0.2} + 20%|█▉ | 1402/7045 [4:33:35<18:52:37, 12.04s/it] 20%|█▉ | 1403/7045 [4:33:46<18:23:08, 11.73s/it] {'loss': 1.166, 'learning_rate': 4.634462992978815e-06, 'epoch': 0.2} + 20%|█▉ | 1403/7045 [4:33:46<18:23:08, 11.73s/it] 20%|█▉ | 1404/7045 [4:33:58<18:10:25, 11.60s/it] {'loss': 1.1562, 'learning_rate': 4.633864350519627e-06, 'epoch': 0.2} + 20%|█▉ | 1404/7045 [4:33:58<18:10:25, 11.60s/it] 20%|█▉ | 1405/7045 [4:34:10<18:29:48, 11.81s/it] {'loss': 1.0938, 'learning_rate': 4.633265256990687e-06, 'epoch': 0.2} + 20%|█▉ | 1405/7045 [4:34:10<18:29:48, 11.81s/it] 20%|█▉ | 1406/7045 [4:34:21<18:11:57, 11.62s/it] {'loss': 1.1055, 'learning_rate': 4.6326657125186356e-06, 'epoch': 0.2} + 20%|█▉ | 1406/7045 [4:34:21<18:11:57, 11.62s/it] 20%|█▉ | 1407/7045 [4:34:33<18:05:56, 11.56s/it] {'loss': 1.1113, 'learning_rate': 4.632065717230209e-06, 'epoch': 0.2} + 20%|█▉ | 1407/7045 [4:34:33<18:05:56, 11.56s/it] 20%|█▉ | 1408/7045 [4:34:43<17:48:43, 11.38s/it] {'loss': 1.0879, 'learning_rate': 4.631465271252239e-06, 'epoch': 0.2} + 20%|█▉ | 1408/7045 [4:34:43<17:48:43, 11.38s/it] 20%|██ | 1409/7045 [4:34:57<18:39:46, 11.92s/it] {'loss': 1.0918, 'learning_rate': 4.63086437471165e-06, 'epoch': 0.2} + 20%|██ | 1409/7045 [4:34:57<18:39:46, 11.92s/it] 20%|██ | 1410/7045 [4:35:09<18:43:08, 11.96s/it] {'loss': 1.1069, 'learning_rate': 4.630263027735463e-06, 'epoch': 0.2} + 20%|██ | 1410/7045 [4:35:09<18:43:08, 11.96s/it] 20%|██ | 1411/7045 [4:35:20<18:27:13, 11.79s/it] {'loss': 1.125, 'learning_rate': 4.629661230450795e-06, 'epoch': 0.2} + 20%|██ | 1411/7045 [4:35:20<18:27:13, 11.79s/it] 20%|██ | 1412/7045 [4:35:32<18:19:50, 11.71s/it] {'loss': 1.1494, 'learning_rate': 4.629058982984859e-06, 'epoch': 0.2} + 20%|██ | 1412/7045 [4:35:32<18:19:50, 11.71s/it] 20%|██ | 1413/7045 [4:35:44<18:28:34, 11.81s/it] {'loss': 1.1504, 'learning_rate': 4.62845628546496e-06, 'epoch': 0.2} + 20%|██ | 1413/7045 [4:35:44<18:28:34, 11.81s/it] 20%|██ | 1414/7045 [4:35:56<18:28:42, 11.81s/it] {'loss': 1.0967, 'learning_rate': 4.6278531380185e-06, 'epoch': 0.2} + 20%|██ | 1414/7045 [4:35:56<18:28:42, 11.81s/it] 20%|██ | 1415/7045 [4:36:07<18:11:16, 11.63s/it] {'loss': 1.1348, 'learning_rate': 4.627249540772979e-06, 'epoch': 0.2} + 20%|██ | 1415/7045 [4:36:07<18:11:16, 11.63s/it] 20%|██ | 1416/7045 [4:36:18<18:03:55, 11.55s/it] {'loss': 1.1475, 'learning_rate': 4.626645493855986e-06, 'epoch': 0.2} + 20%|██ | 1416/7045 [4:36:18<18:03:55, 11.55s/it] 20%|██ | 1417/7045 [4:36:31<18:27:47, 11.81s/it] {'loss': 1.1289, 'learning_rate': 4.626040997395209e-06, 'epoch': 0.2} + 20%|██ | 1417/7045 [4:36:31<18:27:47, 11.81s/it] 20%|██ | 1418/7045 [4:36:42<18:09:14, 11.61s/it] {'loss': 1.1484, 'learning_rate': 4.625436051518432e-06, 'epoch': 0.2} + 20%|██ | 1418/7045 [4:36:42<18:09:14, 11.61s/it] 20%|██ | 1419/7045 [4:36:53<18:00:01, 11.52s/it] {'loss': 1.1582, 'learning_rate': 4.624830656353529e-06, 'epoch': 0.2} + 20%|██ | 1419/7045 [4:36:53<18:00:01, 11.52s/it] 20%|██ | 1420/7045 [4:37:04<17:51:04, 11.42s/it] {'loss': 1.1504, 'learning_rate': 4.624224812028476e-06, 'epoch': 0.2} + 20%|██ | 1420/7045 [4:37:04<17:51:04, 11.42s/it] 20%|██ | 1421/7045 [4:37:18<18:46:59, 12.02s/it] {'loss': 1.1406, 'learning_rate': 4.623618518671338e-06, 'epoch': 0.2} + 20%|██ | 1421/7045 [4:37:18<18:46:59, 12.02s/it] 20%|██ | 1422/7045 [4:37:29<18:21:51, 11.76s/it] {'loss': 1.1543, 'learning_rate': 4.623011776410278e-06, 'epoch': 0.2} + 20%|██ | 1422/7045 [4:37:29<18:21:51, 11.76s/it] 20%|██ | 1423/7045 [4:37:40<18:05:39, 11.59s/it] {'loss': 1.1533, 'learning_rate': 4.622404585373551e-06, 'epoch': 0.2} + 20%|██ | 1423/7045 [4:37:40<18:05:39, 11.59s/it] 20%|██ | 1424/7045 [4:37:51<17:51:26, 11.44s/it] {'loss': 1.1406, 'learning_rate': 4.621796945689512e-06, 'epoch': 0.2} + 20%|██ | 1424/7045 [4:37:51<17:51:26, 11.44s/it] 20%|██ | 1425/7045 [4:38:02<17:39:41, 11.31s/it] {'loss': 1.147, 'learning_rate': 4.621188857486606e-06, 'epoch': 0.2} + 20%|██ | 1425/7045 [4:38:02<17:39:41, 11.31s/it] 20%|██ | 1426/7045 [4:38:13<17:31:26, 11.23s/it] {'loss': 1.167, 'learning_rate': 4.620580320893374e-06, 'epoch': 0.2} + 20%|██ | 1426/7045 [4:38:13<17:31:26, 11.23s/it] 20%|██ | 1427/7045 [4:38:25<18:01:58, 11.56s/it] {'loss': 1.1523, 'learning_rate': 4.6199713360384545e-06, 'epoch': 0.2} + 20%|██ | 1427/7045 [4:38:25<18:01:58, 11.56s/it] 20%|██ | 1428/7045 [4:38:37<18:01:50, 11.56s/it] {'loss': 1.1406, 'learning_rate': 4.619361903050576e-06, 'epoch': 0.2} + 20%|██ | 1428/7045 [4:38:37<18:01:50, 11.56s/it] 20%|██ | 1429/7045 [4:38:50<18:31:28, 11.87s/it] {'loss': 1.2109, 'learning_rate': 4.618752022058566e-06, 'epoch': 0.2} + 20%|██ | 1429/7045 [4:38:50<18:31:28, 11.87s/it] 20%|██ | 1430/7045 [4:39:02<18:49:32, 12.07s/it] {'loss': 1.1436, 'learning_rate': 4.618141693191343e-06, 'epoch': 0.2} + 20%|██ | 1430/7045 [4:39:02<18:49:32, 12.07s/it] 20%|██ | 1431/7045 [4:39:15<19:11:19, 12.30s/it] {'loss': 1.1143, 'learning_rate': 4.617530916577926e-06, 'epoch': 0.2} + 20%|██ | 1431/7045 [4:39:15<19:11:19, 12.30s/it] 20%|██ | 1432/7045 [4:39:27<19:11:05, 12.30s/it] {'loss': 1.1436, 'learning_rate': 4.61691969234742e-06, 'epoch': 0.2} + 20%|██ | 1432/7045 [4:39:27<19:11:05, 12.30s/it] 20%|██ | 1433/7045 [4:39:39<18:42:09, 12.00s/it] {'loss': 1.1504, 'learning_rate': 4.616308020629035e-06, 'epoch': 0.2} + 20%|██ | 1433/7045 [4:39:39<18:42:09, 12.00s/it] 20%|██ | 1434/7045 [4:39:50<18:19:45, 11.76s/it] {'loss': 1.1514, 'learning_rate': 4.615695901552064e-06, 'epoch': 0.2} + 20%|██ | 1434/7045 [4:39:50<18:19:45, 11.76s/it] 20%|██ | 1435/7045 [4:40:01<18:07:12, 11.63s/it] {'loss': 1.1211, 'learning_rate': 4.615083335245905e-06, 'epoch': 0.2} + 20%|██ | 1435/7045 [4:40:01<18:07:12, 11.63s/it] 20%|██ | 1436/7045 [4:40:12<18:01:09, 11.57s/it] {'loss': 1.1318, 'learning_rate': 4.614470321840045e-06, 'epoch': 0.2} + 20%|██ | 1436/7045 [4:40:12<18:01:09, 11.57s/it] 20%|██ | 1437/7045 [4:40:25<18:14:27, 11.71s/it] {'loss': 1.1494, 'learning_rate': 4.613856861464065e-06, 'epoch': 0.2} + 20%|██ | 1437/7045 [4:40:25<18:14:27, 11.71s/it] 20%|██ | 1438/7045 [4:40:35<17:51:37, 11.47s/it] {'loss': 1.165, 'learning_rate': 4.6132429542476435e-06, 'epoch': 0.2} + 20%|██ | 1438/7045 [4:40:35<17:51:37, 11.47s/it] 20%|██ | 1439/7045 [4:40:47<17:58:10, 11.54s/it] {'loss': 1.0762, 'learning_rate': 4.612628600320554e-06, 'epoch': 0.2} + 20%|██ | 1439/7045 [4:40:47<17:58:10, 11.54s/it] 20%|██ | 1440/7045 [4:40:58<17:42:28, 11.37s/it] {'loss': 1.1064, 'learning_rate': 4.612013799812658e-06, 'epoch': 0.2} + 20%|██ | 1440/7045 [4:40:58<17:42:28, 11.37s/it] 20%|██ | 1441/7045 [4:41:10<18:09:06, 11.66s/it] {'loss': 1.1279, 'learning_rate': 4.6113985528539204e-06, 'epoch': 0.2} + 20%|██ | 1441/7045 [4:41:10<18:09:06, 11.66s/it] 20%|██ | 1442/7045 [4:41:22<17:58:11, 11.55s/it] {'loss': 1.1367, 'learning_rate': 4.610782859574393e-06, 'epoch': 0.2} + 20%|██ | 1442/7045 [4:41:22<17:58:11, 11.55s/it] 20%|██ | 1443/7045 [4:41:34<18:18:12, 11.76s/it] {'loss': 1.1162, 'learning_rate': 4.6101667201042265e-06, 'epoch': 0.2} + 20%|██ | 1443/7045 [4:41:34<18:18:12, 11.76s/it] 20%|██ | 1444/7045 [4:41:45<17:52:11, 11.49s/it] {'loss': 1.165, 'learning_rate': 4.609550134573664e-06, 'epoch': 0.2} + 20%|██ | 1444/7045 [4:41:45<17:52:11, 11.49s/it] 21%|██ | 1445/7045 [4:41:56<17:53:39, 11.50s/it] {'loss': 1.168, 'learning_rate': 4.608933103113044e-06, 'epoch': 0.21} + 21%|██ | 1445/7045 [4:41:56<17:53:39, 11.50s/it] 21%|██ | 1446/7045 [4:42:08<18:10:53, 11.69s/it] {'loss': 1.1045, 'learning_rate': 4.608315625852796e-06, 'epoch': 0.21} + 21%|██ | 1446/7045 [4:42:08<18:10:53, 11.69s/it] 21%|██ | 1447/7045 [4:42:20<18:16:04, 11.75s/it] {'loss': 1.1187, 'learning_rate': 4.607697702923448e-06, 'epoch': 0.21} + 21%|██ | 1447/7045 [4:42:20<18:16:04, 11.75s/it] 21%|██ | 1448/7045 [4:42:31<17:51:50, 11.49s/it] {'loss': 1.1387, 'learning_rate': 4.607079334455622e-06, 'epoch': 0.21} + 21%|██ | 1448/7045 [4:42:31<17:51:50, 11.49s/it] 21%|██ | 1449/7045 [4:42:43<17:47:03, 11.44s/it] {'loss': 1.168, 'learning_rate': 4.606460520580029e-06, 'epoch': 0.21} + 21%|██ | 1449/7045 [4:42:43<17:47:03, 11.44s/it] 21%|██ | 1450/7045 [4:42:54<17:34:41, 11.31s/it] {'loss': 1.1357, 'learning_rate': 4.605841261427482e-06, 'epoch': 0.21} + 21%|██ | 1450/7045 [4:42:54<17:34:41, 11.31s/it] 21%|██ | 1451/7045 [4:43:06<17:53:38, 11.52s/it] {'loss': 1.1367, 'learning_rate': 4.605221557128881e-06, 'epoch': 0.21} + 21%|██ | 1451/7045 [4:43:06<17:53:38, 11.52s/it] 21%|██ | 1452/7045 [4:43:18<18:29:37, 11.90s/it] {'loss': 1.1523, 'learning_rate': 4.604601407815223e-06, 'epoch': 0.21} + 21%|██ | 1452/7045 [4:43:18<18:29:37, 11.90s/it] 21%|██ | 1453/7045 [4:43:31<18:37:52, 11.99s/it] {'loss': 1.1318, 'learning_rate': 4.6039808136176e-06, 'epoch': 0.21} + 21%|██ | 1453/7045 [4:43:31<18:37:52, 11.99s/it] 21%|██ | 1454/7045 [4:43:42<18:12:19, 11.72s/it] {'loss': 1.1621, 'learning_rate': 4.6033597746671975e-06, 'epoch': 0.21} + 21%|██ | 1454/7045 [4:43:42<18:12:19, 11.72s/it] 21%|██ | 1455/7045 [4:43:54<18:21:56, 11.83s/it] {'loss': 1.1045, 'learning_rate': 4.602738291095293e-06, 'epoch': 0.21} + 21%|██ | 1455/7045 [4:43:54<18:21:56, 11.83s/it] 21%|██ | 1456/7045 [4:44:05<18:04:08, 11.64s/it] {'loss': 1.1143, 'learning_rate': 4.602116363033262e-06, 'epoch': 0.21} + 21%|██ | 1456/7045 [4:44:05<18:04:08, 11.64s/it] 21%|██ | 1457/7045 [4:44:19<19:04:39, 12.29s/it] {'loss': 1.0913, 'learning_rate': 4.60149399061257e-06, 'epoch': 0.21} + 21%|██ | 1457/7045 [4:44:19<19:04:39, 12.29s/it] 21%|██ | 1458/7045 [4:44:30<18:31:42, 11.94s/it] {'loss': 1.1592, 'learning_rate': 4.600871173964778e-06, 'epoch': 0.21} + 21%|██ | 1458/7045 [4:44:30<18:31:42, 11.94s/it] 21%|██ | 1459/7045 [4:44:42<18:33:58, 11.97s/it] {'loss': 1.1211, 'learning_rate': 4.6002479132215415e-06, 'epoch': 0.21} + 21%|██ | 1459/7045 [4:44:42<18:33:58, 11.97s/it] 21%|██ | 1460/7045 [4:44:53<18:22:40, 11.85s/it] {'loss': 1.1216, 'learning_rate': 4.5996242085146095e-06, 'epoch': 0.21} + 21%|██ | 1460/7045 [4:44:53<18:22:40, 11.85s/it] 21%|██ | 1461/7045 [4:45:05<18:14:59, 11.77s/it] {'loss': 1.1367, 'learning_rate': 4.599000059975824e-06, 'epoch': 0.21} + 21%|██ | 1461/7045 [4:45:05<18:14:59, 11.77s/it] 21%|██ | 1462/7045 [4:45:17<18:11:55, 11.73s/it] {'loss': 1.1416, 'learning_rate': 4.598375467737121e-06, 'epoch': 0.21} + 21%|██ | 1462/7045 [4:45:17<18:11:55, 11.73s/it] 21%|██ | 1463/7045 [4:45:28<18:09:11, 11.71s/it] {'loss': 1.1953, 'learning_rate': 4.597750431930532e-06, 'epoch': 0.21} + 21%|██ | 1463/7045 [4:45:28<18:09:11, 11.71s/it] 21%|██ | 1464/7045 [4:45:39<17:48:33, 11.49s/it] {'loss': 1.1436, 'learning_rate': 4.5971249526881815e-06, 'epoch': 0.21} + 21%|██ | 1464/7045 [4:45:39<17:48:33, 11.49s/it] 21%|██ | 1465/7045 [4:45:50<17:36:15, 11.36s/it] {'loss': 1.1377, 'learning_rate': 4.596499030142285e-06, 'epoch': 0.21} + 21%|██ | 1465/7045 [4:45:50<17:36:15, 11.36s/it] 21%|██ | 1466/7045 [4:46:01<17:23:57, 11.23s/it] {'loss': 1.1089, 'learning_rate': 4.595872664425155e-06, 'epoch': 0.21} + 21%|██ | 1466/7045 [4:46:01<17:23:57, 11.23s/it] 21%|██ | 1467/7045 [4:46:14<17:56:38, 11.58s/it] {'loss': 1.127, 'learning_rate': 4.595245855669197e-06, 'epoch': 0.21} + 21%|██ | 1467/7045 [4:46:14<17:56:38, 11.58s/it] 21%|██ | 1468/7045 [4:46:25<17:53:56, 11.55s/it] {'loss': 1.1572, 'learning_rate': 4.59461860400691e-06, 'epoch': 0.21} + 21%|██ | 1468/7045 [4:46:25<17:53:56, 11.55s/it] 21%|██ | 1469/7045 [4:46:36<17:45:19, 11.46s/it] {'loss': 1.1143, 'learning_rate': 4.593990909570886e-06, 'epoch': 0.21} + 21%|██ | 1469/7045 [4:46:36<17:45:19, 11.46s/it] 21%|██ | 1470/7045 [4:46:48<17:40:09, 11.41s/it] {'loss': 1.1758, 'learning_rate': 4.593362772493811e-06, 'epoch': 0.21} + 21%|██ | 1470/7045 [4:46:48<17:40:09, 11.41s/it] 21%|██ | 1471/7045 [4:47:01<18:26:07, 11.91s/it] {'loss': 1.1294, 'learning_rate': 4.592734192908465e-06, 'epoch': 0.21} + 21%|██ | 1471/7045 [4:47:01<18:26:07, 11.91s/it] 21%|██ | 1472/7045 [4:47:12<18:13:29, 11.77s/it] {'loss': 1.1182, 'learning_rate': 4.59210517094772e-06, 'epoch': 0.21} + 21%|██ | 1472/7045 [4:47:12<18:13:29, 11.77s/it] 21%|██ | 1473/7045 [4:47:24<18:07:53, 11.71s/it] {'loss': 1.1631, 'learning_rate': 4.591475706744545e-06, 'epoch': 0.21} + 21%|██ | 1473/7045 [4:47:24<18:07:53, 11.71s/it] 21%|██ | 1474/7045 [4:47:36<18:29:26, 11.95s/it] {'loss': 1.1162, 'learning_rate': 4.590845800431998e-06, 'epoch': 0.21} + 21%|██ | 1474/7045 [4:47:36<18:29:26, 11.95s/it] 21%|██ | 1475/7045 [4:47:48<18:26:42, 11.92s/it] {'loss': 1.1372, 'learning_rate': 4.590215452143233e-06, 'epoch': 0.21} + 21%|██ | 1475/7045 [4:47:48<18:26:42, 11.92s/it] 21%|██ | 1476/7045 [4:48:00<18:29:20, 11.95s/it] {'loss': 1.1387, 'learning_rate': 4.589584662011497e-06, 'epoch': 0.21} + 21%|██ | 1476/7045 [4:48:00<18:29:20, 11.95s/it] 21%|██ | 1477/7045 [4:48:11<18:05:05, 11.69s/it] {'loss': 1.1143, 'learning_rate': 4.58895343017013e-06, 'epoch': 0.21} + 21%|██ | 1477/7045 [4:48:11<18:05:05, 11.69s/it] 21%|██ | 1478/7045 [4:48:25<18:49:48, 12.18s/it] {'loss': 1.1372, 'learning_rate': 4.588321756752567e-06, 'epoch': 0.21} + 21%|██ | 1478/7045 [4:48:25<18:49:48, 12.18s/it] 21%|██ | 1479/7045 [4:48:36<18:27:15, 11.94s/it] {'loss': 1.1123, 'learning_rate': 4.587689641892336e-06, 'epoch': 0.21} + 21%|██ | 1479/7045 [4:48:36<18:27:15, 11.94s/it] 21%|██ | 1480/7045 [4:48:47<18:05:59, 11.71s/it] {'loss': 1.166, 'learning_rate': 4.587057085723054e-06, 'epoch': 0.21} + 21%|██ | 1480/7045 [4:48:47<18:05:59, 11.71s/it] 21%|██ | 1481/7045 [4:48:59<17:59:25, 11.64s/it] {'loss': 1.1367, 'learning_rate': 4.586424088378437e-06, 'epoch': 0.21} + 21%|██ | 1481/7045 [4:48:59<17:59:25, 11.64s/it] 21%|██ | 1482/7045 [4:49:11<18:12:45, 11.79s/it] {'loss': 1.1138, 'learning_rate': 4.585790649992292e-06, 'epoch': 0.21} + 21%|██ | 1482/7045 [4:49:11<18:12:45, 11.79s/it] 21%|██ | 1483/7045 [4:49:23<18:20:09, 11.87s/it] {'loss': 1.1426, 'learning_rate': 4.585156770698519e-06, 'epoch': 0.21} + 21%|██ | 1483/7045 [4:49:23<18:20:09, 11.87s/it] 21%|██ | 1484/7045 [4:49:34<17:50:53, 11.55s/it] {'loss': 1.0991, 'learning_rate': 4.5845224506311105e-06, 'epoch': 0.21} + 21%|██ | 1484/7045 [4:49:34<17:50:53, 11.55s/it] 21%|██ | 1485/7045 [4:49:45<17:40:53, 11.45s/it] {'loss': 1.1074, 'learning_rate': 4.5838876899241544e-06, 'epoch': 0.21} + 21%|██ | 1485/7045 [4:49:45<17:40:53, 11.45s/it] 21%|██ | 1486/7045 [4:49:56<17:26:09, 11.29s/it] {'loss': 1.1592, 'learning_rate': 4.583252488711831e-06, 'epoch': 0.21} + 21%|██ | 1486/7045 [4:49:56<17:26:09, 11.29s/it] 21%|██ | 1487/7045 [4:50:07<17:31:13, 11.35s/it] {'loss': 1.1572, 'learning_rate': 4.582616847128411e-06, 'epoch': 0.21} + 21%|██ | 1487/7045 [4:50:07<17:31:13, 11.35s/it] 21%|██ | 1488/7045 [4:50:18<17:14:23, 11.17s/it] {'loss': 1.1338, 'learning_rate': 4.5819807653082615e-06, 'epoch': 0.21} + 21%|██ | 1488/7045 [4:50:18<17:14:23, 11.17s/it] 21%|██ | 1489/7045 [4:50:32<18:21:51, 11.90s/it] {'loss': 1.1079, 'learning_rate': 4.5813442433858415e-06, 'epoch': 0.21} + 21%|██ | 1489/7045 [4:50:32<18:21:51, 11.90s/it] 21%|██ | 1490/7045 [4:50:43<18:16:37, 11.84s/it] {'loss': 1.1572, 'learning_rate': 4.580707281495702e-06, 'epoch': 0.21} + 21%|██ | 1490/7045 [4:50:43<18:16:37, 11.84s/it] 21%|██ | 1491/7045 [4:50:54<17:51:59, 11.58s/it] {'loss': 1.0879, 'learning_rate': 4.580069879772491e-06, 'epoch': 0.21} + 21%|██ | 1491/7045 [4:50:54<17:51:59, 11.58s/it] 21%|██ | 1492/7045 [4:51:05<17:39:53, 11.45s/it] {'loss': 1.168, 'learning_rate': 4.579432038350943e-06, 'epoch': 0.21} + 21%|██ | 1492/7045 [4:51:05<17:39:53, 11.45s/it] 21%|██ | 1493/7045 [4:51:17<17:43:37, 11.49s/it] {'loss': 1.1602, 'learning_rate': 4.5787937573658905e-06, 'epoch': 0.21} + 21%|██ | 1493/7045 [4:51:17<17:43:37, 11.49s/it] 21%|██ | 1494/7045 [4:51:28<17:35:40, 11.41s/it] {'loss': 1.1221, 'learning_rate': 4.578155036952258e-06, 'epoch': 0.21} + 21%|██ | 1494/7045 [4:51:28<17:35:40, 11.41s/it] 21%|██ | 1495/7045 [4:51:39<17:22:15, 11.27s/it] {'loss': 1.1299, 'learning_rate': 4.577515877245061e-06, 'epoch': 0.21} + 21%|██ | 1495/7045 [4:51:39<17:22:15, 11.27s/it] 21%|██ | 1496/7045 [4:51:52<18:08:20, 11.77s/it] {'loss': 1.1353, 'learning_rate': 4.57687627837941e-06, 'epoch': 0.21} + 21%|██ | 1496/7045 [4:51:52<18:08:20, 11.77s/it] 21%|██ | 1497/7045 [4:52:06<19:04:13, 12.37s/it] {'loss': 1.1221, 'learning_rate': 4.576236240490507e-06, 'epoch': 0.21} + 21%|██ | 1497/7045 [4:52:06<19:04:13, 12.37s/it] 21%|██▏ | 1498/7045 [4:52:17<18:23:28, 11.94s/it] {'loss': 1.1133, 'learning_rate': 4.575595763713647e-06, 'epoch': 0.21} + 21%|██▏ | 1498/7045 [4:52:17<18:23:28, 11.94s/it] 21%|██▏ | 1499/7045 [4:52:28<18:03:10, 11.72s/it] {'loss': 1.1338, 'learning_rate': 4.574954848184219e-06, 'epoch': 0.21} + 21%|██▏ | 1499/7045 [4:52:28<18:03:10, 11.72s/it] 21%|██▏ | 1500/7045 [4:52:39<17:47:17, 11.55s/it] {'loss': 1.1494, 'learning_rate': 4.5743134940377036e-06, 'epoch': 0.21} + 21%|██▏ | 1500/7045 [4:52:39<17:47:17, 11.55s/it] 21%|██▏ | 1501/7045 [4:52:51<17:43:01, 11.50s/it] {'loss': 1.1172, 'learning_rate': 4.573671701409673e-06, 'epoch': 0.21} + 21%|██▏ | 1501/7045 [4:52:51<17:43:01, 11.50s/it] 21%|██▏ | 1502/7045 [4:53:02<17:31:18, 11.38s/it] {'loss': 1.1475, 'learning_rate': 4.573029470435795e-06, 'epoch': 0.21} + 21%|██▏ | 1502/7045 [4:53:02<17:31:18, 11.38s/it] 21%|██▏ | 1503/7045 [4:53:13<17:21:30, 11.28s/it] {'loss': 1.1934, 'learning_rate': 4.572386801251828e-06, 'epoch': 0.21} + 21%|██▏ | 1503/7045 [4:53:13<17:21:30, 11.28s/it] 21%|██▏ | 1504/7045 [4:53:24<17:32:22, 11.40s/it] {'loss': 1.1064, 'learning_rate': 4.571743693993623e-06, 'epoch': 0.21} + 21%|██▏ | 1504/7045 [4:53:24<17:32:22, 11.40s/it] 21%|██▏ | 1505/7045 [4:53:36<17:29:24, 11.37s/it] {'loss': 1.1836, 'learning_rate': 4.571100148797124e-06, 'epoch': 0.21} + 21%|██▏ | 1505/7045 [4:53:36<17:29:24, 11.37s/it] 21%|██▏ | 1506/7045 [4:53:47<17:26:57, 11.34s/it] {'loss': 1.1729, 'learning_rate': 4.570456165798368e-06, 'epoch': 0.21} + 21%|██▏ | 1506/7045 [4:53:47<17:26:57, 11.34s/it] 21%|██▏ | 1507/7045 [4:53:58<17:19:30, 11.26s/it] {'loss': 1.2031, 'learning_rate': 4.569811745133484e-06, 'epoch': 0.21} + 21%|██▏ | 1507/7045 [4:53:58<17:19:30, 11.26s/it] 21%|██▏ | 1508/7045 [4:54:10<17:48:06, 11.57s/it] {'loss': 1.1143, 'learning_rate': 4.569166886938695e-06, 'epoch': 0.21} + 21%|██▏ | 1508/7045 [4:54:10<17:48:06, 11.57s/it] 21%|██▏ | 1509/7045 [4:54:22<17:41:07, 11.50s/it] {'loss': 1.1338, 'learning_rate': 4.568521591350313e-06, 'epoch': 0.21} + 21%|██▏ | 1509/7045 [4:54:22<17:41:07, 11.50s/it] 21%|██▏ | 1510/7045 [4:54:35<18:27:57, 12.01s/it] {'loss': 1.124, 'learning_rate': 4.567875858504746e-06, 'epoch': 0.21} + 21%|██▏ | 1510/7045 [4:54:35<18:27:57, 12.01s/it] 21%|██▏ | 1511/7045 [4:54:47<18:31:14, 12.05s/it] {'loss': 1.1504, 'learning_rate': 4.567229688538494e-06, 'epoch': 0.21} + 21%|██▏ | 1511/7045 [4:54:47<18:31:14, 12.05s/it] 21%|██▏ | 1512/7045 [4:54:59<18:34:38, 12.09s/it] {'loss': 1.1348, 'learning_rate': 4.566583081588146e-06, 'epoch': 0.21} + 21%|██▏ | 1512/7045 [4:54:59<18:34:38, 12.09s/it] 21%|██▏ | 1513/7045 [4:55:10<18:01:54, 11.73s/it] {'loss': 1.1221, 'learning_rate': 4.565936037790388e-06, 'epoch': 0.21} + 21%|██▏ | 1513/7045 [4:55:10<18:01:54, 11.73s/it] 21%|██▏ | 1514/7045 [4:55:21<17:47:00, 11.57s/it] {'loss': 1.1416, 'learning_rate': 4.565288557281995e-06, 'epoch': 0.21} + 21%|██▏ | 1514/7045 [4:55:21<17:47:00, 11.57s/it] 22%|██▏ | 1515/7045 [4:55:33<17:49:42, 11.61s/it] {'loss': 1.1465, 'learning_rate': 4.564640640199836e-06, 'epoch': 0.22} + 22%|██▏ | 1515/7045 [4:55:33<17:49:42, 11.61s/it] 22%|██▏ | 1516/7045 [4:55:44<17:34:59, 11.45s/it] {'loss': 1.1602, 'learning_rate': 4.563992286680872e-06, 'epoch': 0.22} + 22%|██▏ | 1516/7045 [4:55:44<17:34:59, 11.45s/it] 22%|██▏ | 1517/7045 [4:55:55<17:30:03, 11.40s/it] {'loss': 1.127, 'learning_rate': 4.563343496862156e-06, 'epoch': 0.22} + 22%|██▏ | 1517/7045 [4:55:55<17:30:03, 11.40s/it] 22%|██▏ | 1518/7045 [4:56:06<17:19:36, 11.29s/it] {'loss': 1.125, 'learning_rate': 4.562694270880833e-06, 'epoch': 0.22} + 22%|██▏ | 1518/7045 [4:56:06<17:19:36, 11.29s/it] 22%|██▏ | 1519/7045 [4:56:17<17:14:07, 11.23s/it] {'loss': 1.1143, 'learning_rate': 4.562044608874141e-06, 'epoch': 0.22} + 22%|██▏ | 1519/7045 [4:56:17<17:14:07, 11.23s/it] 22%|██▏ | 1520/7045 [4:56:29<17:24:24, 11.34s/it] {'loss': 1.2158, 'learning_rate': 4.561394510979409e-06, 'epoch': 0.22} + 22%|██▏ | 1520/7045 [4:56:29<17:24:24, 11.34s/it] 22%|██▏ | 1521/7045 [4:56:40<17:23:20, 11.33s/it] {'loss': 1.1221, 'learning_rate': 4.560743977334059e-06, 'epoch': 0.22} + 22%|██▏ | 1521/7045 [4:56:40<17:23:20, 11.33s/it] 22%|██▏ | 1522/7045 [4:56:53<18:04:47, 11.78s/it] {'loss': 1.1416, 'learning_rate': 4.560093008075606e-06, 'epoch': 0.22} + 22%|██▏ | 1522/7045 [4:56:53<18:04:47, 11.78s/it] 22%|██▏ | 1523/7045 [4:57:05<17:56:06, 11.69s/it] {'loss': 1.1553, 'learning_rate': 4.5594416033416534e-06, 'epoch': 0.22} + 22%|██▏ | 1523/7045 [4:57:05<17:56:06, 11.69s/it] 22%|██▏ | 1524/7045 [4:57:16<17:38:56, 11.51s/it] {'loss': 1.1299, 'learning_rate': 4.558789763269902e-06, 'epoch': 0.22} + 22%|██▏ | 1524/7045 [4:57:16<17:38:56, 11.51s/it] 22%|██▏ | 1525/7045 [4:57:28<17:50:44, 11.64s/it] {'loss': 1.1099, 'learning_rate': 4.558137487998141e-06, 'epoch': 0.22} + 22%|██▏ | 1525/7045 [4:57:28<17:50:44, 11.64s/it] 22%|██▏ | 1526/7045 [4:57:40<18:19:20, 11.95s/it] {'loss': 1.1123, 'learning_rate': 4.557484777664252e-06, 'epoch': 0.22} + 22%|██▏ | 1526/7045 [4:57:40<18:19:20, 11.95s/it] 22%|██▏ | 1527/7045 [4:57:52<18:03:42, 11.78s/it] {'loss': 1.168, 'learning_rate': 4.556831632406209e-06, 'epoch': 0.22} + 22%|██▏ | 1527/7045 [4:57:52<18:03:42, 11.78s/it] 22%|██▏ | 1528/7045 [4:58:03<17:46:47, 11.60s/it] {'loss': 1.0996, 'learning_rate': 4.556178052362078e-06, 'epoch': 0.22} + 22%|██▏ | 1528/7045 [4:58:03<17:46:47, 11.60s/it] 22%|██▏ | 1529/7045 [4:58:14<17:28:18, 11.40s/it] {'loss': 1.1362, 'learning_rate': 4.555524037670018e-06, 'epoch': 0.22} + 22%|██▏ | 1529/7045 [4:58:14<17:28:18, 11.40s/it] 22%|██▏ | 1530/7045 [4:58:25<17:10:36, 11.21s/it] {'loss': 1.1562, 'learning_rate': 4.554869588468277e-06, 'epoch': 0.22} + 22%|██▏ | 1530/7045 [4:58:25<17:10:36, 11.21s/it] 22%|██▏ | 1531/7045 [4:58:36<17:14:40, 11.26s/it] {'loss': 1.2031, 'learning_rate': 4.554214704895198e-06, 'epoch': 0.22} + 22%|██▏ | 1531/7045 [4:58:36<17:14:40, 11.26s/it] 22%|██▏ | 1532/7045 [4:58:47<17:11:35, 11.23s/it] {'loss': 1.1533, 'learning_rate': 4.553559387089215e-06, 'epoch': 0.22} + 22%|██▏ | 1532/7045 [4:58:47<17:11:35, 11.23s/it] 22%|██▏ | 1533/7045 [4:58:59<17:38:27, 11.52s/it] {'loss': 1.1123, 'learning_rate': 4.552903635188851e-06, 'epoch': 0.22} + 22%|██▏ | 1533/7045 [4:58:59<17:38:27, 11.52s/it] 22%|██▏ | 1534/7045 [4:59:11<17:27:51, 11.41s/it] {'loss': 1.1641, 'learning_rate': 4.5522474493327246e-06, 'epoch': 0.22} + 22%|██▏ | 1534/7045 [4:59:11<17:27:51, 11.41s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 22%|██▏ | 1535/7045 [4:59:23<18:02:22, 11.79s/it] {'loss': 1.1191, 'learning_rate': 4.551590829659544e-06, 'epoch': 0.22} + 22%|██▏ | 1535/7045 [4:59:23<18:02:22, 11.79s/it] 22%|██▏ | 1536/7045 [4:59:34<17:47:03, 11.62s/it] {'loss': 1.1289, 'learning_rate': 4.5509337763081095e-06, 'epoch': 0.22} + 22%|██▏ | 1536/7045 [4:59:34<17:47:03, 11.62s/it] 22%|██▏ | 1537/7045 [4:59:46<17:57:48, 11.74s/it] {'loss': 1.1611, 'learning_rate': 4.550276289417314e-06, 'epoch': 0.22} + 22%|██▏ | 1537/7045 [4:59:46<17:57:48, 11.74s/it] 22%|██▏ | 1538/7045 [4:59:58<17:42:15, 11.57s/it] {'loss': 1.168, 'learning_rate': 4.549618369126141e-06, 'epoch': 0.22} + 22%|██▏ | 1538/7045 [4:59:58<17:42:15, 11.57s/it] 22%|██▏ | 1539/7045 [5:00:10<17:53:38, 11.70s/it] {'loss': 1.1128, 'learning_rate': 4.548960015573664e-06, 'epoch': 0.22} + 22%|██▏ | 1539/7045 [5:00:10<17:53:38, 11.70s/it] 22%|██▏ | 1540/7045 [5:00:21<17:49:20, 11.65s/it] {'loss': 1.1621, 'learning_rate': 4.548301228899053e-06, 'epoch': 0.22} + 22%|██▏ | 1540/7045 [5:00:21<17:49:20, 11.65s/it] 22%|██▏ | 1541/7045 [5:00:32<17:34:33, 11.50s/it] {'loss': 1.1533, 'learning_rate': 4.547642009241565e-06, 'epoch': 0.22} + 22%|██▏ | 1541/7045 [5:00:32<17:34:33, 11.50s/it] 22%|██▏ | 1542/7045 [5:00:43<17:20:03, 11.34s/it] {'loss': 1.1357, 'learning_rate': 4.546982356740549e-06, 'epoch': 0.22} + 22%|██▏ | 1542/7045 [5:00:43<17:20:03, 11.34s/it] 22%|██▏ | 1543/7045 [5:00:54<17:12:57, 11.26s/it] {'loss': 1.1152, 'learning_rate': 4.546322271535448e-06, 'epoch': 0.22} + 22%|██▏ | 1543/7045 [5:00:54<17:12:57, 11.26s/it] 22%|██▏ | 1544/7045 [5:01:08<18:26:06, 12.06s/it] {'loss': 1.1519, 'learning_rate': 4.545661753765794e-06, 'epoch': 0.22} + 22%|██▏ | 1544/7045 [5:01:08<18:26:06, 12.06s/it] 22%|██▏ | 1545/7045 [5:01:20<18:03:49, 11.82s/it] {'loss': 1.1787, 'learning_rate': 4.545000803571212e-06, 'epoch': 0.22} + 22%|██▏ | 1545/7045 [5:01:20<18:03:49, 11.82s/it] 22%|██▏ | 1546/7045 [5:01:31<17:46:46, 11.64s/it] {'loss': 1.1152, 'learning_rate': 4.544339421091419e-06, 'epoch': 0.22} + 22%|██▏ | 1546/7045 [5:01:31<17:46:46, 11.64s/it] 22%|██▏ | 1547/7045 [5:01:44<18:26:45, 12.08s/it] {'loss': 1.1484, 'learning_rate': 4.54367760646622e-06, 'epoch': 0.22} + 22%|██▏ | 1547/7045 [5:01:44<18:26:45, 12.08s/it] 22%|██▏ | 1548/7045 [5:01:56<18:24:03, 12.05s/it] {'loss': 1.1572, 'learning_rate': 4.543015359835514e-06, 'epoch': 0.22} + 22%|██▏ | 1548/7045 [5:01:56<18:24:03, 12.05s/it] 22%|██▏ | 1549/7045 [5:02:07<18:02:50, 11.82s/it] {'loss': 1.1562, 'learning_rate': 4.542352681339292e-06, 'epoch': 0.22} + 22%|██▏ | 1549/7045 [5:02:07<18:02:50, 11.82s/it] 22%|██▏ | 1550/7045 [5:02:19<18:06:45, 11.87s/it] {'loss': 1.105, 'learning_rate': 4.5416895711176356e-06, 'epoch': 0.22} + 22%|██▏ | 1550/7045 [5:02:19<18:06:45, 11.87s/it] 22%|██▏ | 1551/7045 [5:02:30<17:49:48, 11.68s/it] {'loss': 1.1406, 'learning_rate': 4.541026029310714e-06, 'epoch': 0.22} + 22%|██▏ | 1551/7045 [5:02:30<17:49:48, 11.68s/it] 22%|██▏ | 1552/7045 [5:02:44<18:40:14, 12.24s/it] {'loss': 1.1279, 'learning_rate': 4.540362056058796e-06, 'epoch': 0.22} + 22%|██▏ | 1552/7045 [5:02:44<18:40:14, 12.24s/it] 22%|██▏ | 1553/7045 [5:02:55<18:07:56, 11.89s/it] {'loss': 1.0957, 'learning_rate': 4.539697651502231e-06, 'epoch': 0.22} + 22%|██▏ | 1553/7045 [5:02:55<18:07:56, 11.89s/it] 22%|██▏ | 1554/7045 [5:03:06<17:44:38, 11.63s/it] {'loss': 1.1445, 'learning_rate': 4.5390328157814686e-06, 'epoch': 0.22} + 22%|██▏ | 1554/7045 [5:03:06<17:44:38, 11.63s/it] 22%|██▏ | 1555/7045 [5:03:17<17:28:36, 11.46s/it] {'loss': 1.1572, 'learning_rate': 4.5383675490370445e-06, 'epoch': 0.22} + 22%|██▏ | 1555/7045 [5:03:17<17:28:36, 11.46s/it] 22%|██▏ | 1556/7045 [5:03:28<17:12:21, 11.28s/it] {'loss': 1.1475, 'learning_rate': 4.5377018514095885e-06, 'epoch': 0.22} + 22%|██▏ | 1556/7045 [5:03:28<17:12:21, 11.28s/it] 22%|██▏ | 1557/7045 [5:03:40<17:37:32, 11.56s/it] {'loss': 1.1318, 'learning_rate': 4.537035723039819e-06, 'epoch': 0.22} + 22%|██▏ | 1557/7045 [5:03:40<17:37:32, 11.56s/it] 22%|██▏ | 1558/7045 [5:03:52<17:39:59, 11.59s/it] {'loss': 1.1328, 'learning_rate': 4.536369164068546e-06, 'epoch': 0.22} + 22%|██▏ | 1558/7045 [5:03:52<17:39:59, 11.59s/it] 22%|██▏ | 1559/7045 [5:04:06<18:38:02, 12.23s/it] {'loss': 1.1123, 'learning_rate': 4.535702174636671e-06, 'epoch': 0.22} + 22%|██▏ | 1559/7045 [5:04:06<18:38:02, 12.23s/it] 22%|██▏ | 1560/7045 [5:04:17<18:12:02, 11.95s/it] {'loss': 1.0996, 'learning_rate': 4.535034754885187e-06, 'epoch': 0.22} + 22%|██▏ | 1560/7045 [5:04:17<18:12:02, 11.95s/it] 22%|██▏ | 1561/7045 [5:04:28<17:46:50, 11.67s/it] {'loss': 1.1211, 'learning_rate': 4.5343669049551766e-06, 'epoch': 0.22} + 22%|██▏ | 1561/7045 [5:04:28<17:46:50, 11.67s/it] 22%|██▏ | 1562/7045 [5:04:40<17:45:01, 11.65s/it] {'loss': 1.1533, 'learning_rate': 4.533698624987815e-06, 'epoch': 0.22} + 22%|██▏ | 1562/7045 [5:04:40<17:45:01, 11.65s/it] 22%|██▏ | 1563/7045 [5:04:50<17:24:52, 11.44s/it] {'loss': 1.1055, 'learning_rate': 4.533029915124368e-06, 'epoch': 0.22} + 22%|██▏ | 1563/7045 [5:04:50<17:24:52, 11.44s/it] 22%|██▏ | 1564/7045 [5:05:01<17:14:33, 11.33s/it] {'loss': 1.1338, 'learning_rate': 4.532360775506191e-06, 'epoch': 0.22} + 22%|██▏ | 1564/7045 [5:05:01<17:14:33, 11.33s/it] 22%|██▏ | 1565/7045 [5:05:13<17:11:36, 11.29s/it] {'loss': 1.1494, 'learning_rate': 4.53169120627473e-06, 'epoch': 0.22} + 22%|██▏ | 1565/7045 [5:05:13<17:11:36, 11.29s/it] 22%|██▏ | 1566/7045 [5:05:24<17:01:56, 11.19s/it] {'loss': 1.1445, 'learning_rate': 4.531021207571524e-06, 'epoch': 0.22} + 22%|██▏ | 1566/7045 [5:05:24<17:01:56, 11.19s/it] 22%|██▏ | 1567/7045 [5:05:35<16:58:37, 11.16s/it] {'loss': 1.1152, 'learning_rate': 4.5303507795382015e-06, 'epoch': 0.22} + 22%|██▏ | 1567/7045 [5:05:35<16:58:37, 11.16s/it] 22%|██▏ | 1568/7045 [5:05:46<16:59:00, 11.16s/it] {'loss': 1.1445, 'learning_rate': 4.529679922316481e-06, 'epoch': 0.22} + 22%|██▏ | 1568/7045 [5:05:46<16:59:00, 11.16s/it] 22%|██▏ | 1569/7045 [5:05:57<17:01:45, 11.20s/it] {'loss': 1.1494, 'learning_rate': 4.529008636048173e-06, 'epoch': 0.22} + 22%|██▏ | 1569/7045 [5:05:57<17:01:45, 11.20s/it] 22%|██▏ | 1570/7045 [5:06:09<17:08:08, 11.27s/it] {'loss': 1.1299, 'learning_rate': 4.5283369208751794e-06, 'epoch': 0.22} + 22%|██▏ | 1570/7045 [5:06:09<17:08:08, 11.27s/it] 22%|██▏ | 1571/7045 [5:06:20<17:07:19, 11.26s/it] {'loss': 1.1523, 'learning_rate': 4.52766477693949e-06, 'epoch': 0.22} + 22%|██▏ | 1571/7045 [5:06:20<17:07:19, 11.26s/it] 22%|██▏ | 1572/7045 [5:06:31<17:02:43, 11.21s/it] {'loss': 1.168, 'learning_rate': 4.526992204383187e-06, 'epoch': 0.22} + 22%|██▏ | 1572/7045 [5:06:31<17:02:43, 11.21s/it] 22%|██▏ | 1573/7045 [5:06:43<17:23:27, 11.44s/it] {'loss': 1.1484, 'learning_rate': 4.5263192033484444e-06, 'epoch': 0.22} + 22%|██▏ | 1573/7045 [5:06:43<17:23:27, 11.44s/it] 22%|██▏ | 1574/7045 [5:06:55<17:38:12, 11.61s/it] {'loss': 1.0923, 'learning_rate': 4.525645773977524e-06, 'epoch': 0.22} + 22%|██▏ | 1574/7045 [5:06:55<17:38:12, 11.61s/it] 22%|██▏ | 1575/7045 [5:07:06<17:20:57, 11.42s/it] {'loss': 1.1348, 'learning_rate': 4.5249719164127796e-06, 'epoch': 0.22} + 22%|██▏ | 1575/7045 [5:07:06<17:20:57, 11.42s/it] 22%|██▏ | 1576/7045 [5:07:18<17:48:31, 11.72s/it] {'loss': 1.1455, 'learning_rate': 4.524297630796656e-06, 'epoch': 0.22} + 22%|██▏ | 1576/7045 [5:07:18<17:48:31, 11.72s/it] 22%|██▏ | 1577/7045 [5:07:32<18:50:58, 12.41s/it] {'loss': 1.0938, 'learning_rate': 4.5236229172716886e-06, 'epoch': 0.22} + 22%|██▏ | 1577/7045 [5:07:32<18:50:58, 12.41s/it] 22%|██▏ | 1578/7045 [5:07:43<18:11:55, 11.98s/it] {'loss': 1.1221, 'learning_rate': 4.5229477759805005e-06, 'epoch': 0.22} + 22%|██▏ | 1578/7045 [5:07:43<18:11:55, 11.98s/it] 22%|██▏ | 1579/7045 [5:07:55<18:10:12, 11.97s/it] {'loss': 1.124, 'learning_rate': 4.52227220706581e-06, 'epoch': 0.22} + 22%|██▏ | 1579/7045 [5:07:55<18:10:12, 11.97s/it] 22%|██▏ | 1580/7045 [5:08:06<17:42:52, 11.67s/it] {'loss': 1.1543, 'learning_rate': 4.521596210670421e-06, 'epoch': 0.22} + 22%|██▏ | 1580/7045 [5:08:06<17:42:52, 11.67s/it] 22%|██▏ | 1581/7045 [5:08:18<17:32:08, 11.55s/it] {'loss': 1.1396, 'learning_rate': 4.5209197869372316e-06, 'epoch': 0.22} + 22%|██▏ | 1581/7045 [5:08:18<17:32:08, 11.55s/it] 22%|██▏ | 1582/7045 [5:08:29<17:22:33, 11.45s/it] {'loss': 1.1338, 'learning_rate': 4.520242936009226e-06, 'epoch': 0.22} + 22%|██▏ | 1582/7045 [5:08:29<17:22:33, 11.45s/it] 22%|██▏ | 1583/7045 [5:08:41<17:48:58, 11.74s/it] {'loss': 1.106, 'learning_rate': 4.519565658029485e-06, 'epoch': 0.22} + 22%|██▏ | 1583/7045 [5:08:41<17:48:58, 11.74s/it] 22%|██▏ | 1584/7045 [5:08:52<17:17:53, 11.40s/it] {'loss': 1.127, 'learning_rate': 4.518887953141172e-06, 'epoch': 0.22} + 22%|██▏ | 1584/7045 [5:08:52<17:17:53, 11.40s/it] 22%|██▏ | 1585/7045 [5:09:03<17:19:37, 11.42s/it] {'loss': 1.1387, 'learning_rate': 4.518209821487547e-06, 'epoch': 0.22} + 22%|██▏ | 1585/7045 [5:09:03<17:19:37, 11.42s/it] 23%|██▎ | 1586/7045 [5:09:15<17:17:18, 11.40s/it] {'loss': 1.1133, 'learning_rate': 4.517531263211958e-06, 'epoch': 0.23} + 23%|██▎ | 1586/7045 [5:09:15<17:17:18, 11.40s/it] 23%|██▎ | 1587/7045 [5:09:26<17:11:28, 11.34s/it] {'loss': 1.1768, 'learning_rate': 4.516852278457841e-06, 'epoch': 0.23} + 23%|██▎ | 1587/7045 [5:09:26<17:11:28, 11.34s/it] 23%|██▎ | 1588/7045 [5:09:37<17:08:13, 11.31s/it] {'loss': 1.1318, 'learning_rate': 4.516172867368725e-06, 'epoch': 0.23} + 23%|██▎ | 1588/7045 [5:09:37<17:08:13, 11.31s/it] 23%|██▎ | 1589/7045 [5:09:48<16:59:46, 11.21s/it] {'loss': 1.1465, 'learning_rate': 4.515493030088229e-06, 'epoch': 0.23} + 23%|██▎ | 1589/7045 [5:09:48<16:59:46, 11.21s/it] 23%|██▎ | 1590/7045 [5:09:59<16:54:05, 11.15s/it] {'loss': 1.124, 'learning_rate': 4.514812766760061e-06, 'epoch': 0.23} + 23%|██▎ | 1590/7045 [5:09:59<16:54:05, 11.15s/it] 23%|██▎ | 1591/7045 [5:10:11<17:11:35, 11.35s/it] {'loss': 1.1123, 'learning_rate': 4.514132077528018e-06, 'epoch': 0.23} + 23%|██▎ | 1591/7045 [5:10:11<17:11:35, 11.35s/it] 23%|██▎ | 1592/7045 [5:10:22<17:11:01, 11.34s/it] {'loss': 1.1514, 'learning_rate': 4.5134509625359904e-06, 'epoch': 0.23} + 23%|██▎ | 1592/7045 [5:10:22<17:11:01, 11.34s/it] 23%|██▎ | 1593/7045 [5:10:34<17:18:41, 11.43s/it] {'loss': 1.1367, 'learning_rate': 4.512769421927955e-06, 'epoch': 0.23} + 23%|██▎ | 1593/7045 [5:10:34<17:18:41, 11.43s/it] 23%|██▎ | 1594/7045 [5:10:46<17:46:25, 11.74s/it] {'loss': 1.1504, 'learning_rate': 4.512087455847981e-06, 'epoch': 0.23} + 23%|██▎ | 1594/7045 [5:10:46<17:46:25, 11.74s/it] 23%|██▎ | 1595/7045 [5:10:57<17:26:08, 11.52s/it] {'loss': 1.1074, 'learning_rate': 4.511405064440226e-06, 'epoch': 0.23} + 23%|██▎ | 1595/7045 [5:10:57<17:26:08, 11.52s/it] 23%|██▎ | 1596/7045 [5:11:09<17:18:59, 11.44s/it] {'loss': 1.1143, 'learning_rate': 4.51072224784894e-06, 'epoch': 0.23} + 23%|██▎ | 1596/7045 [5:11:09<17:18:59, 11.44s/it] 23%|██▎ | 1597/7045 [5:11:20<17:07:25, 11.32s/it] {'loss': 1.1289, 'learning_rate': 4.510039006218459e-06, 'epoch': 0.23} + 23%|██▎ | 1597/7045 [5:11:20<17:07:25, 11.32s/it] 23%|██▎ | 1598/7045 [5:11:31<16:59:59, 11.24s/it] {'loss': 1.1367, 'learning_rate': 4.509355339693212e-06, 'epoch': 0.23} + 23%|██▎ | 1598/7045 [5:11:31<16:59:59, 11.24s/it] 23%|██▎ | 1599/7045 [5:11:45<18:23:26, 12.16s/it] {'loss': 1.1509, 'learning_rate': 4.5086712484177164e-06, 'epoch': 0.23} + 23%|██▎ | 1599/7045 [5:11:45<18:23:26, 12.16s/it] 23%|██▎ | 1600/7045 [5:11:56<18:00:21, 11.90s/it] {'loss': 1.1572, 'learning_rate': 4.50798673253658e-06, 'epoch': 0.23} + 23%|██▎ | 1600/7045 [5:11:56<18:00:21, 11.90s/it] 23%|██▎ | 1601/7045 [5:12:09<18:13:43, 12.05s/it] {'loss': 1.1455, 'learning_rate': 4.5073017921945e-06, 'epoch': 0.23} + 23%|██▎ | 1601/7045 [5:12:09<18:13:43, 12.05s/it] 23%|██▎ | 1602/7045 [5:12:21<18:15:41, 12.08s/it] {'loss': 1.126, 'learning_rate': 4.506616427536262e-06, 'epoch': 0.23} + 23%|██▎ | 1602/7045 [5:12:21<18:15:41, 12.08s/it] 23%|██▎ | 1603/7045 [5:12:32<17:43:36, 11.73s/it] {'loss': 1.1338, 'learning_rate': 4.5059306387067455e-06, 'epoch': 0.23} + 23%|██▎ | 1603/7045 [5:12:32<17:43:36, 11.73s/it] 23%|██▎ | 1604/7045 [5:12:43<17:31:09, 11.59s/it] {'loss': 1.165, 'learning_rate': 4.505244425850915e-06, 'epoch': 0.23} + 23%|██▎ | 1604/7045 [5:12:43<17:31:09, 11.59s/it] 23%|██▎ | 1605/7045 [5:12:54<17:10:54, 11.37s/it] {'loss': 1.1455, 'learning_rate': 4.504557789113827e-06, 'epoch': 0.23} + 23%|██▎ | 1605/7045 [5:12:54<17:10:54, 11.37s/it] 23%|██▎ | 1606/7045 [5:13:07<18:10:03, 12.02s/it] {'loss': 1.1094, 'learning_rate': 4.503870728640628e-06, 'epoch': 0.23} + 23%|██▎ | 1606/7045 [5:13:07<18:10:03, 12.02s/it] 23%|██▎ | 1607/7045 [5:13:20<18:20:17, 12.14s/it] {'loss': 1.0869, 'learning_rate': 4.503183244576551e-06, 'epoch': 0.23} + 23%|██▎ | 1607/7045 [5:13:20<18:20:17, 12.14s/it] 23%|██▎ | 1608/7045 [5:13:31<17:51:57, 11.83s/it] {'loss': 1.1152, 'learning_rate': 4.502495337066923e-06, 'epoch': 0.23} + 23%|██▎ | 1608/7045 [5:13:31<17:51:57, 11.83s/it] 23%|██▎ | 1609/7045 [5:13:44<18:32:16, 12.28s/it] {'loss': 1.1289, 'learning_rate': 4.5018070062571575e-06, 'epoch': 0.23} + 23%|██▎ | 1609/7045 [5:13:44<18:32:16, 12.28s/it] 23%|██▎ | 1610/7045 [5:13:55<18:00:53, 11.93s/it] {'loss': 1.1021, 'learning_rate': 4.501118252292756e-06, 'epoch': 0.23} + 23%|██▎ | 1610/7045 [5:13:55<18:00:53, 11.93s/it] 23%|██▎ | 1611/7045 [5:14:07<17:44:01, 11.75s/it] {'loss': 1.1602, 'learning_rate': 4.500429075319316e-06, 'epoch': 0.23} + 23%|██▎ | 1611/7045 [5:14:07<17:44:01, 11.75s/it] 23%|██▎ | 1612/7045 [5:14:19<17:49:52, 11.82s/it] {'loss': 1.125, 'learning_rate': 4.499739475482516e-06, 'epoch': 0.23} + 23%|██▎ | 1612/7045 [5:14:19<17:49:52, 11.82s/it] 23%|██▎ | 1613/7045 [5:14:30<17:43:07, 11.74s/it] {'loss': 1.1279, 'learning_rate': 4.499049452928131e-06, 'epoch': 0.23} + 23%|██▎ | 1613/7045 [5:14:30<17:43:07, 11.74s/it] 23%|██▎ | 1614/7045 [5:14:42<17:31:46, 11.62s/it] {'loss': 1.1328, 'learning_rate': 4.498359007802021e-06, 'epoch': 0.23} + 23%|██▎ | 1614/7045 [5:14:42<17:31:46, 11.62s/it] 23%|██▎ | 1615/7045 [5:14:53<17:18:00, 11.47s/it] {'loss': 1.1787, 'learning_rate': 4.497668140250137e-06, 'epoch': 0.23} + 23%|██▎ | 1615/7045 [5:14:53<17:18:00, 11.47s/it] 23%|██▎ | 1616/7045 [5:15:05<17:43:56, 11.76s/it] {'loss': 1.1196, 'learning_rate': 4.496976850418517e-06, 'epoch': 0.23} + 23%|██▎ | 1616/7045 [5:15:05<17:43:56, 11.76s/it] 23%|██▎ | 1617/7045 [5:15:17<17:40:27, 11.72s/it] {'loss': 1.1182, 'learning_rate': 4.496285138453294e-06, 'epoch': 0.23} + 23%|██▎ | 1617/7045 [5:15:17<17:40:27, 11.72s/it] 23%|██▎ | 1618/7045 [5:15:28<17:29:51, 11.61s/it] {'loss': 1.1602, 'learning_rate': 4.495593004500683e-06, 'epoch': 0.23} + 23%|██▎ | 1618/7045 [5:15:28<17:29:51, 11.61s/it] 23%|██▎ | 1619/7045 [5:15:39<17:16:53, 11.47s/it] {'loss': 1.1104, 'learning_rate': 4.494900448706994e-06, 'epoch': 0.23} + 23%|██▎ | 1619/7045 [5:15:39<17:16:53, 11.47s/it] 23%|██▎ | 1620/7045 [5:15:51<17:34:03, 11.66s/it] {'loss': 1.1479, 'learning_rate': 4.4942074712186215e-06, 'epoch': 0.23} + 23%|██▎ | 1620/7045 [5:15:51<17:34:03, 11.66s/it] 23%|██▎ | 1621/7045 [5:16:02<17:20:13, 11.51s/it] {'loss': 1.1475, 'learning_rate': 4.493514072182053e-06, 'epoch': 0.23} + 23%|██▎ | 1621/7045 [5:16:02<17:20:13, 11.51s/it] 23%|██▎ | 1622/7045 [5:16:15<17:48:30, 11.82s/it] {'loss': 1.1162, 'learning_rate': 4.4928202517438655e-06, 'epoch': 0.23} + 23%|██▎ | 1622/7045 [5:16:15<17:48:30, 11.82s/it] 23%|██▎ | 1623/7045 [5:16:27<17:41:38, 11.75s/it] {'loss': 1.1357, 'learning_rate': 4.492126010050719e-06, 'epoch': 0.23} + 23%|██▎ | 1623/7045 [5:16:27<17:41:38, 11.75s/it] 23%|██▎ | 1624/7045 [5:16:38<17:38:07, 11.71s/it] {'loss': 1.165, 'learning_rate': 4.49143134724937e-06, 'epoch': 0.23} + 23%|██▎ | 1624/7045 [5:16:38<17:38:07, 11.71s/it] 23%|██▎ | 1625/7045 [5:16:51<18:03:54, 12.00s/it] {'loss': 1.1279, 'learning_rate': 4.490736263486658e-06, 'epoch': 0.23} + 23%|██▎ | 1625/7045 [5:16:51<18:03:54, 12.00s/it] 23%|██▎ | 1626/7045 [5:17:02<17:43:23, 11.77s/it] {'loss': 1.1729, 'learning_rate': 4.490040758909517e-06, 'epoch': 0.23} + 23%|██▎ | 1626/7045 [5:17:02<17:43:23, 11.77s/it] 23%|██▎ | 1627/7045 [5:17:15<18:12:19, 12.10s/it] {'loss': 1.1143, 'learning_rate': 4.489344833664965e-06, 'epoch': 0.23} + 23%|██▎ | 1627/7045 [5:17:15<18:12:19, 12.10s/it] 23%|██▎ | 1628/7045 [5:17:26<17:49:46, 11.85s/it] {'loss': 1.1348, 'learning_rate': 4.488648487900112e-06, 'epoch': 0.23} + 23%|██▎ | 1628/7045 [5:17:26<17:49:46, 11.85s/it] 23%|██▎ | 1629/7045 [5:17:37<17:30:03, 11.63s/it] {'loss': 1.1152, 'learning_rate': 4.487951721762156e-06, 'epoch': 0.23} + 23%|██▎ | 1629/7045 [5:17:37<17:30:03, 11.63s/it] 23%|██▎ | 1630/7045 [5:17:49<17:17:42, 11.50s/it] {'loss': 1.1035, 'learning_rate': 4.487254535398384e-06, 'epoch': 0.23} + 23%|██▎ | 1630/7045 [5:17:49<17:17:42, 11.50s/it] 23%|██▎ | 1631/7045 [5:18:00<17:13:57, 11.46s/it] {'loss': 1.1396, 'learning_rate': 4.4865569289561714e-06, 'epoch': 0.23} + 23%|██▎ | 1631/7045 [5:18:00<17:13:57, 11.46s/it] 23%|██▎ | 1632/7045 [5:18:12<17:22:14, 11.55s/it] {'loss': 1.127, 'learning_rate': 4.485858902582982e-06, 'epoch': 0.23} + 23%|██▎ | 1632/7045 [5:18:12<17:22:14, 11.55s/it] 23%|██▎ | 1633/7045 [5:18:23<17:13:04, 11.45s/it] {'loss': 1.165, 'learning_rate': 4.4851604564263705e-06, 'epoch': 0.23} + 23%|██▎ | 1633/7045 [5:18:23<17:13:04, 11.45s/it] 23%|██▎ | 1634/7045 [5:18:36<18:04:41, 12.03s/it] {'loss': 1.1367, 'learning_rate': 4.484461590633978e-06, 'epoch': 0.23} + 23%|██▎ | 1634/7045 [5:18:36<18:04:41, 12.03s/it] 23%|██▎ | 1635/7045 [5:18:48<17:43:44, 11.80s/it] {'loss': 1.1157, 'learning_rate': 4.483762305353535e-06, 'epoch': 0.23} + 23%|██▎ | 1635/7045 [5:18:48<17:43:44, 11.80s/it] 23%|██▎ | 1636/7045 [5:18:59<17:43:21, 11.80s/it] {'loss': 1.1045, 'learning_rate': 4.483062600732861e-06, 'epoch': 0.23} + 23%|██▎ | 1636/7045 [5:18:59<17:43:21, 11.80s/it] 23%|██▎ | 1637/7045 [5:19:10<17:22:36, 11.57s/it] {'loss': 1.1377, 'learning_rate': 4.482362476919864e-06, 'epoch': 0.23} + 23%|██▎ | 1637/7045 [5:19:10<17:22:36, 11.57s/it] 23%|██▎ | 1638/7045 [5:19:22<17:24:24, 11.59s/it] {'loss': 1.1406, 'learning_rate': 4.4816619340625415e-06, 'epoch': 0.23} + 23%|██▎ | 1638/7045 [5:19:22<17:24:24, 11.59s/it] 23%|██▎ | 1639/7045 [5:19:34<17:29:02, 11.64s/it] {'loss': 1.1221, 'learning_rate': 4.480960972308977e-06, 'epoch': 0.23} + 23%|██▎ | 1639/7045 [5:19:34<17:29:02, 11.64s/it] 23%|██▎ | 1640/7045 [5:19:47<18:10:01, 12.10s/it] {'loss': 1.064, 'learning_rate': 4.480259591807344e-06, 'epoch': 0.23} + 23%|██▎ | 1640/7045 [5:19:47<18:10:01, 12.10s/it] 23%|██▎ | 1641/7045 [5:19:59<17:55:37, 11.94s/it] {'loss': 1.1094, 'learning_rate': 4.4795577927059076e-06, 'epoch': 0.23} + 23%|██▎ | 1641/7045 [5:19:59<17:55:37, 11.94s/it] 23%|██▎ | 1642/7045 [5:20:10<17:33:05, 11.69s/it] {'loss': 1.1719, 'learning_rate': 4.478855575153016e-06, 'epoch': 0.23} + 23%|██▎ | 1642/7045 [5:20:10<17:33:05, 11.69s/it] 23%|██▎ | 1643/7045 [5:20:21<17:20:49, 11.56s/it] {'loss': 1.1338, 'learning_rate': 4.47815293929711e-06, 'epoch': 0.23} + 23%|██▎ | 1643/7045 [5:20:21<17:20:49, 11.56s/it] 23%|██▎ | 1644/7045 [5:20:32<17:05:54, 11.40s/it] {'loss': 1.1328, 'learning_rate': 4.477449885286716e-06, 'epoch': 0.23} + 23%|██▎ | 1644/7045 [5:20:32<17:05:54, 11.40s/it] 23%|██▎ | 1645/7045 [5:20:43<16:56:08, 11.29s/it] {'loss': 1.1318, 'learning_rate': 4.4767464132704495e-06, 'epoch': 0.23} + 23%|██▎ | 1645/7045 [5:20:43<16:56:08, 11.29s/it] 23%|██▎ | 1646/7045 [5:20:54<17:00:49, 11.34s/it] {'loss': 1.1123, 'learning_rate': 4.476042523397017e-06, 'epoch': 0.23} + 23%|██▎ | 1646/7045 [5:20:54<17:00:49, 11.34s/it] 23%|██▎ | 1647/7045 [5:21:08<18:07:49, 12.09s/it] {'loss': 1.1084, 'learning_rate': 4.4753382158152095e-06, 'epoch': 0.23} + 23%|██▎ | 1647/7045 [5:21:08<18:07:49, 12.09s/it] 23%|██▎ | 1648/7045 [5:21:19<17:37:50, 11.76s/it] {'loss': 1.1377, 'learning_rate': 4.474633490673908e-06, 'epoch': 0.23} + 23%|██▎ | 1648/7045 [5:21:19<17:37:50, 11.76s/it] 23%|██▎ | 1649/7045 [5:21:32<17:55:22, 11.96s/it] {'loss': 1.0747, 'learning_rate': 4.473928348122083e-06, 'epoch': 0.23} + 23%|██▎ | 1649/7045 [5:21:32<17:55:22, 11.96s/it] 23%|██▎ | 1650/7045 [5:21:43<17:34:37, 11.73s/it] {'loss': 1.1382, 'learning_rate': 4.473222788308791e-06, 'epoch': 0.23} + 23%|██▎ | 1650/7045 [5:21:43<17:34:37, 11.73s/it] 23%|██▎ | 1651/7045 [5:21:55<17:56:42, 11.98s/it] {'loss': 1.1504, 'learning_rate': 4.4725168113831774e-06, 'epoch': 0.23} + 23%|██▎ | 1651/7045 [5:21:55<17:56:42, 11.98s/it] 23%|██▎ | 1652/7045 [5:22:09<18:45:42, 12.52s/it] {'loss': 1.1206, 'learning_rate': 4.471810417494477e-06, 'epoch': 0.23} + 23%|██▎ | 1652/7045 [5:22:09<18:45:42, 12.52s/it] 23%|██▎ | 1653/7045 [5:22:20<18:08:24, 12.11s/it] {'loss': 1.0947, 'learning_rate': 4.471103606792011e-06, 'epoch': 0.23} + 23%|██▎ | 1653/7045 [5:22:20<18:08:24, 12.11s/it] 23%|██▎ | 1654/7045 [5:22:33<18:13:31, 12.17s/it] {'loss': 1.0713, 'learning_rate': 4.4703963794251895e-06, 'epoch': 0.23} + 23%|██▎ | 1654/7045 [5:22:33<18:13:31, 12.17s/it] 23%|██▎ | 1655/7045 [5:22:44<17:40:02, 11.80s/it] {'loss': 1.1328, 'learning_rate': 4.469688735543512e-06, 'epoch': 0.23} + 23%|██▎ | 1655/7045 [5:22:44<17:40:02, 11.80s/it] 24%|██▎ | 1656/7045 [5:22:55<17:33:18, 11.73s/it] {'loss': 1.1299, 'learning_rate': 4.468980675296563e-06, 'epoch': 0.24} + 24%|██▎ | 1656/7045 [5:22:55<17:33:18, 11.73s/it] 24%|██▎ | 1657/7045 [5:23:06<17:17:21, 11.55s/it] {'loss': 1.1455, 'learning_rate': 4.4682721988340185e-06, 'epoch': 0.24} + 24%|██▎ | 1657/7045 [5:23:06<17:17:21, 11.55s/it] 24%|██▎ | 1658/7045 [5:23:19<17:46:26, 11.88s/it] {'loss': 1.0786, 'learning_rate': 4.46756330630564e-06, 'epoch': 0.24} + 24%|██▎ | 1658/7045 [5:23:19<17:46:26, 11.88s/it] 24%|██▎ | 1659/7045 [5:23:31<18:02:14, 12.06s/it] {'loss': 1.1201, 'learning_rate': 4.466853997861277e-06, 'epoch': 0.24} + 24%|██▎ | 1659/7045 [5:23:31<18:02:14, 12.06s/it] 24%|██▎ | 1660/7045 [5:23:44<18:17:30, 12.23s/it] {'loss': 1.147, 'learning_rate': 4.466144273650867e-06, 'epoch': 0.24} + 24%|██▎ | 1660/7045 [5:23:44<18:17:30, 12.23s/it] 24%|██▎ | 1661/7045 [5:23:55<17:52:24, 11.95s/it] {'loss': 1.125, 'learning_rate': 4.465434133824439e-06, 'epoch': 0.24} + 24%|██▎ | 1661/7045 [5:23:55<17:52:24, 11.95s/it] 24%|██▎ | 1662/7045 [5:24:06<17:27:21, 11.67s/it] {'loss': 1.1396, 'learning_rate': 4.464723578532104e-06, 'epoch': 0.24} + 24%|██▎ | 1662/7045 [5:24:06<17:27:21, 11.67s/it] 24%|██▎ | 1663/7045 [5:24:18<17:28:01, 11.68s/it] {'loss': 1.1035, 'learning_rate': 4.464012607924065e-06, 'epoch': 0.24} + 24%|██▎ | 1663/7045 [5:24:18<17:28:01, 11.68s/it] 24%|██▎ | 1664/7045 [5:24:29<17:10:05, 11.49s/it] {'loss': 1.1514, 'learning_rate': 4.4633012221506114e-06, 'epoch': 0.24} + 24%|██▎ | 1664/7045 [5:24:29<17:10:05, 11.49s/it] 24%|██▎ | 1665/7045 [5:24:41<17:10:40, 11.49s/it] {'loss': 1.1631, 'learning_rate': 4.462589421362119e-06, 'epoch': 0.24} + 24%|██▎ | 1665/7045 [5:24:41<17:10:40, 11.49s/it] 24%|██▎ | 1666/7045 [5:24:52<17:00:08, 11.38s/it] {'loss': 1.0996, 'learning_rate': 4.461877205709055e-06, 'epoch': 0.24} + 24%|██▎ | 1666/7045 [5:24:52<17:00:08, 11.38s/it] 24%|██▎ | 1667/7045 [5:25:04<17:32:13, 11.74s/it] {'loss': 1.1074, 'learning_rate': 4.461164575341971e-06, 'epoch': 0.24} + 24%|██▎ | 1667/7045 [5:25:04<17:32:13, 11.74s/it] 24%|██▎ | 1668/7045 [5:25:15<17:13:19, 11.53s/it] {'loss': 1.1055, 'learning_rate': 4.460451530411507e-06, 'epoch': 0.24} + 24%|██▎ | 1668/7045 [5:25:15<17:13:19, 11.53s/it] 24%|██▎ | 1669/7045 [5:25:26<17:00:04, 11.38s/it] {'loss': 1.1055, 'learning_rate': 4.459738071068391e-06, 'epoch': 0.24} + 24%|██▎ | 1669/7045 [5:25:26<17:00:04, 11.38s/it] 24%|██▎ | 1670/7045 [5:25:38<16:52:48, 11.31s/it] {'loss': 1.1641, 'learning_rate': 4.459024197463438e-06, 'epoch': 0.24} + 24%|██▎ | 1670/7045 [5:25:38<16:52:48, 11.31s/it] 24%|██▎ | 1671/7045 [5:25:49<17:00:13, 11.39s/it] {'loss': 1.1553, 'learning_rate': 4.458309909747554e-06, 'epoch': 0.24} + 24%|██▎ | 1671/7045 [5:25:49<17:00:13, 11.39s/it] 24%|██▎ | 1672/7045 [5:26:02<17:48:30, 11.93s/it] {'loss': 1.0732, 'learning_rate': 4.457595208071726e-06, 'epoch': 0.24} + 24%|██▎ | 1672/7045 [5:26:02<17:48:30, 11.93s/it] 24%|██▎ | 1673/7045 [5:26:15<17:58:30, 12.05s/it] {'loss': 1.0825, 'learning_rate': 4.4568800925870335e-06, 'epoch': 0.24} + 24%|██▎ | 1673/7045 [5:26:15<17:58:30, 12.05s/it] 24%|██▍ | 1674/7045 [5:26:26<17:38:25, 11.82s/it] {'loss': 1.1348, 'learning_rate': 4.456164563444643e-06, 'epoch': 0.24} + 24%|██▍ | 1674/7045 [5:26:26<17:38:25, 11.82s/it] 24%|██▍ | 1675/7045 [5:26:43<19:57:07, 13.38s/it] {'loss': 1.1602, 'learning_rate': 4.455448620795807e-06, 'epoch': 0.24} + 24%|██▍ | 1675/7045 [5:26:43<19:57:07, 13.38s/it] 24%|██▍ | 1676/7045 [5:26:57<20:09:58, 13.52s/it] {'loss': 1.1304, 'learning_rate': 4.454732264791866e-06, 'epoch': 0.24} + 24%|██▍ | 1676/7045 [5:26:57<20:09:58, 13.52s/it] 24%|██▍ | 1677/7045 [5:27:08<19:05:11, 12.80s/it] {'loss': 1.0806, 'learning_rate': 4.454015495584247e-06, 'epoch': 0.24} + 24%|██▍ | 1677/7045 [5:27:08<19:05:11, 12.80s/it] 24%|██▍ | 1678/7045 [5:27:19<18:13:02, 12.22s/it] {'loss': 1.1523, 'learning_rate': 4.4532983133244665e-06, 'epoch': 0.24} + 24%|██▍ | 1678/7045 [5:27:19<18:13:02, 12.22s/it] 24%|██▍ | 1679/7045 [5:27:30<17:36:16, 11.81s/it] {'loss': 1.1279, 'learning_rate': 4.452580718164127e-06, 'epoch': 0.24} + 24%|██▍ | 1679/7045 [5:27:30<17:36:16, 11.81s/it] 24%|██▍ | 1680/7045 [5:27:41<17:13:02, 11.55s/it] {'loss': 1.1357, 'learning_rate': 4.451862710254916e-06, 'epoch': 0.24} + 24%|██▍ | 1680/7045 [5:27:41<17:13:02, 11.55s/it] 24%|██▍ | 1681/7045 [5:27:51<16:53:26, 11.34s/it] {'loss': 1.1113, 'learning_rate': 4.451144289748614e-06, 'epoch': 0.24} + 24%|██▍ | 1681/7045 [5:27:51<16:53:26, 11.34s/it] 24%|██▍ | 1682/7045 [5:28:02<16:43:40, 11.23s/it] {'loss': 1.1592, 'learning_rate': 4.450425456797084e-06, 'epoch': 0.24} + 24%|██▍ | 1682/7045 [5:28:02<16:43:40, 11.23s/it] 24%|██▍ | 1683/7045 [5:28:15<17:24:32, 11.69s/it] {'loss': 1.0713, 'learning_rate': 4.449706211552276e-06, 'epoch': 0.24} + 24%|██▍ | 1683/7045 [5:28:15<17:24:32, 11.69s/it] 24%|██▍ | 1684/7045 [5:28:26<17:04:37, 11.47s/it] {'loss': 1.1396, 'learning_rate': 4.4489865541662305e-06, 'epoch': 0.24} + 24%|██▍ | 1684/7045 [5:28:26<17:04:37, 11.47s/it] 24%|██▍ | 1685/7045 [5:28:37<17:00:16, 11.42s/it] {'loss': 1.1367, 'learning_rate': 4.448266484791071e-06, 'epoch': 0.24} + 24%|██▍ | 1685/7045 [5:28:37<17:00:16, 11.42s/it] 24%|██▍ | 1686/7045 [5:28:49<16:58:34, 11.40s/it] {'loss': 1.1406, 'learning_rate': 4.447546003579014e-06, 'epoch': 0.24} + 24%|██▍ | 1686/7045 [5:28:49<16:58:34, 11.40s/it] 24%|██▍ | 1687/7045 [5:29:02<17:45:55, 11.94s/it] {'loss': 1.1514, 'learning_rate': 4.446825110682356e-06, 'epoch': 0.24} + 24%|██▍ | 1687/7045 [5:29:02<17:45:55, 11.94s/it] 24%|██▍ | 1688/7045 [5:29:14<17:57:09, 12.06s/it] {'loss': 1.0796, 'learning_rate': 4.446103806253485e-06, 'epoch': 0.24} + 24%|██▍ | 1688/7045 [5:29:14<17:57:09, 12.06s/it] 24%|██▍ | 1689/7045 [5:29:25<17:33:49, 11.81s/it] {'loss': 1.165, 'learning_rate': 4.445382090444875e-06, 'epoch': 0.24} + 24%|██▍ | 1689/7045 [5:29:26<17:33:49, 11.81s/it] 24%|██▍ | 1690/7045 [5:29:38<17:49:11, 11.98s/it] {'loss': 1.124, 'learning_rate': 4.444659963409087e-06, 'epoch': 0.24} + 24%|██▍ | 1690/7045 [5:29:38<17:49:11, 11.98s/it] 24%|██▍ | 1691/7045 [5:29:49<17:33:06, 11.80s/it] {'loss': 1.1475, 'learning_rate': 4.4439374252987685e-06, 'epoch': 0.24} + 24%|██▍ | 1691/7045 [5:29:49<17:33:06, 11.80s/it] 24%|██▍ | 1692/7045 [5:30:01<17:22:52, 11.69s/it] {'loss': 1.1396, 'learning_rate': 4.443214476266655e-06, 'epoch': 0.24} + 24%|██▍ | 1692/7045 [5:30:01<17:22:52, 11.69s/it] 24%|██▍ | 1693/7045 [5:30:13<17:27:41, 11.75s/it] {'loss': 1.123, 'learning_rate': 4.442491116465566e-06, 'epoch': 0.24} + 24%|██▍ | 1693/7045 [5:30:13<17:27:41, 11.75s/it] 24%|██▍ | 1694/7045 [5:30:23<17:03:58, 11.48s/it] {'loss': 1.1182, 'learning_rate': 4.4417673460484125e-06, 'epoch': 0.24} + 24%|██▍ | 1694/7045 [5:30:23<17:03:58, 11.48s/it] 24%|██▍ | 1695/7045 [5:30:35<17:04:23, 11.49s/it] {'loss': 1.2002, 'learning_rate': 4.441043165168187e-06, 'epoch': 0.24} + 24%|██▍ | 1695/7045 [5:30:35<17:04:23, 11.49s/it] 24%|██��� | 1696/7045 [5:30:47<17:13:08, 11.59s/it] {'loss': 1.1494, 'learning_rate': 4.440318573977973e-06, 'epoch': 0.24} + 24%|██▍ | 1696/7045 [5:30:47<17:13:08, 11.59s/it] 24%|██▍ | 1697/7045 [5:31:00<17:53:04, 12.04s/it] {'loss': 1.0938, 'learning_rate': 4.439593572630939e-06, 'epoch': 0.24} + 24%|██▍ | 1697/7045 [5:31:00<17:53:04, 12.04s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2896 > 2048). Running this sequence through the model will result in indexing errors + 24%|██▍ | 1698/7045 [5:31:11<17:19:34, 11.67s/it] {'loss': 1.0996, 'learning_rate': 4.438868161280342e-06, 'epoch': 0.24} + 24%|██▍ | 1698/7045 [5:31:11<17:19:34, 11.67s/it] 24%|██▍ | 1699/7045 [5:31:22<17:14:59, 11.62s/it] {'loss': 1.1377, 'learning_rate': 4.438142340079521e-06, 'epoch': 0.24} + 24%|██▍ | 1699/7045 [5:31:22<17:14:59, 11.62s/it] 24%|██▍ | 1700/7045 [5:31:33<17:01:34, 11.47s/it] {'loss': 1.124, 'learning_rate': 4.437416109181905e-06, 'epoch': 0.24} + 24%|██▍ | 1700/7045 [5:31:33<17:01:34, 11.47s/it] 24%|██▍ | 1701/7045 [5:31:44<16:54:25, 11.39s/it] {'loss': 1.1328, 'learning_rate': 4.436689468741012e-06, 'epoch': 0.24} + 24%|██▍ | 1701/7045 [5:31:44<16:54:25, 11.39s/it] 24%|██▍ | 1702/7045 [5:31:56<16:59:03, 11.44s/it] {'loss': 1.1157, 'learning_rate': 4.435962418910442e-06, 'epoch': 0.24} + 24%|██▍ | 1702/7045 [5:31:56<16:59:03, 11.44s/it] 24%|██▍ | 1703/7045 [5:32:09<17:41:10, 11.92s/it] {'loss': 1.082, 'learning_rate': 4.4352349598438834e-06, 'epoch': 0.24} + 24%|██▍ | 1703/7045 [5:32:09<17:41:10, 11.92s/it] 24%|██▍ | 1704/7045 [5:32:20<17:18:27, 11.67s/it] {'loss': 1.1377, 'learning_rate': 4.434507091695112e-06, 'epoch': 0.24} + 24%|██▍ | 1704/7045 [5:32:20<17:18:27, 11.67s/it] 24%|██▍ | 1705/7045 [5:32:33<17:54:17, 12.07s/it] {'loss': 1.127, 'learning_rate': 4.433778814617987e-06, 'epoch': 0.24} + 24%|██▍ | 1705/7045 [5:32:33<17:54:17, 12.07s/it] 24%|██▍ | 1706/7045 [5:32:45<17:39:26, 11.91s/it] {'loss': 1.1484, 'learning_rate': 4.433050128766458e-06, 'epoch': 0.24} + 24%|██▍ | 1706/7045 [5:32:45<17:39:26, 11.91s/it] 24%|██▍ | 1707/7045 [5:32:56<17:27:26, 11.77s/it] {'loss': 1.1543, 'learning_rate': 4.43232103429456e-06, 'epoch': 0.24} + 24%|██▍ | 1707/7045 [5:32:56<17:27:26, 11.77s/it] 24%|██▍ | 1708/7045 [5:33:07<17:13:26, 11.62s/it] {'loss': 1.1426, 'learning_rate': 4.431591531356412e-06, 'epoch': 0.24} + 24%|██▍ | 1708/7045 [5:33:07<17:13:26, 11.62s/it] 24%|██▍ | 1709/7045 [5:33:19<17:04:14, 11.52s/it] {'loss': 1.1348, 'learning_rate': 4.430861620106221e-06, 'epoch': 0.24} + 24%|██▍ | 1709/7045 [5:33:19<17:04:14, 11.52s/it] 24%|██▍ | 1710/7045 [5:33:30<16:58:03, 11.45s/it] {'loss': 1.1582, 'learning_rate': 4.43013130069828e-06, 'epoch': 0.24} + 24%|██▍ | 1710/7045 [5:33:30<16:58:03, 11.45s/it] 24%|██▍ | 1711/7045 [5:33:41<16:45:10, 11.31s/it] {'loss': 1.1143, 'learning_rate': 4.429400573286972e-06, 'epoch': 0.24} + 24%|██▍ | 1711/7045 [5:33:41<16:45:10, 11.31s/it] 24%|██▍ | 1712/7045 [5:33:52<16:40:34, 11.26s/it] {'loss': 1.1592, 'learning_rate': 4.428669438026757e-06, 'epoch': 0.24} + 24%|██▍ | 1712/7045 [5:33:52<16:40:34, 11.26s/it] 24%|██▍ | 1713/7045 [5:34:04<16:44:16, 11.30s/it] {'loss': 1.1172, 'learning_rate': 4.427937895072192e-06, 'epoch': 0.24} + 24%|██▍ | 1713/7045 [5:34:04<16:44:16, 11.30s/it] 24%|██▍ | 1714/7045 [5:34:15<16:56:55, 11.45s/it] {'loss': 1.1494, 'learning_rate': 4.427205944577913e-06, 'epoch': 0.24} + 24%|██▍ | 1714/7045 [5:34:15<16:56:55, 11.45s/it] 24%|██▍ | 1715/7045 [5:34:33<19:42:36, 13.31s/it] {'loss': 1.1104, 'learning_rate': 4.426473586698643e-06, 'epoch': 0.24} + 24%|██▍ | 1715/7045 [5:34:33<19:42:36, 13.31s/it] 24%|██▍ | 1716/7045 [5:34:45<19:13:21, 12.99s/it] {'loss': 1.1152, 'learning_rate': 4.425740821589196e-06, 'epoch': 0.24} + 24%|██▍ | 1716/7045 [5:34:45<19:13:21, 12.99s/it] 24%|██▍ | 1717/7045 [5:34:56<18:25:29, 12.45s/it] {'loss': 1.1689, 'learning_rate': 4.425007649404466e-06, 'epoch': 0.24} + 24%|██▍ | 1717/7045 [5:34:56<18:25:29, 12.45s/it] 24%|██▍ | 1718/7045 [5:35:08<18:16:26, 12.35s/it] {'loss': 1.1123, 'learning_rate': 4.424274070299435e-06, 'epoch': 0.24} + 24%|██▍ | 1718/7045 [5:35:08<18:16:26, 12.35s/it] 24%|██▍ | 1719/7045 [5:35:20<18:04:35, 12.22s/it] {'loss': 1.0957, 'learning_rate': 4.423540084429174e-06, 'epoch': 0.24} + 24%|██▍ | 1719/7045 [5:35:20<18:04:35, 12.22s/it] 24%|██▍ | 1720/7045 [5:35:32<17:38:57, 11.93s/it] {'loss': 1.1514, 'learning_rate': 4.422805691948835e-06, 'epoch': 0.24} + 24%|██▍ | 1720/7045 [5:35:32<17:38:57, 11.93s/it] 24%|██▍ | 1721/7045 [5:35:44<17:40:36, 11.95s/it] {'loss': 1.1113, 'learning_rate': 4.4220708930136615e-06, 'epoch': 0.24} + 24%|██▍ | 1721/7045 [5:35:44<17:40:36, 11.95s/it] 24%|██▍ | 1722/7045 [5:35:55<17:22:26, 11.75s/it] {'loss': 1.168, 'learning_rate': 4.421335687778977e-06, 'epoch': 0.24} + 24%|██▍ | 1722/7045 [5:35:55<17:22:26, 11.75s/it] 24%|██▍ | 1723/7045 [5:36:08<17:55:34, 12.13s/it] {'loss': 1.1152, 'learning_rate': 4.4206000764001955e-06, 'epoch': 0.24} + 24%|██▍ | 1723/7045 [5:36:08<17:55:34, 12.13s/it] 24%|██▍ | 1724/7045 [5:36:19<17:33:59, 11.88s/it] {'loss': 1.1309, 'learning_rate': 4.419864059032815e-06, 'epoch': 0.24} + 24%|██▍ | 1724/7045 [5:36:19<17:33:59, 11.88s/it] 24%|██▍ | 1725/7045 [5:36:30<17:07:03, 11.58s/it] {'loss': 1.1133, 'learning_rate': 4.419127635832418e-06, 'epoch': 0.24} + 24%|██▍ | 1725/7045 [5:36:30<17:07:03, 11.58s/it] 24%|██▍ | 1726/7045 [5:36:41<16:48:28, 11.38s/it] {'loss': 1.1387, 'learning_rate': 4.418390806954678e-06, 'epoch': 0.24} + 24%|██▍ | 1726/7045 [5:36:41<16:48:28, 11.38s/it] 25%|██▍ | 1727/7045 [5:36:54<17:19:36, 11.73s/it] {'loss': 1.1631, 'learning_rate': 4.417653572555346e-06, 'epoch': 0.25} + 25%|██▍ | 1727/7045 [5:36:54<17:19:36, 11.73s/it] 25%|██▍ | 1728/7045 [5:37:06<17:38:11, 11.94s/it] {'loss': 1.1118, 'learning_rate': 4.416915932790266e-06, 'epoch': 0.25} + 25%|██▍ | 1728/7045 [5:37:06<17:38:11, 11.94s/it] 25%|██▍ | 1729/7045 [5:37:17<17:16:50, 11.70s/it] {'loss': 1.0977, 'learning_rate': 4.416177887815365e-06, 'epoch': 0.25} + 25%|██▍ | 1729/7045 [5:37:17<17:16:50, 11.70s/it] 25%|██▍ | 1730/7045 [5:37:29<17:06:37, 11.59s/it] {'loss': 1.1367, 'learning_rate': 4.415439437786654e-06, 'epoch': 0.25} + 25%|██▍ | 1730/7045 [5:37:29<17:06:37, 11.59s/it] 25%|██▍ | 1731/7045 [5:37:40<17:00:37, 11.52s/it] {'loss': 1.1455, 'learning_rate': 4.414700582860233e-06, 'epoch': 0.25} + 25%|██▍ | 1731/7045 [5:37:40<17:00:37, 11.52s/it] 25%|██▍ | 1732/7045 [5:37:52<17:04:59, 11.58s/it] {'loss': 1.165, 'learning_rate': 4.413961323192285e-06, 'epoch': 0.25} + 25%|██▍ | 1732/7045 [5:37:52<17:04:59, 11.58s/it] 25%|██▍ | 1733/7045 [5:38:03<16:52:41, 11.44s/it] {'loss': 1.1475, 'learning_rate': 4.413221658939078e-06, 'epoch': 0.25} + 25%|██▍ | 1733/7045 [5:38:03<16:52:41, 11.44s/it] 25%|██▍ | 1734/7045 [5:38:15<17:17:32, 11.72s/it] {'loss': 1.1621, 'learning_rate': 4.41248159025697e-06, 'epoch': 0.25} + 25%|██▍ | 1734/7045 [5:38:15<17:17:32, 11.72s/it] 25%|██▍ | 1735/7045 [5:38:27<17:28:54, 11.85s/it] {'loss': 1.0874, 'learning_rate': 4.4117411173024e-06, 'epoch': 0.25} + 25%|██▍ | 1735/7045 [5:38:27<17:28:54, 11.85s/it] 25%|██▍ | 1736/7045 [5:38:38<17:04:58, 11.58s/it] {'loss': 1.1191, 'learning_rate': 4.411000240231893e-06, 'epoch': 0.25} + 25%|██▍ | 1736/7045 [5:38:38<17:04:58, 11.58s/it] 25%|██▍ | 1737/7045 [5:38:50<17:05:02, 11.59s/it] {'loss': 1.1289, 'learning_rate': 4.41025895920206e-06, 'epoch': 0.25} + 25%|██▍ | 1737/7045 [5:38:50<17:05:02, 11.59s/it] 25%|██▍ | 1738/7045 [5:39:01<17:02:10, 11.56s/it] {'loss': 1.1045, 'learning_rate': 4.4095172743696e-06, 'epoch': 0.25} + 25%|██▍ | 1738/7045 [5:39:01<17:02:10, 11.56s/it] 25%|██▍ | 1739/7045 [5:39:13<16:54:59, 11.48s/it] {'loss': 1.1104, 'learning_rate': 4.408775185891293e-06, 'epoch': 0.25} + 25%|██▍ | 1739/7045 [5:39:13<16:54:59, 11.48s/it] 25%|██▍ | 1740/7045 [5:39:24<16:42:28, 11.34s/it] {'loss': 1.1533, 'learning_rate': 4.408032693924007e-06, 'epoch': 0.25} + 25%|██▍ | 1740/7045 [5:39:24<16:42:28, 11.34s/it] 25%|██▍ | 1741/7045 [5:39:35<16:55:01, 11.48s/it] {'loss': 1.1045, 'learning_rate': 4.407289798624695e-06, 'epoch': 0.25} + 25%|██▍ | 1741/7045 [5:39:35<16:55:01, 11.48s/it] 25%|██▍ | 1742/7045 [5:39:47<16:56:12, 11.50s/it] {'loss': 1.1689, 'learning_rate': 4.406546500150394e-06, 'epoch': 0.25} + 25%|██▍ | 1742/7045 [5:39:47<16:56:12, 11.50s/it] 25%|██▍ | 1743/7045 [5:39:58<16:50:12, 11.43s/it] {'loss': 1.1338, 'learning_rate': 4.405802798658228e-06, 'epoch': 0.25} + 25%|██▍ | 1743/7045 [5:39:58<16:50:12, 11.43s/it] 25%|██▍ | 1744/7045 [5:40:10<17:06:33, 11.62s/it] {'loss': 1.1426, 'learning_rate': 4.405058694305404e-06, 'epoch': 0.25} + 25%|██▍ | 1744/7045 [5:40:10<17:06:33, 11.62s/it] 25%|██▍ | 1745/7045 [5:40:24<17:51:08, 12.13s/it] {'loss': 1.124, 'learning_rate': 4.404314187249218e-06, 'epoch': 0.25} + 25%|██▍ | 1745/7045 [5:40:24<17:51:08, 12.13s/it] 25%|██▍ | 1746/7045 [5:40:36<18:05:52, 12.30s/it] {'loss': 1.1099, 'learning_rate': 4.403569277647046e-06, 'epoch': 0.25} + 25%|██▍ | 1746/7045 [5:40:36<18:05:52, 12.30s/it] 25%|██▍ | 1747/7045 [5:40:48<18:00:27, 12.24s/it] {'loss': 1.1631, 'learning_rate': 4.402823965656354e-06, 'epoch': 0.25} + 25%|██▍ | 1747/7045 [5:40:48<18:00:27, 12.24s/it] 25%|██▍ | 1748/7045 [5:40:59<17:30:02, 11.89s/it] {'loss': 1.1035, 'learning_rate': 4.4020782514346886e-06, 'epoch': 0.25} + 25%|██▍ | 1748/7045 [5:40:59<17:30:02, 11.89s/it] 25%|██▍ | 1749/7045 [5:41:11<17:07:49, 11.64s/it] {'loss': 1.1523, 'learning_rate': 4.401332135139685e-06, 'epoch': 0.25} + 25%|██▍ | 1749/7045 [5:41:11<17:07:49, 11.64s/it] 25%|██▍ | 1750/7045 [5:41:22<17:08:48, 11.66s/it] {'loss': 1.1133, 'learning_rate': 4.4005856169290615e-06, 'epoch': 0.25} + 25%|██▍ | 1750/7045 [5:41:22<17:08:48, 11.66s/it] 25%|██▍ | 1751/7045 [5:41:35<17:35:18, 11.96s/it] {'loss': 1.0869, 'learning_rate': 4.399838696960621e-06, 'epoch': 0.25} + 25%|██▍ | 1751/7045 [5:41:35<17:35:18, 11.96s/it] 25%|██▍ | 1752/7045 [5:41:46<17:22:55, 11.82s/it] {'loss': 1.165, 'learning_rate': 4.399091375392254e-06, 'epoch': 0.25} + 25%|██▍ | 1752/7045 [5:41:46<17:22:55, 11.82s/it] 25%|██▍ | 1753/7045 [5:41:58<17:10:58, 11.69s/it] {'loss': 1.1318, 'learning_rate': 4.398343652381933e-06, 'epoch': 0.25} + 25%|██▍ | 1753/7045 [5:41:58<17:10:58, 11.69s/it] 25%|██▍ | 1754/7045 [5:42:10<17:23:44, 11.84s/it] {'loss': 1.1157, 'learning_rate': 4.397595528087717e-06, 'epoch': 0.25} + 25%|██▍ | 1754/7045 [5:42:10<17:23:44, 11.84s/it] 25%|██▍ | 1755/7045 [5:42:21<16:57:31, 11.54s/it] {'loss': 1.1152, 'learning_rate': 4.396847002667748e-06, 'epoch': 0.25} + 25%|██▍ | 1755/7045 [5:42:21<16:57:31, 11.54s/it] 25%|██▍ | 1756/7045 [5:42:33<17:03:32, 11.61s/it] {'loss': 1.1396, 'learning_rate': 4.396098076280256e-06, 'epoch': 0.25} + 25%|██▍ | 1756/7045 [5:42:33<17:03:32, 11.61s/it] 25%|██▍ | 1757/7045 [5:42:45<17:19:52, 11.80s/it] {'loss': 1.1367, 'learning_rate': 4.39534874908355e-06, 'epoch': 0.25} + 25%|██▍ | 1757/7045 [5:42:45<17:19:52, 11.80s/it] 25%|██▍ | 1758/7045 [5:42:56<17:11:56, 11.71s/it] {'loss': 1.1113, 'learning_rate': 4.3945990212360325e-06, 'epoch': 0.25} + 25%|██▍ | 1758/7045 [5:42:56<17:11:56, 11.71s/it] 25%|██▍ | 1759/7045 [5:43:09<17:27:03, 11.88s/it] {'loss': 1.1172, 'learning_rate': 4.393848892896183e-06, 'epoch': 0.25} + 25%|██▍ | 1759/7045 [5:43:09<17:27:03, 11.88s/it] 25%|██▍ | 1760/7045 [5:43:20<17:11:45, 11.71s/it] {'loss': 1.1729, 'learning_rate': 4.393098364222568e-06, 'epoch': 0.25} + 25%|██▍ | 1760/7045 [5:43:20<17:11:45, 11.71s/it] 25%|██▍ | 1761/7045 [5:43:32<17:23:03, 11.84s/it] {'loss': 1.1221, 'learning_rate': 4.39234743537384e-06, 'epoch': 0.25} + 25%|██▍ | 1761/7045 [5:43:32<17:23:03, 11.84s/it] 25%|██▌ | 1762/7045 [5:43:43<17:07:53, 11.67s/it] {'loss': 1.1387, 'learning_rate': 4.391596106508735e-06, 'epoch': 0.25} + 25%|██▌ | 1762/7045 [5:43:43<17:07:53, 11.67s/it] 25%|██▌ | 1763/7045 [5:43:55<17:14:20, 11.75s/it] {'loss': 1.1079, 'learning_rate': 4.390844377786074e-06, 'epoch': 0.25} + 25%|██▌ | 1763/7045 [5:43:55<17:14:20, 11.75s/it] 25%|██▌ | 1764/7045 [5:44:07<17:18:15, 11.80s/it] {'loss': 1.1084, 'learning_rate': 4.390092249364761e-06, 'epoch': 0.25} + 25%|██▌ | 1764/7045 [5:44:07<17:18:15, 11.80s/it] 25%|██▌ | 1765/7045 [5:44:18<16:57:49, 11.57s/it] {'loss': 1.1309, 'learning_rate': 4.389339721403787e-06, 'epoch': 0.25} + 25%|██▌ | 1765/7045 [5:44:18<16:57:49, 11.57s/it] 25%|██▌ | 1766/7045 [5:44:29<16:46:11, 11.44s/it] {'loss': 1.1133, 'learning_rate': 4.388586794062225e-06, 'epoch': 0.25} + 25%|██▌ | 1766/7045 [5:44:29<16:46:11, 11.44s/it] 25%|██▌ | 1767/7045 [5:44:42<17:13:54, 11.75s/it] {'loss': 1.126, 'learning_rate': 4.387833467499234e-06, 'epoch': 0.25} + 25%|██▌ | 1767/7045 [5:44:42<17:13:54, 11.75s/it] 25%|██▌ | 1768/7045 [5:44:53<16:59:24, 11.59s/it] {'loss': 1.1289, 'learning_rate': 4.3870797418740575e-06, 'epoch': 0.25} + 25%|██▌ | 1768/7045 [5:44:53<16:59:24, 11.59s/it] 25%|██▌ | 1769/7045 [5:45:04<16:48:58, 11.47s/it] {'loss': 1.124, 'learning_rate': 4.386325617346021e-06, 'epoch': 0.25} + 25%|██▌ | 1769/7045 [5:45:04<16:48:58, 11.47s/it] 25%|██▌ | 1770/7045 [5:45:15<16:35:25, 11.32s/it] {'loss': 1.1011, 'learning_rate': 4.3855710940745384e-06, 'epoch': 0.25} + 25%|██▌ | 1770/7045 [5:45:15<16:35:25, 11.32s/it] 25%|██▌ | 1771/7045 [5:45:27<16:45:14, 11.44s/it] {'loss': 1.1211, 'learning_rate': 4.384816172219104e-06, 'epoch': 0.25} + 25%|██▌ | 1771/7045 [5:45:27<16:45:14, 11.44s/it] 25%|██▌ | 1772/7045 [5:45:39<17:15:43, 11.79s/it] {'loss': 1.0957, 'learning_rate': 4.384060851939299e-06, 'epoch': 0.25} + 25%|██▌ | 1772/7045 [5:45:39<17:15:43, 11.79s/it] 25%|██▌ | 1773/7045 [5:45:51<16:59:15, 11.60s/it] {'loss': 1.1543, 'learning_rate': 4.3833051333947876e-06, 'epoch': 0.25} + 25%|██▌ | 1773/7045 [5:45:51<16:59:15, 11.60s/it] 25%|██▌ | 1774/7045 [5:46:03<17:26:02, 11.91s/it] {'loss': 1.1221, 'learning_rate': 4.382549016745317e-06, 'epoch': 0.25} + 25%|██▌ | 1774/7045 [5:46:03<17:26:02, 11.91s/it] 25%|██▌ | 1775/7045 [5:46:15<17:10:20, 11.73s/it] {'loss': 1.0957, 'learning_rate': 4.381792502150721e-06, 'epoch': 0.25} + 25%|██▌ | 1775/7045 [5:46:15<17:10:20, 11.73s/it] 25%|██▌ | 1776/7045 [5:46:26<16:52:26, 11.53s/it] {'loss': 1.1299, 'learning_rate': 4.381035589770918e-06, 'epoch': 0.25} + 25%|██▌ | 1776/7045 [5:46:26<16:52:26, 11.53s/it] 25%|██▌ | 1777/7045 [5:46:37<16:37:56, 11.37s/it] {'loss': 1.1235, 'learning_rate': 4.380278279765906e-06, 'epoch': 0.25} + 25%|██▌ | 1777/7045 [5:46:37<16:37:56, 11.37s/it] 25%|██▌ | 1778/7045 [5:46:49<16:50:55, 11.52s/it] {'loss': 1.1445, 'learning_rate': 4.379520572295771e-06, 'epoch': 0.25} + 25%|██▌ | 1778/7045 [5:46:49<16:50:55, 11.52s/it] 25%|██▌ | 1779/7045 [5:46:59<16:36:01, 11.35s/it] {'loss': 1.1104, 'learning_rate': 4.378762467520683e-06, 'epoch': 0.25} + 25%|██▌ | 1779/7045 [5:46:59<16:36:01, 11.35s/it] 25%|██▌ | 1780/7045 [5:47:11<16:30:36, 11.29s/it] {'loss': 1.1377, 'learning_rate': 4.378003965600895e-06, 'epoch': 0.25} + 25%|██▌ | 1780/7045 [5:47:11<16:30:36, 11.29s/it] 25%|██▌ | 1781/7045 [5:47:23<16:57:23, 11.60s/it] {'loss': 1.0977, 'learning_rate': 4.377245066696742e-06, 'epoch': 0.25} + 25%|██▌ | 1781/7045 [5:47:23<16:57:23, 11.60s/it] 25%|██▌ | 1782/7045 [5:47:35<17:11:52, 11.76s/it] {'loss': 1.1104, 'learning_rate': 4.376485770968647e-06, 'epoch': 0.25} + 25%|██▌ | 1782/7045 [5:47:35<17:11:52, 11.76s/it] 25%|██▌ | 1783/7045 [5:47:46<16:55:15, 11.58s/it] {'loss': 1.0918, 'learning_rate': 4.3757260785771134e-06, 'epoch': 0.25} + 25%|██▌ | 1783/7045 [5:47:46<16:55:15, 11.58s/it] 25%|██▌ | 1784/7045 [5:47:58<17:02:49, 11.66s/it] {'loss': 1.1084, 'learning_rate': 4.37496598968273e-06, 'epoch': 0.25} + 25%|██▌ | 1784/7045 [5:47:58<17:02:49, 11.66s/it] 25%|██▌ | 1785/7045 [5:48:11<17:22:12, 11.89s/it] {'loss': 1.1133, 'learning_rate': 4.37420550444617e-06, 'epoch': 0.25} + 25%|██▌ | 1785/7045 [5:48:11<17:22:12, 11.89s/it] 25%|██▌ | 1786/7045 [5:48:22<17:14:09, 11.80s/it] {'loss': 1.1602, 'learning_rate': 4.373444623028189e-06, 'epoch': 0.25} + 25%|██▌ | 1786/7045 [5:48:22<17:14:09, 11.80s/it] 25%|██▌ | 1787/7045 [5:48:34<17:27:45, 11.96s/it] {'loss': 1.0942, 'learning_rate': 4.372683345589627e-06, 'epoch': 0.25} + 25%|██▌ | 1787/7045 [5:48:34<17:27:45, 11.96s/it] 25%|██▌ | 1788/7045 [5:48:48<18:05:32, 12.39s/it] {'loss': 1.1182, 'learning_rate': 4.371921672291407e-06, 'epoch': 0.25} + 25%|██▌ | 1788/7045 [5:48:48<18:05:32, 12.39s/it] 25%|██▌ | 1789/7045 [5:48:59<17:32:24, 12.01s/it] {'loss': 1.1699, 'learning_rate': 4.371159603294538e-06, 'epoch': 0.25} + 25%|██▌ | 1789/7045 [5:48:59<17:32:24, 12.01s/it] 25%|██▌ | 1790/7045 [5:49:10<17:11:46, 11.78s/it] {'loss': 1.1543, 'learning_rate': 4.370397138760109e-06, 'epoch': 0.25} + 25%|██▌ | 1790/7045 [5:49:10<17:11:46, 11.78s/it] 25%|██▌ | 1791/7045 [5:49:23<17:42:26, 12.13s/it] {'loss': 1.0796, 'learning_rate': 4.369634278849295e-06, 'epoch': 0.25} + 25%|██▌ | 1791/7045 [5:49:23<17:42:26, 12.13s/it] 25%|██▌ | 1792/7045 [5:49:34<17:17:46, 11.85s/it] {'loss': 1.1211, 'learning_rate': 4.368871023723355e-06, 'epoch': 0.25} + 25%|██▌ | 1792/7045 [5:49:34<17:17:46, 11.85s/it] 25%|██▌ | 1793/7045 [5:49:46<17:05:37, 11.72s/it] {'loss': 1.1709, 'learning_rate': 4.368107373543631e-06, 'epoch': 0.25} + 25%|██▌ | 1793/7045 [5:49:46<17:05:37, 11.72s/it] 25%|██▌ | 1794/7045 [5:49:58<17:29:35, 11.99s/it] {'loss': 1.1582, 'learning_rate': 4.367343328471547e-06, 'epoch': 0.25} + 25%|██▌ | 1794/7045 [5:49:58<17:29:35, 11.99s/it] 25%|██▌ | 1795/7045 [5:50:10<17:08:44, 11.76s/it] {'loss': 1.1426, 'learning_rate': 4.3665788886686126e-06, 'epoch': 0.25} + 25%|██▌ | 1795/7045 [5:50:10<17:08:44, 11.76s/it] 25%|██▌ | 1796/7045 [5:50:23<17:39:27, 12.11s/it] {'loss': 1.0952, 'learning_rate': 4.365814054296419e-06, 'epoch': 0.25} + 25%|██▌ | 1796/7045 [5:50:23<17:39:27, 12.11s/it] 26%|██▌ | 1797/7045 [5:50:34<17:30:14, 12.01s/it] {'loss': 1.1094, 'learning_rate': 4.365048825516642e-06, 'epoch': 0.26} + 26%|██▌ | 1797/7045 [5:50:34<17:30:14, 12.01s/it] 26%|██▌ | 1798/7045 [5:50:45<17:07:29, 11.75s/it] {'loss': 1.1387, 'learning_rate': 4.364283202491042e-06, 'epoch': 0.26} + 26%|██▌ | 1798/7045 [5:50:45<17:07:29, 11.75s/it] 26%|██▌ | 1799/7045 [5:50:57<17:04:27, 11.72s/it] {'loss': 1.1064, 'learning_rate': 4.363517185381459e-06, 'epoch': 0.26} + 26%|██▌ | 1799/7045 [5:50:57<17:04:27, 11.72s/it] 26%|██▌ | 1800/7045 [5:51:11<17:49:04, 12.23s/it] {'loss': 1.0996, 'learning_rate': 4.36275077434982e-06, 'epoch': 0.26} + 26%|██▌ | 1800/7045 [5:51:11<17:49:04, 12.23s/it] 26%|██▌ | 1801/7045 [5:51:22<17:29:37, 12.01s/it] {'loss': 1.1211, 'learning_rate': 4.3619839695581326e-06, 'epoch': 0.26} + 26%|██▌ | 1801/7045 [5:51:22<17:29:37, 12.01s/it] 26%|██▌ | 1802/7045 [5:51:36<18:09:01, 12.46s/it] {'loss': 1.1465, 'learning_rate': 4.361216771168491e-06, 'epoch': 0.26} + 26%|██▌ | 1802/7045 [5:51:36<18:09:01, 12.46s/it] 26%|██▌ | 1803/7045 [5:51:47<17:38:05, 12.11s/it] {'loss': 1.1172, 'learning_rate': 4.360449179343068e-06, 'epoch': 0.26} + 26%|██▌ | 1803/7045 [5:51:47<17:38:05, 12.11s/it] 26%|██▌ | 1804/7045 [5:51:59<17:52:05, 12.27s/it] {'loss': 1.1523, 'learning_rate': 4.359681194244123e-06, 'epoch': 0.26} + 26%|██▌ | 1804/7045 [5:51:59<17:52:05, 12.27s/it] 26%|██▌ | 1805/7045 [5:52:13<18:27:20, 12.68s/it] {'loss': 1.0898, 'learning_rate': 4.358912816033998e-06, 'epoch': 0.26} + 26%|██▌ | 1805/7045 [5:52:13<18:27:20, 12.68s/it] 26%|██▌ | 1806/7045 [5:52:24<17:46:32, 12.21s/it] {'loss': 1.1152, 'learning_rate': 4.358144044875118e-06, 'epoch': 0.26} + 26%|██▌ | 1806/7045 [5:52:24<17:46:32, 12.21s/it] 26%|██▌ | 1807/7045 [5:52:35<17:17:46, 11.89s/it] {'loss': 1.166, 'learning_rate': 4.357374880929988e-06, 'epoch': 0.26} + 26%|██▌ | 1807/7045 [5:52:35<17:17:46, 11.89s/it] 26%|██▌ | 1808/7045 [5:52:47<17:11:42, 11.82s/it] {'loss': 1.1069, 'learning_rate': 4.356605324361203e-06, 'epoch': 0.26} + 26%|██▌ | 1808/7045 [5:52:47<17:11:42, 11.82s/it] 26%|██▌ | 1809/7045 [5:52:59<17:23:41, 11.96s/it] {'loss': 1.1338, 'learning_rate': 4.355835375331433e-06, 'epoch': 0.26} + 26%|██▌ | 1809/7045 [5:52:59<17:23:41, 11.96s/it] 26%|██▌ | 1810/7045 [5:53:10<17:02:09, 11.72s/it] {'loss': 1.1455, 'learning_rate': 4.3550650340034364e-06, 'epoch': 0.26} + 26%|██▌ | 1810/7045 [5:53:10<17:02:09, 11.72s/it] 26%|██▌ | 1811/7045 [5:53:22<16:53:12, 11.61s/it] {'loss': 1.1943, 'learning_rate': 4.354294300540053e-06, 'epoch': 0.26} + 26%|██▌ | 1811/7045 [5:53:22<16:53:12, 11.61s/it] 26%|██▌ | 1812/7045 [5:53:33<16:41:14, 11.48s/it] {'loss': 1.1289, 'learning_rate': 4.353523175104205e-06, 'epoch': 0.26} + 26%|██▌ | 1812/7045 [5:53:33<16:41:14, 11.48s/it] 26%|██▌ | 1813/7045 [5:53:44<16:31:44, 11.37s/it] {'loss': 1.1279, 'learning_rate': 4.352751657858897e-06, 'epoch': 0.26} + 26%|██▌ | 1813/7045 [5:53:44<16:31:44, 11.37s/it] 26%|██▌ | 1814/7045 [5:53:55<16:19:57, 11.24s/it] {'loss': 1.1211, 'learning_rate': 4.351979748967219e-06, 'epoch': 0.26} + 26%|██▌ | 1814/7045 [5:53:55<16:19:57, 11.24s/it] 26%|██▌ | 1815/7045 [5:54:06<16:24:59, 11.30s/it] {'loss': 1.1465, 'learning_rate': 4.351207448592339e-06, 'epoch': 0.26} + 26%|██▌ | 1815/7045 [5:54:06<16:24:59, 11.30s/it] 26%|██▌ | 1816/7045 [5:54:17<16:15:07, 11.19s/it] {'loss': 1.1504, 'learning_rate': 4.350434756897515e-06, 'epoch': 0.26} + 26%|██▌ | 1816/7045 [5:54:17<16:15:07, 11.19s/it] 26%|██▌ | 1817/7045 [5:54:29<16:13:21, 11.17s/it] {'loss': 1.1143, 'learning_rate': 4.349661674046079e-06, 'epoch': 0.26} + 26%|██▌ | 1817/7045 [5:54:29<16:13:21, 11.17s/it] 26%|██▌ | 1818/7045 [5:54:40<16:15:08, 11.19s/it] {'loss': 1.1523, 'learning_rate': 4.348888200201454e-06, 'epoch': 0.26} + 26%|██▌ | 1818/7045 [5:54:40<16:15:08, 11.19s/it] 26%|██▌ | 1819/7045 [5:54:51<16:13:05, 11.17s/it] {'loss': 1.1343, 'learning_rate': 4.348114335527138e-06, 'epoch': 0.26} + 26%|██▌ | 1819/7045 [5:54:51<16:13:05, 11.17s/it] 26%|██▌ | 1820/7045 [5:55:02<16:07:11, 11.11s/it] {'loss': 1.1484, 'learning_rate': 4.3473400801867195e-06, 'epoch': 0.26} + 26%|██▌ | 1820/7045 [5:55:02<16:07:11, 11.11s/it] 26%|██▌ | 1821/7045 [5:55:13<16:17:11, 11.22s/it] {'loss': 1.1182, 'learning_rate': 4.346565434343862e-06, 'epoch': 0.26} + 26%|██▌ | 1821/7045 [5:55:13<16:17:11, 11.22s/it] 26%|██▌ | 1822/7045 [5:55:24<16:11:44, 11.16s/it] {'loss': 1.1748, 'learning_rate': 4.345790398162317e-06, 'epoch': 0.26} + 26%|██▌ | 1822/7045 [5:55:24<16:11:44, 11.16s/it] 26%|██▌ | 1823/7045 [5:55:36<16:19:09, 11.25s/it] {'loss': 1.1367, 'learning_rate': 4.345014971805916e-06, 'epoch': 0.26} + 26%|██▌ | 1823/7045 [5:55:36<16:19:09, 11.25s/it] 26%|██▌ | 1824/7045 [5:55:48<16:53:17, 11.64s/it] {'loss': 1.0894, 'learning_rate': 4.344239155438573e-06, 'epoch': 0.26} + 26%|██▌ | 1824/7045 [5:55:48<16:53:17, 11.64s/it] 26%|██▌ | 1825/7045 [5:55:59<16:35:08, 11.44s/it] {'loss': 1.1523, 'learning_rate': 4.343462949224286e-06, 'epoch': 0.26} + 26%|██▌ | 1825/7045 [5:55:59<16:35:08, 11.44s/it] 26%|██▌ | 1826/7045 [5:56:11<16:30:00, 11.38s/it] {'loss': 1.1182, 'learning_rate': 4.342686353327135e-06, 'epoch': 0.26} + 26%|██▌ | 1826/7045 [5:56:11<16:30:00, 11.38s/it] 26%|██▌ | 1827/7045 [5:56:21<16:14:20, 11.20s/it] {'loss': 1.1455, 'learning_rate': 4.341909367911279e-06, 'epoch': 0.26} + 26%|██▌ | 1827/7045 [5:56:21<16:14:20, 11.20s/it] 26%|██▌ | 1828/7045 [5:56:33<16:33:02, 11.42s/it] {'loss': 1.1318, 'learning_rate': 4.341131993140964e-06, 'epoch': 0.26} + 26%|██▌ | 1828/7045 [5:56:33<16:33:02, 11.42s/it] 26%|██▌ | 1829/7045 [5:56:45<16:28:19, 11.37s/it] {'loss': 1.1328, 'learning_rate': 4.340354229180517e-06, 'epoch': 0.26} + 26%|██▌ | 1829/7045 [5:56:45<16:28:19, 11.37s/it] 26%|██▌ | 1830/7045 [5:56:56<16:18:50, 11.26s/it] {'loss': 1.1514, 'learning_rate': 4.339576076194346e-06, 'epoch': 0.26} + 26%|██▌ | 1830/7045 [5:56:56<16:18:50, 11.26s/it] 26%|██▌ | 1831/7045 [5:57:08<16:42:48, 11.54s/it] {'loss': 1.0884, 'learning_rate': 4.33879753434694e-06, 'epoch': 0.26} + 26%|██▌ | 1831/7045 [5:57:08<16:42:48, 11.54s/it] 26%|██▌ | 1832/7045 [5:57:19<16:45:56, 11.58s/it] {'loss': 1.1465, 'learning_rate': 4.338018603802875e-06, 'epoch': 0.26} + 26%|██▌ | 1832/7045 [5:57:19<16:45:56, 11.58s/it] 26%|██▌ | 1833/7045 [5:57:31<16:37:02, 11.48s/it] {'loss': 1.1328, 'learning_rate': 4.3372392847268055e-06, 'epoch': 0.26} + 26%|██▌ | 1833/7045 [5:57:31<16:37:02, 11.48s/it] 26%|██▌ | 1834/7045 [5:57:44<17:18:34, 11.96s/it] {'loss': 1.0659, 'learning_rate': 4.336459577283468e-06, 'epoch': 0.26} + 26%|██▌ | 1834/7045 [5:57:44<17:18:34, 11.96s/it] 26%|██▌ | 1835/7045 [5:57:57<17:38:44, 12.19s/it] {'loss': 1.1426, 'learning_rate': 4.335679481637682e-06, 'epoch': 0.26} + 26%|██▌ | 1835/7045 [5:57:57<17:38:44, 12.19s/it] 26%|██▌ | 1836/7045 [5:58:08<17:21:17, 11.99s/it] {'loss': 1.1289, 'learning_rate': 4.334898997954351e-06, 'epoch': 0.26} + 26%|██▌ | 1836/7045 [5:58:08<17:21:17, 11.99s/it] 26%|██▌ | 1837/7045 [5:58:20<17:30:57, 12.11s/it] {'loss': 1.1167, 'learning_rate': 4.334118126398456e-06, 'epoch': 0.26} + 26%|██▌ | 1837/7045 [5:58:20<17:30:57, 12.11s/it] 26%|██▌ | 1838/7045 [5:58:31<17:02:25, 11.78s/it] {'loss': 1.1357, 'learning_rate': 4.333336867135064e-06, 'epoch': 0.26} + 26%|██▌ | 1838/7045 [5:58:31<17:02:25, 11.78s/it] 26%|██▌ | 1839/7045 [5:58:43<16:45:13, 11.59s/it] {'loss': 1.1201, 'learning_rate': 4.332555220329322e-06, 'epoch': 0.26} + 26%|██▌ | 1839/7045 [5:58:43<16:45:13, 11.59s/it] 26%|██▌ | 1840/7045 [5:58:55<17:09:46, 11.87s/it] {'loss': 1.1221, 'learning_rate': 4.331773186146459e-06, 'epoch': 0.26} + 26%|██▌ | 1840/7045 [5:58:55<17:09:46, 11.87s/it] 26%|██▌ | 1841/7045 [5:59:06<16:54:56, 11.70s/it] {'loss': 1.1045, 'learning_rate': 4.330990764751788e-06, 'epoch': 0.26} + 26%|██▌ | 1841/7045 [5:59:06<16:54:56, 11.70s/it] 26%|██▌ | 1842/7045 [5:59:18<16:45:26, 11.59s/it] {'loss': 1.1123, 'learning_rate': 4.330207956310701e-06, 'epoch': 0.26} + 26%|██▌ | 1842/7045 [5:59:18<16:45:26, 11.59s/it] 26%|██▌ | 1843/7045 [5:59:31<17:26:36, 12.07s/it] {'loss': 1.1045, 'learning_rate': 4.329424760988673e-06, 'epoch': 0.26} + 26%|██▌ | 1843/7045 [5:59:31<17:26:36, 12.07s/it] 26%|██▌ | 1844/7045 [5:59:43<17:27:52, 12.09s/it] {'loss': 1.1436, 'learning_rate': 4.328641178951262e-06, 'epoch': 0.26} + 26%|██▌ | 1844/7045 [5:59:43<17:27:52, 12.09s/it] 26%|██▌ | 1845/7045 [5:59:54<16:58:24, 11.75s/it] {'loss': 1.1475, 'learning_rate': 4.327857210364104e-06, 'epoch': 0.26} + 26%|██▌ | 1845/7045 [5:59:54<16:58:24, 11.75s/it] 26%|██▌ | 1846/7045 [6:00:05<16:49:36, 11.65s/it] {'loss': 1.1504, 'learning_rate': 4.327072855392922e-06, 'epoch': 0.26} + 26%|██▌ | 1846/7045 [6:00:05<16:49:36, 11.65s/it] 26%|██▌ | 1847/7045 [6:00:19<17:29:43, 12.12s/it] {'loss': 1.0977, 'learning_rate': 4.326288114203516e-06, 'epoch': 0.26} + 26%|██▌ | 1847/7045 [6:00:19<17:29:43, 12.12s/it] 26%|██▌ | 1848/7045 [6:00:30<17:05:21, 11.84s/it] {'loss': 1.1621, 'learning_rate': 4.3255029869617694e-06, 'epoch': 0.26} + 26%|██▌ | 1848/7045 [6:00:30<17:05:21, 11.84s/it] 26%|██▌ | 1849/7045 [6:00:41<16:47:14, 11.63s/it] {'loss': 1.1426, 'learning_rate': 4.3247174738336494e-06, 'epoch': 0.26} + 26%|██▌ | 1849/7045 [6:00:41<16:47:14, 11.63s/it] 26%|██▋ | 1850/7045 [6:00:52<16:32:02, 11.46s/it] {'loss': 1.1357, 'learning_rate': 4.323931574985202e-06, 'epoch': 0.26} + 26%|██▋ | 1850/7045 [6:00:52<16:32:02, 11.46s/it] 26%|██▋ | 1851/7045 [6:01:05<16:59:16, 11.77s/it] {'loss': 1.0654, 'learning_rate': 4.323145290582554e-06, 'epoch': 0.26} + 26%|██▋ | 1851/7045 [6:01:05<16:59:16, 11.77s/it] 26%|██▋ | 1852/7045 [6:01:16<17:02:38, 11.82s/it] {'loss': 1.1689, 'learning_rate': 4.322358620791916e-06, 'epoch': 0.26} + 26%|██▋ | 1852/7045 [6:01:16<17:02:38, 11.82s/it] 26%|██▋ | 1853/7045 [6:01:28<16:42:21, 11.58s/it] {'loss': 1.1719, 'learning_rate': 4.32157156577958e-06, 'epoch': 0.26} + 26%|██▋ | 1853/7045 [6:01:28<16:42:21, 11.58s/it] 26%|██▋ | 1854/7045 [6:01:39<16:30:05, 11.44s/it] {'loss': 1.1348, 'learning_rate': 4.320784125711918e-06, 'epoch': 0.26} + 26%|██▋ | 1854/7045 [6:01:39<16:30:05, 11.44s/it] 26%|██▋ | 1855/7045 [6:01:50<16:27:29, 11.42s/it] {'loss': 1.1328, 'learning_rate': 4.319996300755384e-06, 'epoch': 0.26} + 26%|██▋ | 1855/7045 [6:01:50<16:27:29, 11.42s/it] 26%|██▋ | 1856/7045 [6:02:02<16:30:34, 11.45s/it] {'loss': 1.1309, 'learning_rate': 4.319208091076513e-06, 'epoch': 0.26} + 26%|██▋ | 1856/7045 [6:02:02<16:30:34, 11.45s/it] 26%|██▋ | 1857/7045 [6:02:13<16:27:14, 11.42s/it] {'loss': 1.1279, 'learning_rate': 4.318419496841923e-06, 'epoch': 0.26} + 26%|██▋ | 1857/7045 [6:02:13<16:27:14, 11.42s/it] 26%|██▋ | 1858/7045 [6:02:24<16:26:52, 11.42s/it] {'loss': 1.1484, 'learning_rate': 4.317630518218312e-06, 'epoch': 0.26} + 26%|██▋ | 1858/7045 [6:02:24<16:26:52, 11.42s/it] 26%|██▋ | 1859/7045 [6:02:37<16:50:52, 11.70s/it] {'loss': 1.1279, 'learning_rate': 4.3168411553724575e-06, 'epoch': 0.26} + 26%|██▋ | 1859/7045 [6:02:37<16:50:52, 11.70s/it] 26%|██▋ | 1860/7045 [6:02:51<17:58:50, 12.48s/it] {'loss': 1.083, 'learning_rate': 4.316051408471222e-06, 'epoch': 0.26} + 26%|██▋ | 1860/7045 [6:02:51<17:58:50, 12.48s/it] 26%|██▋ | 1861/7045 [6:03:03<17:44:37, 12.32s/it] {'loss': 1.1123, 'learning_rate': 4.3152612776815465e-06, 'epoch': 0.26} + 26%|██▋ | 1861/7045 [6:03:03<17:44:37, 12.32s/it] 26%|██▋ | 1862/7045 [6:03:14<17:14:30, 11.98s/it] {'loss': 1.1123, 'learning_rate': 4.314470763170453e-06, 'epoch': 0.26} + 26%|██▋ | 1862/7045 [6:03:14<17:14:30, 11.98s/it] 26%|██▋ | 1863/7045 [6:03:25<16:49:33, 11.69s/it] {'loss': 1.126, 'learning_rate': 4.313679865105046e-06, 'epoch': 0.26} + 26%|██▋ | 1863/7045 [6:03:25<16:49:33, 11.69s/it] 26%|██▋ | 1864/7045 [6:03:38<17:13:37, 11.97s/it] {'loss': 1.0688, 'learning_rate': 4.3128885836525114e-06, 'epoch': 0.26} + 26%|██▋ | 1864/7045 [6:03:38<17:13:37, 11.97s/it] 26%|██▋ | 1865/7045 [6:03:49<16:44:16, 11.63s/it] {'loss': 1.0889, 'learning_rate': 4.312096918980114e-06, 'epoch': 0.26} + 26%|██▋ | 1865/7045 [6:03:49<16:44:16, 11.63s/it] 26%|██▋ | 1866/7045 [6:04:00<16:35:29, 11.53s/it] {'loss': 1.1318, 'learning_rate': 4.311304871255202e-06, 'epoch': 0.26} + 26%|██▋ | 1866/7045 [6:04:00<16:35:29, 11.53s/it] 27%|██▋ | 1867/7045 [6:04:11<16:15:11, 11.30s/it] {'loss': 1.1045, 'learning_rate': 4.3105124406452035e-06, 'epoch': 0.27} + 27%|██▋ | 1867/7045 [6:04:11<16:15:11, 11.30s/it] 27%|██▋ | 1868/7045 [6:04:22<16:09:01, 11.23s/it] {'loss': 1.127, 'learning_rate': 4.309719627317625e-06, 'epoch': 0.27} + 27%|██▋ | 1868/7045 [6:04:22<16:09:01, 11.23s/it] 27%|██▋ | 1869/7045 [6:04:33<16:18:07, 11.34s/it] {'loss': 1.1357, 'learning_rate': 4.30892643144006e-06, 'epoch': 0.27} + 27%|██▋ | 1869/7045 [6:04:33<16:18:07, 11.34s/it] 27%|██▋ | 1870/7045 [6:04:44<16:14:35, 11.30s/it] {'loss': 1.1191, 'learning_rate': 4.308132853180177e-06, 'epoch': 0.27} + 27%|██▋ | 1870/7045 [6:04:44<16:14:35, 11.30s/it] 27%|██▋ | 1871/7045 [6:04:56<16:10:29, 11.25s/it] {'loss': 1.1396, 'learning_rate': 4.3073388927057295e-06, 'epoch': 0.27} + 27%|██▋ | 1871/7045 [6:04:56<16:10:29, 11.25s/it] 27%|██▋ | 1872/7045 [6:05:09<16:57:51, 11.81s/it] {'loss': 1.1157, 'learning_rate': 4.306544550184547e-06, 'epoch': 0.27} + 27%|██▋ | 1872/7045 [6:05:09<16:57:51, 11.81s/it] 27%|██▋ | 1873/7045 [6:05:20<16:48:18, 11.70s/it] {'loss': 1.1177, 'learning_rate': 4.305749825784545e-06, 'epoch': 0.27} + 27%|██▋ | 1873/7045 [6:05:20<16:48:18, 11.70s/it] 27%|██▋ | 1874/7045 [6:05:32<16:43:17, 11.64s/it] {'loss': 1.1035, 'learning_rate': 4.3049547196737165e-06, 'epoch': 0.27} + 27%|██▋ | 1874/7045 [6:05:32<16:43:17, 11.64s/it] 27%|██▋ | 1875/7045 [6:05:43<16:25:04, 11.43s/it] {'loss': 1.1143, 'learning_rate': 4.304159232020135e-06, 'epoch': 0.27} + 27%|██▋ | 1875/7045 [6:05:43<16:25:04, 11.43s/it] 27%|██▋ | 1876/7045 [6:05:54<16:17:17, 11.34s/it] {'loss': 1.1699, 'learning_rate': 4.3033633629919595e-06, 'epoch': 0.27} + 27%|██▋ | 1876/7045 [6:05:54<16:17:17, 11.34s/it] 27%|██▋ | 1877/7045 [6:06:05<16:09:46, 11.26s/it] {'loss': 1.1211, 'learning_rate': 4.3025671127574205e-06, 'epoch': 0.27} + 27%|██▋ | 1877/7045 [6:06:05<16:09:46, 11.26s/it] 27%|██▋ | 1878/7045 [6:06:18<17:05:24, 11.91s/it] {'loss': 1.1226, 'learning_rate': 4.301770481484838e-06, 'epoch': 0.27} + 27%|██▋ | 1878/7045 [6:06:18<17:05:24, 11.91s/it] 27%|██▋ | 1879/7045 [6:06:29<16:45:38, 11.68s/it] {'loss': 1.1475, 'learning_rate': 4.300973469342609e-06, 'epoch': 0.27} + 27%|██▋ | 1879/7045 [6:06:29<16:45:38, 11.68s/it] 27%|██▋ | 1880/7045 [6:06:41<16:38:40, 11.60s/it] {'loss': 1.1367, 'learning_rate': 4.30017607649921e-06, 'epoch': 0.27} + 27%|██▋ | 1880/7045 [6:06:41<16:38:40, 11.60s/it] 27%|██▋ | 1881/7045 [6:06:52<16:15:39, 11.34s/it] {'loss': 1.1396, 'learning_rate': 4.299378303123198e-06, 'epoch': 0.27} + 27%|██▋ | 1881/7045 [6:06:52<16:15:39, 11.34s/it] 27%|██▋ | 1882/7045 [6:07:03<16:20:38, 11.40s/it] {'loss': 1.1357, 'learning_rate': 4.298580149383213e-06, 'epoch': 0.27} + 27%|██▋ | 1882/7045 [6:07:03<16:20:38, 11.40s/it] 27%|██▋ | 1883/7045 [6:07:14<16:08:39, 11.26s/it] {'loss': 1.1255, 'learning_rate': 4.2977816154479725e-06, 'epoch': 0.27} + 27%|██▋ | 1883/7045 [6:07:14<16:08:39, 11.26s/it] 27%|██▋ | 1884/7045 [6:07:25<16:05:09, 11.22s/it] {'loss': 1.1514, 'learning_rate': 4.296982701486278e-06, 'epoch': 0.27} + 27%|██▋ | 1884/7045 [6:07:25<16:05:09, 11.22s/it] 27%|██▋ | 1885/7045 [6:07:36<16:07:54, 11.25s/it] {'loss': 1.1436, 'learning_rate': 4.2961834076670055e-06, 'epoch': 0.27} + 27%|██▋ | 1885/7045 [6:07:36<16:07:54, 11.25s/it] 27%|██▋ | 1886/7045 [6:07:48<16:09:18, 11.27s/it] {'loss': 1.1719, 'learning_rate': 4.295383734159118e-06, 'epoch': 0.27} + 27%|██▋ | 1886/7045 [6:07:48<16:09:18, 11.27s/it] 27%|██▋ | 1887/7045 [6:07:59<16:05:35, 11.23s/it] {'loss': 1.1201, 'learning_rate': 4.294583681131653e-06, 'epoch': 0.27} + 27%|██▋ | 1887/7045 [6:07:59<16:05:35, 11.23s/it] 27%|██▋ | 1888/7045 [6:08:12<16:43:30, 11.68s/it] {'loss': 1.1367, 'learning_rate': 4.293783248753732e-06, 'epoch': 0.27} + 27%|██▋ | 1888/7045 [6:08:12<16:43:30, 11.68s/it] 27%|██▋ | 1889/7045 [6:08:24<17:11:38, 12.01s/it] {'loss': 1.1055, 'learning_rate': 4.292982437194556e-06, 'epoch': 0.27} + 27%|██▋ | 1889/7045 [6:08:24<17:11:38, 12.01s/it] 27%|██▋ | 1890/7045 [6:08:36<17:04:55, 11.93s/it] {'loss': 1.0981, 'learning_rate': 4.292181246623405e-06, 'epoch': 0.27} + 27%|██▋ | 1890/7045 [6:08:36<17:04:55, 11.93s/it] 27%|██▋ | 1891/7045 [6:08:48<17:08:45, 11.98s/it] {'loss': 1.1016, 'learning_rate': 4.291379677209641e-06, 'epoch': 0.27} + 27%|██▋ | 1891/7045 [6:08:48<17:08:45, 11.98s/it] 27%|██▋ | 1892/7045 [6:09:02<17:44:56, 12.40s/it] {'loss': 1.1147, 'learning_rate': 4.290577729122701e-06, 'epoch': 0.27} + 27%|██▋ | 1892/7045 [6:09:02<17:44:56, 12.40s/it] 27%|██▋ | 1893/7045 [6:09:13<17:11:59, 12.02s/it] {'loss': 1.1631, 'learning_rate': 4.289775402532111e-06, 'epoch': 0.27} + 27%|██▋ | 1893/7045 [6:09:13<17:11:59, 12.02s/it] 27%|██▋ | 1894/7045 [6:09:24<16:58:34, 11.86s/it] {'loss': 1.103, 'learning_rate': 4.288972697607467e-06, 'epoch': 0.27} + 27%|██▋ | 1894/7045 [6:09:24<16:58:34, 11.86s/it] 27%|██▋ | 1895/7045 [6:09:35<16:34:39, 11.59s/it] {'loss': 1.1045, 'learning_rate': 4.288169614518453e-06, 'epoch': 0.27} + 27%|██▋ | 1895/7045 [6:09:35<16:34:39, 11.59s/it] 27%|██▋ | 1896/7045 [6:09:46<16:24:19, 11.47s/it] {'loss': 1.1533, 'learning_rate': 4.287366153434829e-06, 'epoch': 0.27} + 27%|██▋ | 1896/7045 [6:09:46<16:24:19, 11.47s/it] 27%|██▋ | 1897/7045 [6:09:57<16:10:43, 11.31s/it] {'loss': 1.1162, 'learning_rate': 4.286562314526435e-06, 'epoch': 0.27} + 27%|██▋ | 1897/7045 [6:09:57<16:10:43, 11.31s/it] 27%|██▋ | 1898/7045 [6:10:10<16:38:05, 11.64s/it] {'loss': 1.1021, 'learning_rate': 4.285758097963194e-06, 'epoch': 0.27} + 27%|██▋ | 1898/7045 [6:10:10<16:38:05, 11.64s/it] 27%|██▋ | 1899/7045 [6:10:21<16:29:46, 11.54s/it] {'loss': 1.1104, 'learning_rate': 4.284953503915104e-06, 'epoch': 0.27} + 27%|██▋ | 1899/7045 [6:10:21<16:29:46, 11.54s/it] 27%|██▋ | 1900/7045 [6:10:32<16:21:23, 11.44s/it] {'loss': 1.167, 'learning_rate': 4.284148532552245e-06, 'epoch': 0.27} + 27%|██▋ | 1900/7045 [6:10:32<16:21:23, 11.44s/it] 27%|██▋ | 1901/7045 [6:10:44<16:18:01, 11.41s/it] {'loss': 1.1689, 'learning_rate': 4.283343184044777e-06, 'epoch': 0.27} + 27%|██▋ | 1901/7045 [6:10:44<16:18:01, 11.41s/it] 27%|██▋ | 1902/7045 [6:10:55<16:07:09, 11.28s/it] {'loss': 1.1055, 'learning_rate': 4.282537458562942e-06, 'epoch': 0.27} + 27%|██▋ | 1902/7045 [6:10:55<16:07:09, 11.28s/it] 27%|██▋ | 1903/7045 [6:11:06<16:02:25, 11.23s/it] {'loss': 1.165, 'learning_rate': 4.281731356277057e-06, 'epoch': 0.27} + 27%|██▋ | 1903/7045 [6:11:06<16:02:25, 11.23s/it] 27%|██▋ | 1904/7045 [6:11:19<16:47:22, 11.76s/it] {'loss': 1.1436, 'learning_rate': 4.280924877357521e-06, 'epoch': 0.27} + 27%|██▋ | 1904/7045 [6:11:19<16:47:22, 11.76s/it] 27%|██▋ | 1905/7045 [6:11:30<16:25:41, 11.51s/it] {'loss': 1.123, 'learning_rate': 4.280118021974814e-06, 'epoch': 0.27} + 27%|██▋ | 1905/7045 [6:11:30<16:25:41, 11.51s/it] 27%|██▋ | 1906/7045 [6:11:42<16:53:19, 11.83s/it] {'loss': 1.1963, 'learning_rate': 4.279310790299493e-06, 'epoch': 0.27} + 27%|██▋ | 1906/7045 [6:11:42<16:53:19, 11.83s/it] 27%|██▋ | 1907/7045 [6:11:53<16:26:56, 11.53s/it] {'loss': 1.1309, 'learning_rate': 4.278503182502197e-06, 'epoch': 0.27} + 27%|██▋ | 1907/7045 [6:11:53<16:26:56, 11.53s/it] 27%|██▋ | 1908/7045 [6:12:04<16:15:08, 11.39s/it] {'loss': 1.1631, 'learning_rate': 4.277695198753643e-06, 'epoch': 0.27} + 27%|██▋ | 1908/7045 [6:12:04<16:15:08, 11.39s/it] 27%|██▋ | 1909/7045 [6:12:16<16:19:56, 11.45s/it] {'loss': 1.1514, 'learning_rate': 4.276886839224626e-06, 'epoch': 0.27} + 27%|██▋ | 1909/7045 [6:12:16<16:19:56, 11.45s/it] 27%|██▋ | 1910/7045 [6:12:27<16:11:36, 11.35s/it] {'loss': 1.1152, 'learning_rate': 4.2760781040860245e-06, 'epoch': 0.27} + 27%|██▋ | 1910/7045 [6:12:27<16:11:36, 11.35s/it] 27%|██▋ | 1911/7045 [6:12:38<16:06:05, 11.29s/it] {'loss': 1.1719, 'learning_rate': 4.275268993508792e-06, 'epoch': 0.27} + 27%|██▋ | 1911/7045 [6:12:38<16:06:05, 11.29s/it] 27%|██▋ | 1912/7045 [6:12:49<15:57:52, 11.20s/it] {'loss': 1.1504, 'learning_rate': 4.274459507663965e-06, 'epoch': 0.27} + 27%|██▋ | 1912/7045 [6:12:49<15:57:52, 11.20s/it] 27%|██▋ | 1913/7045 [6:13:00<15:55:11, 11.17s/it] {'loss': 1.1748, 'learning_rate': 4.273649646722656e-06, 'epoch': 0.27} + 27%|██▋ | 1913/7045 [6:13:00<15:55:11, 11.17s/it] 27%|██▋ | 1914/7045 [6:13:11<15:57:40, 11.20s/it] {'loss': 1.1357, 'learning_rate': 4.272839410856061e-06, 'epoch': 0.27} + 27%|██▋ | 1914/7045 [6:13:11<15:57:40, 11.20s/it] 27%|██▋ | 1915/7045 [6:13:23<16:14:50, 11.40s/it] {'loss': 1.1465, 'learning_rate': 4.272028800235451e-06, 'epoch': 0.27} + 27%|██▋ | 1915/7045 [6:13:23<16:14:50, 11.40s/it] 27%|██▋ | 1916/7045 [6:13:34<16:04:16, 11.28s/it] {'loss': 1.127, 'learning_rate': 4.271217815032178e-06, 'epoch': 0.27} + 27%|██▋ | 1916/7045 [6:13:34<16:04:16, 11.28s/it] 27%|██▋ | 1917/7045 [6:13:45<16:05:09, 11.29s/it] {'loss': 1.1523, 'learning_rate': 4.270406455417674e-06, 'epoch': 0.27} + 27%|██▋ | 1917/7045 [6:13:45<16:05:09, 11.29s/it] 27%|██▋ | 1918/7045 [6:13:57<15:58:57, 11.22s/it] {'loss': 1.1279, 'learning_rate': 4.269594721563448e-06, 'epoch': 0.27} + 27%|██▋ | 1918/7045 [6:13:57<15:58:57, 11.22s/it] 27%|██▋ | 1919/7045 [6:14:09<16:29:23, 11.58s/it] {'loss': 1.1147, 'learning_rate': 4.268782613641091e-06, 'epoch': 0.27} + 27%|██▋ | 1919/7045 [6:14:09<16:29:23, 11.58s/it] 27%|██▋ | 1920/7045 [6:14:20<16:16:56, 11.44s/it] {'loss': 1.1572, 'learning_rate': 4.2679701318222714e-06, 'epoch': 0.27} + 27%|██▋ | 1920/7045 [6:14:20<16:16:56, 11.44s/it] 27%|██▋ | 1921/7045 [6:14:31<16:01:05, 11.25s/it] {'loss': 1.126, 'learning_rate': 4.267157276278736e-06, 'epoch': 0.27} + 27%|██▋ | 1921/7045 [6:14:31<16:01:05, 11.25s/it] 27%|██▋ | 1922/7045 [6:14:42<15:59:55, 11.24s/it] {'loss': 1.0889, 'learning_rate': 4.2663440471823115e-06, 'epoch': 0.27} + 27%|██▋ | 1922/7045 [6:14:42<15:59:55, 11.24s/it] 27%|██▋ | 1923/7045 [6:14:53<15:57:41, 11.22s/it] {'loss': 1.126, 'learning_rate': 4.265530444704903e-06, 'epoch': 0.27} + 27%|██▋ | 1923/7045 [6:14:53<15:57:41, 11.22s/it] 27%|██▋ | 1924/7045 [6:15:05<15:59:17, 11.24s/it] {'loss': 1.1455, 'learning_rate': 4.264716469018496e-06, 'epoch': 0.27} + 27%|██▋ | 1924/7045 [6:15:05<15:59:17, 11.24s/it] 27%|██▋ | 1925/7045 [6:15:16<15:53:20, 11.17s/it] {'loss': 1.1064, 'learning_rate': 4.263902120295154e-06, 'epoch': 0.27} + 27%|██▋ | 1925/7045 [6:15:16<15:53:20, 11.17s/it] 27%|██▋ | 1926/7045 [6:15:29<16:42:35, 11.75s/it] {'loss': 1.0986, 'learning_rate': 4.263087398707017e-06, 'epoch': 0.27} + 27%|██▋ | 1926/7045 [6:15:29<16:42:35, 11.75s/it] 27%|██▋ | 1927/7045 [6:15:41<17:06:33, 12.03s/it] {'loss': 1.1343, 'learning_rate': 4.262272304426308e-06, 'epoch': 0.27} + 27%|██▋ | 1927/7045 [6:15:41<17:06:33, 12.03s/it] 27%|██▋ | 1928/7045 [6:15:54<17:13:57, 12.12s/it] {'loss': 1.0889, 'learning_rate': 4.261456837625326e-06, 'epoch': 0.27} + 27%|██▋ | 1928/7045 [6:15:54<17:13:57, 12.12s/it] 27%|██▋ | 1929/7045 [6:16:05<16:47:46, 11.82s/it] {'loss': 1.1348, 'learning_rate': 4.2606409984764494e-06, 'epoch': 0.27} + 27%|██▋ | 1929/7045 [6:16:05<16:47:46, 11.82s/it] 27%|██▋ | 1930/7045 [6:16:16<16:32:17, 11.64s/it] {'loss': 1.1328, 'learning_rate': 4.259824787152137e-06, 'epoch': 0.27} + 27%|██▋ | 1930/7045 [6:16:16<16:32:17, 11.64s/it] 27%|██▋ | 1931/7045 [6:16:27<16:22:41, 11.53s/it] {'loss': 1.1729, 'learning_rate': 4.259008203824923e-06, 'epoch': 0.27} + 27%|██▋ | 1931/7045 [6:16:27<16:22:41, 11.53s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 2 bytes but only got 0. + warnings.warn(str(msg)) + 27%|██▋ | 1932/7045 [6:16:38<16:07:25, 11.35s/it] {'loss': 1.1318, 'learning_rate': 4.258191248667423e-06, 'epoch': 0.27} + 27%|██▋ | 1932/7045 [6:16:38<16:07:25, 11.35s/it] 27%|██▋ | 1933/7045 [6:16:52<17:09:13, 12.08s/it] {'loss': 1.1357, 'learning_rate': 4.2573739218523284e-06, 'epoch': 0.27} + 27%|██▋ | 1933/7045 [6:16:52<17:09:13, 12.08s/it] 27%|██▋ | 1934/7045 [6:17:04<17:02:36, 12.00s/it] {'loss': 1.105, 'learning_rate': 4.256556223552413e-06, 'epoch': 0.27} + 27%|██▋ | 1934/7045 [6:17:04<17:02:36, 12.00s/it] 27%|██▋ | 1935/7045 [6:17:15<16:44:06, 11.79s/it] {'loss': 1.1475, 'learning_rate': 4.255738153940526e-06, 'epoch': 0.27} + 27%|██▋ | 1935/7045 [6:17:15<16:44:06, 11.79s/it] 27%|██▋ | 1936/7045 [6:17:26<16:30:15, 11.63s/it] {'loss': 1.1309, 'learning_rate': 4.254919713189596e-06, 'epoch': 0.27} + 27%|██▋ | 1936/7045 [6:17:26<16:30:15, 11.63s/it] 27%|██▋ | 1937/7045 [6:17:39<17:03:56, 12.03s/it] {'loss': 1.1401, 'learning_rate': 4.254100901472631e-06, 'epoch': 0.27} + 27%|██▋ | 1937/7045 [6:17:39<17:03:56, 12.03s/it] 28%|██▊ | 1938/7045 [6:17:51<16:44:35, 11.80s/it] {'loss': 1.1064, 'learning_rate': 4.253281718962716e-06, 'epoch': 0.28} + 28%|██▊ | 1938/7045 [6:17:51<16:44:35, 11.80s/it] 28%|██▊ | 1939/7045 [6:18:01<16:17:14, 11.48s/it] {'loss': 1.1123, 'learning_rate': 4.252462165833015e-06, 'epoch': 0.28} + 28%|██▊ | 1939/7045 [6:18:01<16:17:14, 11.48s/it] 28%|██▊ | 1940/7045 [6:18:14<16:47:42, 11.84s/it] {'loss': 1.1104, 'learning_rate': 4.25164224225677e-06, 'epoch': 0.28} + 28%|██▊ | 1940/7045 [6:18:14<16:47:42, 11.84s/it] 28%|██▊ | 1941/7045 [6:18:25<16:29:04, 11.63s/it] {'loss': 1.1406, 'learning_rate': 4.250821948407303e-06, 'epoch': 0.28} + 28%|██▊ | 1941/7045 [6:18:25<16:29:04, 11.63s/it] 28%|██▊ | 1942/7045 [6:18:37<16:28:45, 11.63s/it] {'loss': 1.0869, 'learning_rate': 4.250001284458012e-06, 'epoch': 0.28} + 28%|██▊ | 1942/7045 [6:18:37<16:28:45, 11.63s/it] 28%|██▊ | 1943/7045 [6:18:48<16:19:08, 11.51s/it] {'loss': 1.1025, 'learning_rate': 4.249180250582373e-06, 'epoch': 0.28} + 28%|██▊ | 1943/7045 [6:18:48<16:19:08, 11.51s/it] 28%|██▊ | 1944/7045 [6:18:59<16:01:49, 11.31s/it] {'loss': 1.1113, 'learning_rate': 4.248358846953944e-06, 'epoch': 0.28} + 28%|██▊ | 1944/7045 [6:18:59<16:01:49, 11.31s/it] 28%|██▊ | 1945/7045 [6:19:12<16:46:19, 11.84s/it] {'loss': 1.1377, 'learning_rate': 4.247537073746355e-06, 'epoch': 0.28} + 28%|██▊ | 1945/7045 [6:19:12<16:46:19, 11.84s/it] 28%|██▊ | 1946/7045 [6:19:23<16:29:55, 11.65s/it] {'loss': 1.1064, 'learning_rate': 4.246714931133321e-06, 'epoch': 0.28} + 28%|██▊ | 1946/7045 [6:19:23<16:29:55, 11.65s/it] 28%|██▊ | 1947/7045 [6:19:34<16:12:53, 11.45s/it] {'loss': 1.125, 'learning_rate': 4.24589241928863e-06, 'epoch': 0.28} + 28%|██▊ | 1947/7045 [6:19:34<16:12:53, 11.45s/it] 28%|██▊ | 1948/7045 [6:19:47<16:47:19, 11.86s/it] {'loss': 1.1011, 'learning_rate': 4.245069538386149e-06, 'epoch': 0.28} + 28%|██▊ | 1948/7045 [6:19:47<16:47:19, 11.86s/it] 28%|██▊ | 1949/7045 [6:19:58<16:25:55, 11.61s/it] {'loss': 1.1182, 'learning_rate': 4.2442462885998265e-06, 'epoch': 0.28} + 28%|██▊ | 1949/7045 [6:19:58<16:25:55, 11.61s/it] 28%|██▊ | 1950/7045 [6:20:09<16:18:31, 11.52s/it] {'loss': 1.1465, 'learning_rate': 4.243422670103684e-06, 'epoch': 0.28} + 28%|██▊ | 1950/7045 [6:20:09<16:18:31, 11.52s/it] 28%|██▊ | 1951/7045 [6:20:20<16:06:36, 11.39s/it] {'loss': 1.1064, 'learning_rate': 4.242598683071823e-06, 'epoch': 0.28} + 28%|██▊ | 1951/7045 [6:20:20<16:06:36, 11.39s/it] 28%|██▊ | 1952/7045 [6:20:31<15:50:58, 11.20s/it] {'loss': 1.0889, 'learning_rate': 4.241774327678424e-06, 'epoch': 0.28} + 28%|██▊ | 1952/7045 [6:20:31<15:50:58, 11.20s/it] 28%|██▊ | 1953/7045 [6:20:42<15:52:15, 11.22s/it] {'loss': 1.1045, 'learning_rate': 4.2409496040977445e-06, 'epoch': 0.28} + 28%|██▊ | 1953/7045 [6:20:42<15:52:15, 11.22s/it] 28%|██▊ | 1954/7045 [6:20:54<15:53:07, 11.23s/it] {'loss': 1.1045, 'learning_rate': 4.24012451250412e-06, 'epoch': 0.28} + 28%|██▊ | 1954/7045 [6:20:54<15:53:07, 11.23s/it] 28%|██▊ | 1955/7045 [6:21:06<16:32:13, 11.70s/it] {'loss': 1.124, 'learning_rate': 4.239299053071963e-06, 'epoch': 0.28} + 28%|██▊ | 1955/7045 [6:21:06<16:32:13, 11.70s/it] 28%|██▊ | 1956/7045 [6:21:18<16:23:28, 11.60s/it] {'loss': 1.1436, 'learning_rate': 4.238473225975764e-06, 'epoch': 0.28} + 28%|██▊ | 1956/7045 [6:21:18<16:23:28, 11.60s/it] 28%|██▊ | 1957/7045 [6:21:29<16:04:15, 11.37s/it] {'loss': 1.1504, 'learning_rate': 4.237647031390093e-06, 'epoch': 0.28} + 28%|██▊ | 1957/7045 [6:21:29<16:04:15, 11.37s/it] 28%|██▊ | 1958/7045 [6:21:40<15:52:21, 11.23s/it] {'loss': 1.1201, 'learning_rate': 4.236820469489595e-06, 'epoch': 0.28} + 28%|██▊ | 1958/7045 [6:21:40<15:52:21, 11.23s/it] 28%|██▊ | 1959/7045 [6:21:51<16:08:44, 11.43s/it] {'loss': 1.1318, 'learning_rate': 4.235993540448993e-06, 'epoch': 0.28} + 28%|██▊ | 1959/7045 [6:21:51<16:08:44, 11.43s/it] 28%|██▊ | 1960/7045 [6:22:04<16:50:13, 11.92s/it] {'loss': 1.0918, 'learning_rate': 4.235166244443091e-06, 'epoch': 0.28} + 28%|██▊ | 1960/7045 [6:22:04<16:50:13, 11.92s/it] 28%|██▊ | 1961/7045 [6:22:17<16:52:58, 11.95s/it] {'loss': 1.1426, 'learning_rate': 4.234338581646767e-06, 'epoch': 0.28} + 28%|██▊ | 1961/7045 [6:22:17<16:52:58, 11.95s/it] 28%|██▊ | 1962/7045 [6:22:30<17:34:54, 12.45s/it] {'loss': 1.1396, 'learning_rate': 4.233510552234976e-06, 'epoch': 0.28} + 28%|██▊ | 1962/7045 [6:22:30<17:34:54, 12.45s/it] 28%|██▊ | 1963/7045 [6:22:42<17:07:21, 12.13s/it] {'loss': 1.1172, 'learning_rate': 4.232682156382754e-06, 'epoch': 0.28} + 28%|██▊ | 1963/7045 [6:22:42<17:07:21, 12.13s/it] 28%|██▊ | 1964/7045 [6:22:54<17:17:38, 12.25s/it] {'loss': 1.1011, 'learning_rate': 4.231853394265212e-06, 'epoch': 0.28} + 28%|██▊ | 1964/7045 [6:22:54<17:17:38, 12.25s/it] 28%|██▊ | 1965/7045 [6:23:08<17:52:54, 12.67s/it] {'loss': 1.1553, 'learning_rate': 4.231024266057539e-06, 'epoch': 0.28} + 28%|██▊ | 1965/7045 [6:23:08<17:52:54, 12.67s/it] 28%|██▊ | 1966/7045 [6:23:19<17:16:08, 12.24s/it] {'loss': 1.1182, 'learning_rate': 4.230194771935002e-06, 'epoch': 0.28} + 28%|██▊ | 1966/7045 [6:23:19<17:16:08, 12.24s/it] 28%|██▊ | 1967/7045 [6:23:31<17:15:54, 12.24s/it] {'loss': 1.1309, 'learning_rate': 4.229364912072943e-06, 'epoch': 0.28} + 28%|██▊ | 1967/7045 [6:23:31<17:15:54, 12.24s/it] 28%|██▊ | 1968/7045 [6:23:44<17:35:44, 12.48s/it] {'loss': 1.0938, 'learning_rate': 4.228534686646785e-06, 'epoch': 0.28} + 28%|██▊ | 1968/7045 [6:23:44<17:35:44, 12.48s/it] 28%|██▊ | 1969/7045 [6:23:56<17:07:58, 12.15s/it] {'loss': 1.1338, 'learning_rate': 4.227704095832025e-06, 'epoch': 0.28} + 28%|██▊ | 1969/7045 [6:23:56<17:07:58, 12.15s/it] 28%|██▊ | 1970/7045 [6:24:09<17:36:18, 12.49s/it] {'loss': 1.1333, 'learning_rate': 4.226873139804239e-06, 'epoch': 0.28} + 28%|██▊ | 1970/7045 [6:24:09<17:36:18, 12.49s/it] 28%|██▊ | 1971/7045 [6:24:24<18:31:59, 13.15s/it] {'loss': 1.0742, 'learning_rate': 4.226041818739079e-06, 'epoch': 0.28} + 28%|██▊ | 1971/7045 [6:24:24<18:31:59, 13.15s/it] 28%|██▊ | 1972/7045 [6:24:36<18:22:38, 13.04s/it] {'loss': 1.1318, 'learning_rate': 4.225210132812277e-06, 'epoch': 0.28} + 28%|██▊ | 1972/7045 [6:24:36<18:22:38, 13.04s/it] 28%|██▊ | 1973/7045 [6:24:47<17:31:18, 12.44s/it] {'loss': 1.1582, 'learning_rate': 4.2243780821996385e-06, 'epoch': 0.28} + 28%|██▊ | 1973/7045 [6:24:47<17:31:18, 12.44s/it] 28%|██▊ | 1974/7045 [6:25:00<17:27:25, 12.39s/it] {'loss': 1.1133, 'learning_rate': 4.223545667077047e-06, 'epoch': 0.28} + 28%|██▊ | 1974/7045 [6:25:00<17:27:25, 12.39s/it] 28%|██▊ | 1975/7045 [6:25:11<17:06:05, 12.14s/it] {'loss': 1.1289, 'learning_rate': 4.2227128876204644e-06, 'epoch': 0.28} + 28%|██▊ | 1975/7045 [6:25:11<17:06:05, 12.14s/it] 28%|██▊ | 1976/7045 [6:25:22<16:43:16, 11.88s/it] {'loss': 1.1289, 'learning_rate': 4.221879744005929e-06, 'epoch': 0.28} + 28%|██▊ | 1976/7045 [6:25:22<16:43:16, 11.88s/it] 28%|██▊ | 1977/7045 [6:25:35<17:07:35, 12.17s/it] {'loss': 1.1543, 'learning_rate': 4.221046236409556e-06, 'epoch': 0.28} + 28%|██▊ | 1977/7045 [6:25:35<17:07:35, 12.17s/it] 28%|██▊ | 1978/7045 [6:25:48<17:15:39, 12.26s/it] {'loss': 1.1045, 'learning_rate': 4.2202123650075374e-06, 'epoch': 0.28} + 28%|██▊ | 1978/7045 [6:25:48<17:15:39, 12.26s/it] 28%|██▊ | 1979/7045 [6:26:01<17:47:40, 12.65s/it] {'loss': 1.125, 'learning_rate': 4.219378129976142e-06, 'epoch': 0.28} + 28%|██▊ | 1979/7045 [6:26:01<17:47:40, 12.65s/it] 28%|██▊ | 1980/7045 [6:26:12<17:06:54, 12.16s/it] {'loss': 1.1177, 'learning_rate': 4.218543531491717e-06, 'epoch': 0.28} + 28%|██▊ | 1980/7045 [6:26:12<17:06:54, 12.16s/it] 28%|██▊ | 1981/7045 [6:26:24<16:47:16, 11.93s/it] {'loss': 1.1592, 'learning_rate': 4.217708569730683e-06, 'epoch': 0.28} + 28%|██▊ | 1981/7045 [6:26:24<16:47:16, 11.93s/it] 28%|██▊ | 1982/7045 [6:26:37<17:09:52, 12.20s/it] {'loss': 1.0854, 'learning_rate': 4.216873244869541e-06, 'epoch': 0.28} + 28%|██▊ | 1982/7045 [6:26:37<17:09:52, 12.20s/it] 28%|██▊ | 1983/7045 [6:26:48<16:42:00, 11.88s/it] {'loss': 1.1357, 'learning_rate': 4.2160375570848665e-06, 'epoch': 0.28} + 28%|██▊ | 1983/7045 [6:26:48<16:42:00, 11.88s/it] 28%|██▊ | 1984/7045 [6:26:59<16:37:09, 11.82s/it] {'loss': 1.1182, 'learning_rate': 4.2152015065533125e-06, 'epoch': 0.28} + 28%|██▊ | 1984/7045 [6:26:59<16:37:09, 11.82s/it] 28%|██▊ | 1985/7045 [6:27:10<16:17:36, 11.59s/it] {'loss': 1.1123, 'learning_rate': 4.214365093451609e-06, 'epoch': 0.28} + 28%|██▊ | 1985/7045 [6:27:10<16:17:36, 11.59s/it] 28%|██▊ | 1986/7045 [6:27:24<16:58:35, 12.08s/it] {'loss': 1.1104, 'learning_rate': 4.213528317956563e-06, 'epoch': 0.28} + 28%|██▊ | 1986/7045 [6:27:24<16:58:35, 12.08s/it] 28%|██▊ | 1987/7045 [6:27:35<16:35:51, 11.81s/it] {'loss': 1.1104, 'learning_rate': 4.212691180245056e-06, 'epoch': 0.28} + 28%|██▊ | 1987/7045 [6:27:35<16:35:51, 11.81s/it] 28%|██▊ | 1988/7045 [6:27:46<16:11:57, 11.53s/it] {'loss': 1.1318, 'learning_rate': 4.211853680494048e-06, 'epoch': 0.28} + 28%|██▊ | 1988/7045 [6:27:46<16:11:57, 11.53s/it] 28%|██▊ | 1989/7045 [6:27:57<16:14:32, 11.56s/it] {'loss': 1.0996, 'learning_rate': 4.211015818880575e-06, 'epoch': 0.28} + 28%|██▊ | 1989/7045 [6:27:57<16:14:32, 11.56s/it] 28%|██▊ | 1990/7045 [6:28:09<16:09:51, 11.51s/it] {'loss': 1.1377, 'learning_rate': 4.210177595581749e-06, 'epoch': 0.28} + 28%|██▊ | 1990/7045 [6:28:09<16:09:51, 11.51s/it] 28%|██▊ | 1991/7045 [6:28:22<16:45:38, 11.94s/it] {'loss': 1.1201, 'learning_rate': 4.209339010774761e-06, 'epoch': 0.28} + 28%|██▊ | 1991/7045 [6:28:22<16:45:38, 11.94s/it] 28%|██▊ | 1992/7045 [6:28:33<16:26:51, 11.72s/it] {'loss': 1.1543, 'learning_rate': 4.208500064636874e-06, 'epoch': 0.28} + 28%|██▊ | 1992/7045 [6:28:33<16:26:51, 11.72s/it] 28%|██▊ | 1993/7045 [6:28:45<16:40:34, 11.88s/it] {'loss': 1.1084, 'learning_rate': 4.2076607573454304e-06, 'epoch': 0.28} + 28%|██▊ | 1993/7045 [6:28:45<16:40:34, 11.88s/it] 28%|██▊ | 1994/7045 [6:28:59<17:15:49, 12.30s/it] {'loss': 1.1396, 'learning_rate': 4.206821089077848e-06, 'epoch': 0.28} + 28%|██▊ | 1994/7045 [6:28:59<17:15:49, 12.30s/it] 28%|██▊ | 1995/7045 [6:29:10<16:58:01, 12.10s/it] {'loss': 1.1338, 'learning_rate': 4.2059810600116225e-06, 'epoch': 0.28} + 28%|██▊ | 1995/7045 [6:29:10<16:58:01, 12.10s/it] 28%|██▊ | 1996/7045 [6:29:21<16:27:39, 11.74s/it] {'loss': 1.1621, 'learning_rate': 4.205140670324324e-06, 'epoch': 0.28} + 28%|██▊ | 1996/7045 [6:29:21<16:27:39, 11.74s/it] 28%|██▊ | 1997/7045 [6:29:32<16:09:32, 11.52s/it] {'loss': 1.125, 'learning_rate': 4.204299920193599e-06, 'epoch': 0.28} + 28%|██▊ | 1997/7045 [6:29:32<16:09:32, 11.52s/it] 28%|██▊ | 1998/7045 [6:29:43<15:53:31, 11.34s/it] {'loss': 1.1182, 'learning_rate': 4.20345880979717e-06, 'epoch': 0.28} + 28%|██▊ | 1998/7045 [6:29:43<15:53:31, 11.34s/it] 28%|██▊ | 1999/7045 [6:29:54<15:51:49, 11.32s/it] {'loss': 1.1475, 'learning_rate': 4.202617339312838e-06, 'epoch': 0.28} + 28%|██▊ | 1999/7045 [6:29:54<15:51:49, 11.32s/it] 28%|██▊ | 2000/7045 [6:30:06<16:02:44, 11.45s/it] {'loss': 1.1738, 'learning_rate': 4.201775508918477e-06, 'epoch': 0.28} + 28%|██▊ | 2000/7045 [6:30:06<16:02:44, 11.45s/it]/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 28%|██▊ | 2001/7045 [6:30:49<29:19:20, 20.93s/it] {'loss': 1.1045, 'learning_rate': 4.2009333187920375e-06, 'epoch': 0.28} + 28%|██▊ | 2001/7045 [6:30:49<29:19:20, 20.93s/it] 28%|██▊ | 2002/7045 [6:31:02<25:50:12, 18.44s/it] {'loss': 1.0596, 'learning_rate': 4.20009076911155e-06, 'epoch': 0.28} + 28%|██▊ | 2002/7045 [6:31:02<25:50:12, 18.44s/it] 28%|██▊ | 2003/7045 [6:31:13<22:49:17, 16.29s/it] {'loss': 1.1172, 'learning_rate': 4.199247860055115e-06, 'epoch': 0.28} + 28%|██▊ | 2003/7045 [6:31:13<22:49:17, 16.29s/it] 28%|██▊ | 2004/7045 [6:31:24<20:41:07, 14.77s/it] {'loss': 1.147, 'learning_rate': 4.198404591800913e-06, 'epoch': 0.28} + 28%|██▊ | 2004/7045 [6:31:24<20:41:07, 14.77s/it] 28%|██▊ | 2005/7045 [6:31:36<19:33:00, 13.96s/it] {'loss': 1.0967, 'learning_rate': 4.197560964527201e-06, 'epoch': 0.28} + 28%|██▊ | 2005/7045 [6:31:36<19:33:00, 13.96s/it] 28%|██▊ | 2006/7045 [6:31:47<18:17:01, 13.06s/it] {'loss': 1.1182, 'learning_rate': 4.196716978412307e-06, 'epoch': 0.28} + 28%|██▊ | 2006/7045 [6:31:47<18:17:01, 13.06s/it] 28%|██▊ | 2007/7045 [6:31:58<17:27:05, 12.47s/it] {'loss': 1.1304, 'learning_rate': 4.195872633634641e-06, 'epoch': 0.28} + 28%|██▊ | 2007/7045 [6:31:58<17:27:05, 12.47s/it] 29%|██▊ | 2008/7045 [6:32:12<17:50:12, 12.75s/it] {'loss': 1.0908, 'learning_rate': 4.195027930372685e-06, 'epoch': 0.29} + 29%|██▊ | 2008/7045 [6:32:12<17:50:12, 12.75s/it] 29%|██▊ | 2009/7045 [6:32:24<17:32:26, 12.54s/it] {'loss': 1.1475, 'learning_rate': 4.194182868804997e-06, 'epoch': 0.29} + 29%|██▊ | 2009/7045 [6:32:24<17:32:26, 12.54s/it] 29%|██▊ | 2010/7045 [6:32:35<16:54:19, 12.09s/it] {'loss': 1.1533, 'learning_rate': 4.193337449110213e-06, 'epoch': 0.29} + 29%|██▊ | 2010/7045 [6:32:35<16:54:19, 12.09s/it] 29%|██▊ | 2011/7045 [6:32:46<16:37:03, 11.88s/it] {'loss': 1.1436, 'learning_rate': 4.192491671467041e-06, 'epoch': 0.29} + 29%|██▊ | 2011/7045 [6:32:46<16:37:03, 11.88s/it] 29%|██▊ | 2012/7045 [6:32:59<16:57:26, 12.13s/it] {'loss': 1.1133, 'learning_rate': 4.191645536054268e-06, 'epoch': 0.29} + 29%|██▊ | 2012/7045 [6:32:59<16:57:26, 12.13s/it] 29%|██▊ | 2013/7045 [6:33:11<16:54:53, 12.10s/it] {'loss': 1.1465, 'learning_rate': 4.190799043050757e-06, 'epoch': 0.29} + 29%|██▊ | 2013/7045 [6:33:11<16:54:53, 12.10s/it] 29%|██▊ | 2014/7045 [6:33:23<16:52:48, 12.08s/it] {'loss': 1.1313, 'learning_rate': 4.189952192635443e-06, 'epoch': 0.29} + 29%|██▊ | 2014/7045 [6:33:23<16:52:48, 12.08s/it] 29%|██▊ | 2015/7045 [6:33:36<17:05:55, 12.24s/it] {'loss': 1.1484, 'learning_rate': 4.189104984987339e-06, 'epoch': 0.29} + 29%|██▊ | 2015/7045 [6:33:36<17:05:55, 12.24s/it] 29%|██▊ | 2016/7045 [6:33:48<17:12:00, 12.31s/it] {'loss': 1.1025, 'learning_rate': 4.1882574202855334e-06, 'epoch': 0.29} + 29%|██▊ | 2016/7045 [6:33:48<17:12:00, 12.31s/it] 29%|██▊ | 2017/7045 [6:33:59<16:44:55, 11.99s/it] {'loss': 1.1318, 'learning_rate': 4.18740949870919e-06, 'epoch': 0.29} + 29%|██▊ | 2017/7045 [6:33:59<16:44:55, 11.99s/it] 29%|██▊ | 2018/7045 [6:34:12<17:07:23, 12.26s/it] {'loss': 1.1455, 'learning_rate': 4.186561220437546e-06, 'epoch': 0.29} + 29%|██▊ | 2018/7045 [6:34:12<17:07:23, 12.26s/it] 29%|██▊ | 2019/7045 [6:34:23<16:33:47, 11.86s/it] {'loss': 1.1328, 'learning_rate': 4.185712585649919e-06, 'epoch': 0.29} + 29%|██▊ | 2019/7045 [6:34:23<16:33:47, 11.86s/it] 29%|██▊ | 2020/7045 [6:34:35<16:44:09, 11.99s/it] {'loss': 1.0996, 'learning_rate': 4.184863594525697e-06, 'epoch': 0.29} + 29%|██▊ | 2020/7045 [6:34:35<16:44:09, 11.99s/it] 29%|██▊ | 2021/7045 [6:34:48<17:00:23, 12.19s/it] {'loss': 1.1279, 'learning_rate': 4.184014247244344e-06, 'epoch': 0.29} + 29%|██▊ | 2021/7045 [6:34:48<17:00:23, 12.19s/it] 29%|██▊ | 2022/7045 [6:35:00<16:54:59, 12.12s/it] {'loss': 1.1299, 'learning_rate': 4.183164543985402e-06, 'epoch': 0.29} + 29%|██▊ | 2022/7045 [6:35:00<16:54:59, 12.12s/it] 29%|██▊ | 2023/7045 [6:35:12<16:47:13, 12.03s/it] {'loss': 1.1147, 'learning_rate': 4.182314484928487e-06, 'epoch': 0.29} + 29%|██▊ | 2023/7045 [6:35:12<16:47:13, 12.03s/it] 29%|██▊ | 2024/7045 [6:35:23<16:16:24, 11.67s/it] {'loss': 1.1328, 'learning_rate': 4.181464070253289e-06, 'epoch': 0.29} + 29%|██▊ | 2024/7045 [6:35:23<16:16:24, 11.67s/it] 29%|██▊ | 2025/7045 [6:35:34<16:05:29, 11.54s/it] {'loss': 1.1279, 'learning_rate': 4.180613300139575e-06, 'epoch': 0.29} + 29%|██▊ | 2025/7045 [6:35:34<16:05:29, 11.54s/it] 29%|██▉ | 2026/7045 [6:35:45<15:49:58, 11.36s/it] {'loss': 1.1406, 'learning_rate': 4.1797621747671836e-06, 'epoch': 0.29} + 29%|██▉ | 2026/7045 [6:35:45<15:49:58, 11.36s/it] 29%|██▉ | 2027/7045 [6:35:57<15:59:33, 11.47s/it] {'loss': 1.1338, 'learning_rate': 4.178910694316034e-06, 'epoch': 0.29} + 29%|██▉ | 2027/7045 [6:35:57<15:59:33, 11.47s/it] 29%|██▉ | 2028/7045 [6:36:08<15:49:09, 11.35s/it] {'loss': 1.1084, 'learning_rate': 4.178058858966116e-06, 'epoch': 0.29} + 29%|██▉ | 2028/7045 [6:36:08<15:49:09, 11.35s/it] 29%|██▉ | 2029/7045 [6:36:19<15:40:03, 11.24s/it] {'loss': 1.0859, 'learning_rate': 4.177206668897496e-06, 'epoch': 0.29} + 29%|██▉ | 2029/7045 [6:36:19<15:40:03, 11.24s/it] 29%|██▉ | 2030/7045 [6:36:31<16:07:43, 11.58s/it] {'loss': 1.0977, 'learning_rate': 4.176354124290316e-06, 'epoch': 0.29} + 29%|██▉ | 2030/7045 [6:36:31<16:07:43, 11.58s/it] 29%|██▉ | 2031/7045 [6:36:42<15:52:40, 11.40s/it] {'loss': 1.1172, 'learning_rate': 4.175501225324792e-06, 'epoch': 0.29} + 29%|██▉ | 2031/7045 [6:36:42<15:52:40, 11.40s/it] 29%|██▉ | 2032/7045 [6:36:53<15:47:18, 11.34s/it] {'loss': 1.1177, 'learning_rate': 4.174647972181215e-06, 'epoch': 0.29} + 29%|██▉ | 2032/7045 [6:36:53<15:47:18, 11.34s/it] 29%|██▉ | 2033/7045 [6:37:05<16:00:16, 11.50s/it] {'loss': 1.1406, 'learning_rate': 4.173794365039951e-06, 'epoch': 0.29} + 29%|██▉ | 2033/7045 [6:37:05<16:00:16, 11.50s/it] 29%|██▉ | 2034/7045 [6:37:16<15:48:27, 11.36s/it] {'loss': 1.1631, 'learning_rate': 4.172940404081441e-06, 'epoch': 0.29} + 29%|██▉ | 2034/7045 [6:37:16<15:48:27, 11.36s/it] 29%|██▉ | 2035/7045 [6:37:27<15:37:59, 11.23s/it] {'loss': 1.1084, 'learning_rate': 4.172086089486201e-06, 'epoch': 0.29} + 29%|██▉ | 2035/7045 [6:37:27<15:37:59, 11.23s/it] 29%|██▉ | 2036/7045 [6:37:39<15:48:17, 11.36s/it] {'loss': 1.1445, 'learning_rate': 4.171231421434821e-06, 'epoch': 0.29} + 29%|██▉ | 2036/7045 [6:37:39<15:48:17, 11.36s/it] 29%|██▉ | 2037/7045 [6:37:50<15:42:22, 11.29s/it] {'loss': 1.1377, 'learning_rate': 4.170376400107966e-06, 'epoch': 0.29} + 29%|██▉ | 2037/7045 [6:37:50<15:42:22, 11.29s/it] 29%|██▉ | 2038/7045 [6:38:01<15:43:13, 11.30s/it] {'loss': 1.168, 'learning_rate': 4.169521025686377e-06, 'epoch': 0.29} + 29%|██▉ | 2038/7045 [6:38:01<15:43:13, 11.30s/it] 29%|██▉ | 2039/7045 [6:38:13<16:01:27, 11.52s/it] {'loss': 1.0713, 'learning_rate': 4.168665298350867e-06, 'epoch': 0.29} + 29%|██▉ | 2039/7045 [6:38:13<16:01:27, 11.52s/it] 29%|██▉ | 2040/7045 [6:38:24<15:54:58, 11.45s/it] {'loss': 1.1123, 'learning_rate': 4.167809218282325e-06, 'epoch': 0.29} + 29%|██▉ | 2040/7045 [6:38:24<15:54:58, 11.45s/it] 29%|██▉ | 2041/7045 [6:38:38<16:38:16, 11.97s/it] {'loss': 1.0933, 'learning_rate': 4.166952785661716e-06, 'epoch': 0.29} + 29%|██▉ | 2041/7045 [6:38:38<16:38:16, 11.97s/it] 29%|██▉ | 2042/7045 [6:38:51<17:05:17, 12.30s/it] {'loss': 1.1162, 'learning_rate': 4.166096000670077e-06, 'epoch': 0.29} + 29%|██▉ | 2042/7045 [6:38:51<17:05:17, 12.30s/it] 29%|██▉ | 2043/7045 [6:39:02<16:41:51, 12.02s/it] {'loss': 1.124, 'learning_rate': 4.165238863488521e-06, 'epoch': 0.29} + 29%|██▉ | 2043/7045 [6:39:02<16:41:51, 12.02s/it] 29%|██▉ | 2044/7045 [6:39:13<16:23:48, 11.80s/it] {'loss': 1.1143, 'learning_rate': 4.164381374298235e-06, 'epoch': 0.29} + 29%|██▉ | 2044/7045 [6:39:13<16:23:48, 11.80s/it] 29%|██▉ | 2045/7045 [6:39:25<16:12:24, 11.67s/it] {'loss': 1.2021, 'learning_rate': 4.163523533280481e-06, 'epoch': 0.29} + 29%|██▉ | 2045/7045 [6:39:25<16:12:24, 11.67s/it] 29%|██▉ | 2046/7045 [6:39:36<16:02:29, 11.55s/it] {'loss': 1.1377, 'learning_rate': 4.162665340616594e-06, 'epoch': 0.29} + 29%|██▉ | 2046/7045 [6:39:36<16:02:29, 11.55s/it] 29%|██▉ | 2047/7045 [6:39:47<15:59:35, 11.52s/it] {'loss': 1.1309, 'learning_rate': 4.161806796487985e-06, 'epoch': 0.29} + 29%|██▉ | 2047/7045 [6:39:47<15:59:35, 11.52s/it] 29%|██▉ | 2048/7045 [6:39:59<16:05:21, 11.59s/it] {'loss': 1.1143, 'learning_rate': 4.160947901076139e-06, 'epoch': 0.29} + 29%|██▉ | 2048/7045 [6:39:59<16:05:21, 11.59s/it] 29%|██▉ | 2049/7045 [6:40:12<16:27:40, 11.86s/it] {'loss': 1.1494, 'learning_rate': 4.160088654562613e-06, 'epoch': 0.29} + 29%|██▉ | 2049/7045 [6:40:12<16:27:40, 11.86s/it] 29%|██▉ | 2050/7045 [6:40:23<16:16:55, 11.73s/it] {'loss': 1.1387, 'learning_rate': 4.15922905712904e-06, 'epoch': 0.29} + 29%|██▉ | 2050/7045 [6:40:23<16:16:55, 11.73s/it] 29%|██▉ | 2051/7045 [6:40:34<16:03:27, 11.58s/it] {'loss': 1.1299, 'learning_rate': 4.158369108957129e-06, 'epoch': 0.29} + 29%|██▉ | 2051/7045 [6:40:34<16:03:27, 11.58s/it] 29%|██▉ | 2052/7045 [6:40:46<15:54:02, 11.46s/it] {'loss': 1.1641, 'learning_rate': 4.15750881022866e-06, 'epoch': 0.29} + 29%|██▉ | 2052/7045 [6:40:46<15:54:02, 11.46s/it] 29%|██▉ | 2053/7045 [6:40:57<15:51:34, 11.44s/it] {'loss': 1.124, 'learning_rate': 4.156648161125489e-06, 'epoch': 0.29} + 29%|██▉ | 2053/7045 [6:40:57<15:51:34, 11.44s/it] 29%|██▉ | 2054/7045 [6:41:08<15:45:42, 11.37s/it] {'loss': 1.1494, 'learning_rate': 4.155787161829544e-06, 'epoch': 0.29} + 29%|██▉ | 2054/7045 [6:41:08<15:45:42, 11.37s/it] 29%|██▉ | 2055/7045 [6:41:19<15:42:08, 11.33s/it] {'loss': 1.1416, 'learning_rate': 4.154925812522831e-06, 'epoch': 0.29} + 29%|██▉ | 2055/7045 [6:41:19<15:42:08, 11.33s/it] 29%|██▉ | 2056/7045 [6:41:30<15:37:19, 11.27s/it] {'loss': 1.126, 'learning_rate': 4.154064113387426e-06, 'epoch': 0.29} + 29%|██▉ | 2056/7045 [6:41:30<15:37:19, 11.27s/it] 29%|██▉ | 2057/7045 [6:41:42<15:34:49, 11.24s/it] {'loss': 1.1396, 'learning_rate': 4.15320206460548e-06, 'epoch': 0.29} + 29%|██▉ | 2057/7045 [6:41:42<15:34:49, 11.24s/it] 29%|██▉ | 2058/7045 [6:41:54<16:08:44, 11.66s/it] {'loss': 1.1509, 'learning_rate': 4.152339666359219e-06, 'epoch': 0.29} + 29%|██▉ | 2058/7045 [6:41:54<16:08:44, 11.66s/it] 29%|██▉ | 2059/7045 [6:42:05<15:52:47, 11.47s/it] {'loss': 1.1006, 'learning_rate': 4.151476918830941e-06, 'epoch': 0.29} + 29%|██▉ | 2059/7045 [6:42:05<15:52:47, 11.47s/it] 29%|██▉ | 2060/7045 [6:42:17<16:05:56, 11.63s/it] {'loss': 1.1055, 'learning_rate': 4.150613822203021e-06, 'epoch': 0.29} + 29%|██▉ | 2060/7045 [6:42:17<16:05:56, 11.63s/it] 29%|██▉ | 2061/7045 [6:42:29<15:55:28, 11.50s/it] {'loss': 1.1445, 'learning_rate': 4.149750376657905e-06, 'epoch': 0.29} + 29%|██▉ | 2061/7045 [6:42:29<15:55:28, 11.50s/it] 29%|██▉ | 2062/7045 [6:42:40<16:01:28, 11.58s/it] {'loss': 1.1592, 'learning_rate': 4.148886582378113e-06, 'epoch': 0.29} + 29%|██▉ | 2062/7045 [6:42:40<16:01:28, 11.58s/it] 29%|██▉ | 2063/7045 [6:42:51<15:50:11, 11.44s/it] {'loss': 1.1445, 'learning_rate': 4.14802243954624e-06, 'epoch': 0.29} + 29%|██▉ | 2063/7045 [6:42:51<15:50:11, 11.44s/it] 29%|██▉ | 2064/7045 [6:43:02<15:39:04, 11.31s/it] {'loss': 1.1592, 'learning_rate': 4.147157948344954e-06, 'epoch': 0.29} + 29%|██▉ | 2064/7045 [6:43:02<15:39:04, 11.31s/it] 29%|██▉ | 2065/7045 [6:43:13<15:33:12, 11.24s/it] {'loss': 1.1348, 'learning_rate': 4.146293108956996e-06, 'epoch': 0.29} + 29%|██▉ | 2065/7045 [6:43:13<15:33:12, 11.24s/it] 29%|██▉ | 2066/7045 [6:43:25<15:45:36, 11.40s/it] {'loss': 1.1494, 'learning_rate': 4.145427921565182e-06, 'epoch': 0.29} + 29%|██▉ | 2066/7045 [6:43:25<15:45:36, 11.40s/it] 29%|██▉ | 2067/7045 [6:43:37<15:50:17, 11.45s/it] {'loss': 1.1592, 'learning_rate': 4.1445623863524e-06, 'epoch': 0.29} + 29%|██▉ | 2067/7045 [6:43:37<15:50:17, 11.45s/it] 29%|██▉ | 2068/7045 [6:43:48<15:44:55, 11.39s/it] {'loss': 1.127, 'learning_rate': 4.1436965035016124e-06, 'epoch': 0.29} + 29%|██▉ | 2068/7045 [6:43:48<15:44:55, 11.39s/it] 29%|██▉ | 2069/7045 [6:44:02<16:41:11, 12.07s/it] {'loss': 1.0947, 'learning_rate': 4.142830273195856e-06, 'epoch': 0.29} + 29%|██▉ | 2069/7045 [6:44:02<16:41:11, 12.07s/it] 29%|██▉ | 2070/7045 [6:44:15<17:00:19, 12.31s/it] {'loss': 1.0957, 'learning_rate': 4.141963695618239e-06, 'epoch': 0.29} + 29%|██▉ | 2070/7045 [6:44:15<17:00:19, 12.31s/it] 29%|██▉ | 2071/7045 [6:44:27<16:58:29, 12.29s/it] {'loss': 1.0825, 'learning_rate': 4.141096770951945e-06, 'epoch': 0.29} + 29%|██▉ | 2071/7045 [6:44:27<16:58:29, 12.29s/it] 29%|██▉ | 2072/7045 [6:44:38<16:34:32, 12.00s/it] {'loss': 1.124, 'learning_rate': 4.140229499380228e-06, 'epoch': 0.29} + 29%|██▉ | 2072/7045 [6:44:38<16:34:32, 12.00s/it] 29%|██▉ | 2073/7045 [6:44:50<16:39:00, 12.06s/it] {'loss': 1.0776, 'learning_rate': 4.139361881086419e-06, 'epoch': 0.29} + 29%|██▉ | 2073/7045 [6:44:50<16:39:00, 12.06s/it] 29%|██▉ | 2074/7045 [6:45:04<17:14:19, 12.48s/it] {'loss': 1.1182, 'learning_rate': 4.138493916253921e-06, 'epoch': 0.29} + 29%|██▉ | 2074/7045 [6:45:04<17:14:19, 12.48s/it] 29%|██▉ | 2075/7045 [6:45:15<16:41:11, 12.09s/it] {'loss': 1.1367, 'learning_rate': 4.13762560506621e-06, 'epoch': 0.29} + 29%|██▉ | 2075/7045 [6:45:15<16:41:11, 12.09s/it] 29%|██▉ | 2076/7045 [6:45:29<17:20:11, 12.56s/it] {'loss': 1.1187, 'learning_rate': 4.1367569477068335e-06, 'epoch': 0.29} + 29%|██▉ | 2076/7045 [6:45:29<17:20:11, 12.56s/it] 29%|██▉ | 2077/7045 [6:45:40<16:46:59, 12.16s/it] {'loss': 1.1143, 'learning_rate': 4.135887944359415e-06, 'epoch': 0.29} + 29%|██▉ | 2077/7045 [6:45:40<16:46:59, 12.16s/it] 29%|██▉ | 2078/7045 [6:45:51<16:24:34, 11.89s/it] {'loss': 1.1123, 'learning_rate': 4.135018595207649e-06, 'epoch': 0.29} + 29%|██▉ | 2078/7045 [6:45:51<16:24:34, 11.89s/it] 30%|██▉ | 2079/7045 [6:46:03<16:23:11, 11.88s/it] {'loss': 1.1396, 'learning_rate': 4.1341489004353045e-06, 'epoch': 0.3} + 30%|██▉ | 2079/7045 [6:46:03<16:23:11, 11.88s/it] 30%|██▉ | 2080/7045 [6:46:15<16:14:58, 11.78s/it] {'loss': 1.1445, 'learning_rate': 4.1332788602262246e-06, 'epoch': 0.3} + 30%|██▉ | 2080/7045 [6:46:15<16:14:58, 11.78s/it] 30%|██▉ | 2081/7045 [6:46:26<15:56:15, 11.56s/it] {'loss': 1.1079, 'learning_rate': 4.1324084747643215e-06, 'epoch': 0.3} + 30%|██▉ | 2081/7045 [6:46:26<15:56:15, 11.56s/it] 30%|██▉ | 2082/7045 [6:46:37<15:42:04, 11.39s/it] {'loss': 1.1182, 'learning_rate': 4.1315377442335844e-06, 'epoch': 0.3} + 30%|██▉ | 2082/7045 [6:46:37<15:42:04, 11.39s/it] 30%|██▉ | 2083/7045 [6:46:49<16:12:33, 11.76s/it] {'loss': 1.0864, 'learning_rate': 4.130666668818073e-06, 'epoch': 0.3} + 30%|██▉ | 2083/7045 [6:46:49<16:12:33, 11.76s/it] 30%|██▉ | 2084/7045 [6:47:02<16:42:33, 12.13s/it] {'loss': 1.1475, 'learning_rate': 4.129795248701921e-06, 'epoch': 0.3} + 30%|██▉ | 2084/7045 [6:47:02<16:42:33, 12.13s/it] 30%|██▉ | 2085/7045 [6:47:13<16:18:11, 11.83s/it] {'loss': 1.0879, 'learning_rate': 4.128923484069335e-06, 'epoch': 0.3} + 30%|██▉ | 2085/7045 [6:47:13<16:18:11, 11.83s/it] 30%|██▉ | 2086/7045 [6:47:26<16:31:41, 12.00s/it] {'loss': 1.1475, 'learning_rate': 4.128051375104593e-06, 'epoch': 0.3} + 30%|██▉ | 2086/7045 [6:47:26<16:31:41, 12.00s/it] 30%|██▉ | 2087/7045 [6:47:37<16:15:33, 11.81s/it] {'loss': 1.1689, 'learning_rate': 4.1271789219920485e-06, 'epoch': 0.3} + 30%|██▉ | 2087/7045 [6:47:37<16:15:33, 11.81s/it] 30%|██▉ | 2088/7045 [6:47:49<16:12:18, 11.77s/it] {'loss': 1.1553, 'learning_rate': 4.126306124916125e-06, 'epoch': 0.3} + 30%|██▉ | 2088/7045 [6:47:49<16:12:18, 11.77s/it] 30%|██▉ | 2089/7045 [6:48:00<16:01:17, 11.64s/it] {'loss': 1.1553, 'learning_rate': 4.125432984061321e-06, 'epoch': 0.3} + 30%|██▉ | 2089/7045 [6:48:00<16:01:17, 11.64s/it] 30%|██▉ | 2090/7045 [6:48:13<16:35:23, 12.05s/it] {'loss': 1.1348, 'learning_rate': 4.124559499612205e-06, 'epoch': 0.3} + 30%|██▉ | 2090/7045 [6:48:13<16:35:23, 12.05s/it] 30%|██▉ | 2091/7045 [6:48:25<16:20:33, 11.88s/it] {'loss': 1.1338, 'learning_rate': 4.123685671753421e-06, 'epoch': 0.3} + 30%|██▉ | 2091/7045 [6:48:25<16:20:33, 11.88s/it] 30%|██▉ | 2092/7045 [6:48:36<15:59:29, 11.62s/it] {'loss': 1.1631, 'learning_rate': 4.122811500669683e-06, 'epoch': 0.3} + 30%|██▉ | 2092/7045 [6:48:36<15:59:29, 11.62s/it] 30%|██▉ | 2093/7045 [6:48:47<15:50:33, 11.52s/it] {'loss': 1.1426, 'learning_rate': 4.12193698654578e-06, 'epoch': 0.3} + 30%|██▉ | 2093/7045 [6:48:47<15:50:33, 11.52s/it] 30%|██▉ | 2094/7045 [6:48:58<15:38:03, 11.37s/it] {'loss': 1.1309, 'learning_rate': 4.121062129566572e-06, 'epoch': 0.3} + 30%|██▉ | 2094/7045 [6:48:58<15:38:03, 11.37s/it] 30%|██▉ | 2095/7045 [6:49:09<15:31:35, 11.29s/it] {'loss': 1.126, 'learning_rate': 4.120186929916991e-06, 'epoch': 0.3} + 30%|██▉ | 2095/7045 [6:49:09<15:31:35, 11.29s/it] 30%|██▉ | 2096/7045 [6:49:20<15:26:09, 11.23s/it] {'loss': 1.1338, 'learning_rate': 4.119311387782045e-06, 'epoch': 0.3} + 30%|██▉ | 2096/7045 [6:49:20<15:26:09, 11.23s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 12 bytes but only got 10. + warnings.warn(str(msg)) + 30%|██▉ | 2097/7045 [6:49:32<15:34:22, 11.33s/it] {'loss': 1.1416, 'learning_rate': 4.1184355033468074e-06, 'epoch': 0.3} + 30%|██▉ | 2097/7045 [6:49:32<15:34:22, 11.33s/it] 30%|██▉ | 2098/7045 [6:49:43<15:32:59, 11.32s/it] {'loss': 1.1309, 'learning_rate': 4.1175592767964315e-06, 'epoch': 0.3} + 30%|██▉ | 2098/7045 [6:49:43<15:32:59, 11.32s/it] 30%|██▉ | 2099/7045 [6:49:55<16:02:38, 11.68s/it] {'loss': 1.1123, 'learning_rate': 4.1166827083161375e-06, 'epoch': 0.3} + 30%|██▉ | 2099/7045 [6:49:55<16:02:38, 11.68s/it] 30%|██▉ | 2100/7045 [6:50:07<15:53:10, 11.57s/it] {'loss': 1.1436, 'learning_rate': 4.115805798091222e-06, 'epoch': 0.3} + 30%|██▉ | 2100/7045 [6:50:07<15:53:10, 11.57s/it] 30%|██▉ | 2101/7045 [6:50:18<15:50:15, 11.53s/it] {'loss': 1.1836, 'learning_rate': 4.1149285463070505e-06, 'epoch': 0.3} + 30%|██▉ | 2101/7045 [6:50:18<15:50:15, 11.53s/it] 30%|██▉ | 2102/7045 [6:50:29<15:40:11, 11.41s/it] {'loss': 1.1279, 'learning_rate': 4.114050953149062e-06, 'epoch': 0.3} + 30%|██▉ | 2102/7045 [6:50:29<15:40:11, 11.41s/it] 30%|██▉ | 2103/7045 [6:50:40<15:25:40, 11.24s/it] {'loss': 1.1338, 'learning_rate': 4.113173018802768e-06, 'epoch': 0.3} + 30%|██▉ | 2103/7045 [6:50:40<15:25:40, 11.24s/it] 30%|██▉ | 2104/7045 [6:50:51<15:27:08, 11.26s/it] {'loss': 1.1196, 'learning_rate': 4.112294743453752e-06, 'epoch': 0.3} + 30%|██▉ | 2104/7045 [6:50:51<15:27:08, 11.26s/it] 30%|██▉ | 2105/7045 [6:51:03<15:31:03, 11.31s/it] {'loss': 1.1187, 'learning_rate': 4.111416127287669e-06, 'epoch': 0.3} + 30%|██▉ | 2105/7045 [6:51:03<15:31:03, 11.31s/it] 30%|██▉ | 2106/7045 [6:51:14<15:19:46, 11.17s/it] {'loss': 1.1387, 'learning_rate': 4.110537170490247e-06, 'epoch': 0.3} + 30%|██▉ | 2106/7045 [6:51:14<15:19:46, 11.17s/it] 30%|██▉ | 2107/7045 [6:51:25<15:20:04, 11.18s/it] {'loss': 1.1143, 'learning_rate': 4.109657873247286e-06, 'epoch': 0.3} + 30%|██▉ | 2107/7045 [6:51:25<15:20:04, 11.18s/it] 30%|██▉ | 2108/7045 [6:51:38<15:59:57, 11.67s/it] {'loss': 1.1465, 'learning_rate': 4.108778235744656e-06, 'epoch': 0.3} + 30%|██▉ | 2108/7045 [6:51:38<15:59:57, 11.67s/it] 30%|██▉ | 2109/7045 [6:51:49<15:44:10, 11.48s/it] {'loss': 1.1533, 'learning_rate': 4.1078982581683e-06, 'epoch': 0.3} + 30%|██▉ | 2109/7045 [6:51:49<15:44:10, 11.48s/it] 30%|██▉ | 2110/7045 [6:52:01<15:59:57, 11.67s/it] {'loss': 1.1108, 'learning_rate': 4.107017940704235e-06, 'epoch': 0.3} + 30%|██▉ | 2110/7045 [6:52:01<15:59:57, 11.67s/it] 30%|██▉ | 2111/7045 [6:52:12<15:49:48, 11.55s/it] {'loss': 1.1592, 'learning_rate': 4.106137283538547e-06, 'epoch': 0.3} + 30%|██▉ | 2111/7045 [6:52:12<15:49:48, 11.55s/it] 30%|██▉ | 2112/7045 [6:52:24<16:07:26, 11.77s/it] {'loss': 1.0918, 'learning_rate': 4.105256286857395e-06, 'epoch': 0.3} + 30%|██▉ | 2112/7045 [6:52:24<16:07:26, 11.77s/it] 30%|██▉ | 2113/7045 [6:52:36<16:12:45, 11.83s/it] {'loss': 1.166, 'learning_rate': 4.10437495084701e-06, 'epoch': 0.3} + 30%|██▉ | 2113/7045 [6:52:36<16:12:45, 11.83s/it] 30%|███ | 2114/7045 [6:52:48<16:03:53, 11.73s/it] {'loss': 1.1875, 'learning_rate': 4.103493275693695e-06, 'epoch': 0.3} + 30%|███ | 2114/7045 [6:52:48<16:03:53, 11.73s/it] 30%|███ | 2115/7045 [6:52:59<15:53:32, 11.61s/it] {'loss': 1.104, 'learning_rate': 4.102611261583822e-06, 'epoch': 0.3} + 30%|███ | 2115/7045 [6:52:59<15:53:32, 11.61s/it] 30%|███ | 2116/7045 [6:53:13<16:39:02, 12.16s/it] {'loss': 1.0996, 'learning_rate': 4.101728908703838e-06, 'epoch': 0.3} + 30%|███ | 2116/7045 [6:53:13<16:39:02, 12.16s/it] 30%|███ | 2117/7045 [6:53:24<16:10:50, 11.82s/it] {'loss': 1.1191, 'learning_rate': 4.100846217240261e-06, 'epoch': 0.3} + 30%|███ | 2117/7045 [6:53:24<16:10:50, 11.82s/it] 30%|███ | 2118/7045 [6:53:36<16:25:45, 12.00s/it] {'loss': 1.1006, 'learning_rate': 4.099963187379678e-06, 'epoch': 0.3} + 30%|███ | 2118/7045 [6:53:36<16:25:45, 12.00s/it] 30%|███ | 2119/7045 [6:53:48<16:22:06, 11.96s/it] {'loss': 1.1113, 'learning_rate': 4.099079819308751e-06, 'epoch': 0.3} + 30%|███ | 2119/7045 [6:53:48<16:22:06, 11.96s/it] 30%|███ | 2120/7045 [6:53:59<16:00:45, 11.70s/it] {'loss': 1.125, 'learning_rate': 4.098196113214213e-06, 'epoch': 0.3} + 30%|███ | 2120/7045 [6:53:59<16:00:45, 11.70s/it] 30%|███ | 2121/7045 [6:54:12<16:31:28, 12.08s/it] {'loss': 1.1445, 'learning_rate': 4.0973120692828635e-06, 'epoch': 0.3} + 30%|███ | 2121/7045 [6:54:12<16:31:28, 12.08s/it] 30%|███ | 2122/7045 [6:54:23<16:11:24, 11.84s/it] {'loss': 1.1348, 'learning_rate': 4.096427687701581e-06, 'epoch': 0.3} + 30%|███ | 2122/7045 [6:54:23<16:11:24, 11.84s/it] 30%|███ | 2123/7045 [6:54:35<16:06:46, 11.79s/it] {'loss': 1.1094, 'learning_rate': 4.095542968657311e-06, 'epoch': 0.3} + 30%|███ | 2123/7045 [6:54:35<16:06:46, 11.79s/it] 30%|███ | 2124/7045 [6:54:46<15:47:17, 11.55s/it] {'loss': 1.1045, 'learning_rate': 4.09465791233707e-06, 'epoch': 0.3} + 30%|███ | 2124/7045 [6:54:46<15:47:17, 11.55s/it] 30%|███ | 2125/7045 [6:54:59<16:12:20, 11.86s/it] {'loss': 1.1836, 'learning_rate': 4.093772518927947e-06, 'epoch': 0.3} + 30%|███ | 2125/7045 [6:54:59<16:12:20, 11.86s/it] 30%|███ | 2126/7045 [6:55:10<16:01:26, 11.73s/it] {'loss': 1.1699, 'learning_rate': 4.092886788617103e-06, 'epoch': 0.3} + 30%|███ | 2126/7045 [6:55:10<16:01:26, 11.73s/it] 30%|███ | 2127/7045 [6:55:22<16:11:55, 11.86s/it] {'loss': 1.0996, 'learning_rate': 4.092000721591768e-06, 'epoch': 0.3} + 30%|███ | 2127/7045 [6:55:22<16:11:55, 11.86s/it] 30%|███ | 2128/7045 [6:55:36<16:47:07, 12.29s/it] {'loss': 1.124, 'learning_rate': 4.091114318039244e-06, 'epoch': 0.3} + 30%|███ | 2128/7045 [6:55:36<16:47:07, 12.29s/it] 30%|███ | 2129/7045 [6:55:47<16:17:31, 11.93s/it] {'loss': 1.1006, 'learning_rate': 4.090227578146907e-06, 'epoch': 0.3} + 30%|███ | 2129/7045 [6:55:47<16:17:31, 11.93s/it] 30%|███ | 2130/7045 [6:56:00<16:42:14, 12.23s/it] {'loss': 1.0957, 'learning_rate': 4.0893405021022e-06, 'epoch': 0.3} + 30%|███ | 2130/7045 [6:56:00<16:42:14, 12.23s/it] 30%|███ | 2131/7045 [6:56:11<16:11:49, 11.87s/it] {'loss': 1.1445, 'learning_rate': 4.088453090092639e-06, 'epoch': 0.3} + 30%|███ | 2131/7045 [6:56:11<16:11:49, 11.87s/it] 30%|███ | 2132/7045 [6:56:22<16:02:53, 11.76s/it] {'loss': 1.165, 'learning_rate': 4.087565342305811e-06, 'epoch': 0.3} + 30%|███ | 2132/7045 [6:56:22<16:02:53, 11.76s/it] 30%|███ | 2133/7045 [6:56:33<15:43:12, 11.52s/it] {'loss': 1.1123, 'learning_rate': 4.0866772589293735e-06, 'epoch': 0.3} + 30%|███ | 2133/7045 [6:56:33<15:43:12, 11.52s/it] 30%|███ | 2134/7045 [6:56:44<15:30:04, 11.36s/it] {'loss': 1.1621, 'learning_rate': 4.085788840151055e-06, 'epoch': 0.3} + 30%|███ | 2134/7045 [6:56:44<15:30:04, 11.36s/it] 30%|███ | 2135/7045 [6:56:57<16:11:14, 11.87s/it] {'loss': 1.106, 'learning_rate': 4.084900086158655e-06, 'epoch': 0.3} + 30%|███ | 2135/7045 [6:56:57<16:11:14, 11.87s/it] 30%|███ | 2136/7045 [6:57:10<16:39:06, 12.21s/it] {'loss': 1.0869, 'learning_rate': 4.084010997140045e-06, 'epoch': 0.3} + 30%|███ | 2136/7045 [6:57:10<16:39:06, 12.21s/it] 30%|███ | 2137/7045 [6:57:24<17:09:12, 12.58s/it] {'loss': 1.124, 'learning_rate': 4.083121573283167e-06, 'epoch': 0.3} + 30%|███ | 2137/7045 [6:57:24<17:09:12, 12.58s/it] 30%|███ | 2138/7045 [6:57:34<16:29:23, 12.10s/it] {'loss': 1.1582, 'learning_rate': 4.08223181477603e-06, 'epoch': 0.3} + 30%|███ | 2138/7045 [6:57:34<16:29:23, 12.10s/it] 30%|███ | 2139/7045 [6:57:45<16:00:15, 11.74s/it] {'loss': 1.0918, 'learning_rate': 4.081341721806718e-06, 'epoch': 0.3} + 30%|███ | 2139/7045 [6:57:45<16:00:15, 11.74s/it] 30%|███ | 2140/7045 [6:57:58<16:12:58, 11.90s/it] {'loss': 1.1616, 'learning_rate': 4.080451294563384e-06, 'epoch': 0.3} + 30%|███ | 2140/7045 [6:57:58<16:12:58, 11.90s/it] 30%|███ | 2141/7045 [6:58:11<16:43:35, 12.28s/it] {'loss': 1.1191, 'learning_rate': 4.079560533234255e-06, 'epoch': 0.3} + 30%|███ | 2141/7045 [6:58:11<16:43:35, 12.28s/it] 30%|███ | 2142/7045 [6:58:22<16:20:14, 12.00s/it] {'loss': 1.1143, 'learning_rate': 4.078669438007623e-06, 'epoch': 0.3} + 30%|███ | 2142/7045 [6:58:22<16:20:14, 12.00s/it] 30%|███ | 2143/7045 [6:58:33<15:48:21, 11.61s/it] {'loss': 1.1201, 'learning_rate': 4.077778009071854e-06, 'epoch': 0.3} + 30%|███ | 2143/7045 [6:58:33<15:48:21, 11.61s/it] 30%|███ | 2144/7045 [6:58:44<15:34:46, 11.44s/it] {'loss': 1.0732, 'learning_rate': 4.076886246615385e-06, 'epoch': 0.3} + 30%|███ | 2144/7045 [6:58:44<15:34:46, 11.44s/it] 30%|███ | 2145/7045 [6:58:57<16:13:01, 11.91s/it] {'loss': 1.1157, 'learning_rate': 4.075994150826721e-06, 'epoch': 0.3} + 30%|███ | 2145/7045 [6:58:57<16:13:01, 11.91s/it] 30%|███ | 2146/7045 [6:59:08<15:57:42, 11.73s/it] {'loss': 1.1543, 'learning_rate': 4.075101721894438e-06, 'epoch': 0.3} + 30%|███ | 2146/7045 [6:59:08<15:57:42, 11.73s/it] 30%|███ | 2147/7045 [6:59:21<16:17:56, 11.98s/it] {'loss': 1.1279, 'learning_rate': 4.074208960007186e-06, 'epoch': 0.3} + 30%|███ | 2147/7045 [6:59:21<16:17:56, 11.98s/it] 30%|███ | 2148/7045 [6:59:32<16:01:43, 11.78s/it] {'loss': 1.1406, 'learning_rate': 4.07331586535368e-06, 'epoch': 0.3} + 30%|███ | 2148/7045 [6:59:32<16:01:43, 11.78s/it] 31%|███ | 2149/7045 [6:59:43<15:38:53, 11.51s/it] {'loss': 1.1035, 'learning_rate': 4.07242243812271e-06, 'epoch': 0.31} + 31%|███ | 2149/7045 [6:59:43<15:38:53, 11.51s/it] 31%|███ | 2150/7045 [6:59:54<15:31:41, 11.42s/it] {'loss': 1.1016, 'learning_rate': 4.071528678503134e-06, 'epoch': 0.31} + 31%|███ | 2150/7045 [6:59:54<15:31:41, 11.42s/it] 31%|███ | 2151/7045 [7:00:06<15:31:08, 11.42s/it] {'loss': 1.1133, 'learning_rate': 4.0706345866838795e-06, 'epoch': 0.31} + 31%|███ | 2151/7045 [7:00:06<15:31:08, 11.42s/it] 31%|███ | 2152/7045 [7:00:17<15:21:07, 11.30s/it] {'loss': 1.1191, 'learning_rate': 4.0697401628539456e-06, 'epoch': 0.31} + 31%|███ | 2152/7045 [7:00:17<15:21:07, 11.30s/it] 31%|███ | 2153/7045 [7:00:27<15:09:28, 11.15s/it] {'loss': 1.1436, 'learning_rate': 4.068845407202401e-06, 'epoch': 0.31} + 31%|███ | 2153/7045 [7:00:27<15:09:28, 11.15s/it] 31%|███ | 2154/7045 [7:00:39<15:10:13, 11.17s/it] {'loss': 1.1133, 'learning_rate': 4.067950319918386e-06, 'epoch': 0.31} + 31%|███ | 2154/7045 [7:00:39<15:10:13, 11.17s/it] 31%|███ | 2155/7045 [7:00:51<15:30:53, 11.42s/it] {'loss': 1.166, 'learning_rate': 4.0670549011911095e-06, 'epoch': 0.31} + 31%|███ | 2155/7045 [7:00:51<15:30:53, 11.42s/it] 31%|███ | 2156/7045 [7:01:02<15:26:29, 11.37s/it] {'loss': 1.1426, 'learning_rate': 4.066159151209851e-06, 'epoch': 0.31} + 31%|███ | 2156/7045 [7:01:02<15:26:29, 11.37s/it] 31%|███ | 2157/7045 [7:01:13<15:14:13, 11.22s/it] {'loss': 1.1338, 'learning_rate': 4.0652630701639576e-06, 'epoch': 0.31} + 31%|███ | 2157/7045 [7:01:13<15:14:13, 11.22s/it] 31%|███ | 2158/7045 [7:01:24<15:16:02, 11.25s/it] {'loss': 1.166, 'learning_rate': 4.064366658242851e-06, 'epoch': 0.31} + 31%|███ | 2158/7045 [7:01:24<15:16:02, 11.25s/it] 31%|███ | 2159/7045 [7:01:37<15:46:11, 11.62s/it] {'loss': 1.1133, 'learning_rate': 4.063469915636019e-06, 'epoch': 0.31} + 31%|███ | 2159/7045 [7:01:37<15:46:11, 11.62s/it] 31%|███ | 2160/7045 [7:01:47<15:28:25, 11.40s/it] {'loss': 1.1377, 'learning_rate': 4.062572842533023e-06, 'epoch': 0.31} + 31%|███ | 2160/7045 [7:01:47<15:28:25, 11.40s/it] 31%|███ | 2161/7045 [7:01:58<15:15:02, 11.24s/it] {'loss': 1.1865, 'learning_rate': 4.061675439123488e-06, 'epoch': 0.31} + 31%|███ | 2161/7045 [7:01:58<15:15:02, 11.24s/it] 31%|███ | 2162/7045 [7:02:10<15:16:07, 11.26s/it] {'loss': 1.1299, 'learning_rate': 4.060777705597117e-06, 'epoch': 0.31} + 31%|███ | 2162/7045 [7:02:10<15:16:07, 11.26s/it] 31%|███ | 2163/7045 [7:02:21<15:12:36, 11.22s/it] {'loss': 1.1074, 'learning_rate': 4.059879642143676e-06, 'epoch': 0.31} + 31%|███ | 2163/7045 [7:02:21<15:12:36, 11.22s/it] 31%|███ | 2164/7045 [7:02:34<16:07:10, 11.89s/it] {'loss': 1.1196, 'learning_rate': 4.0589812489530046e-06, 'epoch': 0.31} + 31%|███ | 2164/7045 [7:02:34<16:07:10, 11.89s/it] 31%|███ | 2165/7045 [7:02:46<16:04:58, 11.86s/it] {'loss': 1.1191, 'learning_rate': 4.05808252621501e-06, 'epoch': 0.31} + 31%|███ | 2165/7045 [7:02:46<16:04:58, 11.86s/it] 31%|███ | 2166/7045 [7:02:57<15:38:16, 11.54s/it] {'loss': 1.1455, 'learning_rate': 4.05718347411967e-06, 'epoch': 0.31} + 31%|███ | 2166/7045 [7:02:57<15:38:16, 11.54s/it] 31%|███ | 2167/7045 [7:03:10<16:15:50, 12.00s/it] {'loss': 1.1914, 'learning_rate': 4.056284092857032e-06, 'epoch': 0.31} + 31%|███ | 2167/7045 [7:03:10<16:15:50, 12.00s/it] 31%|███ | 2168/7045 [7:03:21<16:02:31, 11.84s/it] {'loss': 1.1777, 'learning_rate': 4.055384382617214e-06, 'epoch': 0.31} + 31%|███ | 2168/7045 [7:03:21<16:02:31, 11.84s/it] 31%|███ | 2169/7045 [7:03:33<15:54:38, 11.75s/it] {'loss': 1.1455, 'learning_rate': 4.054484343590401e-06, 'epoch': 0.31} + 31%|███ | 2169/7045 [7:03:33<15:54:38, 11.75s/it] 31%|██�� | 2170/7045 [7:03:45<15:55:52, 11.76s/it] {'loss': 1.0728, 'learning_rate': 4.05358397596685e-06, 'epoch': 0.31} + 31%|███ | 2170/7045 [7:03:45<15:55:52, 11.76s/it] 31%|███ | 2171/7045 [7:03:55<15:31:11, 11.46s/it] {'loss': 1.0957, 'learning_rate': 4.052683279936885e-06, 'epoch': 0.31} + 31%|███ | 2171/7045 [7:03:55<15:31:11, 11.46s/it] 31%|███ | 2172/7045 [7:04:07<15:27:00, 11.41s/it] {'loss': 1.1035, 'learning_rate': 4.0517822556909015e-06, 'epoch': 0.31} + 31%|███ | 2172/7045 [7:04:07<15:27:00, 11.41s/it] 31%|███ | 2173/7045 [7:04:19<15:41:42, 11.60s/it] {'loss': 1.1377, 'learning_rate': 4.050880903419365e-06, 'epoch': 0.31} + 31%|███ | 2173/7045 [7:04:19<15:41:42, 11.60s/it] 31%|███ | 2174/7045 [7:04:32<16:13:30, 11.99s/it] {'loss': 1.0684, 'learning_rate': 4.0499792233128075e-06, 'epoch': 0.31} + 31%|███ | 2174/7045 [7:04:32<16:13:30, 11.99s/it] 31%|███ | 2175/7045 [7:04:43<15:52:12, 11.73s/it] {'loss': 1.1294, 'learning_rate': 4.049077215561832e-06, 'epoch': 0.31} + 31%|███ | 2175/7045 [7:04:43<15:52:12, 11.73s/it] 31%|███ | 2176/7045 [7:04:54<15:34:59, 11.52s/it] {'loss': 1.0977, 'learning_rate': 4.048174880357111e-06, 'epoch': 0.31} + 31%|███ | 2176/7045 [7:04:54<15:34:59, 11.52s/it] 31%|███ | 2177/7045 [7:05:05<15:30:29, 11.47s/it] {'loss': 1.1475, 'learning_rate': 4.047272217889385e-06, 'epoch': 0.31} + 31%|███ | 2177/7045 [7:05:05<15:30:29, 11.47s/it] 31%|███ | 2178/7045 [7:05:16<15:13:14, 11.26s/it] {'loss': 1.1104, 'learning_rate': 4.046369228349466e-06, 'epoch': 0.31} + 31%|███ | 2178/7045 [7:05:16<15:13:14, 11.26s/it] 31%|███ | 2179/7045 [7:05:28<15:33:09, 11.51s/it] {'loss': 1.1348, 'learning_rate': 4.045465911928233e-06, 'epoch': 0.31} + 31%|███ | 2179/7045 [7:05:28<15:33:09, 11.51s/it] 31%|███ | 2180/7045 [7:05:39<15:23:18, 11.39s/it] {'loss': 1.1377, 'learning_rate': 4.044562268816635e-06, 'epoch': 0.31} + 31%|███ | 2180/7045 [7:05:39<15:23:18, 11.39s/it] 31%|███ | 2181/7045 [7:05:51<15:30:08, 11.47s/it] {'loss': 1.1328, 'learning_rate': 4.043658299205688e-06, 'epoch': 0.31} + 31%|███ | 2181/7045 [7:05:51<15:30:08, 11.47s/it] 31%|███ | 2182/7045 [7:06:03<15:41:56, 11.62s/it] {'loss': 1.1006, 'learning_rate': 4.0427540032864816e-06, 'epoch': 0.31} + 31%|███ | 2182/7045 [7:06:03<15:41:56, 11.62s/it] 31%|███ | 2183/7045 [7:06:14<15:32:10, 11.50s/it] {'loss': 1.1104, 'learning_rate': 4.041849381250169e-06, 'epoch': 0.31} + 31%|███ | 2183/7045 [7:06:14<15:32:10, 11.50s/it] 31%|███ | 2184/7045 [7:06:27<16:19:36, 12.09s/it] {'loss': 1.165, 'learning_rate': 4.040944433287977e-06, 'epoch': 0.31} + 31%|███ | 2184/7045 [7:06:27<16:19:36, 12.09s/it] 31%|███ | 2185/7045 [7:06:39<15:54:59, 11.79s/it] {'loss': 1.1641, 'learning_rate': 4.040039159591198e-06, 'epoch': 0.31} + 31%|███ | 2185/7045 [7:06:39<15:54:59, 11.79s/it] 31%|███ | 2186/7045 [7:06:51<16:10:16, 11.98s/it] {'loss': 1.124, 'learning_rate': 4.039133560351196e-06, 'epoch': 0.31} + 31%|███ | 2186/7045 [7:06:51<16:10:16, 11.98s/it] 31%|███ | 2187/7045 [7:07:02<15:45:12, 11.67s/it] {'loss': 1.127, 'learning_rate': 4.0382276357594e-06, 'epoch': 0.31} + 31%|███ | 2187/7045 [7:07:02<15:45:12, 11.67s/it] 31%|███ | 2188/7045 [7:07:15<16:14:44, 12.04s/it] {'loss': 1.1328, 'learning_rate': 4.037321386007311e-06, 'epoch': 0.31} + 31%|███ | 2188/7045 [7:07:15<16:14:44, 12.04s/it] 31%|███ | 2189/7045 [7:07:26<15:58:14, 11.84s/it] {'loss': 1.1133, 'learning_rate': 4.036414811286498e-06, 'epoch': 0.31} + 31%|███ | 2189/7045 [7:07:26<15:58:14, 11.84s/it] 31%|███ | 2190/7045 [7:07:38<15:46:13, 11.69s/it] {'loss': 1.127, 'learning_rate': 4.0355079117886e-06, 'epoch': 0.31} + 31%|███ | 2190/7045 [7:07:38<15:46:13, 11.69s/it] 31%|███ | 2191/7045 [7:07:49<15:43:13, 11.66s/it] {'loss': 1.1387, 'learning_rate': 4.034600687705321e-06, 'epoch': 0.31} + 31%|███ | 2191/7045 [7:07:49<15:43:13, 11.66s/it] 31%|███ | 2192/7045 [7:08:03<16:37:04, 12.33s/it] {'loss': 1.0996, 'learning_rate': 4.033693139228436e-06, 'epoch': 0.31} + 31%|███ | 2192/7045 [7:08:03<16:37:04, 12.33s/it] 31%|███ | 2193/7045 [7:08:14<16:07:23, 11.96s/it] {'loss': 1.1196, 'learning_rate': 4.032785266549789e-06, 'epoch': 0.31} + 31%|███ | 2193/7045 [7:08:14<16:07:23, 11.96s/it] 31%|███ | 2194/7045 [7:08:28<16:58:51, 12.60s/it] {'loss': 1.1299, 'learning_rate': 4.031877069861292e-06, 'epoch': 0.31} + 31%|███ | 2194/7045 [7:08:28<16:58:51, 12.60s/it] 31%|███ | 2195/7045 [7:08:41<17:14:12, 12.79s/it] {'loss': 1.0903, 'learning_rate': 4.030968549354925e-06, 'epoch': 0.31} + 31%|███ | 2195/7045 [7:08:41<17:14:12, 12.79s/it] 31%|███ | 2196/7045 [7:08:54<17:13:31, 12.79s/it] {'loss': 1.1104, 'learning_rate': 4.030059705222736e-06, 'epoch': 0.31} + 31%|███ | 2196/7045 [7:08:54<17:13:31, 12.79s/it] 31%|███ | 2197/7045 [7:09:05<16:34:10, 12.30s/it] {'loss': 1.1406, 'learning_rate': 4.029150537656844e-06, 'epoch': 0.31} + 31%|███ | 2197/7045 [7:09:05<16:34:10, 12.30s/it] 31%|███ | 2198/7045 [7:09:16<15:57:13, 11.85s/it] {'loss': 1.1104, 'learning_rate': 4.028241046849433e-06, 'epoch': 0.31} + 31%|███ | 2198/7045 [7:09:16<15:57:13, 11.85s/it] 31%|███ | 2199/7045 [7:09:27<15:41:06, 11.65s/it] {'loss': 1.1738, 'learning_rate': 4.027331232992758e-06, 'epoch': 0.31} + 31%|███ | 2199/7045 [7:09:27<15:41:06, 11.65s/it] 31%|███ | 2200/7045 [7:09:38<15:25:38, 11.46s/it] {'loss': 1.1118, 'learning_rate': 4.026421096279141e-06, 'epoch': 0.31} + 31%|███ | 2200/7045 [7:09:38<15:25:38, 11.46s/it] 31%|███ | 2201/7045 [7:09:50<15:16:52, 11.36s/it] {'loss': 1.124, 'learning_rate': 4.025510636900972e-06, 'epoch': 0.31} + 31%|███ | 2201/7045 [7:09:50<15:16:52, 11.36s/it] 31%|███▏ | 2202/7045 [7:10:01<15:18:23, 11.38s/it] {'loss': 1.126, 'learning_rate': 4.0245998550507106e-06, 'epoch': 0.31} + 31%|███▏ | 2202/7045 [7:10:01<15:18:23, 11.38s/it] 31%|███▏ | 2203/7045 [7:10:13<15:35:30, 11.59s/it] {'loss': 1.1191, 'learning_rate': 4.0236887509208825e-06, 'epoch': 0.31} + 31%|███▏ | 2203/7045 [7:10:13<15:35:30, 11.59s/it] 31%|███▏ | 2204/7045 [7:10:25<15:34:18, 11.58s/it] {'loss': 1.1582, 'learning_rate': 4.022777324704083e-06, 'epoch': 0.31} + 31%|███▏ | 2204/7045 [7:10:25<15:34:18, 11.58s/it] 31%|███▏ | 2205/7045 [7:10:38<16:08:37, 12.01s/it] {'loss': 1.0947, 'learning_rate': 4.021865576592975e-06, 'epoch': 0.31} + 31%|███▏ | 2205/7045 [7:10:38<16:08:37, 12.01s/it] 31%|███▏ | 2206/7045 [7:10:50<16:23:03, 12.19s/it] {'loss': 1.1079, 'learning_rate': 4.02095350678029e-06, 'epoch': 0.31} + 31%|███▏ | 2206/7045 [7:10:50<16:23:03, 12.19s/it] 31%|███▏ | 2207/7045 [7:11:03<16:25:11, 12.22s/it] {'loss': 1.1123, 'learning_rate': 4.020041115458827e-06, 'epoch': 0.31} + 31%|███▏ | 2207/7045 [7:11:03<16:25:11, 12.22s/it] 31%|███▏ | 2208/7045 [7:11:13<15:50:19, 11.79s/it] {'loss': 1.1064, 'learning_rate': 4.019128402821453e-06, 'epoch': 0.31} + 31%|███▏ | 2208/7045 [7:11:13<15:50:19, 11.79s/it] 31%|███▏ | 2209/7045 [7:11:25<15:41:49, 11.69s/it] {'loss': 1.1592, 'learning_rate': 4.018215369061103e-06, 'epoch': 0.31} + 31%|██��▏ | 2209/7045 [7:11:25<15:41:49, 11.69s/it] 31%|███▏ | 2210/7045 [7:11:37<15:56:11, 11.87s/it] {'loss': 1.1113, 'learning_rate': 4.017302014370781e-06, 'epoch': 0.31} + 31%|███▏ | 2210/7045 [7:11:37<15:56:11, 11.87s/it] 31%|███▏ | 2211/7045 [7:11:48<15:35:56, 11.62s/it] {'loss': 1.0898, 'learning_rate': 4.016388338943554e-06, 'epoch': 0.31} + 31%|███▏ | 2211/7045 [7:11:48<15:35:56, 11.62s/it] 31%|███▏ | 2212/7045 [7:12:00<15:33:58, 11.59s/it] {'loss': 1.1182, 'learning_rate': 4.015474342972565e-06, 'epoch': 0.31} + 31%|███▏ | 2212/7045 [7:12:00<15:33:58, 11.59s/it] 31%|███▏ | 2213/7045 [7:12:11<15:24:09, 11.48s/it] {'loss': 1.1387, 'learning_rate': 4.014560026651018e-06, 'epoch': 0.31} + 31%|███▏ | 2213/7045 [7:12:11<15:24:09, 11.48s/it] 31%|███▏ | 2214/7045 [7:12:22<15:15:30, 11.37s/it] {'loss': 1.1152, 'learning_rate': 4.013645390172186e-06, 'epoch': 0.31} + 31%|███▏ | 2214/7045 [7:12:22<15:15:30, 11.37s/it] 31%|███▏ | 2215/7045 [7:12:33<15:12:33, 11.34s/it] {'loss': 1.1445, 'learning_rate': 4.012730433729413e-06, 'epoch': 0.31} + 31%|███▏ | 2215/7045 [7:12:33<15:12:33, 11.34s/it] 31%|███▏ | 2216/7045 [7:12:44<15:06:17, 11.26s/it] {'loss': 1.1514, 'learning_rate': 4.0118151575161074e-06, 'epoch': 0.31} + 31%|███▏ | 2216/7045 [7:12:44<15:06:17, 11.26s/it] 31%|███▏ | 2217/7045 [7:12:55<15:02:40, 11.22s/it] {'loss': 1.1533, 'learning_rate': 4.010899561725745e-06, 'epoch': 0.31} + 31%|███▏ | 2217/7045 [7:12:55<15:02:40, 11.22s/it] 31%|███▏ | 2218/7045 [7:13:06<14:53:21, 11.10s/it] {'loss': 1.1279, 'learning_rate': 4.009983646551872e-06, 'epoch': 0.31} + 31%|███▏ | 2218/7045 [7:13:06<14:53:21, 11.10s/it] 31%|███▏ | 2219/7045 [7:13:17<14:53:14, 11.11s/it] {'loss': 1.106, 'learning_rate': 4.009067412188099e-06, 'epoch': 0.31} + 31%|███▏ | 2219/7045 [7:13:17<14:53:14, 11.11s/it] 32%|███▏ | 2220/7045 [7:13:30<15:28:01, 11.54s/it] {'loss': 1.1787, 'learning_rate': 4.008150858828106e-06, 'epoch': 0.32} + 32%|███▏ | 2220/7045 [7:13:30<15:28:01, 11.54s/it] 32%|███▏ | 2221/7045 [7:13:41<15:14:26, 11.37s/it] {'loss': 1.1367, 'learning_rate': 4.00723398666564e-06, 'epoch': 0.32} + 32%|███▏ | 2221/7045 [7:13:41<15:14:26, 11.37s/it] 32%|███▏ | 2222/7045 [7:13:52<15:06:38, 11.28s/it] {'loss': 1.1328, 'learning_rate': 4.0063167958945135e-06, 'epoch': 0.32} + 32%|███▏ | 2222/7045 [7:13:52<15:06:38, 11.28s/it] 32%|███▏ | 2223/7045 [7:14:03<14:57:04, 11.16s/it] {'loss': 1.1826, 'learning_rate': 4.005399286708611e-06, 'epoch': 0.32} + 32%|███▏ | 2223/7045 [7:14:03<14:57:04, 11.16s/it] 32%|███▏ | 2224/7045 [7:14:14<14:55:51, 11.15s/it] {'loss': 1.1826, 'learning_rate': 4.0044814593018784e-06, 'epoch': 0.32} + 32%|███▏ | 2224/7045 [7:14:14<14:55:51, 11.15s/it] 32%|███▏ | 2225/7045 [7:14:25<14:49:12, 11.07s/it] {'loss': 1.0908, 'learning_rate': 4.003563313868335e-06, 'epoch': 0.32} + 32%|███▏ | 2225/7045 [7:14:25<14:49:12, 11.07s/it] 32%|███▏ | 2226/7045 [7:14:36<14:52:25, 11.11s/it] {'loss': 1.0801, 'learning_rate': 4.002644850602062e-06, 'epoch': 0.32} + 32%|███▏ | 2226/7045 [7:14:36<14:52:25, 11.11s/it] 32%|███▏ | 2227/7045 [7:14:47<14:54:11, 11.14s/it] {'loss': 1.125, 'learning_rate': 4.00172606969721e-06, 'epoch': 0.32} + 32%|███▏ | 2227/7045 [7:14:47<14:54:11, 11.14s/it] 32%|███▏ | 2228/7045 [7:14:58<14:53:19, 11.13s/it] {'loss': 1.124, 'learning_rate': 4.000806971347998e-06, 'epoch': 0.32} + 32%|███▏ | 2228/7045 [7:14:58<14:53:19, 11.13s/it] 32%|███▏ | 2229/7045 [7:15:09<14:45:16, 11.03s/it] {'loss': 1.1074, 'learning_rate': 3.99988755574871e-06, 'epoch': 0.32} + 32%|███▏ | 2229/7045 [7:15:09<14:45:16, 11.03s/it] 32%|███▏ | 2230/7045 [7:15:21<15:15:02, 11.40s/it] {'loss': 1.1572, 'learning_rate': 3.998967823093698e-06, 'epoch': 0.32} + 32%|███▏ | 2230/7045 [7:15:21<15:15:02, 11.40s/it] 32%|███▏ | 2231/7045 [7:15:33<15:10:56, 11.35s/it] {'loss': 1.0781, 'learning_rate': 3.99804777357738e-06, 'epoch': 0.32} + 32%|███▏ | 2231/7045 [7:15:33<15:10:56, 11.35s/it] 32%|███▏ | 2232/7045 [7:15:44<14:59:18, 11.21s/it] {'loss': 1.1367, 'learning_rate': 3.997127407394244e-06, 'epoch': 0.32} + 32%|███▏ | 2232/7045 [7:15:44<14:59:18, 11.21s/it] 32%|███▏ | 2233/7045 [7:15:55<14:56:23, 11.18s/it] {'loss': 1.1191, 'learning_rate': 3.996206724738842e-06, 'epoch': 0.32} + 32%|███▏ | 2233/7045 [7:15:55<14:56:23, 11.18s/it] 32%|███▏ | 2234/7045 [7:16:06<14:53:58, 11.15s/it] {'loss': 1.1357, 'learning_rate': 3.995285725805792e-06, 'epoch': 0.32} + 32%|███▏ | 2234/7045 [7:16:06<14:53:58, 11.15s/it] 32%|███▏ | 2235/7045 [7:16:17<15:00:59, 11.24s/it] {'loss': 1.1494, 'learning_rate': 3.9943644107897825e-06, 'epoch': 0.32} + 32%|███▏ | 2235/7045 [7:16:17<15:00:59, 11.24s/it] 32%|███▏ | 2236/7045 [7:16:31<16:02:40, 12.01s/it] {'loss': 1.1558, 'learning_rate': 3.993442779885566e-06, 'epoch': 0.32} + 32%|███▏ | 2236/7045 [7:16:31<16:02:40, 12.01s/it] 32%|███▏ | 2237/7045 [7:16:43<15:54:29, 11.91s/it] {'loss': 1.1416, 'learning_rate': 3.992520833287963e-06, 'epoch': 0.32} + 32%|███▏ | 2237/7045 [7:16:43<15:54:29, 11.91s/it] 32%|███▏ | 2238/7045 [7:16:56<16:25:06, 12.30s/it] {'loss': 1.0884, 'learning_rate': 3.991598571191861e-06, 'epoch': 0.32} + 32%|███▏ | 2238/7045 [7:16:56<16:25:06, 12.30s/it] 32%|███▏ | 2239/7045 [7:17:07<16:02:21, 12.01s/it] {'loss': 1.207, 'learning_rate': 3.990675993792213e-06, 'epoch': 0.32} + 32%|███▏ | 2239/7045 [7:17:07<16:02:21, 12.01s/it] 32%|███▏ | 2240/7045 [7:17:20<16:26:56, 12.32s/it] {'loss': 1.1133, 'learning_rate': 3.9897531012840384e-06, 'epoch': 0.32} + 32%|███▏ | 2240/7045 [7:17:20<16:26:56, 12.32s/it] 32%|███▏ | 2241/7045 [7:17:32<16:01:50, 12.01s/it] {'loss': 1.1074, 'learning_rate': 3.988829893862426e-06, 'epoch': 0.32} + 32%|███▏ | 2241/7045 [7:17:32<16:01:50, 12.01s/it] 32%|███▏ | 2242/7045 [7:17:44<16:09:31, 12.11s/it] {'loss': 1.0791, 'learning_rate': 3.9879063717225275e-06, 'epoch': 0.32} + 32%|███▏ | 2242/7045 [7:17:44<16:09:31, 12.11s/it] 32%|███▏ | 2243/7045 [7:17:55<15:47:16, 11.84s/it] {'loss': 1.1113, 'learning_rate': 3.986982535059563e-06, 'epoch': 0.32} + 32%|███▏ | 2243/7045 [7:17:55<15:47:16, 11.84s/it] 32%|███▏ | 2244/7045 [7:18:07<15:55:44, 11.94s/it] {'loss': 1.1475, 'learning_rate': 3.98605838406882e-06, 'epoch': 0.32} + 32%|███▏ | 2244/7045 [7:18:07<15:55:44, 11.94s/it] 32%|███▏ | 2245/7045 [7:18:18<15:37:04, 11.71s/it] {'loss': 1.1514, 'learning_rate': 3.985133918945652e-06, 'epoch': 0.32} + 32%|███▏ | 2245/7045 [7:18:18<15:37:04, 11.71s/it] 32%|███▏ | 2246/7045 [7:18:29<15:20:36, 11.51s/it] {'loss': 1.1475, 'learning_rate': 3.984209139885476e-06, 'epoch': 0.32} + 32%|███▏ | 2246/7045 [7:18:29<15:20:36, 11.51s/it] 32%|███▏ | 2247/7045 [7:18:41<15:22:15, 11.53s/it] {'loss': 1.1133, 'learning_rate': 3.983284047083778e-06, 'epoch': 0.32} + 32%|███▏ | 2247/7045 [7:18:41<15:22:15, 11.53s/it] 32%|███▏ | 2248/7045 [7:18:52<15:13:52, 11.43s/it] {'loss': 1.1582, 'learning_rate': 3.982358640736112e-06, 'epoch': 0.32} + 32%|███▏ | 2248/7045 [7:18:52<15:13:52, 11.43s/it] 32%|██���▏ | 2249/7045 [7:19:04<15:16:47, 11.47s/it] {'loss': 1.1504, 'learning_rate': 3.981432921038095e-06, 'epoch': 0.32} + 32%|███▏ | 2249/7045 [7:19:04<15:16:47, 11.47s/it] 32%|███▏ | 2250/7045 [7:19:15<15:07:46, 11.36s/it] {'loss': 1.1123, 'learning_rate': 3.98050688818541e-06, 'epoch': 0.32} + 32%|███▏ | 2250/7045 [7:19:15<15:07:46, 11.36s/it] 32%|███▏ | 2251/7045 [7:19:26<15:01:25, 11.28s/it] {'loss': 1.1289, 'learning_rate': 3.979580542373811e-06, 'epoch': 0.32} + 32%|███▏ | 2251/7045 [7:19:26<15:01:25, 11.28s/it] 32%|███▏ | 2252/7045 [7:19:37<15:01:26, 11.28s/it] {'loss': 1.1377, 'learning_rate': 3.9786538837991114e-06, 'epoch': 0.32} + 32%|███▏ | 2252/7045 [7:19:37<15:01:26, 11.28s/it] 32%|███▏ | 2253/7045 [7:19:49<15:18:20, 11.50s/it] {'loss': 1.1514, 'learning_rate': 3.977726912657197e-06, 'epoch': 0.32} + 32%|███▏ | 2253/7045 [7:19:49<15:18:20, 11.50s/it] 32%|███▏ | 2254/7045 [7:20:01<15:17:31, 11.49s/it] {'loss': 1.1416, 'learning_rate': 3.976799629144015e-06, 'epoch': 0.32} + 32%|███▏ | 2254/7045 [7:20:01<15:17:31, 11.49s/it] 32%|███▏ | 2255/7045 [7:20:12<15:07:11, 11.36s/it] {'loss': 1.126, 'learning_rate': 3.975872033455582e-06, 'epoch': 0.32} + 32%|███▏ | 2255/7045 [7:20:12<15:07:11, 11.36s/it] 32%|███▏ | 2256/7045 [7:20:23<15:05:15, 11.34s/it] {'loss': 1.1221, 'learning_rate': 3.974944125787977e-06, 'epoch': 0.32} + 32%|███▏ | 2256/7045 [7:20:23<15:05:15, 11.34s/it] 32%|███▏ | 2257/7045 [7:20:35<15:09:48, 11.40s/it] {'loss': 1.1592, 'learning_rate': 3.974015906337348e-06, 'epoch': 0.32} + 32%|███▏ | 2257/7045 [7:20:35<15:09:48, 11.40s/it] 32%|███▏ | 2258/7045 [7:20:47<15:22:08, 11.56s/it] {'loss': 1.125, 'learning_rate': 3.973087375299909e-06, 'epoch': 0.32} + 32%|███▏ | 2258/7045 [7:20:47<15:22:08, 11.56s/it] 32%|███▏ | 2259/7045 [7:20:58<15:12:58, 11.45s/it] {'loss': 1.106, 'learning_rate': 3.972158532871936e-06, 'epoch': 0.32} + 32%|███▏ | 2259/7045 [7:20:58<15:12:58, 11.45s/it] 32%|███▏ | 2260/7045 [7:21:09<15:11:48, 11.43s/it] {'loss': 1.1465, 'learning_rate': 3.971229379249777e-06, 'epoch': 0.32} + 32%|███▏ | 2260/7045 [7:21:09<15:11:48, 11.43s/it] 32%|███▏ | 2261/7045 [7:21:21<15:24:48, 11.60s/it] {'loss': 1.0615, 'learning_rate': 3.9702999146298395e-06, 'epoch': 0.32} + 32%|███▏ | 2261/7045 [7:21:21<15:24:48, 11.60s/it] 32%|███▏ | 2262/7045 [7:21:33<15:19:06, 11.53s/it] {'loss': 1.1426, 'learning_rate': 3.969370139208601e-06, 'epoch': 0.32} + 32%|███▏ | 2262/7045 [7:21:33<15:19:06, 11.53s/it] 32%|███▏ | 2263/7045 [7:21:45<15:37:55, 11.77s/it] {'loss': 1.1631, 'learning_rate': 3.968440053182604e-06, 'epoch': 0.32} + 32%|███▏ | 2263/7045 [7:21:45<15:37:55, 11.77s/it] 32%|███▏ | 2264/7045 [7:21:56<15:31:08, 11.69s/it] {'loss': 1.1099, 'learning_rate': 3.967509656748455e-06, 'epoch': 0.32} + 32%|███▏ | 2264/7045 [7:21:56<15:31:08, 11.69s/it] 32%|███▏ | 2265/7045 [7:22:09<15:53:23, 11.97s/it] {'loss': 1.0908, 'learning_rate': 3.966578950102827e-06, 'epoch': 0.32} + 32%|███▏ | 2265/7045 [7:22:09<15:53:23, 11.97s/it] 32%|███▏ | 2266/7045 [7:22:22<16:14:55, 12.24s/it] {'loss': 1.0664, 'learning_rate': 3.965647933442459e-06, 'epoch': 0.32} + 32%|███▏ | 2266/7045 [7:22:22<16:14:55, 12.24s/it] 32%|███▏ | 2267/7045 [7:22:34<16:10:17, 12.18s/it] {'loss': 1.125, 'learning_rate': 3.9647166069641545e-06, 'epoch': 0.32} + 32%|███▏ | 2267/7045 [7:22:34<16:10:17, 12.18s/it] 32%|███▏ | 2268/7045 [7:22:46<16:10:06, 12.18s/it] {'loss': 1.125, 'learning_rate': 3.963784970864784e-06, 'epoch': 0.32} + 32%|███▏ | 2268/7045 [7:22:46<16:10:06, 12.18s/it] 32%|███▏ | 2269/7045 [7:22:58<15:54:07, 11.99s/it] {'loss': 1.1055, 'learning_rate': 3.962853025341282e-06, 'epoch': 0.32} + 32%|███▏ | 2269/7045 [7:22:58<15:54:07, 11.99s/it] 32%|███▏ | 2270/7045 [7:23:10<16:01:59, 12.09s/it] {'loss': 1.1357, 'learning_rate': 3.96192077059065e-06, 'epoch': 0.32} + 32%|███▏ | 2270/7045 [7:23:10<16:01:59, 12.09s/it] 32%|███▏ | 2271/7045 [7:23:21<15:35:23, 11.76s/it] {'loss': 1.125, 'learning_rate': 3.960988206809954e-06, 'epoch': 0.32} + 32%|███▏ | 2271/7045 [7:23:21<15:35:23, 11.76s/it] 32%|███▏ | 2272/7045 [7:23:32<15:13:14, 11.48s/it] {'loss': 1.1006, 'learning_rate': 3.960055334196323e-06, 'epoch': 0.32} + 32%|███▏ | 2272/7045 [7:23:32<15:13:14, 11.48s/it] 32%|███▏ | 2273/7045 [7:23:43<15:10:31, 11.45s/it] {'loss': 1.126, 'learning_rate': 3.9591221529469555e-06, 'epoch': 0.32} + 32%|███▏ | 2273/7045 [7:23:43<15:10:31, 11.45s/it] 32%|███▏ | 2274/7045 [7:23:54<15:03:39, 11.36s/it] {'loss': 1.1543, 'learning_rate': 3.958188663259114e-06, 'epoch': 0.32} + 32%|███▏ | 2274/7045 [7:23:54<15:03:39, 11.36s/it] 32%|███▏ | 2275/7045 [7:24:06<15:05:25, 11.39s/it] {'loss': 1.1338, 'learning_rate': 3.957254865330123e-06, 'epoch': 0.32} + 32%|███▏ | 2275/7045 [7:24:06<15:05:25, 11.39s/it] 32%|███▏ | 2276/7045 [7:24:17<14:58:28, 11.30s/it] {'loss': 1.1377, 'learning_rate': 3.956320759357376e-06, 'epoch': 0.32} + 32%|███▏ | 2276/7045 [7:24:17<14:58:28, 11.30s/it] 32%|███▏ | 2277/7045 [7:24:28<14:48:45, 11.18s/it] {'loss': 1.1221, 'learning_rate': 3.955386345538331e-06, 'epoch': 0.32} + 32%|███▏ | 2277/7045 [7:24:28<14:48:45, 11.18s/it] 32%|███▏ | 2278/7045 [7:24:39<14:47:38, 11.17s/it] {'loss': 1.1406, 'learning_rate': 3.9544516240705086e-06, 'epoch': 0.32} + 32%|███▏ | 2278/7045 [7:24:39<14:47:38, 11.17s/it] 32%|███▏ | 2279/7045 [7:24:51<15:10:48, 11.47s/it] {'loss': 1.1416, 'learning_rate': 3.9535165951514974e-06, 'epoch': 0.32} + 32%|███▏ | 2279/7045 [7:24:51<15:10:48, 11.47s/it] 32%|███▏ | 2280/7045 [7:25:02<15:00:39, 11.34s/it] {'loss': 1.1055, 'learning_rate': 3.9525812589789495e-06, 'epoch': 0.32} + 32%|███▏ | 2280/7045 [7:25:02<15:00:39, 11.34s/it] 32%|███▏ | 2281/7045 [7:25:13<14:48:20, 11.19s/it] {'loss': 1.1406, 'learning_rate': 3.951645615750581e-06, 'epoch': 0.32} + 32%|███▏ | 2281/7045 [7:25:13<14:48:20, 11.19s/it] 32%|███▏ | 2282/7045 [7:25:24<14:42:26, 11.12s/it] {'loss': 1.1055, 'learning_rate': 3.950709665664176e-06, 'epoch': 0.32} + 32%|███▏ | 2282/7045 [7:25:24<14:42:26, 11.12s/it] 32%|███▏ | 2283/7045 [7:25:35<14:42:15, 11.12s/it] {'loss': 1.0684, 'learning_rate': 3.949773408917581e-06, 'epoch': 0.32} + 32%|███▏ | 2283/7045 [7:25:35<14:42:15, 11.12s/it] 32%|███▏ | 2284/7045 [7:25:46<14:35:01, 11.03s/it] {'loss': 1.1367, 'learning_rate': 3.9488368457087065e-06, 'epoch': 0.32} + 32%|███▏ | 2284/7045 [7:25:46<14:35:01, 11.03s/it] 32%|███▏ | 2285/7045 [7:25:57<14:35:54, 11.04s/it] {'loss': 1.1201, 'learning_rate': 3.947899976235531e-06, 'epoch': 0.32} + 32%|███▏ | 2285/7045 [7:25:57<14:35:54, 11.04s/it] 32%|███▏ | 2286/7045 [7:26:08<14:35:29, 11.04s/it] {'loss': 1.0986, 'learning_rate': 3.9469628006960944e-06, 'epoch': 0.32} + 32%|███▏ | 2286/7045 [7:26:08<14:35:29, 11.04s/it] 32%|███▏ | 2287/7045 [7:26:19<14:28:44, 10.96s/it] {'loss': 1.1641, 'learning_rate': 3.946025319288504e-06, 'epoch': 0.32} + 32%|███▏ | 2287/7045 [7:26:19<14:28:44, 10.96s/it] 32%|███▏ | 2288/7045 [7:26:32<15:24:56, 11.67s/it] {'loss': 1.1299, 'learning_rate': 3.94508753221093e-06, 'epoch': 0.32} + 32%|███▏ | 2288/7045 [7:26:32<15:24:56, 11.67s/it] 32%|███▏ | 2289/7045 [7:26:45<15:47:58, 11.96s/it] {'loss': 1.1299, 'learning_rate': 3.944149439661608e-06, 'epoch': 0.32} + 32%|███▏ | 2289/7045 [7:26:45<15:47:58, 11.96s/it] 33%|███▎ | 2290/7045 [7:26:56<15:28:08, 11.71s/it] {'loss': 1.1504, 'learning_rate': 3.9432110418388366e-06, 'epoch': 0.33} + 33%|███▎ | 2290/7045 [7:26:56<15:28:08, 11.71s/it] 33%|███▎ | 2291/7045 [7:27:07<15:23:32, 11.66s/it] {'loss': 1.1162, 'learning_rate': 3.942272338940982e-06, 'epoch': 0.33} + 33%|███▎ | 2291/7045 [7:27:07<15:23:32, 11.66s/it] 33%|███▎ | 2292/7045 [7:27:20<15:51:15, 12.01s/it] {'loss': 1.1396, 'learning_rate': 3.941333331166472e-06, 'epoch': 0.33} + 33%|███▎ | 2292/7045 [7:27:20<15:51:15, 12.01s/it] 33%|███▎ | 2293/7045 [7:27:33<16:04:55, 12.18s/it] {'loss': 1.1099, 'learning_rate': 3.940394018713802e-06, 'epoch': 0.33} + 33%|███▎ | 2293/7045 [7:27:33<16:04:55, 12.18s/it] 33%|███▎ | 2294/7045 [7:27:46<16:25:14, 12.44s/it] {'loss': 1.124, 'learning_rate': 3.939454401781528e-06, 'epoch': 0.33} + 33%|███▎ | 2294/7045 [7:27:46<16:25:14, 12.44s/it] 33%|███▎ | 2295/7045 [7:27:57<15:53:17, 12.04s/it] {'loss': 1.1406, 'learning_rate': 3.9385144805682715e-06, 'epoch': 0.33} + 33%|███▎ | 2295/7045 [7:27:57<15:53:17, 12.04s/it] 33%|███▎ | 2296/7045 [7:28:09<15:47:06, 11.97s/it] {'loss': 1.1318, 'learning_rate': 3.937574255272721e-06, 'epoch': 0.33} + 33%|███▎ | 2296/7045 [7:28:09<15:47:06, 11.97s/it] 33%|███▎ | 2297/7045 [7:28:19<15:15:42, 11.57s/it] {'loss': 1.105, 'learning_rate': 3.936633726093626e-06, 'epoch': 0.33} + 33%|███▎ | 2297/7045 [7:28:19<15:15:42, 11.57s/it] 33%|███▎ | 2298/7045 [7:28:31<15:06:11, 11.45s/it] {'loss': 1.0913, 'learning_rate': 3.935692893229802e-06, 'epoch': 0.33} + 33%|███▎ | 2298/7045 [7:28:31<15:06:11, 11.45s/it] 33%|███▎ | 2299/7045 [7:28:41<14:50:17, 11.26s/it] {'loss': 1.1318, 'learning_rate': 3.934751756880127e-06, 'epoch': 0.33} + 33%|███▎ | 2299/7045 [7:28:41<14:50:17, 11.26s/it] 33%|███▎ | 2300/7045 [7:28:54<15:28:05, 11.74s/it] {'loss': 1.1465, 'learning_rate': 3.933810317243547e-06, 'epoch': 0.33} + 33%|███▎ | 2300/7045 [7:28:54<15:28:05, 11.74s/it] 33%|███▎ | 2301/7045 [7:29:06<15:37:22, 11.86s/it] {'loss': 1.1157, 'learning_rate': 3.9328685745190675e-06, 'epoch': 0.33} + 33%|███▎ | 2301/7045 [7:29:06<15:37:22, 11.86s/it] 33%|███▎ | 2302/7045 [7:29:19<15:58:31, 12.13s/it] {'loss': 1.1543, 'learning_rate': 3.9319265289057595e-06, 'epoch': 0.33} + 33%|███▎ | 2302/7045 [7:29:19<15:58:31, 12.13s/it] 33%|███▎ | 2303/7045 [7:29:30<15:33:09, 11.81s/it] {'loss': 1.1172, 'learning_rate': 3.9309841806027604e-06, 'epoch': 0.33} + 33%|███▎ | 2303/7045 [7:29:30<15:33:09, 11.81s/it] 33%|███▎ | 2304/7045 [7:29:44<16:10:20, 12.28s/it] {'loss': 1.0806, 'learning_rate': 3.930041529809269e-06, 'epoch': 0.33} + 33%|███▎ | 2304/7045 [7:29:44<16:10:20, 12.28s/it] 33%|███▎ | 2305/7045 [7:29:55<15:45:48, 11.97s/it] {'loss': 1.1211, 'learning_rate': 3.929098576724547e-06, 'epoch': 0.33} + 33%|███▎ | 2305/7045 [7:29:55<15:45:48, 11.97s/it] 33%|███▎ | 2306/7045 [7:30:06<15:23:38, 11.69s/it] {'loss': 1.1631, 'learning_rate': 3.928155321547925e-06, 'epoch': 0.33} + 33%|███▎ | 2306/7045 [7:30:06<15:23:38, 11.69s/it] 33%|███▎ | 2307/7045 [7:30:17<15:22:46, 11.69s/it] {'loss': 1.1021, 'learning_rate': 3.927211764478792e-06, 'epoch': 0.33} + 33%|███▎ | 2307/7045 [7:30:17<15:22:46, 11.69s/it] 33%|███▎ | 2308/7045 [7:30:29<15:17:05, 11.62s/it] {'loss': 1.1152, 'learning_rate': 3.926267905716603e-06, 'epoch': 0.33} + 33%|███▎ | 2308/7045 [7:30:29<15:17:05, 11.62s/it] 33%|███▎ | 2309/7045 [7:30:40<15:01:14, 11.42s/it] {'loss': 1.0967, 'learning_rate': 3.925323745460879e-06, 'epoch': 0.33} + 33%|███▎ | 2309/7045 [7:30:40<15:01:14, 11.42s/it] 33%|███▎ | 2310/7045 [7:30:53<15:42:28, 11.94s/it] {'loss': 1.1221, 'learning_rate': 3.9243792839112e-06, 'epoch': 0.33} + 33%|███▎ | 2310/7045 [7:30:53<15:42:28, 11.94s/it] 33%|███▎ | 2311/7045 [7:31:05<15:45:32, 11.98s/it] {'loss': 1.1279, 'learning_rate': 3.923434521267214e-06, 'epoch': 0.33} + 33%|███▎ | 2311/7045 [7:31:05<15:45:32, 11.98s/it] 33%|███▎ | 2312/7045 [7:31:16<15:29:08, 11.78s/it] {'loss': 1.1982, 'learning_rate': 3.92248945772863e-06, 'epoch': 0.33} + 33%|███▎ | 2312/7045 [7:31:16<15:29:08, 11.78s/it] 33%|███▎ | 2313/7045 [7:31:28<15:17:39, 11.64s/it] {'loss': 1.0879, 'learning_rate': 3.921544093495222e-06, 'epoch': 0.33} + 33%|███▎ | 2313/7045 [7:31:28<15:17:39, 11.64s/it] 33%|███▎ | 2314/7045 [7:31:39<14:57:56, 11.39s/it] {'loss': 1.1211, 'learning_rate': 3.9205984287668266e-06, 'epoch': 0.33} + 33%|███▎ | 2314/7045 [7:31:39<14:57:56, 11.39s/it] 33%|███▎ | 2315/7045 [7:31:50<15:04:27, 11.47s/it] {'loss': 1.1484, 'learning_rate': 3.919652463743345e-06, 'epoch': 0.33} + 33%|███▎ | 2315/7045 [7:31:50<15:04:27, 11.47s/it] 33%|███▎ | 2316/7045 [7:32:01<14:50:03, 11.29s/it] {'loss': 1.1084, 'learning_rate': 3.918706198624741e-06, 'epoch': 0.33} + 33%|███▎ | 2316/7045 [7:32:01<14:50:03, 11.29s/it] 33%|███▎ | 2317/7045 [7:32:12<14:42:27, 11.20s/it] {'loss': 1.1011, 'learning_rate': 3.9177596336110415e-06, 'epoch': 0.33} + 33%|███▎ | 2317/7045 [7:32:12<14:42:27, 11.20s/it] 33%|███▎ | 2318/7045 [7:32:23<14:33:05, 11.08s/it] {'loss': 1.0918, 'learning_rate': 3.9168127689023375e-06, 'epoch': 0.33} + 33%|███▎ | 2318/7045 [7:32:23<14:33:05, 11.08s/it] 33%|███▎ | 2319/7045 [7:32:34<14:36:48, 11.13s/it] {'loss': 1.1372, 'learning_rate': 3.915865604698785e-06, 'epoch': 0.33} + 33%|███▎ | 2319/7045 [7:32:34<14:36:48, 11.13s/it] 33%|███▎ | 2320/7045 [7:32:47<15:22:33, 11.71s/it] {'loss': 1.1621, 'learning_rate': 3.9149181412005995e-06, 'epoch': 0.33} + 33%|███▎ | 2320/7045 [7:32:47<15:22:33, 11.71s/it] 33%|███▎ | 2321/7045 [7:32:59<15:14:57, 11.62s/it] {'loss': 1.1172, 'learning_rate': 3.913970378608063e-06, 'epoch': 0.33} + 33%|███▎ | 2321/7045 [7:32:59<15:14:57, 11.62s/it] 33%|███▎ | 2322/7045 [7:33:12<15:49:08, 12.06s/it] {'loss': 1.1006, 'learning_rate': 3.913022317121519e-06, 'epoch': 0.33} + 33%|███▎ | 2322/7045 [7:33:12<15:49:08, 12.06s/it] 33%|███▎ | 2323/7045 [7:33:23<15:26:24, 11.77s/it] {'loss': 1.1318, 'learning_rate': 3.912073956941375e-06, 'epoch': 0.33} + 33%|███▎ | 2323/7045 [7:33:23<15:26:24, 11.77s/it] 33%|███▎ | 2324/7045 [7:33:34<15:22:43, 11.73s/it] {'loss': 1.1123, 'learning_rate': 3.9111252982681e-06, 'epoch': 0.33} + 33%|███▎ | 2324/7045 [7:33:34<15:22:43, 11.73s/it] 33%|███▎ | 2325/7045 [7:33:46<15:11:38, 11.59s/it] {'loss': 1.1377, 'learning_rate': 3.9101763413022306e-06, 'epoch': 0.33} + 33%|███▎ | 2325/7045 [7:33:46<15:11:38, 11.59s/it] 33%|███▎ | 2326/7045 [7:33:57<14:59:01, 11.43s/it] {'loss': 1.1279, 'learning_rate': 3.9092270862443605e-06, 'epoch': 0.33} + 33%|███▎ | 2326/7045 [7:33:57<14:59:01, 11.43s/it] 33%|███▎ | 2327/7045 [7:34:10<15:42:45, 11.99s/it] {'loss': 1.1138, 'learning_rate': 3.90827753329515e-06, 'epoch': 0.33} + 33%|███▎ | 2327/7045 [7:34:10<15:42:45, 11.99s/it] 33%|███▎ | 2328/7045 [7:34:21<15:21:46, 11.72s/it] {'loss': 1.0928, 'learning_rate': 3.907327682655322e-06, 'epoch': 0.33} + 33%|███▎ | 2328/7045 [7:34:21<15:21:46, 11.72s/it] 33%|███▎ | 2329/7045 [7:34:32<15:11:13, 11.59s/it] {'loss': 1.1152, 'learning_rate': 3.906377534525662e-06, 'epoch': 0.33} + 33%|███▎ | 2329/7045 [7:34:32<15:11:13, 11.59s/it] 33%|███▎ | 2330/7045 [7:34:43<14:57:36, 11.42s/it] {'loss': 1.0957, 'learning_rate': 3.905427089107019e-06, 'epoch': 0.33} + 33%|███▎ | 2330/7045 [7:34:43<14:57:36, 11.42s/it] 33%|███▎ | 2331/7045 [7:34:55<14:53:27, 11.37s/it] {'loss': 1.1035, 'learning_rate': 3.904476346600302e-06, 'epoch': 0.33} + 33%|███▎ | 2331/7045 [7:34:55<14:53:27, 11.37s/it] 33%|███▎ | 2332/7045 [7:35:06<14:58:51, 11.44s/it] {'loss': 1.062, 'learning_rate': 3.903525307206486e-06, 'epoch': 0.33} + 33%|███▎ | 2332/7045 [7:35:06<14:58:51, 11.44s/it] 33%|███▎ | 2333/7045 [7:35:17<14:46:58, 11.29s/it] {'loss': 1.1299, 'learning_rate': 3.902573971126609e-06, 'epoch': 0.33} + 33%|███▎ | 2333/7045 [7:35:17<14:46:58, 11.29s/it] 33%|███▎ | 2334/7045 [7:35:30<15:19:27, 11.71s/it] {'loss': 1.1226, 'learning_rate': 3.901622338561769e-06, 'epoch': 0.33} + 33%|███▎ | 2334/7045 [7:35:30<15:19:27, 11.71s/it] 33%|███▎ | 2335/7045 [7:35:41<15:09:15, 11.58s/it] {'loss': 1.1387, 'learning_rate': 3.9006704097131275e-06, 'epoch': 0.33} + 33%|███▎ | 2335/7045 [7:35:41<15:09:15, 11.58s/it] 33%|███▎ | 2336/7045 [7:35:52<14:53:35, 11.39s/it] {'loss': 1.1074, 'learning_rate': 3.899718184781911e-06, 'epoch': 0.33} + 33%|███▎ | 2336/7045 [7:35:52<14:53:35, 11.39s/it] 33%|███▎ | 2337/7045 [7:36:04<14:57:50, 11.44s/it] {'loss': 1.1396, 'learning_rate': 3.898765663969407e-06, 'epoch': 0.33} + 33%|███▎ | 2337/7045 [7:36:04<14:57:50, 11.44s/it] 33%|███▎ | 2338/7045 [7:36:15<14:46:42, 11.30s/it] {'loss': 1.1255, 'learning_rate': 3.897812847476963e-06, 'epoch': 0.33} + 33%|███▎ | 2338/7045 [7:36:15<14:46:42, 11.30s/it] 33%|███▎ | 2339/7045 [7:36:26<14:41:24, 11.24s/it] {'loss': 1.165, 'learning_rate': 3.896859735505994e-06, 'epoch': 0.33} + 33%|███▎ | 2339/7045 [7:36:26<14:41:24, 11.24s/it] 33%|███▎ | 2340/7045 [7:36:37<14:37:44, 11.19s/it] {'loss': 1.1011, 'learning_rate': 3.8959063282579735e-06, 'epoch': 0.33} + 33%|███▎ | 2340/7045 [7:36:37<14:37:44, 11.19s/it] 33%|███▎ | 2341/7045 [7:36:48<14:38:50, 11.21s/it] {'loss': 1.1216, 'learning_rate': 3.894952625934439e-06, 'epoch': 0.33} + 33%|███▎ | 2341/7045 [7:36:48<14:38:50, 11.21s/it] 33%|███▎ | 2342/7045 [7:36:59<14:33:41, 11.15s/it] {'loss': 1.1484, 'learning_rate': 3.893998628736988e-06, 'epoch': 0.33} + 33%|███▎ | 2342/7045 [7:36:59<14:33:41, 11.15s/it] 33%|███▎ | 2343/7045 [7:37:10<14:38:09, 11.21s/it] {'loss': 1.1504, 'learning_rate': 3.893044336867287e-06, 'epoch': 0.33} + 33%|███▎ | 2343/7045 [7:37:10<14:38:09, 11.21s/it] 33%|███▎ | 2344/7045 [7:37:23<15:08:46, 11.60s/it] {'loss': 1.1401, 'learning_rate': 3.892089750527057e-06, 'epoch': 0.33} + 33%|███▎ | 2344/7045 [7:37:23<15:08:46, 11.60s/it] 33%|███▎ | 2345/7045 [7:37:35<15:12:59, 11.66s/it] {'loss': 1.1104, 'learning_rate': 3.891134869918085e-06, 'epoch': 0.33} + 33%|███▎ | 2345/7045 [7:37:35<15:12:59, 11.66s/it] 33%|███▎ | 2346/7045 [7:37:46<14:58:49, 11.48s/it] {'loss': 1.1416, 'learning_rate': 3.890179695242221e-06, 'epoch': 0.33} + 33%|███▎ | 2346/7045 [7:37:46<14:58:49, 11.48s/it] 33%|███▎ | 2347/7045 [7:37:57<14:53:34, 11.41s/it] {'loss': 1.1348, 'learning_rate': 3.889224226701373e-06, 'epoch': 0.33} + 33%|███▎ | 2347/7045 [7:37:57<14:53:34, 11.41s/it] 33%|███▎ | 2348/7045 [7:38:08<14:38:23, 11.22s/it] {'loss': 1.1064, 'learning_rate': 3.888268464497518e-06, 'epoch': 0.33} + 33%|███▎ | 2348/7045 [7:38:08<14:38:23, 11.22s/it] 33%|███▎ | 2349/7045 [7:38:19<14:38:44, 11.23s/it] {'loss': 1.126, 'learning_rate': 3.887312408832688e-06, 'epoch': 0.33} + 33%|███▎ | 2349/7045 [7:38:19<14:38:44, 11.23s/it] 33%|███▎ | 2350/7045 [7:38:30<14:34:19, 11.17s/it] {'loss': 1.1191, 'learning_rate': 3.88635605990898e-06, 'epoch': 0.33} + 33%|███▎ | 2350/7045 [7:38:30<14:34:19, 11.17s/it] 33%|███▎ | 2351/7045 [7:38:42<14:40:16, 11.25s/it] {'loss': 1.1709, 'learning_rate': 3.885399417928556e-06, 'epoch': 0.33} + 33%|███▎ | 2351/7045 [7:38:42<14:40:16, 11.25s/it] 33%|███▎ | 2352/7045 [7:38:53<14:41:43, 11.27s/it] {'loss': 1.1035, 'learning_rate': 3.884442483093634e-06, 'epoch': 0.33} + 33%|███▎ | 2352/7045 [7:38:53<14:41:43, 11.27s/it] 33%|███▎ | 2353/7045 [7:39:04<14:37:06, 11.22s/it] {'loss': 1.1006, 'learning_rate': 3.883485255606498e-06, 'epoch': 0.33} + 33%|███▎ | 2353/7045 [7:39:04<14:37:06, 11.22s/it] 33%|███▎ | 2354/7045 [7:39:16<14:46:30, 11.34s/it] {'loss': 1.1455, 'learning_rate': 3.882527735669493e-06, 'epoch': 0.33} + 33%|███▎ | 2354/7045 [7:39:16<14:46:30, 11.34s/it] 33%|███▎ | 2355/7045 [7:39:27<14:44:53, 11.32s/it] {'loss': 1.1689, 'learning_rate': 3.881569923485026e-06, 'epoch': 0.33} + 33%|███▎ | 2355/7045 [7:39:27<14:44:53, 11.32s/it] 33%|███▎ | 2356/7045 [7:39:40<15:29:04, 11.89s/it] {'loss': 1.0645, 'learning_rate': 3.880611819255565e-06, 'epoch': 0.33} + 33%|███▎ | 2356/7045 [7:39:40<15:29:04, 11.89s/it] 33%|███▎ | 2357/7045 [7:39:53<15:45:23, 12.10s/it] {'loss': 1.0967, 'learning_rate': 3.879653423183639e-06, 'epoch': 0.33} + 33%|███▎ | 2357/7045 [7:39:53<15:45:23, 12.10s/it] 33%|███▎ | 2358/7045 [7:40:04<15:22:08, 11.80s/it] {'loss': 1.167, 'learning_rate': 3.878694735471842e-06, 'epoch': 0.33} + 33%|███▎ | 2358/7045 [7:40:04<15:22:08, 11.80s/it] 33%|███▎ | 2359/7045 [7:40:15<14:58:30, 11.50s/it] {'loss': 1.1104, 'learning_rate': 3.877735756322825e-06, 'epoch': 0.33} + 33%|███▎ | 2359/7045 [7:40:15<14:58:30, 11.50s/it] 33%|███▎ | 2360/7045 [7:40:26<14:59:34, 11.52s/it] {'loss': 1.0918, 'learning_rate': 3.876776485939305e-06, 'epoch': 0.33} + 33%|███▎ | 2360/7045 [7:40:26<14:59:34, 11.52s/it] 34%|███▎ | 2361/7045 [7:40:37<14:49:15, 11.39s/it] {'loss': 1.1748, 'learning_rate': 3.875816924524059e-06, 'epoch': 0.34} + 34%|███▎ | 2361/7045 [7:40:37<14:49:15, 11.39s/it] 34%|███▎ | 2362/7045 [7:40:49<14:51:28, 11.42s/it] {'loss': 1.1123, 'learning_rate': 3.874857072279924e-06, 'epoch': 0.34} + 34%|███▎ | 2362/7045 [7:40:49<14:51:28, 11.42s/it] 34%|███▎ | 2363/7045 [7:41:01<15:12:52, 11.70s/it] {'loss': 1.1216, 'learning_rate': 3.873896929409799e-06, 'epoch': 0.34} + 34%|███▎ | 2363/7045 [7:41:01<15:12:52, 11.70s/it] 34%|███▎ | 2364/7045 [7:41:12<15:05:11, 11.60s/it] {'loss': 1.1348, 'learning_rate': 3.8729364961166475e-06, 'epoch': 0.34} + 34%|███▎ | 2364/7045 [7:41:13<15:05:11, 11.60s/it] 34%|███▎ | 2365/7045 [7:41:23<14:48:48, 11.39s/it] {'loss': 1.1538, 'learning_rate': 3.871975772603488e-06, 'epoch': 0.34} + 34%|███▎ | 2365/7045 [7:41:23<14:48:48, 11.39s/it] 34%|███▎ | 2366/7045 [7:41:37<15:45:21, 12.12s/it] {'loss': 1.0908, 'learning_rate': 3.871014759073408e-06, 'epoch': 0.34} + 34%|███▎ | 2366/7045 [7:41:37<15:45:21, 12.12s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2540 > 2048). Running this sequence through the model will result in indexing errors + 34%|███▎ | 2367/7045 [7:41:48<15:19:57, 11.80s/it] {'loss': 1.1152, 'learning_rate': 3.870053455729552e-06, 'epoch': 0.34} + 34%|███▎ | 2367/7045 [7:41:48<15:19:57, 11.80s/it] 34%|███▎ | 2368/7045 [7:41:59<15:05:38, 11.62s/it] {'loss': 1.1475, 'learning_rate': 3.869091862775125e-06, 'epoch': 0.34} + 34%|███▎ | 2368/7045 [7:41:59<15:05:38, 11.62s/it] 34%|███▎ | 2369/7045 [7:42:11<14:54:58, 11.48s/it] {'loss': 1.166, 'learning_rate': 3.8681299804133955e-06, 'epoch': 0.34} + 34%|███▎ | 2369/7045 [7:42:11<14:54:58, 11.48s/it] 34%|███▎ | 2370/7045 [7:42:22<14:53:11, 11.46s/it] {'loss': 1.1709, 'learning_rate': 3.867167808847693e-06, 'epoch': 0.34} + 34%|███▎ | 2370/7045 [7:42:22<14:53:11, 11.46s/it] 34%|███▎ | 2371/7045 [7:42:35<15:29:48, 11.94s/it] {'loss': 1.1377, 'learning_rate': 3.866205348281405e-06, 'epoch': 0.34} + 34%|███▎ | 2371/7045 [7:42:35<15:29:48, 11.94s/it] 34%|███▎ | 2372/7045 [7:42:46<15:10:59, 11.70s/it] {'loss': 1.1318, 'learning_rate': 3.865242598917985e-06, 'epoch': 0.34} + 34%|███▎ | 2372/7045 [7:42:46<15:10:59, 11.70s/it] 34%|███▎ | 2373/7045 [7:42:57<14:59:56, 11.56s/it] {'loss': 1.0894, 'learning_rate': 3.8642795609609444e-06, 'epoch': 0.34} + 34%|███▎ | 2373/7045 [7:42:57<14:59:56, 11.56s/it] 34%|███▎ | 2374/7045 [7:43:08<14:47:06, 11.40s/it] {'loss': 1.1543, 'learning_rate': 3.863316234613855e-06, 'epoch': 0.34} + 34%|███▎ | 2374/7045 [7:43:08<14:47:06, 11.40s/it] 34%|███▎ | 2375/7045 [7:43:21<15:13:39, 11.74s/it] {'loss': 1.1123, 'learning_rate': 3.862352620080353e-06, 'epoch': 0.34} + 34%|███▎ | 2375/7045 [7:43:21<15:13:39, 11.74s/it] 34%|███▎ | 2376/7045 [7:43:32<15:02:06, 11.59s/it] {'loss': 1.1592, 'learning_rate': 3.86138871756413e-06, 'epoch': 0.34} + 34%|███▎ | 2376/7045 [7:43:32<15:02:06, 11.59s/it] 34%|███▎ | 2377/7045 [7:43:43<14:52:15, 11.47s/it] {'loss': 1.082, 'learning_rate': 3.8604245272689466e-06, 'epoch': 0.34} + 34%|███▎ | 2377/7045 [7:43:43<14:52:15, 11.47s/it] 34%|███▍ | 2378/7045 [7:43:55<14:42:15, 11.34s/it] {'loss': 1.0918, 'learning_rate': 3.859460049398615e-06, 'epoch': 0.34} + 34%|███▍ | 2378/7045 [7:43:55<14:42:15, 11.34s/it] 34%|███▍ | 2379/7045 [7:44:07<14:58:14, 11.55s/it] {'loss': 1.1172, 'learning_rate': 3.858495284157015e-06, 'epoch': 0.34} + 34%|███▍ | 2379/7045 [7:44:07<14:58:14, 11.55s/it] 34%|███▍ | 2380/7045 [7:44:18<14:46:48, 11.41s/it] {'loss': 1.1104, 'learning_rate': 3.857530231748084e-06, 'epoch': 0.34} + 34%|███▍ | 2380/7045 [7:44:18<14:46:48, 11.41s/it] 34%|███▍ | 2381/7045 [7:44:29<14:46:45, 11.41s/it] {'loss': 1.1113, 'learning_rate': 3.8565648923758215e-06, 'epoch': 0.34} + 34%|███▍ | 2381/7045 [7:44:29<14:46:45, 11.41s/it] 34%|███▍ | 2382/7045 [7:44:40<14:42:49, 11.36s/it] {'loss': 1.1006, 'learning_rate': 3.8555992662442865e-06, 'epoch': 0.34} + 34%|███▍ | 2382/7045 [7:44:40<14:42:49, 11.36s/it] 34%|███▍ | 2383/7045 [7:44:52<14:43:53, 11.38s/it] {'loss': 1.1807, 'learning_rate': 3.854633353557599e-06, 'epoch': 0.34} + 34%|███▍ | 2383/7045 [7:44:52<14:43:53, 11.38s/it] 34%|███▍ | 2384/7045 [7:45:09<17:10:49, 13.27s/it] {'loss': 1.1206, 'learning_rate': 3.85366715451994e-06, 'epoch': 0.34} + 34%|███▍ | 2384/7045 [7:45:09<17:10:49, 13.27s/it] 34%|███▍ | 2385/7045 [7:45:21<16:26:14, 12.70s/it] {'loss': 1.1074, 'learning_rate': 3.8527006693355514e-06, 'epoch': 0.34} + 34%|███▍ | 2385/7045 [7:45:21<16:26:14, 12.70s/it] 34%|███▍ | 2386/7045 [7:45:33<16:26:58, 12.71s/it] {'loss': 1.083, 'learning_rate': 3.851733898208734e-06, 'epoch': 0.34} + 34%|███▍ | 2386/7045 [7:45:33<16:26:58, 12.71s/it] 34%|███▍ | 2387/7045 [7:45:45<15:53:38, 12.28s/it] {'loss': 1.1475, 'learning_rate': 3.85076684134385e-06, 'epoch': 0.34} + 34%|███▍ | 2387/7045 [7:45:45<15:53:38, 12.28s/it] 34%|███▍ | 2388/7045 [7:45:57<15:45:30, 12.18s/it] {'loss': 1.1768, 'learning_rate': 3.849799498945324e-06, 'epoch': 0.34} + 34%|███▍ | 2388/7045 [7:45:57<15:45:30, 12.18s/it] 34%|███▍ | 2389/7045 [7:46:09<15:41:50, 12.14s/it] {'loss': 1.1133, 'learning_rate': 3.848831871217635e-06, 'epoch': 0.34} + 34%|███▍ | 2389/7045 [7:46:09<15:41:50, 12.14s/it] 34%|███▍ | 2390/7045 [7:46:20<15:20:23, 11.86s/it] {'loss': 1.1157, 'learning_rate': 3.8478639583653295e-06, 'epoch': 0.34} + 34%|███▍ | 2390/7045 [7:46:20<15:20:23, 11.86s/it] 34%|███▍ | 2391/7045 [7:46:31<15:00:53, 11.61s/it] {'loss': 1.1025, 'learning_rate': 3.8468957605930106e-06, 'epoch': 0.34} + 34%|███▍ | 2391/7045 [7:46:31<15:00:53, 11.61s/it] 34%|███▍ | 2392/7045 [7:46:42<14:55:39, 11.55s/it] {'loss': 1.1123, 'learning_rate': 3.845927278105341e-06, 'epoch': 0.34} + 34%|███▍ | 2392/7045 [7:46:42<14:55:39, 11.55s/it] 34%|███▍ | 2393/7045 [7:46:53<14:43:22, 11.39s/it] {'loss': 1.1494, 'learning_rate': 3.844958511107045e-06, 'epoch': 0.34} + 34%|███▍ | 2393/7045 [7:46:53<14:43:22, 11.39s/it] 34%|███▍ | 2394/7045 [7:47:05<14:43:30, 11.40s/it] {'loss': 1.1406, 'learning_rate': 3.843989459802908e-06, 'epoch': 0.34} + 34%|███▍ | 2394/7045 [7:47:05<14:43:30, 11.40s/it] 34%|███▍ | 2395/7045 [7:47:17<14:54:27, 11.54s/it] {'loss': 1.1807, 'learning_rate': 3.843020124397773e-06, 'epoch': 0.34} + 34%|███▍ | 2395/7045 [7:47:17<14:54:27, 11.54s/it] 34%|███▍ | 2396/7045 [7:47:28<14:41:17, 11.37s/it] {'loss': 1.1318, 'learning_rate': 3.842050505096544e-06, 'epoch': 0.34} + 34%|███▍ | 2396/7045 [7:47:28<14:41:17, 11.37s/it] 34%|███▍ | 2397/7045 [7:47:39<14:38:18, 11.34s/it] {'loss': 1.1094, 'learning_rate': 3.841080602104186e-06, 'epoch': 0.34} + 34%|███▍ | 2397/7045 [7:47:39<14:38:18, 11.34s/it] 34%|███▍ | 2398/7045 [7:47:50<14:32:52, 11.27s/it] {'loss': 1.0791, 'learning_rate': 3.840110415625723e-06, 'epoch': 0.34} + 34%|███▍ | 2398/7045 [7:47:50<14:32:52, 11.27s/it] 34%|███▍ | 2399/7045 [7:48:01<14:28:13, 11.21s/it] {'loss': 1.0781, 'learning_rate': 3.839139945866238e-06, 'epoch': 0.34} + 34%|███▍ | 2399/7045 [7:48:01<14:28:13, 11.21s/it] 34%|███▍ | 2400/7045 [7:48:12<14:24:13, 11.16s/it] {'loss': 1.1592, 'learning_rate': 3.838169193030877e-06, 'epoch': 0.34} + 34%|███▍ | 2400/7045 [7:48:12<14:24:13, 11.16s/it] 34%|███▍ | 2401/7045 [7:48:23<14:16:45, 11.07s/it] {'loss': 1.1035, 'learning_rate': 3.8371981573248425e-06, 'epoch': 0.34} + 34%|███▍ | 2401/7045 [7:48:23<14:16:45, 11.07s/it] 34%|███▍ | 2402/7045 [7:48:34<14:22:38, 11.15s/it] {'loss': 1.1372, 'learning_rate': 3.836226838953399e-06, 'epoch': 0.34} + 34%|███▍ | 2402/7045 [7:48:34<14:22:38, 11.15s/it] 34%|███▍ | 2403/7045 [7:48:48<15:08:56, 11.75s/it] {'loss': 1.1123, 'learning_rate': 3.83525523812187e-06, 'epoch': 0.34} + 34%|███▍ | 2403/7045 [7:48:48<15:08:56, 11.75s/it] 34%|███▍ | 2404/7045 [7:48:58<14:47:03, 11.47s/it] {'loss': 1.1396, 'learning_rate': 3.834283355035637e-06, 'epoch': 0.34} + 34%|███▍ | 2404/7045 [7:48:58<14:47:03, 11.47s/it] 34%|███▍ | 2405/7045 [7:49:10<15:03:10, 11.68s/it] {'loss': 1.1562, 'learning_rate': 3.833311189900145e-06, 'epoch': 0.34} + 34%|███▍ | 2405/7045 [7:49:10<15:03:10, 11.68s/it] 34%|███▍ | 2406/7045 [7:49:22<14:49:32, 11.51s/it] {'loss': 1.105, 'learning_rate': 3.832338742920896e-06, 'epoch': 0.34} + 34%|███▍ | 2406/7045 [7:49:22<14:49:32, 11.51s/it] 34%|███▍ | 2407/7045 [7:49:34<15:14:31, 11.83s/it] {'loss': 1.1162, 'learning_rate': 3.8313660143034504e-06, 'epoch': 0.34} + 34%|███▍ | 2407/7045 [7:49:34<15:14:31, 11.83s/it] 34%|███▍ | 2408/7045 [7:49:46<15:22:48, 11.94s/it] {'loss': 1.083, 'learning_rate': 3.830393004253431e-06, 'epoch': 0.34} + 34%|███▍ | 2408/7045 [7:49:46<15:22:48, 11.94s/it] 34%|███▍ | 2409/7045 [7:49:57<15:01:58, 11.67s/it] {'loss': 1.1025, 'learning_rate': 3.8294197129765185e-06, 'epoch': 0.34} + 34%|███▍ | 2409/7045 [7:49:57<15:01:58, 11.67s/it] 34%|███▍ | 2410/7045 [7:50:09<14:54:16, 11.58s/it] {'loss': 1.1084, 'learning_rate': 3.828446140678454e-06, 'epoch': 0.34} + 34%|███▍ | 2410/7045 [7:50:09<14:54:16, 11.58s/it] 34%|███▍ | 2411/7045 [7:50:20<14:43:36, 11.44s/it] {'loss': 1.1562, 'learning_rate': 3.827472287565036e-06, 'epoch': 0.34} + 34%|███▍ | 2411/7045 [7:50:20<14:43:36, 11.44s/it] 34%|███▍ | 2412/7045 [7:50:33<15:13:43, 11.83s/it] {'loss': 1.1357, 'learning_rate': 3.826498153842125e-06, 'epoch': 0.34} + 34%|███▍ | 2412/7045 [7:50:33<15:13:43, 11.83s/it] 34%|███▍ | 2413/7045 [7:50:46<15:57:36, 12.40s/it] {'loss': 1.0703, 'learning_rate': 3.825523739715639e-06, 'epoch': 0.34} + 34%|███▍ | 2413/7045 [7:50:46<15:57:36, 12.40s/it] 34%|███▍ | 2414/7045 [7:50:57<15:25:21, 11.99s/it] {'loss': 1.1221, 'learning_rate': 3.8245490453915566e-06, 'epoch': 0.34} + 34%|███▍ | 2414/7045 [7:50:57<15:25:21, 11.99s/it] 34%|███▍ | 2415/7045 [7:51:09<15:13:05, 11.83s/it] {'loss': 1.1367, 'learning_rate': 3.8235740710759126e-06, 'epoch': 0.34} + 34%|███▍ | 2415/7045 [7:51:09<15:13:05, 11.83s/it] 34%|███▍ | 2416/7045 [7:51:21<15:19:33, 11.92s/it] {'loss': 1.1152, 'learning_rate': 3.822598816974807e-06, 'epoch': 0.34} + 34%|███▍ | 2416/7045 [7:51:21<15:19:33, 11.92s/it] 34%|███▍ | 2417/7045 [7:51:34<15:34:27, 12.11s/it] {'loss': 1.1016, 'learning_rate': 3.821623283294392e-06, 'epoch': 0.34} + 34%|███▍ | 2417/7045 [7:51:34<15:34:27, 12.11s/it] 34%|███▍ | 2418/7045 [7:51:45<15:17:08, 11.89s/it] {'loss': 1.1035, 'learning_rate': 3.8206474702408825e-06, 'epoch': 0.34} + 34%|███▍ | 2418/7045 [7:51:45<15:17:08, 11.89s/it] 34%|███▍ | 2419/7045 [7:51:56<15:06:17, 11.75s/it] {'loss': 1.1299, 'learning_rate': 3.819671378020553e-06, 'epoch': 0.34} + 34%|███▍ | 2419/7045 [7:51:56<15:06:17, 11.75s/it] 34%|███▍ | 2420/7045 [7:52:08<14:56:34, 11.63s/it] {'loss': 1.1055, 'learning_rate': 3.818695006839736e-06, 'epoch': 0.34} + 34%|███▍ | 2420/7045 [7:52:08<14:56:34, 11.63s/it] 34%|███▍ | 2421/7045 [7:52:19<14:49:46, 11.55s/it] {'loss': 1.1562, 'learning_rate': 3.817718356904823e-06, 'epoch': 0.34} + 34%|███▍ | 2421/7045 [7:52:19<14:49:46, 11.55s/it] 34%|███▍ | 2422/7045 [7:52:31<14:47:36, 11.52s/it] {'loss': 1.126, 'learning_rate': 3.816741428422263e-06, 'epoch': 0.34} + 34%|███▍ | 2422/7045 [7:52:31<14:47:36, 11.52s/it] 34%|███▍ | 2423/7045 [7:52:42<14:53:02, 11.59s/it] {'loss': 1.1484, 'learning_rate': 3.8157642215985665e-06, 'epoch': 0.34} + 34%|███▍ | 2423/7045 [7:52:42<14:53:02, 11.59s/it] 34%|███▍ | 2424/7045 [7:52:54<14:46:21, 11.51s/it] {'loss': 1.1221, 'learning_rate': 3.814786736640303e-06, 'epoch': 0.34} + 34%|███▍ | 2424/7045 [7:52:54<14:46:21, 11.51s/it] 34%|███▍ | 2425/7045 [7:53:05<14:38:41, 11.41s/it] {'loss': 1.1299, 'learning_rate': 3.813808973754098e-06, 'epoch': 0.34} + 34%|███▍ | 2425/7045 [7:53:05<14:38:41, 11.41s/it] 34%|███▍ | 2426/7045 [7:53:16<14:37:07, 11.39s/it] {'loss': 1.1133, 'learning_rate': 3.8128309331466363e-06, 'epoch': 0.34} + 34%|███▍ | 2426/7045 [7:53:16<14:37:07, 11.39s/it] 34%|███▍ | 2427/7045 [7:53:30<15:28:09, 12.06s/it] {'loss': 1.1328, 'learning_rate': 3.811852615024664e-06, 'epoch': 0.34} + 34%|███▍ | 2427/7045 [7:53:30<15:28:09, 12.06s/it] 34%|███▍ | 2428/7045 [7:53:41<15:12:42, 11.86s/it] {'loss': 1.1299, 'learning_rate': 3.8108740195949836e-06, 'epoch': 0.34} + 34%|███▍ | 2428/7045 [7:53:41<15:12:42, 11.86s/it] 34%|███▍ | 2429/7045 [7:53:54<15:38:54, 12.20s/it] {'loss': 1.1294, 'learning_rate': 3.8098951470644563e-06, 'epoch': 0.34} + 34%|███▍ | 2429/7045 [7:53:54<15:38:54, 12.20s/it] 34%|███▍ | 2430/7045 [7:54:06<15:26:58, 12.05s/it] {'loss': 1.1104, 'learning_rate': 3.8089159976400024e-06, 'epoch': 0.34} + 34%|███▍ | 2430/7045 [7:54:06<15:26:58, 12.05s/it] 35%|███▍ | 2431/7045 [7:54:17<15:02:18, 11.73s/it] {'loss': 1.1353, 'learning_rate': 3.8079365715286017e-06, 'epoch': 0.35} + 35%|███▍ | 2431/7045 [7:54:17<15:02:18, 11.73s/it] 35%|███▍ | 2432/7045 [7:54:28<14:48:38, 11.56s/it] {'loss': 1.1265, 'learning_rate': 3.8069568689372906e-06, 'epoch': 0.35} + 35%|███▍ | 2432/7045 [7:54:28<14:48:38, 11.56s/it] 35%|███▍ | 2433/7045 [7:54:39<14:45:26, 11.52s/it] {'loss': 1.0889, 'learning_rate': 3.805976890073165e-06, 'epoch': 0.35} + 35%|███▍ | 2433/7045 [7:54:39<14:45:26, 11.52s/it] 35%|███▍ | 2434/7045 [7:54:52<15:03:01, 11.75s/it] {'loss': 1.125, 'learning_rate': 3.8049966351433787e-06, 'epoch': 0.35} + 35%|███▍ | 2434/7045 [7:54:52<15:03:01, 11.75s/it] 35%|███▍ | 2435/7045 [7:55:03<14:55:13, 11.65s/it] {'loss': 1.1357, 'learning_rate': 3.804016104355146e-06, 'epoch': 0.35} + 35%|███▍ | 2435/7045 [7:55:03<14:55:13, 11.65s/it] 35%|███▍ | 2436/7045 [7:55:14<14:43:31, 11.50s/it] {'loss': 1.125, 'learning_rate': 3.8030352979157352e-06, 'epoch': 0.35} + 35%|███▍ | 2436/7045 [7:55:14<14:43:31, 11.50s/it] 35%|███▍ | 2437/7045 [7:55:25<14:31:16, 11.34s/it] {'loss': 1.1499, 'learning_rate': 3.802054216032477e-06, 'epoch': 0.35} + 35%|███▍ | 2437/7045 [7:55:25<14:31:16, 11.34s/it] 35%|███▍ | 2438/7045 [7:55:37<14:34:21, 11.39s/it] {'loss': 1.1279, 'learning_rate': 3.8010728589127575e-06, 'epoch': 0.35} + 35%|███▍ | 2438/7045 [7:55:37<14:34:21, 11.39s/it] 35%|███▍ | 2439/7045 [7:55:50<15:06:11, 11.80s/it] {'loss': 1.1016, 'learning_rate': 3.8000912267640233e-06, 'epoch': 0.35} + 35%|███▍ | 2439/7045 [7:55:50<15:06:11, 11.80s/it] 35%|███▍ | 2440/7045 [7:56:02<15:28:39, 12.10s/it] {'loss': 1.1299, 'learning_rate': 3.7991093197937777e-06, 'epoch': 0.35} + 35%|███▍ | 2440/7045 [7:56:02<15:28:39, 12.10s/it] 35%|███▍ | 2441/7045 [7:56:14<15:20:43, 12.00s/it] {'loss': 1.1079, 'learning_rate': 3.7981271382095823e-06, 'epoch': 0.35} + 35%|███▍ | 2441/7045 [7:56:14<15:20:43, 12.00s/it] 35%|███▍ | 2442/7045 [7:56:26<15:27:31, 12.09s/it] {'loss': 1.124, 'learning_rate': 3.7971446822190572e-06, 'epoch': 0.35} + 35%|███▍ | 2442/7045 [7:56:26<15:27:31, 12.09s/it] 35%|███▍ | 2443/7045 [7:56:38<15:07:26, 11.83s/it] {'loss': 1.1641, 'learning_rate': 3.7961619520298797e-06, 'epoch': 0.35} + 35%|███▍ | 2443/7045 [7:56:38<15:07:26, 11.83s/it] 35%|███▍ | 2444/7045 [7:56:49<14:50:04, 11.61s/it] {'loss': 1.1328, 'learning_rate': 3.7951789478497846e-06, 'epoch': 0.35} + 35%|███▍ | 2444/7045 [7:56:49<14:50:04, 11.61s/it] 35%|███▍ | 2445/7045 [7:57:02<15:35:25, 12.20s/it] {'loss': 1.1006, 'learning_rate': 3.7941956698865672e-06, 'epoch': 0.35} + 35%|███▍ | 2445/7045 [7:57:02<15:35:25, 12.20s/it] 35%|███▍ | 2446/7045 [7:57:13<15:08:48, 11.86s/it] {'loss': 1.084, 'learning_rate': 3.793212118348079e-06, 'epoch': 0.35} + 35%|███▍ | 2446/7045 [7:57:13<15:08:48, 11.86s/it] 35%|███▍ | 2447/7045 [7:57:26<15:22:19, 12.04s/it] {'loss': 1.1846, 'learning_rate': 3.792228293442228e-06, 'epoch': 0.35} + 35%|███▍ | 2447/7045 [7:57:26<15:22:19, 12.04s/it] 35%|███▍ | 2448/7045 [7:57:37<15:02:09, 11.78s/it] {'loss': 1.1045, 'learning_rate': 3.791244195376983e-06, 'epoch': 0.35} + 35%|███▍ | 2448/7045 [7:57:37<15:02:09, 11.78s/it] 35%|███▍ | 2449/7045 [7:57:49<14:57:35, 11.72s/it] {'loss': 1.1738, 'learning_rate': 3.790259824360367e-06, 'epoch': 0.35} + 35%|███▍ | 2449/7045 [7:57:49<14:57:35, 11.72s/it] 35%|███▍ | 2450/7045 [7:58:00<14:40:17, 11.49s/it] {'loss': 1.1182, 'learning_rate': 3.7892751806004647e-06, 'epoch': 0.35} + 35%|███▍ | 2450/7045 [7:58:00<14:40:17, 11.49s/it] 35%|███▍ | 2451/7045 [7:58:11<14:44:47, 11.56s/it] {'loss': 1.1387, 'learning_rate': 3.788290264305413e-06, 'epoch': 0.35} + 35%|███▍ | 2451/7045 [7:58:11<14:44:47, 11.56s/it] 35%|███▍ | 2452/7045 [7:58:23<14:39:05, 11.48s/it] {'loss': 1.1445, 'learning_rate': 3.787305075683414e-06, 'epoch': 0.35} + 35%|███▍ | 2452/7045 [7:58:23<14:39:05, 11.48s/it] 35%|███▍ | 2453/7045 [7:58:35<14:58:08, 11.74s/it] {'loss': 1.085, 'learning_rate': 3.78631961494272e-06, 'epoch': 0.35} + 35%|███▍ | 2453/7045 [7:58:35<14:58:08, 11.74s/it] 35%|███▍ | 2454/7045 [7:58:46<14:53:53, 11.68s/it] {'loss': 1.1328, 'learning_rate': 3.785333882291645e-06, 'epoch': 0.35} + 35%|███▍ | 2454/7045 [7:58:46<14:53:53, 11.68s/it] 35%|███▍ | 2455/7045 [7:58:57<14:38:40, 11.49s/it] {'loss': 1.1074, 'learning_rate': 3.78434787793856e-06, 'epoch': 0.35} + 35%|███▍ | 2455/7045 [7:58:57<14:38:40, 11.49s/it] 35%|███▍ | 2456/7045 [7:59:09<14:51:27, 11.66s/it] {'loss': 1.0938, 'learning_rate': 3.7833616020918915e-06, 'epoch': 0.35} + 35%|███▍ | 2456/7045 [7:59:09<14:51:27, 11.66s/it] 35%|███▍ | 2457/7045 [7:59:21<14:38:05, 11.48s/it] {'loss': 1.1152, 'learning_rate': 3.782375054960126e-06, 'epoch': 0.35} + 35%|███▍ | 2457/7045 [7:59:21<14:38:05, 11.48s/it] 35%|███▍ | 2458/7045 [7:59:32<14:35:36, 11.45s/it] {'loss': 1.1455, 'learning_rate': 3.7813882367518053e-06, 'epoch': 0.35} + 35%|███▍ | 2458/7045 [7:59:32<14:35:36, 11.45s/it] 35%|███▍ | 2459/7045 [7:59:45<15:12:54, 11.94s/it] {'loss': 1.1235, 'learning_rate': 3.78040114767553e-06, 'epoch': 0.35} + 35%|███▍ | 2459/7045 [7:59:45<15:12:54, 11.94s/it] 35%|███▍ | 2460/7045 [7:59:56<14:57:19, 11.74s/it] {'loss': 1.1396, 'learning_rate': 3.7794137879399558e-06, 'epoch': 0.35} + 35%|███▍ | 2460/7045 [7:59:56<14:57:19, 11.74s/it] 35%|███▍ | 2461/7045 [8:00:08<14:49:57, 11.65s/it] {'loss': 1.0996, 'learning_rate': 3.7784261577537984e-06, 'epoch': 0.35} + 35%|███▍ | 2461/7045 [8:00:08<14:49:57, 11.65s/it] 35%|███▍ | 2462/7045 [8:00:19<14:37:42, 11.49s/it] {'loss': 1.1436, 'learning_rate': 3.777438257325829e-06, 'epoch': 0.35} + 35%|███▍ | 2462/7045 [8:00:19<14:37:42, 11.49s/it] 35%|███▍ | 2463/7045 [8:00:30<14:28:01, 11.37s/it] {'loss': 1.1157, 'learning_rate': 3.776450086864876e-06, 'epoch': 0.35} + 35%|███▍ | 2463/7045 [8:00:30<14:28:01, 11.37s/it] 35%|███▍ | 2464/7045 [8:00:41<14:22:07, 11.29s/it] {'loss': 1.0859, 'learning_rate': 3.7754616465798242e-06, 'epoch': 0.35} + 35%|███▍ | 2464/7045 [8:00:41<14:22:07, 11.29s/it] 35%|███▍ | 2465/7045 [8:00:52<14:20:11, 11.27s/it] {'loss': 1.126, 'learning_rate': 3.774472936679619e-06, 'epoch': 0.35} + 35%|███▍ | 2465/7045 [8:00:52<14:20:11, 11.27s/it] 35%|███▌ | 2466/7045 [8:01:03<14:13:46, 11.19s/it] {'loss': 1.1113, 'learning_rate': 3.773483957373258e-06, 'epoch': 0.35} + 35%|███▌ | 2466/7045 [8:01:03<14:13:46, 11.19s/it] 35%|███▌ | 2467/7045 [8:01:14<14:12:47, 11.18s/it] {'loss': 1.1064, 'learning_rate': 3.7724947088697984e-06, 'epoch': 0.35} + 35%|███▌ | 2467/7045 [8:01:14<14:12:47, 11.18s/it] 35%|███▌ | 2468/7045 [8:01:28<14:57:32, 11.77s/it] {'loss': 1.104, 'learning_rate': 3.771505191378354e-06, 'epoch': 0.35} + 35%|███▌ | 2468/7045 [8:01:28<14:57:32, 11.77s/it] 35%|███▌ | 2469/7045 [8:01:39<14:45:15, 11.61s/it] {'loss': 1.123, 'learning_rate': 3.770515405108095e-06, 'epoch': 0.35} + 35%|███▌ | 2469/7045 [8:01:39<14:45:15, 11.61s/it] 35%|███▌ | 2470/7045 [8:01:50<14:35:35, 11.48s/it] {'loss': 1.1777, 'learning_rate': 3.769525350268249e-06, 'epoch': 0.35} + 35%|███▌ | 2470/7045 [8:01:50<14:35:35, 11.48s/it] 35%|███▌ | 2471/7045 [8:02:01<14:21:51, 11.31s/it] {'loss': 1.1338, 'learning_rate': 3.7685350270681e-06, 'epoch': 0.35} + 35%|███▌ | 2471/7045 [8:02:01<14:21:51, 11.31s/it] 35%|███▌ | 2472/7045 [8:02:12<14:24:54, 11.35s/it] {'loss': 1.127, 'learning_rate': 3.7675444357169887e-06, 'epoch': 0.35} + 35%|███▌ | 2472/7045 [8:02:12<14:24:54, 11.35s/it] 35%|███▌ | 2473/7045 [8:02:24<14:33:25, 11.46s/it] {'loss': 1.1152, 'learning_rate': 3.766553576424312e-06, 'epoch': 0.35} + 35%|███▌ | 2473/7045 [8:02:24<14:33:25, 11.46s/it] 35%|███▌ | 2474/7045 [8:02:35<14:24:36, 11.35s/it] {'loss': 1.1689, 'learning_rate': 3.765562449399525e-06, 'epoch': 0.35} + 35%|███▌ | 2474/7045 [8:02:35<14:24:36, 11.35s/it] 35%|███▌ | 2475/7045 [8:02:46<14:23:44, 11.34s/it] {'loss': 1.1299, 'learning_rate': 3.7645710548521373e-06, 'epoch': 0.35} + 35%|███▌ | 2475/7045 [8:02:46<14:23:44, 11.34s/it] 35%|███▌ | 2476/7045 [8:02:59<14:50:07, 11.69s/it] {'loss': 1.1182, 'learning_rate': 3.7635793929917185e-06, 'epoch': 0.35} + 35%|███▌ | 2476/7045 [8:02:59<14:50:07, 11.69s/it] 35%|███▌ | 2477/7045 [8:03:15<16:17:52, 12.84s/it] {'loss': 1.123, 'learning_rate': 3.762587464027889e-06, 'epoch': 0.35} + 35%|███▌ | 2477/7045 [8:03:15<16:17:52, 12.84s/it] 35%|███▌ | 2478/7045 [8:03:26<15:41:50, 12.37s/it] {'loss': 1.1108, 'learning_rate': 3.7615952681703314e-06, 'epoch': 0.35} + 35%|███▌ | 2478/7045 [8:03:26<15:41:50, 12.37s/it] 35%|███▌ | 2479/7045 [8:03:37<15:26:28, 12.17s/it] {'loss': 1.1445, 'learning_rate': 3.7606028056287817e-06, 'epoch': 0.35} + 35%|███▌ | 2479/7045 [8:03:37<15:26:28, 12.17s/it] 35%|███▌ | 2480/7045 [8:03:50<15:28:34, 12.20s/it] {'loss': 1.1055, 'learning_rate': 3.7596100766130334e-06, 'epoch': 0.35} + 35%|███▌ | 2480/7045 [8:03:50<15:28:34, 12.20s/it] 35%|███▌ | 2481/7045 [8:04:01<15:02:55, 11.87s/it] {'loss': 1.0811, 'learning_rate': 3.758617081332934e-06, 'epoch': 0.35} + 35%|███▌ | 2481/7045 [8:04:01<15:02:55, 11.87s/it] 35%|███▌ | 2482/7045 [8:04:12<14:38:32, 11.55s/it] {'loss': 1.123, 'learning_rate': 3.757623819998391e-06, 'epoch': 0.35} + 35%|███▌ | 2482/7045 [8:04:12<14:38:32, 11.55s/it] 35%|███▌ | 2483/7045 [8:04:25<15:19:52, 12.10s/it] {'loss': 1.1318, 'learning_rate': 3.7566302928193667e-06, 'epoch': 0.35} + 35%|███▌ | 2483/7045 [8:04:25<15:19:52, 12.10s/it] 35%|███▌ | 2484/7045 [8:04:37<15:22:22, 12.13s/it] {'loss': 1.127, 'learning_rate': 3.755636500005877e-06, 'epoch': 0.35} + 35%|███▌ | 2484/7045 [8:04:37<15:22:22, 12.13s/it] 35%|███▌ | 2485/7045 [8:04:50<15:38:08, 12.34s/it] {'loss': 1.1094, 'learning_rate': 3.7546424417679973e-06, 'epoch': 0.35} + 35%|███▌ | 2485/7045 [8:04:50<15:38:08, 12.34s/it] 35%|███▌ | 2486/7045 [8:05:01<15:14:02, 12.03s/it] {'loss': 1.1602, 'learning_rate': 3.753648118315858e-06, 'epoch': 0.35} + 35%|███▌ | 2486/7045 [8:05:01<15:14:02, 12.03s/it] 35%|███▌ | 2487/7045 [8:05:15<15:49:35, 12.50s/it] {'loss': 1.0791, 'learning_rate': 3.7526535298596457e-06, 'epoch': 0.35} + 35%|███▌ | 2487/7045 [8:05:15<15:49:35, 12.50s/it] 35%|███▌ | 2488/7045 [8:05:26<15:18:19, 12.09s/it] {'loss': 1.1167, 'learning_rate': 3.751658676609602e-06, 'epoch': 0.35} + 35%|███▌ | 2488/7045 [8:05:26<15:18:19, 12.09s/it] 35%|███▌ | 2489/7045 [8:05:37<15:00:23, 11.86s/it] {'loss': 1.1719, 'learning_rate': 3.750663558776026e-06, 'epoch': 0.35} + 35%|███▌ | 2489/7045 [8:05:37<15:00:23, 11.86s/it] 35%|███▌ | 2490/7045 [8:05:49<14:49:48, 11.72s/it] {'loss': 1.1338, 'learning_rate': 3.749668176569272e-06, 'epoch': 0.35} + 35%|███▌ | 2490/7045 [8:05:49<14:49:48, 11.72s/it] 35%|███▌ | 2491/7045 [8:06:02<15:24:55, 12.19s/it] {'loss': 1.0898, 'learning_rate': 3.7486725301997497e-06, 'epoch': 0.35} + 35%|███▌ | 2491/7045 [8:06:02<15:24:55, 12.19s/it] 35%|███▌ | 2492/7045 [8:06:13<15:02:00, 11.89s/it] {'loss': 1.0942, 'learning_rate': 3.7476766198779256e-06, 'epoch': 0.35} + 35%|███▌ | 2492/7045 [8:06:13<15:02:00, 11.89s/it] 35%|███▌ | 2493/7045 [8:06:24<14:43:49, 11.65s/it] {'loss': 1.1357, 'learning_rate': 3.7466804458143203e-06, 'epoch': 0.35} + 35%|███▌ | 2493/7045 [8:06:24<14:43:49, 11.65s/it] 35%|███▌ | 2494/7045 [8:06:36<14:44:37, 11.66s/it] {'loss': 1.1436, 'learning_rate': 3.745684008219514e-06, 'epoch': 0.35} + 35%|███▌ | 2494/7045 [8:06:36<14:44:37, 11.66s/it] 35%|███▌ | 2495/7045 [8:06:48<14:41:35, 11.63s/it] {'loss': 1.1357, 'learning_rate': 3.7446873073041375e-06, 'epoch': 0.35} + 35%|███▌ | 2495/7045 [8:06:48<14:41:35, 11.63s/it] 35%|███▌ | 2496/7045 [8:07:00<15:03:17, 11.91s/it] {'loss': 1.123, 'learning_rate': 3.7436903432788806e-06, 'epoch': 0.35} + 35%|███▌ | 2496/7045 [8:07:00<15:03:17, 11.91s/it] 35%|███▌ | 2497/7045 [8:07:11<14:43:35, 11.66s/it] {'loss': 1.0933, 'learning_rate': 3.7426931163544877e-06, 'epoch': 0.35} + 35%|███▌ | 2497/7045 [8:07:11<14:43:35, 11.66s/it] 35%|███▌ | 2498/7045 [8:07:22<14:26:18, 11.43s/it] {'loss': 1.1045, 'learning_rate': 3.7416956267417604e-06, 'epoch': 0.35} + 35%|███▌ | 2498/7045 [8:07:22<14:26:18, 11.43s/it] 35%|███▌ | 2499/7045 [8:07:34<14:28:43, 11.47s/it] {'loss': 1.1426, 'learning_rate': 3.740697874651551e-06, 'epoch': 0.35} + 35%|███▌ | 2499/7045 [8:07:34<14:28:43, 11.47s/it] 35%|███▌ | 2500/7045 [8:07:45<14:31:39, 11.51s/it] {'loss': 1.1504, 'learning_rate': 3.739699860294774e-06, 'epoch': 0.35} + 35%|███▌ | 2500/7045 [8:07:45<14:31:39, 11.51s/it] 36%|███▌ | 2501/7045 [8:07:57<14:26:07, 11.44s/it] {'loss': 1.1221, 'learning_rate': 3.7387015838823947e-06, 'epoch': 0.36} + 36%|███▌ | 2501/7045 [8:07:57<14:26:07, 11.44s/it] 36%|███▌ | 2502/7045 [8:08:08<14:18:29, 11.34s/it] {'loss': 1.1377, 'learning_rate': 3.7377030456254353e-06, 'epoch': 0.36} + 36%|███▌ | 2502/7045 [8:08:08<14:18:29, 11.34s/it] 36%|███▌ | 2503/7045 [8:08:19<14:14:48, 11.29s/it] {'loss': 1.1562, 'learning_rate': 3.7367042457349724e-06, 'epoch': 0.36} + 36%|███▌ | 2503/7045 [8:08:19<14:14:48, 11.29s/it] 36%|███▌ | 2504/7045 [8:08:30<14:09:14, 11.22s/it] {'loss': 1.1396, 'learning_rate': 3.7357051844221394e-06, 'epoch': 0.36} + 36%|███▌ | 2504/7045 [8:08:30<14:09:14, 11.22s/it] 36%|███▌ | 2505/7045 [8:08:43<14:56:21, 11.85s/it] {'loss': 1.105, 'learning_rate': 3.7347058618981243e-06, 'epoch': 0.36} + 36%|███▌ | 2505/7045 [8:08:43<14:56:21, 11.85s/it] 36%|███▌ | 2506/7045 [8:08:57<15:29:24, 12.29s/it] {'loss': 1.1162, 'learning_rate': 3.7337062783741696e-06, 'epoch': 0.36} + 36%|███▌ | 2506/7045 [8:08:57<15:29:24, 12.29s/it] 36%|███▌ | 2507/7045 [8:09:08<15:03:34, 11.95s/it] {'loss': 1.1191, 'learning_rate': 3.732706434061574e-06, 'epoch': 0.36} + 36%|███▌ | 2507/7045 [8:09:08<15:03:34, 11.95s/it] 36%|███▌ | 2508/7045 [8:09:19<14:48:08, 11.75s/it] {'loss': 1.1387, 'learning_rate': 3.7317063291716904e-06, 'epoch': 0.36} + 36%|███▌ | 2508/7045 [8:09:19<14:48:08, 11.75s/it] 36%|███▌ | 2509/7045 [8:09:30<14:36:01, 11.59s/it] {'loss': 1.1289, 'learning_rate': 3.7307059639159288e-06, 'epoch': 0.36} + 36%|███▌ | 2509/7045 [8:09:30<14:36:01, 11.59s/it] 36%|███▌ | 2510/7045 [8:09:42<14:44:36, 11.70s/it] {'loss': 1.0879, 'learning_rate': 3.729705338505751e-06, 'epoch': 0.36} + 36%|███▌ | 2510/7045 [8:09:42<14:44:36, 11.70s/it] 36%|███▌ | 2511/7045 [8:09:55<15:19:57, 12.17s/it] {'loss': 1.0869, 'learning_rate': 3.7287044531526755e-06, 'epoch': 0.36} + 36%|███▌ | 2511/7045 [8:09:55<15:19:57, 12.17s/it] 36%|███▌ | 2512/7045 [8:10:07<14:58:35, 11.89s/it] {'loss': 1.1104, 'learning_rate': 3.7277033080682773e-06, 'epoch': 0.36} + 36%|███▌ | 2512/7045 [8:10:07<14:58:35, 11.89s/it] 36%|███▌ | 2513/7045 [8:10:20<15:30:16, 12.32s/it] {'loss': 1.0889, 'learning_rate': 3.7267019034641843e-06, 'epoch': 0.36} + 36%|███▌ | 2513/7045 [8:10:20<15:30:16, 12.32s/it] 36%|███▌ | 2514/7045 [8:10:33<15:36:13, 12.40s/it] {'loss': 1.1289, 'learning_rate': 3.725700239552079e-06, 'epoch': 0.36} + 36%|███▌ | 2514/7045 [8:10:33<15:36:13, 12.40s/it] 36%|███▌ | 2515/7045 [8:10:45<15:33:46, 12.37s/it] {'loss': 1.124, 'learning_rate': 3.7246983165436994e-06, 'epoch': 0.36} + 36%|███▌ | 2515/7045 [8:10:45<15:33:46, 12.37s/it] 36%|███▌ | 2516/7045 [8:10:56<15:01:31, 11.94s/it] {'loss': 1.1152, 'learning_rate': 3.7236961346508396e-06, 'epoch': 0.36} + 36%|███▌ | 2516/7045 [8:10:56<15:01:31, 11.94s/it] 36%|███▌ | 2517/7045 [8:11:08<14:55:10, 11.86s/it] {'loss': 1.126, 'learning_rate': 3.7226936940853463e-06, 'epoch': 0.36} + 36%|███▌ | 2517/7045 [8:11:08<14:55:10, 11.86s/it] 36%|███▌ | 2518/7045 [8:11:20<15:07:13, 12.02s/it] {'loss': 1.123, 'learning_rate': 3.7216909950591207e-06, 'epoch': 0.36} + 36%|███▌ | 2518/7045 [8:11:20<15:07:13, 12.02s/it] 36%|███▌ | 2519/7045 [8:11:31<14:46:59, 11.76s/it] {'loss': 1.167, 'learning_rate': 3.7206880377841216e-06, 'epoch': 0.36} + 36%|███▌ | 2519/7045 [8:11:31<14:46:59, 11.76s/it] 36%|███▌ | 2520/7045 [8:11:44<15:19:28, 12.19s/it] {'loss': 1.0957, 'learning_rate': 3.7196848224723597e-06, 'epoch': 0.36} + 36%|███▌ | 2520/7045 [8:11:44<15:19:28, 12.19s/it] 36%|███▌ | 2521/7045 [8:11:56<14:58:52, 11.92s/it] {'loss': 1.0894, 'learning_rate': 3.7186813493359007e-06, 'epoch': 0.36} + 36%|███▌ | 2521/7045 [8:11:56<14:58:52, 11.92s/it] 36%|███▌ | 2522/7045 [8:12:06<14:31:31, 11.56s/it] {'loss': 1.1152, 'learning_rate': 3.717677618586866e-06, 'epoch': 0.36} + 36%|███▌ | 2522/7045 [8:12:06<14:31:31, 11.56s/it] 36%|███▌ | 2523/7045 [8:12:18<14:28:35, 11.52s/it] {'loss': 1.1162, 'learning_rate': 3.7166736304374283e-06, 'epoch': 0.36} + 36%|███▌ | 2523/7045 [8:12:18<14:28:35, 11.52s/it] 36%|███▌ | 2524/7045 [8:12:30<14:55:28, 11.88s/it] {'loss': 1.0908, 'learning_rate': 3.7156693850998195e-06, 'epoch': 0.36} + 36%|███▌ | 2524/7045 [8:12:30<14:55:28, 11.88s/it] 36%|███▌ | 2525/7045 [8:12:41<14:36:12, 11.63s/it] {'loss': 1.1484, 'learning_rate': 3.7146648827863224e-06, 'epoch': 0.36} + 36%|███▌ | 2525/7045 [8:12:41<14:36:12, 11.63s/it] 36%|███▌ | 2526/7045 [8:12:53<14:26:22, 11.50s/it] {'loss': 1.1182, 'learning_rate': 3.7136601237092745e-06, 'epoch': 0.36} + 36%|███▌ | 2526/7045 [8:12:53<14:26:22, 11.50s/it] 36%|███▌ | 2527/7045 [8:13:04<14:22:03, 11.45s/it] {'loss': 1.0928, 'learning_rate': 3.712655108081069e-06, 'epoch': 0.36} + 36%|███▌ | 2527/7045 [8:13:04<14:22:03, 11.45s/it] 36%|███▌ | 2528/7045 [8:13:15<14:09:20, 11.28s/it] {'loss': 1.1279, 'learning_rate': 3.711649836114153e-06, 'epoch': 0.36} + 36%|███▌ | 2528/7045 [8:13:15<14:09:20, 11.28s/it] 36%|███▌ | 2529/7045 [8:13:26<13:58:37, 11.14s/it] {'loss': 1.1211, 'learning_rate': 3.7106443080210254e-06, 'epoch': 0.36} + 36%|███▌ | 2529/7045 [8:13:26<13:58:37, 11.14s/it] 36%|███▌ | 2530/7045 [8:13:37<14:00:20, 11.17s/it] {'loss': 1.1426, 'learning_rate': 3.709638524014241e-06, 'epoch': 0.36} + 36%|███▌ | 2530/7045 [8:13:37<14:00:20, 11.17s/it] 36%|███▌ | 2531/7045 [8:13:48<14:04:19, 11.22s/it] {'loss': 1.1582, 'learning_rate': 3.708632484306412e-06, 'epoch': 0.36} + 36%|███▌ | 2531/7045 [8:13:48<14:04:19, 11.22s/it] 36%|███▌ | 2532/7045 [8:14:00<14:09:04, 11.29s/it] {'loss': 1.1211, 'learning_rate': 3.7076261891101985e-06, 'epoch': 0.36} + 36%|███▌ | 2532/7045 [8:14:00<14:09:04, 11.29s/it] 36%|███▌ | 2533/7045 [8:14:11<14:12:23, 11.33s/it] {'loss': 1.1143, 'learning_rate': 3.7066196386383178e-06, 'epoch': 0.36} + 36%|███▌ | 2533/7045 [8:14:11<14:12:23, 11.33s/it] 36%|███▌ | 2534/7045 [8:14:23<14:25:09, 11.51s/it] {'loss': 1.0723, 'learning_rate': 3.7056128331035413e-06, 'epoch': 0.36} + 36%|███▌ | 2534/7045 [8:14:23<14:25:09, 11.51s/it] 36%|███▌ | 2535/7045 [8:14:34<14:21:44, 11.46s/it] {'loss': 1.0947, 'learning_rate': 3.7046057727186946e-06, 'epoch': 0.36} + 36%|███▌ | 2535/7045 [8:14:34<14:21:44, 11.46s/it] 36%|███▌ | 2536/7045 [8:14:46<14:13:23, 11.36s/it] {'loss': 1.1377, 'learning_rate': 3.703598457696656e-06, 'epoch': 0.36} + 36%|███▌ | 2536/7045 [8:14:46<14:13:23, 11.36s/it] 36%|███▌ | 2537/7045 [8:14:57<14:24:34, 11.51s/it] {'loss': 1.1216, 'learning_rate': 3.702590888250358e-06, 'epoch': 0.36} + 36%|███▌ | 2537/7045 [8:14:57<14:24:34, 11.51s/it] 36%|███▌ | 2538/7045 [8:15:08<14:07:35, 11.28s/it] {'loss': 1.0923, 'learning_rate': 3.701583064592787e-06, 'epoch': 0.36} + 36%|███▌ | 2538/7045 [8:15:08<14:07:35, 11.28s/it] 36%|███▌ | 2539/7045 [8:15:19<14:00:16, 11.19s/it] {'loss': 1.1143, 'learning_rate': 3.700574986936984e-06, 'epoch': 0.36} + 36%|███▌ | 2539/7045 [8:15:19<14:00:16, 11.19s/it] 36%|███▌ | 2540/7045 [8:15:32<14:33:16, 11.63s/it] {'loss': 1.1133, 'learning_rate': 3.6995666554960415e-06, 'epoch': 0.36} + 36%|███▌ | 2540/7045 [8:15:32<14:33:16, 11.63s/it] 36%|███▌ | 2541/7045 [8:15:44<14:51:31, 11.88s/it] {'loss': 1.1191, 'learning_rate': 3.698558070483107e-06, 'epoch': 0.36} + 36%|███▌ | 2541/7045 [8:15:44<14:51:31, 11.88s/it] 36%|███▌ | 2542/7045 [8:15:56<14:54:40, 11.92s/it] {'loss': 1.1309, 'learning_rate': 3.6975492321113828e-06, 'epoch': 0.36} + 36%|███▌ | 2542/7045 [8:15:56<14:54:40, 11.92s/it] 36%|███▌ | 2543/7045 [8:16:07<14:33:36, 11.64s/it] {'loss': 1.1279, 'learning_rate': 3.6965401405941225e-06, 'epoch': 0.36} + 36%|███▌ | 2543/7045 [8:16:07<14:33:36, 11.64s/it] 36%|███▌ | 2544/7045 [8:16:18<14:21:31, 11.48s/it] {'loss': 1.0986, 'learning_rate': 3.695530796144635e-06, 'epoch': 0.36} + 36%|███▌ | 2544/7045 [8:16:18<14:21:31, 11.48s/it] 36%|███▌ | 2545/7045 [8:16:30<14:19:46, 11.46s/it] {'loss': 1.1221, 'learning_rate': 3.6945211989762813e-06, 'epoch': 0.36} + 36%|███▌ | 2545/7045 [8:16:30<14:19:46, 11.46s/it] 36%|███▌ | 2546/7045 [8:16:41<14:10:18, 11.34s/it] {'loss': 1.1313, 'learning_rate': 3.6935113493024766e-06, 'epoch': 0.36} + 36%|███▌ | 2546/7045 [8:16:41<14:10:18, 11.34s/it] 36%|███▌ | 2547/7045 [8:16:52<14:11:54, 11.36s/it] {'loss': 1.1221, 'learning_rate': 3.69250124733669e-06, 'epoch': 0.36} + 36%|███▌ | 2547/7045 [8:16:52<14:11:54, 11.36s/it] 36%|███▌ | 2548/7045 [8:17:05<14:37:58, 11.71s/it] {'loss': 1.1055, 'learning_rate': 3.691490893292442e-06, 'epoch': 0.36} + 36%|███▌ | 2548/7045 [8:17:05<14:37:58, 11.71s/it] 36%|███▌ | 2549/7045 [8:17:16<14:23:39, 11.53s/it] {'loss': 1.1211, 'learning_rate': 3.690480287383308e-06, 'epoch': 0.36} + 36%|███▌ | 2549/7045 [8:17:16<14:23:39, 11.53s/it] 36%|███▌ | 2550/7045 [8:17:28<14:27:44, 11.58s/it] {'loss': 1.165, 'learning_rate': 3.689469429822918e-06, 'epoch': 0.36} + 36%|███▌ | 2550/7045 [8:17:28<14:27:44, 11.58s/it] 36%|███▌ | 2551/7045 [8:17:39<14:19:50, 11.48s/it] {'loss': 1.1299, 'learning_rate': 3.6884583208249507e-06, 'epoch': 0.36} + 36%|███▌ | 2551/7045 [8:17:39<14:19:50, 11.48s/it] 36%|███▌ | 2552/7045 [8:17:50<14:10:19, 11.36s/it] {'loss': 1.1162, 'learning_rate': 3.6874469606031427e-06, 'epoch': 0.36} + 36%|███▌ | 2552/7045 [8:17:50<14:10:19, 11.36s/it] 36%|███▌ | 2553/7045 [8:18:03<14:54:06, 11.94s/it] {'loss': 1.1128, 'learning_rate': 3.6864353493712806e-06, 'epoch': 0.36} + 36%|███▌ | 2553/7045 [8:18:03<14:54:06, 11.94s/it] 36%|███▋ | 2554/7045 [8:18:18<15:52:35, 12.73s/it] {'loss': 1.1426, 'learning_rate': 3.685423487343206e-06, 'epoch': 0.36} + 36%|███▋ | 2554/7045 [8:18:18<15:52:35, 12.73s/it] 36%|███▋ | 2555/7045 [8:18:29<15:22:02, 12.32s/it] {'loss': 1.1304, 'learning_rate': 3.684411374732812e-06, 'epoch': 0.36} + 36%|███▋ | 2555/7045 [8:18:29<15:22:02, 12.32s/it] 36%|███▋ | 2556/7045 [8:18:40<14:48:43, 11.88s/it] {'loss': 1.1113, 'learning_rate': 3.6833990117540458e-06, 'epoch': 0.36} + 36%|███▋ | 2556/7045 [8:18:40<14:48:43, 11.88s/it] 36%|███▋ | 2557/7045 [8:18:51<14:24:36, 11.56s/it] {'loss': 1.1348, 'learning_rate': 3.682386398620907e-06, 'epoch': 0.36} + 36%|███▋ | 2557/7045 [8:18:51<14:24:36, 11.56s/it] 36%|███▋ | 2558/7045 [8:19:02<14:23:33, 11.55s/it] {'loss': 1.1719, 'learning_rate': 3.681373535547449e-06, 'epoch': 0.36} + 36%|███▋ | 2558/7045 [8:19:02<14:23:33, 11.55s/it] 36%|███▋ | 2559/7045 [8:19:14<14:19:28, 11.50s/it] {'loss': 1.1494, 'learning_rate': 3.6803604227477752e-06, 'epoch': 0.36} + 36%|███▋ | 2559/7045 [8:19:14<14:19:28, 11.50s/it] 36%|███▋ | 2560/7045 [8:19:25<14:11:09, 11.39s/it] {'loss': 1.1318, 'learning_rate': 3.6793470604360455e-06, 'epoch': 0.36} + 36%|███▋ | 2560/7045 [8:19:25<14:11:09, 11.39s/it] 36%|███▋ | 2561/7045 [8:19:36<14:10:01, 11.37s/it] {'loss': 1.0996, 'learning_rate': 3.6783334488264706e-06, 'epoch': 0.36} + 36%|███▋ | 2561/7045 [8:19:36<14:10:01, 11.37s/it] 36%|███▋ | 2562/7045 [8:19:47<13:58:39, 11.22s/it] {'loss': 1.1094, 'learning_rate': 3.6773195881333135e-06, 'epoch': 0.36} + 36%|███▋ | 2562/7045 [8:19:47<13:58:39, 11.22s/it] 36%|███▋ | 2563/7045 [8:19:58<13:59:49, 11.24s/it] {'loss': 1.1445, 'learning_rate': 3.6763054785708905e-06, 'epoch': 0.36} + 36%|███▋ | 2563/7045 [8:19:58<13:59:49, 11.24s/it] 36%|███▋ | 2564/7045 [8:20:10<13:58:37, 11.23s/it] {'loss': 1.1592, 'learning_rate': 3.6752911203535716e-06, 'epoch': 0.36} + 36%|███▋ | 2564/7045 [8:20:10<13:58:37, 11.23s/it] 36%|███▋ | 2565/7045 [8:20:21<13:59:00, 11.24s/it] {'loss': 1.1367, 'learning_rate': 3.6742765136957774e-06, 'epoch': 0.36} + 36%|███▋ | 2565/7045 [8:20:21<13:59:00, 11.24s/it] 36%|███▋ | 2566/7045 [8:20:32<13:55:15, 11.19s/it] {'loss': 1.127, 'learning_rate': 3.673261658811982e-06, 'epoch': 0.36} + 36%|███▋ | 2566/7045 [8:20:32<13:55:15, 11.19s/it] 36%|███▋ | 2567/7045 [8:20:44<14:21:33, 11.54s/it] {'loss': 1.1089, 'learning_rate': 3.672246555916711e-06, 'epoch': 0.36} + 36%|███▋ | 2567/7045 [8:20:44<14:21:33, 11.54s/it] 36%|███▋ | 2568/7045 [8:20:56<14:26:56, 11.62s/it] {'loss': 1.1338, 'learning_rate': 3.6712312052245447e-06, 'epoch': 0.36} + 36%|███▋ | 2568/7045 [8:20:56<14:26:56, 11.62s/it] 36%|███▋ | 2569/7045 [8:21:07<14:16:44, 11.48s/it] {'loss': 1.1152, 'learning_rate': 3.6702156069501144e-06, 'epoch': 0.36} + 36%|███▋ | 2569/7045 [8:21:07<14:16:44, 11.48s/it] 36%|███▋ | 2570/7045 [8:21:18<14:01:52, 11.29s/it] {'loss': 1.1572, 'learning_rate': 3.6691997613081026e-06, 'epoch': 0.36} + 36%|███▋ | 2570/7045 [8:21:18<14:01:52, 11.29s/it] 36%|███▋ | 2571/7045 [8:21:29<13:46:46, 11.09s/it] {'loss': 1.0908, 'learning_rate': 3.6681836685132456e-06, 'epoch': 0.36} + 36%|███▋ | 2571/7045 [8:21:29<13:46:46, 11.09s/it] 37%|███▋ | 2572/7045 [8:21:40<13:56:04, 11.21s/it] {'loss': 1.1299, 'learning_rate': 3.667167328780331e-06, 'epoch': 0.37} + 37%|███▋ | 2572/7045 [8:21:40<13:56:04, 11.21s/it] 37%|███▋ | 2573/7045 [8:21:51<13:55:08, 11.20s/it] {'loss': 1.1465, 'learning_rate': 3.6661507423242006e-06, 'epoch': 0.37} + 37%|███▋ | 2573/7045 [8:21:51<13:55:08, 11.20s/it] 37%|███▋ | 2574/7045 [8:22:03<13:56:06, 11.22s/it] {'loss': 1.1211, 'learning_rate': 3.665133909359745e-06, 'epoch': 0.37} + 37%|███▋ | 2574/7045 [8:22:03<13:56:06, 11.22s/it] 37%|███▋ | 2575/7045 [8:22:14<14:10:35, 11.42s/it] {'loss': 1.1401, 'learning_rate': 3.6641168301019104e-06, 'epoch': 0.37} + 37%|███▋ | 2575/7045 [8:22:14<14:10:35, 11.42s/it] 37%|███▋ | 2576/7045 [8:22:27<14:28:05, 11.65s/it] {'loss': 1.1079, 'learning_rate': 3.6630995047656932e-06, 'epoch': 0.37} + 37%|███▋ | 2576/7045 [8:22:27<14:28:05, 11.65s/it] 37%|███▋ | 2577/7045 [8:22:38<14:14:35, 11.48s/it] {'loss': 1.1426, 'learning_rate': 3.662081933566141e-06, 'epoch': 0.37} + 37%|███▋ | 2577/7045 [8:22:38<14:14:35, 11.48s/it] 37%|███▋ | 2578/7045 [8:22:50<14:39:52, 11.82s/it] {'loss': 1.1167, 'learning_rate': 3.661064116718355e-06, 'epoch': 0.37} + 37%|███▋ | 2578/7045 [8:22:50<14:39:52, 11.82s/it] 37%|███▋ | 2579/7045 [8:23:01<14:16:02, 11.50s/it] {'loss': 1.125, 'learning_rate': 3.6600460544374875e-06, 'epoch': 0.37} + 37%|███▋ | 2579/7045 [8:23:01<14:16:02, 11.50s/it] 37%|███▋ | 2580/7045 [8:23:13<14:26:02, 11.64s/it] {'loss': 1.1221, 'learning_rate': 3.6590277469387447e-06, 'epoch': 0.37} + 37%|███▋ | 2580/7045 [8:23:13<14:26:02, 11.64s/it] 37%|███▋ | 2581/7045 [8:23:27<15:07:24, 12.20s/it] {'loss': 1.1187, 'learning_rate': 3.6580091944373808e-06, 'epoch': 0.37} + 37%|███▋ | 2581/7045 [8:23:27<15:07:24, 12.20s/it] 37%|███▋ | 2582/7045 [8:23:37<14:35:30, 11.77s/it] {'loss': 1.1182, 'learning_rate': 3.6569903971487048e-06, 'epoch': 0.37} + 37%|███▋ | 2582/7045 [8:23:37<14:35:30, 11.77s/it] 37%|███▋ | 2583/7045 [8:23:49<14:33:20, 11.74s/it] {'loss': 1.1172, 'learning_rate': 3.655971355288076e-06, 'epoch': 0.37} + 37%|███▋ | 2583/7045 [8:23:49<14:33:20, 11.74s/it] 37%|███▋ | 2584/7045 [8:24:00<14:22:58, 11.61s/it] {'loss': 1.1133, 'learning_rate': 3.6549520690709062e-06, 'epoch': 0.37} + 37%|███▋ | 2584/7045 [8:24:00<14:22:58, 11.61s/it] 37%|███▋ | 2585/7045 [8:24:11<14:12:12, 11.46s/it] {'loss': 1.1182, 'learning_rate': 3.6539325387126583e-06, 'epoch': 0.37} + 37%|███▋ | 2585/7045 [8:24:11<14:12:12, 11.46s/it] 37%|███▋ | 2586/7045 [8:24:23<14:02:48, 11.34s/it] {'loss': 1.1084, 'learning_rate': 3.6529127644288476e-06, 'epoch': 0.37} + 37%|███▋ | 2586/7045 [8:24:23<14:02:48, 11.34s/it] 37%|███▋ | 2587/7045 [8:24:34<13:58:36, 11.29s/it] {'loss': 1.0908, 'learning_rate': 3.651892746435041e-06, 'epoch': 0.37} + 37%|███▋ | 2587/7045 [8:24:34<13:58:36, 11.29s/it] 37%|███▋ | 2588/7045 [8:24:47<14:34:19, 11.77s/it] {'loss': 1.1201, 'learning_rate': 3.650872484946855e-06, 'epoch': 0.37} + 37%|███▋ | 2588/7045 [8:24:47<14:34:19, 11.77s/it] 37%|███▋ | 2589/7045 [8:24:59<14:47:49, 11.95s/it] {'loss': 1.1069, 'learning_rate': 3.649851980179959e-06, 'epoch': 0.37} + 37%|███▋ | 2589/7045 [8:24:59<14:47:49, 11.95s/it] 37%|███▋ | 2590/7045 [8:25:10<14:26:40, 11.67s/it] {'loss': 1.1162, 'learning_rate': 3.6488312323500752e-06, 'epoch': 0.37} + 37%|███▋ | 2590/7045 [8:25:10<14:26:40, 11.67s/it] 37%|███▋ | 2591/7045 [8:25:21<14:10:10, 11.45s/it] {'loss': 1.1211, 'learning_rate': 3.6478102416729743e-06, 'epoch': 0.37} + 37%|███▋ | 2591/7045 [8:25:21<14:10:10, 11.45s/it] 37%|███▋ | 2592/7045 [8:25:33<14:14:39, 11.52s/it] {'loss': 1.1133, 'learning_rate': 3.6467890083644803e-06, 'epoch': 0.37} + 37%|███▋ | 2592/7045 [8:25:33<14:14:39, 11.52s/it] 37%|███▋ | 2593/7045 [8:25:44<14:20:32, 11.60s/it] {'loss': 1.1006, 'learning_rate': 3.6457675326404673e-06, 'epoch': 0.37} + 37%|███▋ | 2593/7045 [8:25:44<14:20:32, 11.60s/it] 37%|███▋ | 2594/7045 [8:25:56<14:11:03, 11.47s/it] {'loss': 1.1455, 'learning_rate': 3.644745814716863e-06, 'epoch': 0.37} + 37%|███▋ | 2594/7045 [8:25:56<14:11:03, 11.47s/it] 37%|███▋ | 2595/7045 [8:26:07<14:00:26, 11.33s/it] {'loss': 1.1182, 'learning_rate': 3.643723854809643e-06, 'epoch': 0.37} + 37%|███▋ | 2595/7045 [8:26:07<14:00:26, 11.33s/it] 37%|███▋ | 2596/7045 [8:26:18<13:56:18, 11.28s/it] {'loss': 1.1006, 'learning_rate': 3.6427016531348365e-06, 'epoch': 0.37} + 37%|███▋ | 2596/7045 [8:26:18<13:56:18, 11.28s/it] 37%|███▋ | 2597/7045 [8:26:31<14:32:00, 11.76s/it] {'loss': 1.1406, 'learning_rate': 3.6416792099085217e-06, 'epoch': 0.37} + 37%|███▋ | 2597/7045 [8:26:31<14:32:00, 11.76s/it] 37%|███▋ | 2598/7045 [8:26:42<14:26:25, 11.69s/it] {'loss': 1.1143, 'learning_rate': 3.640656525346831e-06, 'epoch': 0.37} + 37%|███▋ | 2598/7045 [8:26:42<14:26:25, 11.69s/it] 37%|███▋ | 2599/7045 [8:26:54<14:21:26, 11.63s/it] {'loss': 1.0723, 'learning_rate': 3.6396335996659448e-06, 'epoch': 0.37} + 37%|███▋ | 2599/7045 [8:26:54<14:21:26, 11.63s/it] 37%|███▋ | 2600/7045 [8:27:05<14:19:07, 11.60s/it] {'loss': 1.1094, 'learning_rate': 3.638610433082096e-06, 'epoch': 0.37} + 37%|███▋ | 2600/7045 [8:27:05<14:19:07, 11.60s/it] 37%|███▋ | 2601/7045 [8:27:16<14:10:28, 11.48s/it] {'loss': 1.1123, 'learning_rate': 3.6375870258115676e-06, 'epoch': 0.37} + 37%|███▋ | 2601/7045 [8:27:16<14:10:28, 11.48s/it] 37%|███▋ | 2602/7045 [8:27:28<14:04:13, 11.40s/it] {'loss': 1.0962, 'learning_rate': 3.636563378070694e-06, 'epoch': 0.37} + 37%|███▋ | 2602/7045 [8:27:28<14:04:13, 11.40s/it] 37%|███▋ | 2603/7045 [8:27:39<13:57:47, 11.32s/it] {'loss': 1.1069, 'learning_rate': 3.6355394900758608e-06, 'epoch': 0.37} + 37%|███▋ | 2603/7045 [8:27:39<13:57:47, 11.32s/it] 37%|███▋ | 2604/7045 [8:27:50<13:59:44, 11.35s/it] {'loss': 1.127, 'learning_rate': 3.634515362043503e-06, 'epoch': 0.37} + 37%|███▋ | 2604/7045 [8:27:50<13:59:44, 11.35s/it] 37%|███▋ | 2605/7045 [8:28:02<14:08:09, 11.46s/it] {'loss': 1.1582, 'learning_rate': 3.6334909941901087e-06, 'epoch': 0.37} + 37%|███▋ | 2605/7045 [8:28:02<14:08:09, 11.46s/it] 37%|███▋ | 2606/7045 [8:28:14<14:24:03, 11.68s/it] {'loss': 1.1318, 'learning_rate': 3.632466386732214e-06, 'epoch': 0.37} + 37%|███▋ | 2606/7045 [8:28:14<14:24:03, 11.68s/it] 37%|███▋ | 2607/7045 [8:28:25<14:15:10, 11.56s/it] {'loss': 1.1338, 'learning_rate': 3.6314415398864074e-06, 'epoch': 0.37} + 37%|███▋ | 2607/7045 [8:28:25<14:15:10, 11.56s/it] 37%|███▋ | 2608/7045 [8:28:37<14:12:12, 11.52s/it] {'loss': 1.1099, 'learning_rate': 3.6304164538693276e-06, 'epoch': 0.37} + 37%|███▋ | 2608/7045 [8:28:37<14:12:12, 11.52s/it] 37%|███▋ | 2609/7045 [8:28:48<14:09:42, 11.49s/it] {'loss': 1.1221, 'learning_rate': 3.629391128897663e-06, 'epoch': 0.37} + 37%|███▋ | 2609/7045 [8:28:48<14:09:42, 11.49s/it] 37%|███▋ | 2610/7045 [8:28:59<14:02:56, 11.40s/it] {'loss': 1.1221, 'learning_rate': 3.6283655651881546e-06, 'epoch': 0.37} + 37%|███▋ | 2610/7045 [8:28:59<14:02:56, 11.40s/it] 37%|███▋ | 2611/7045 [8:29:11<14:12:08, 11.53s/it] {'loss': 1.1377, 'learning_rate': 3.627339762957591e-06, 'epoch': 0.37} + 37%|███▋ | 2611/7045 [8:29:11<14:12:08, 11.53s/it] 37%|███▋ | 2612/7045 [8:29:22<13:57:48, 11.34s/it] {'loss': 1.1299, 'learning_rate': 3.6263137224228138e-06, 'epoch': 0.37} + 37%|███▋ | 2612/7045 [8:29:22<13:57:48, 11.34s/it] 37%|███▋ | 2613/7045 [8:29:33<13:49:00, 11.22s/it] {'loss': 1.166, 'learning_rate': 3.625287443800715e-06, 'epoch': 0.37} + 37%|███▋ | 2613/7045 [8:29:33<13:49:00, 11.22s/it] 37%|███▋ | 2614/7045 [8:29:45<14:12:48, 11.55s/it] {'loss': 1.0977, 'learning_rate': 3.6242609273082336e-06, 'epoch': 0.37} + 37%|███▋ | 2614/7045 [8:29:45<14:12:48, 11.55s/it] 37%|███▋ | 2615/7045 [8:29:58<14:36:58, 11.88s/it] {'loss': 1.1299, 'learning_rate': 3.6232341731623623e-06, 'epoch': 0.37} + 37%|███▋ | 2615/7045 [8:29:58<14:36:58, 11.88s/it] 37%|███▋ | 2616/7045 [8:30:11<14:52:31, 12.09s/it] {'loss': 1.1196, 'learning_rate': 3.6222071815801425e-06, 'epoch': 0.37} + 37%|███▋ | 2616/7045 [8:30:11<14:52:31, 12.09s/it] 37%|███▋ | 2617/7045 [8:30:22<14:29:21, 11.78s/it] {'loss': 1.1357, 'learning_rate': 3.6211799527786667e-06, 'epoch': 0.37} + 37%|███▋ | 2617/7045 [8:30:22<14:29:21, 11.78s/it] 37%|███▋ | 2618/7045 [8:30:33<14:20:48, 11.67s/it] {'loss': 1.1328, 'learning_rate': 3.620152486975077e-06, 'epoch': 0.37} + 37%|███▋ | 2618/7045 [8:30:33<14:20:48, 11.67s/it] 37%|███▋ | 2619/7045 [8:30:44<14:03:51, 11.44s/it] {'loss': 1.1572, 'learning_rate': 3.619124784386565e-06, 'epoch': 0.37} + 37%|███▋ | 2619/7045 [8:30:44<14:03:51, 11.44s/it] 37%|███▋ | 2620/7045 [8:30:57<14:36:09, 11.88s/it] {'loss': 1.1201, 'learning_rate': 3.6180968452303735e-06, 'epoch': 0.37} + 37%|███▋ | 2620/7045 [8:30:57<14:36:09, 11.88s/it] 37%|███▋ | 2621/7045 [8:31:08<14:16:04, 11.61s/it] {'loss': 1.1162, 'learning_rate': 3.6170686697237945e-06, 'epoch': 0.37} + 37%|███▋ | 2621/7045 [8:31:08<14:16:04, 11.61s/it] 37%|███▋ | 2622/7045 [8:31:19<14:06:48, 11.49s/it] {'loss': 1.1035, 'learning_rate': 3.616040258084171e-06, 'epoch': 0.37} + 37%|███▋ | 2622/7045 [8:31:19<14:06:48, 11.49s/it] 37%|███▋ | 2623/7045 [8:31:32<14:50:29, 12.08s/it] {'loss': 1.104, 'learning_rate': 3.6150116105288942e-06, 'epoch': 0.37} + 37%|███▋ | 2623/7045 [8:31:32<14:50:29, 12.08s/it] 37%|███▋ | 2624/7045 [8:31:43<14:25:21, 11.74s/it] {'loss': 1.0977, 'learning_rate': 3.613982727275407e-06, 'epoch': 0.37} + 37%|███▋ | 2624/7045 [8:31:43<14:25:21, 11.74s/it] 37%|███▋ | 2625/7045 [8:31:55<14:20:37, 11.68s/it] {'loss': 1.2021, 'learning_rate': 3.6129536085411997e-06, 'epoch': 0.37} + 37%|███▋ | 2625/7045 [8:31:55<14:20:37, 11.68s/it] 37%|███▋ | 2626/7045 [8:32:06<14:02:56, 11.45s/it] {'loss': 1.1025, 'learning_rate': 3.6119242545438153e-06, 'epoch': 0.37} + 37%|███▋ | 2626/7045 [8:32:06<14:02:56, 11.45s/it] 37%|███▋ | 2627/7045 [8:32:17<13:57:23, 11.37s/it] {'loss': 1.1602, 'learning_rate': 3.6108946655008447e-06, 'epoch': 0.37} + 37%|███▋ | 2627/7045 [8:32:17<13:57:23, 11.37s/it] 37%|███▋ | 2628/7045 [8:32:28<13:57:01, 11.37s/it] {'loss': 1.1055, 'learning_rate': 3.6098648416299297e-06, 'epoch': 0.37} + 37%|███▋ | 2628/7045 [8:32:28<13:57:01, 11.37s/it] 37%|███▋ | 2629/7045 [8:32:42<14:39:45, 11.95s/it] {'loss': 1.1265, 'learning_rate': 3.6088347831487603e-06, 'epoch': 0.37} + 37%|███▋ | 2629/7045 [8:32:42<14:39:45, 11.95s/it] 37%|███▋ | 2630/7045 [8:32:53<14:18:10, 11.66s/it] {'loss': 1.1797, 'learning_rate': 3.6078044902750765e-06, 'epoch': 0.37} + 37%|███▋ | 2630/7045 [8:32:53<14:18:10, 11.66s/it] 37%|███▋ | 2631/7045 [8:33:04<14:12:17, 11.59s/it] {'loss': 1.0791, 'learning_rate': 3.6067739632266686e-06, 'epoch': 0.37} + 37%|███▋ | 2631/7045 [8:33:04<14:12:17, 11.59s/it] 37%|███▋ | 2632/7045 [8:33:15<14:05:29, 11.50s/it] {'loss': 1.1426, 'learning_rate': 3.605743202221376e-06, 'epoch': 0.37} + 37%|███▋ | 2632/7045 [8:33:15<14:05:29, 11.50s/it] 37%|███▋ | 2633/7045 [8:33:27<14:00:46, 11.43s/it] {'loss': 1.0991, 'learning_rate': 3.604712207477087e-06, 'epoch': 0.37} + 37%|███▋ | 2633/7045 [8:33:27<14:00:46, 11.43s/it] 37%|███▋ | 2634/7045 [8:33:38<13:53:53, 11.34s/it] {'loss': 1.166, 'learning_rate': 3.60368097921174e-06, 'epoch': 0.37} + 37%|███▋ | 2634/7045 [8:33:38<13:53:53, 11.34s/it] 37%|███▋ | 2635/7045 [8:33:49<13:46:14, 11.24s/it] {'loss': 1.1348, 'learning_rate': 3.6026495176433236e-06, 'epoch': 0.37} + 37%|███▋ | 2635/7045 [8:33:49<13:46:14, 11.24s/it] 37%|███▋ | 2636/7045 [8:34:00<13:49:02, 11.28s/it] {'loss': 1.1504, 'learning_rate': 3.601617822989873e-06, 'epoch': 0.37} + 37%|███▋ | 2636/7045 [8:34:00<13:49:02, 11.28s/it] 37%|███▋ | 2637/7045 [8:34:15<14:56:00, 12.20s/it] {'loss': 1.1377, 'learning_rate': 3.600585895469475e-06, 'epoch': 0.37} + 37%|███▋ | 2637/7045 [8:34:15<14:56:00, 12.20s/it] 37%|███▋ | 2638/7045 [8:34:26<14:36:46, 11.94s/it] {'loss': 1.1162, 'learning_rate': 3.5995537353002654e-06, 'epoch': 0.37} + 37%|███▋ | 2638/7045 [8:34:26<14:36:46, 11.94s/it] 37%|███▋ | 2639/7045 [8:34:38<14:44:38, 12.05s/it] {'loss': 1.1318, 'learning_rate': 3.5985213427004277e-06, 'epoch': 0.37} + 37%|███▋ | 2639/7045 [8:34:38<14:44:38, 12.05s/it] 37%|███▋ | 2640/7045 [8:34:49<14:27:24, 11.81s/it] {'loss': 1.1201, 'learning_rate': 3.597488717888197e-06, 'epoch': 0.37} + 37%|███▋ | 2640/7045 [8:34:49<14:27:24, 11.81s/it] 37%|███▋ | 2641/7045 [8:35:01<14:12:06, 11.61s/it] {'loss': 1.1191, 'learning_rate': 3.5964558610818544e-06, 'epoch': 0.37} + 37%|███▋ | 2641/7045 [8:35:01<14:12:06, 11.61s/it] 38%|███▊ | 2642/7045 [8:35:12<14:07:17, 11.55s/it] {'loss': 1.127, 'learning_rate': 3.5954227724997337e-06, 'epoch': 0.38} + 38%|███▊ | 2642/7045 [8:35:12<14:07:17, 11.55s/it] 38%|███▊ | 2643/7045 [8:35:23<13:57:05, 11.41s/it] {'loss': 1.1172, 'learning_rate': 3.5943894523602144e-06, 'epoch': 0.38} + 38%|███▊ | 2643/7045 [8:35:23<13:57:05, 11.41s/it] 38%|███▊ | 2644/7045 [8:35:36<14:29:58, 11.86s/it] {'loss': 1.0874, 'learning_rate': 3.5933559008817265e-06, 'epoch': 0.38} + 38%|███▊ | 2644/7045 [8:35:36<14:29:58, 11.86s/it] 38%|███▊ | 2645/7045 [8:35:47<14:14:50, 11.66s/it] {'loss': 1.1592, 'learning_rate': 3.592322118282748e-06, 'epoch': 0.38} + 38%|███▊ | 2645/7045 [8:35:47<14:14:50, 11.66s/it] 38%|███▊ | 2646/7045 [8:35:58<14:06:25, 11.54s/it] {'loss': 1.1152, 'learning_rate': 3.5912881047818075e-06, 'epoch': 0.38} + 38%|███▊ | 2646/7045 [8:35:58<14:06:25, 11.54s/it] 38%|███▊ | 2647/7045 [8:36:11<14:22:00, 11.76s/it] {'loss': 1.1348, 'learning_rate': 3.5902538605974815e-06, 'epoch': 0.38} + 38%|███▊ | 2647/7045 [8:36:11<14:22:00, 11.76s/it] 38%|███▊ | 2648/7045 [8:36:22<14:12:11, 11.63s/it] {'loss': 1.1494, 'learning_rate': 3.5892193859483938e-06, 'epoch': 0.38} + 38%|███▊ | 2648/7045 [8:36:22<14:12:11, 11.63s/it] 38%|███▊ | 2649/7045 [8:36:34<14:23:17, 11.78s/it] {'loss': 1.1113, 'learning_rate': 3.5881846810532183e-06, 'epoch': 0.38} + 38%|███▊ | 2649/7045 [8:36:34<14:23:17, 11.78s/it] 38%|███▊ | 2650/7045 [8:36:47<14:46:35, 12.10s/it] {'loss': 1.1035, 'learning_rate': 3.587149746130679e-06, 'epoch': 0.38} + 38%|███▊ | 2650/7045 [8:36:47<14:46:35, 12.10s/it] 38%|███▊ | 2651/7045 [8:36:58<14:19:20, 11.73s/it] {'loss': 1.1328, 'learning_rate': 3.5861145813995464e-06, 'epoch': 0.38} + 38%|███▊ | 2651/7045 [8:36:58<14:19:20, 11.73s/it] 38%|███▊ | 2652/7045 [8:37:09<14:07:22, 11.57s/it] {'loss': 1.1309, 'learning_rate': 3.5850791870786383e-06, 'epoch': 0.38} + 38%|███▊ | 2652/7045 [8:37:09<14:07:22, 11.57s/it] 38%|███▊ | 2653/7045 [8:37:22<14:41:27, 12.04s/it] {'loss': 1.1328, 'learning_rate': 3.584043563386825e-06, 'epoch': 0.38} + 38%|███▊ | 2653/7045 [8:37:22<14:41:27, 12.04s/it] 38%|███▊ | 2654/7045 [8:37:33<14:18:40, 11.73s/it] {'loss': 1.1011, 'learning_rate': 3.583007710543023e-06, 'epoch': 0.38} + 38%|███▊ | 2654/7045 [8:37:33<14:18:40, 11.73s/it] 38%|███▊ | 2655/7045 [8:37:46<14:38:28, 12.01s/it] {'loss': 1.0972, 'learning_rate': 3.581971628766197e-06, 'epoch': 0.38} + 38%|███▊ | 2655/7045 [8:37:46<14:38:28, 12.01s/it] 38%|███▊ | 2656/7045 [8:38:00<15:15:51, 12.52s/it] {'loss': 1.0811, 'learning_rate': 3.58093531827536e-06, 'epoch': 0.38} + 38%|███▊ | 2656/7045 [8:38:00<15:15:51, 12.52s/it] 38%|███▊ | 2657/7045 [8:38:11<14:45:23, 12.11s/it] {'loss': 1.1211, 'learning_rate': 3.579898779289575e-06, 'epoch': 0.38} + 38%|███▊ | 2657/7045 [8:38:11<14:45:23, 12.11s/it] 38%|███▊ | 2658/7045 [8:38:22<14:29:10, 11.89s/it] {'loss': 1.1348, 'learning_rate': 3.5788620120279517e-06, 'epoch': 0.38} + 38%|███▊ | 2658/7045 [8:38:22<14:29:10, 11.89s/it] 38%|███▊ | 2659/7045 [8:38:33<14:12:36, 11.66s/it] {'loss': 1.105, 'learning_rate': 3.577825016709648e-06, 'epoch': 0.38} + 38%|███▊ | 2659/7045 [8:38:33<14:12:36, 11.66s/it] 38%|███▊ | 2660/7045 [8:38:45<14:12:54, 11.67s/it] {'loss': 1.1289, 'learning_rate': 3.576787793553871e-06, 'epoch': 0.38} + 38%|███▊ | 2660/7045 [8:38:45<14:12:54, 11.67s/it] 38%|███▊ | 2661/7045 [8:38:56<13:59:23, 11.49s/it] {'loss': 1.1602, 'learning_rate': 3.5757503427798757e-06, 'epoch': 0.38} + 38%|███▊ | 2661/7045 [8:38:56<13:59:23, 11.49s/it] 38%|███▊ | 2662/7045 [8:39:07<13:53:39, 11.41s/it] {'loss': 1.1348, 'learning_rate': 3.574712664606964e-06, 'epoch': 0.38} + 38%|███▊ | 2662/7045 [8:39:07<13:53:39, 11.41s/it] 38%|███▊ | 2663/7045 [8:39:18<13:44:39, 11.29s/it] {'loss': 1.1431, 'learning_rate': 3.5736747592544883e-06, 'epoch': 0.38} + 38%|███▊ | 2663/7045 [8:39:18<13:44:39, 11.29s/it] 38%|███▊ | 2664/7045 [8:39:29<13:40:16, 11.23s/it] {'loss': 1.1504, 'learning_rate': 3.572636626941846e-06, 'epoch': 0.38} + 38%|███▊ | 2664/7045 [8:39:29<13:40:16, 11.23s/it] 38%|███▊ | 2665/7045 [8:39:43<14:34:07, 11.97s/it] {'loss': 1.082, 'learning_rate': 3.5715982678884863e-06, 'epoch': 0.38} + 38%|███▊ | 2665/7045 [8:39:43<14:34:07, 11.97s/it] 38%|███▊ | 2666/7045 [8:39:54<14:14:37, 11.71s/it] {'loss': 1.1523, 'learning_rate': 3.570559682313902e-06, 'epoch': 0.38} + 38%|███▊ | 2666/7045 [8:39:54<14:14:37, 11.71s/it] 38%|███▊ | 2667/7045 [8:40:05<14:04:42, 11.58s/it] {'loss': 1.1289, 'learning_rate': 3.5695208704376373e-06, 'epoch': 0.38} + 38%|███▊ | 2667/7045 [8:40:05<14:04:42, 11.58s/it] 38%|███▊ | 2668/7045 [8:40:18<14:24:44, 11.85s/it] {'loss': 1.1484, 'learning_rate': 3.568481832479282e-06, 'epoch': 0.38} + 38%|███▊ | 2668/7045 [8:40:18<14:24:44, 11.85s/it] 38%|███▊ | 2669/7045 [8:40:29<14:06:15, 11.60s/it] {'loss': 1.1416, 'learning_rate': 3.567442568658475e-06, 'epoch': 0.38} + 38%|███▊ | 2669/7045 [8:40:29<14:06:15, 11.60s/it] 38%|███▊ | 2670/7045 [8:40:40<13:52:12, 11.41s/it] {'loss': 1.1025, 'learning_rate': 3.566403079194901e-06, 'epoch': 0.38} + 38%|███▊ | 2670/7045 [8:40:40<13:52:12, 11.41s/it] 38%|███▊ | 2671/7045 [8:40:51<13:51:07, 11.40s/it] {'loss': 1.1758, 'learning_rate': 3.565363364308296e-06, 'epoch': 0.38} + 38%|███▊ | 2671/7045 [8:40:51<13:51:07, 11.40s/it] 38%|███▊ | 2672/7045 [8:41:02<13:46:14, 11.34s/it] {'loss': 1.0947, 'learning_rate': 3.564323424218441e-06, 'epoch': 0.38} + 38%|███▊ | 2672/7045 [8:41:02<13:46:14, 11.34s/it] 38%|███▊ | 2673/7045 [8:41:15<14:06:51, 11.62s/it] {'loss': 1.0591, 'learning_rate': 3.5632832591451643e-06, 'epoch': 0.38} + 38%|███▊ | 2673/7045 [8:41:15<14:06:51, 11.62s/it] 38%|███▊ | 2674/7045 [8:41:27<14:15:52, 11.75s/it] {'loss': 1.1113, 'learning_rate': 3.5622428693083432e-06, 'epoch': 0.38} + 38%|███▊ | 2674/7045 [8:41:27<14:15:52, 11.75s/it] 38%|███▊ | 2675/7045 [8:41:38<14:01:00, 11.55s/it] {'loss': 1.1455, 'learning_rate': 3.561202254927901e-06, 'epoch': 0.38} + 38%|███▊ | 2675/7045 [8:41:38<14:01:00, 11.55s/it] 38%|███▊ | 2676/7045 [8:41:49<13:50:16, 11.40s/it] {'loss': 1.0879, 'learning_rate': 3.5601614162238107e-06, 'epoch': 0.38} + 38%|███▊ | 2676/7045 [8:41:49<13:50:16, 11.40s/it] 38%|███▊ | 2677/7045 [8:42:00<13:45:36, 11.34s/it] {'loss': 1.167, 'learning_rate': 3.55912035341609e-06, 'epoch': 0.38} + 38%|███▊ | 2677/7045 [8:42:00<13:45:36, 11.34s/it] 38%|███▊ | 2678/7045 [8:42:13<14:22:29, 11.85s/it] {'loss': 1.0938, 'learning_rate': 3.5580790667248065e-06, 'epoch': 0.38} + 38%|███▊ | 2678/7045 [8:42:13<14:22:29, 11.85s/it] 38%|███▊ | 2679/7045 [8:42:24<14:03:14, 11.59s/it] {'loss': 1.0947, 'learning_rate': 3.5570375563700736e-06, 'epoch': 0.38} + 38%|███▊ | 2679/7045 [8:42:24<14:03:14, 11.59s/it] 38%|███▊ | 2680/7045 [8:42:35<13:54:21, 11.47s/it] {'loss': 1.1318, 'learning_rate': 3.5559958225720524e-06, 'epoch': 0.38} + 38%|███▊ | 2680/7045 [8:42:35<13:54:21, 11.47s/it] 38%|███▊ | 2681/7045 [8:42:48<14:14:56, 11.75s/it] {'loss': 1.1035, 'learning_rate': 3.5549538655509514e-06, 'epoch': 0.38} + 38%|███▊ | 2681/7045 [8:42:48<14:14:56, 11.75s/it] 38%|███▊ | 2682/7045 [8:42:59<13:54:43, 11.48s/it] {'loss': 1.125, 'learning_rate': 3.5539116855270247e-06, 'epoch': 0.38} + 38%|███▊ | 2682/7045 [8:42:59<13:54:43, 11.48s/it] 38%|███▊ | 2683/7045 [8:43:10<13:46:51, 11.37s/it] {'loss': 1.1279, 'learning_rate': 3.552869282720577e-06, 'epoch': 0.38} + 38%|███▊ | 2683/7045 [8:43:10<13:46:51, 11.37s/it] 38%|███▊ | 2684/7045 [8:43:21<13:49:54, 11.42s/it] {'loss': 1.1318, 'learning_rate': 3.5518266573519554e-06, 'epoch': 0.38} + 38%|███▊ | 2684/7045 [8:43:21<13:49:54, 11.42s/it] 38%|███▊ | 2685/7045 [8:43:34<14:19:13, 11.82s/it] {'loss': 1.1055, 'learning_rate': 3.5507838096415593e-06, 'epoch': 0.38} + 38%|███▊ | 2685/7045 [8:43:34<14:19:13, 11.82s/it] 38%|███▊ | 2686/7045 [8:43:45<13:55:37, 11.50s/it] {'loss': 1.1455, 'learning_rate': 3.5497407398098314e-06, 'epoch': 0.38} + 38%|███▊ | 2686/7045 [8:43:45<13:55:37, 11.50s/it] 38%|███▊ | 2687/7045 [8:43:57<14:08:43, 11.69s/it] {'loss': 1.1406, 'learning_rate': 3.5486974480772618e-06, 'epoch': 0.38} + 38%|███▊ | 2687/7045 [8:43:57<14:08:43, 11.69s/it] 38%|███▊ | 2688/7045 [8:44:09<14:18:56, 11.83s/it] {'loss': 1.1123, 'learning_rate': 3.547653934664389e-06, 'epoch': 0.38} + 38%|███▊ | 2688/7045 [8:44:09<14:18:56, 11.83s/it] 38%|███▊ | 2689/7045 [8:44:22<14:34:04, 12.04s/it] {'loss': 1.123, 'learning_rate': 3.546610199791797e-06, 'epoch': 0.38} + 38%|███▊ | 2689/7045 [8:44:22<14:34:04, 12.04s/it] 38%|███▊ | 2690/7045 [8:44:34<14:35:37, 12.06s/it] {'loss': 1.0713, 'learning_rate': 3.545566243680117e-06, 'epoch': 0.38} + 38%|███▊ | 2690/7045 [8:44:34<14:35:37, 12.06s/it] 38%|███▊ | 2691/7045 [8:44:45<14:17:22, 11.81s/it] {'loss': 1.1543, 'learning_rate': 3.544522066550028e-06, 'epoch': 0.38} + 38%|███▊ | 2691/7045 [8:44:45<14:17:22, 11.81s/it] 38%|███▊ | 2692/7045 [8:44:58<14:54:32, 12.33s/it] {'loss': 1.1196, 'learning_rate': 3.543477668622253e-06, 'epoch': 0.38} + 38%|███▊ | 2692/7045 [8:44:58<14:54:32, 12.33s/it] 38%|███▊ | 2693/7045 [8:45:10<14:30:17, 12.00s/it] {'loss': 1.1104, 'learning_rate': 3.542433050117565e-06, 'epoch': 0.38} + 38%|███▊ | 2693/7045 [8:45:10<14:30:17, 12.00s/it] 38%|███▊ | 2694/7045 [8:45:21<14:21:06, 11.87s/it] {'loss': 1.1025, 'learning_rate': 3.5413882112567816e-06, 'epoch': 0.38} + 38%|███▊ | 2694/7045 [8:45:21<14:21:06, 11.87s/it] 38%|███▊ | 2695/7045 [8:45:32<14:00:27, 11.59s/it] {'loss': 1.1064, 'learning_rate': 3.540343152260767e-06, 'epoch': 0.38} + 38%|███▊ | 2695/7045 [8:45:32<14:00:27, 11.59s/it] 38%|███▊ | 2696/7045 [8:45:44<13:58:34, 11.57s/it] {'loss': 1.0723, 'learning_rate': 3.5392978733504336e-06, 'epoch': 0.38} + 38%|███▊ | 2696/7045 [8:45:44<13:58:34, 11.57s/it] 38%|███▊ | 2697/7045 [8:45:56<14:21:25, 11.89s/it] {'loss': 1.1299, 'learning_rate': 3.5382523747467374e-06, 'epoch': 0.38} + 38%|███▊ | 2697/7045 [8:45:56<14:21:25, 11.89s/it] 38%|███▊ | 2698/7045 [8:46:07<14:04:25, 11.66s/it] {'loss': 1.125, 'learning_rate': 3.537206656670683e-06, 'epoch': 0.38} + 38%|███▊ | 2698/7045 [8:46:07<14:04:25, 11.66s/it] 38%|███�� | 2699/7045 [8:46:21<14:42:47, 12.19s/it] {'loss': 1.082, 'learning_rate': 3.536160719343323e-06, 'epoch': 0.38} + 38%|███▊ | 2699/7045 [8:46:21<14:42:47, 12.19s/it] 38%|███▊ | 2700/7045 [8:46:33<14:40:36, 12.16s/it] {'loss': 1.1006, 'learning_rate': 3.53511456298575e-06, 'epoch': 0.38} + 38%|███▊ | 2700/7045 [8:46:33<14:40:36, 12.16s/it] 38%|███▊ | 2701/7045 [8:46:46<14:52:31, 12.33s/it] {'loss': 1.1128, 'learning_rate': 3.5340681878191114e-06, 'epoch': 0.38} + 38%|███▊ | 2701/7045 [8:46:46<14:52:31, 12.33s/it] 38%|███▊ | 2702/7045 [8:46:57<14:38:53, 12.14s/it] {'loss': 1.1416, 'learning_rate': 3.5330215940645952e-06, 'epoch': 0.38} + 38%|███▊ | 2702/7045 [8:46:57<14:38:53, 12.14s/it] 38%|███▊ | 2703/7045 [8:47:10<14:56:44, 12.39s/it] {'loss': 1.0972, 'learning_rate': 3.5319747819434356e-06, 'epoch': 0.38} + 38%|███▊ | 2703/7045 [8:47:10<14:56:44, 12.39s/it] 38%|███▊ | 2704/7045 [8:47:22<14:37:36, 12.13s/it] {'loss': 1.1318, 'learning_rate': 3.5309277516769156e-06, 'epoch': 0.38} + 38%|███▊ | 2704/7045 [8:47:22<14:37:36, 12.13s/it] 38%|███▊ | 2705/7045 [8:47:33<14:23:49, 11.94s/it] {'loss': 1.1143, 'learning_rate': 3.5298805034863633e-06, 'epoch': 0.38} + 38%|███▊ | 2705/7045 [8:47:33<14:23:49, 11.94s/it] 38%|███▊ | 2706/7045 [8:47:48<15:11:14, 12.60s/it] {'loss': 1.0801, 'learning_rate': 3.5288330375931516e-06, 'epoch': 0.38} + 38%|███▊ | 2706/7045 [8:47:48<15:11:14, 12.60s/it] 38%|███▊ | 2707/7045 [8:48:00<14:57:49, 12.42s/it] {'loss': 1.0938, 'learning_rate': 3.5277853542187017e-06, 'epoch': 0.38} + 38%|███▊ | 2707/7045 [8:48:00<14:57:49, 12.42s/it] 38%|███▊ | 2708/7045 [8:48:12<15:07:00, 12.55s/it] {'loss': 1.0986, 'learning_rate': 3.526737453584479e-06, 'epoch': 0.38} + 38%|███▊ | 2708/7045 [8:48:12<15:07:00, 12.55s/it] 38%|███▊ | 2709/7045 [8:48:25<15:11:29, 12.61s/it] {'loss': 1.1055, 'learning_rate': 3.5256893359119952e-06, 'epoch': 0.38} + 38%|███▊ | 2709/7045 [8:48:25<15:11:29, 12.61s/it] 38%|███▊ | 2710/7045 [8:48:36<14:42:17, 12.21s/it] {'loss': 1.1592, 'learning_rate': 3.524641001422809e-06, 'epoch': 0.38} + 38%|███▊ | 2710/7045 [8:48:36<14:42:17, 12.21s/it] 38%|███▊ | 2711/7045 [8:48:47<14:13:03, 11.81s/it] {'loss': 1.1367, 'learning_rate': 3.523592450338522e-06, 'epoch': 0.38} + 38%|███▊ | 2711/7045 [8:48:47<14:13:03, 11.81s/it] 38%|███▊ | 2712/7045 [8:49:00<14:27:56, 12.02s/it] {'loss': 1.1221, 'learning_rate': 3.5225436828807847e-06, 'epoch': 0.38} + 38%|███▊ | 2712/7045 [8:49:00<14:27:56, 12.02s/it] 39%|███▊ | 2713/7045 [8:49:12<14:37:04, 12.15s/it] {'loss': 1.085, 'learning_rate': 3.5214946992712932e-06, 'epoch': 0.39} + 39%|███▊ | 2713/7045 [8:49:12<14:37:04, 12.15s/it] 39%|███▊ | 2714/7045 [8:49:24<14:18:07, 11.89s/it] {'loss': 1.0977, 'learning_rate': 3.520445499731787e-06, 'epoch': 0.39} + 39%|███▊ | 2714/7045 [8:49:24<14:18:07, 11.89s/it] 39%|███▊ | 2715/7045 [8:49:37<14:48:13, 12.31s/it] {'loss': 1.1323, 'learning_rate': 3.5193960844840526e-06, 'epoch': 0.39} + 39%|███▊ | 2715/7045 [8:49:37<14:48:13, 12.31s/it] 39%|███▊ | 2716/7045 [8:49:50<15:11:06, 12.63s/it] {'loss': 1.0845, 'learning_rate': 3.5183464537499224e-06, 'epoch': 0.39} + 39%|███▊ | 2716/7045 [8:49:50<15:11:06, 12.63s/it] 39%|███▊ | 2717/7045 [8:50:01<14:40:39, 12.21s/it] {'loss': 1.1113, 'learning_rate': 3.517296607751275e-06, 'epoch': 0.39} + 39%|███▊ | 2717/7045 [8:50:01<14:40:39, 12.21s/it] 39%|███▊ | 2718/7045 [8:50:13<14:18:36, 11.91s/it] {'loss': 1.0996, 'learning_rate': 3.516246546710031e-06, 'epoch': 0.39} + 39%|███▊ | 2718/7045 [8:50:13<14:18:36, 11.91s/it] 39%|███▊ | 2719/7045 [8:50:24<14:11:22, 11.81s/it] {'loss': 1.064, 'learning_rate': 3.51519627084816e-06, 'epoch': 0.39} + 39%|███▊ | 2719/7045 [8:50:24<14:11:22, 11.81s/it] 39%|███▊ | 2720/7045 [8:50:35<13:54:27, 11.58s/it] {'loss': 1.1191, 'learning_rate': 3.5141457803876772e-06, 'epoch': 0.39} + 39%|███▊ | 2720/7045 [8:50:35<13:54:27, 11.58s/it] 39%|███▊ | 2721/7045 [8:50:48<14:14:48, 11.86s/it] {'loss': 1.0996, 'learning_rate': 3.513095075550641e-06, 'epoch': 0.39} + 39%|███▊ | 2721/7045 [8:50:48<14:14:48, 11.86s/it] 39%|███▊ | 2722/7045 [8:51:02<14:54:36, 12.42s/it] {'loss': 1.125, 'learning_rate': 3.512044156559156e-06, 'epoch': 0.39} + 39%|███▊ | 2722/7045 [8:51:02<14:54:36, 12.42s/it] 39%|███▊ | 2723/7045 [8:51:13<14:37:06, 12.18s/it] {'loss': 1.1035, 'learning_rate': 3.5109930236353723e-06, 'epoch': 0.39} + 39%|███▊ | 2723/7045 [8:51:13<14:37:06, 12.18s/it] 39%|███▊ | 2724/7045 [8:51:24<14:11:50, 11.83s/it] {'loss': 1.1182, 'learning_rate': 3.5099416770014848e-06, 'epoch': 0.39} + 39%|███▊ | 2724/7045 [8:51:24<14:11:50, 11.83s/it] 39%|███▊ | 2725/7045 [8:51:37<14:31:10, 12.10s/it] {'loss': 1.0923, 'learning_rate': 3.5088901168797336e-06, 'epoch': 0.39} + 39%|███▊ | 2725/7045 [8:51:37<14:31:10, 12.10s/it] 39%|███▊ | 2726/7045 [8:51:50<14:50:02, 12.36s/it] {'loss': 1.0586, 'learning_rate': 3.5078383434924048e-06, 'epoch': 0.39} + 39%|███▊ | 2726/7045 [8:51:50<14:50:02, 12.36s/it] 39%|███▊ | 2727/7045 [8:52:01<14:27:57, 12.06s/it] {'loss': 1.1279, 'learning_rate': 3.5067863570618283e-06, 'epoch': 0.39} + 39%|███▊ | 2727/7045 [8:52:01<14:27:57, 12.06s/it] 39%|███▊ | 2728/7045 [8:52:13<14:12:25, 11.85s/it] {'loss': 1.1201, 'learning_rate': 3.5057341578103804e-06, 'epoch': 0.39} + 39%|███▊ | 2728/7045 [8:52:13<14:12:25, 11.85s/it] 39%|███▊ | 2729/7045 [8:52:25<14:24:50, 12.02s/it] {'loss': 1.0762, 'learning_rate': 3.504681745960481e-06, 'epoch': 0.39} + 39%|███▊ | 2729/7045 [8:52:25<14:24:50, 12.02s/it] 39%|███▉ | 2730/7045 [8:52:38<14:43:35, 12.29s/it] {'loss': 1.1157, 'learning_rate': 3.503629121734595e-06, 'epoch': 0.39} + 39%|███▉ | 2730/7045 [8:52:38<14:43:35, 12.29s/it] 39%|███▉ | 2731/7045 [8:52:50<14:39:03, 12.23s/it] {'loss': 1.0757, 'learning_rate': 3.502576285355234e-06, 'epoch': 0.39} + 39%|███▉ | 2731/7045 [8:52:50<14:39:03, 12.23s/it] 39%|███▉ | 2732/7045 [8:53:01<14:21:07, 11.98s/it] {'loss': 1.1367, 'learning_rate': 3.501523237044953e-06, 'epoch': 0.39} + 39%|███▉ | 2732/7045 [8:53:01<14:21:07, 11.98s/it] 39%|███▉ | 2733/7045 [8:53:13<14:04:57, 11.76s/it] {'loss': 1.1748, 'learning_rate': 3.5004699770263512e-06, 'epoch': 0.39} + 39%|███▉ | 2733/7045 [8:53:13<14:04:57, 11.76s/it] 39%|███▉ | 2734/7045 [8:53:23<13:37:40, 11.38s/it] {'loss': 1.062, 'learning_rate': 3.4994165055220742e-06, 'epoch': 0.39} + 39%|███▉ | 2734/7045 [8:53:23<13:37:40, 11.38s/it] 39%|███▉ | 2735/7045 [8:53:34<13:33:50, 11.33s/it] {'loss': 1.0996, 'learning_rate': 3.4983628227548112e-06, 'epoch': 0.39} + 39%|███▉ | 2735/7045 [8:53:34<13:33:50, 11.33s/it] 39%|███▉ | 2736/7045 [8:53:46<13:39:31, 11.41s/it] {'loss': 1.1157, 'learning_rate': 3.497308928947297e-06, 'epoch': 0.39} + 39%|███▉ | 2736/7045 [8:53:46<13:39:31, 11.41s/it] 39%|███▉ | 2737/7045 [8:53:58<13:56:49, 11.65s/it] {'loss': 1.0884, 'learning_rate': 3.496254824322308e-06, 'epoch': 0.39} + 39%|███▉ | 2737/7045 [8:53:58<13:56:49, 11.65s/it] 39%|███▉ | 2738/7045 [8:54:11<14:15:22, 11.92s/it] {'loss': 1.1514, 'learning_rate': 3.4952005091026698e-06, 'epoch': 0.39} + 39%|███▉ | 2738/7045 [8:54:11<14:15:22, 11.92s/it] 39%|███▉ | 2739/7045 [8:54:22<14:01:42, 11.73s/it] {'loss': 1.1514, 'learning_rate': 3.49414598351125e-06, 'epoch': 0.39} + 39%|███▉ | 2739/7045 [8:54:22<14:01:42, 11.73s/it] 39%|███▉ | 2740/7045 [8:54:33<13:51:58, 11.60s/it] {'loss': 1.1113, 'learning_rate': 3.49309124777096e-06, 'epoch': 0.39} + 39%|███▉ | 2740/7045 [8:54:33<13:51:58, 11.60s/it] 39%|███▉ | 2741/7045 [8:54:44<13:37:10, 11.39s/it] {'loss': 1.123, 'learning_rate': 3.4920363021047576e-06, 'epoch': 0.39} + 39%|███▉ | 2741/7045 [8:54:44<13:37:10, 11.39s/it] 39%|███▉ | 2742/7045 [8:54:57<14:06:03, 11.80s/it] {'loss': 1.1377, 'learning_rate': 3.4909811467356423e-06, 'epoch': 0.39} + 39%|███▉ | 2742/7045 [8:54:57<14:06:03, 11.80s/it] 39%|███▉ | 2743/7045 [8:55:09<14:21:43, 12.02s/it] {'loss': 1.1157, 'learning_rate': 3.4899257818866614e-06, 'epoch': 0.39} + 39%|███▉ | 2743/7045 [8:55:09<14:21:43, 12.02s/it] 39%|███▉ | 2744/7045 [8:55:23<14:45:31, 12.35s/it] {'loss': 1.0869, 'learning_rate': 3.488870207780903e-06, 'epoch': 0.39} + 39%|███▉ | 2744/7045 [8:55:23<14:45:31, 12.35s/it] 39%|███▉ | 2745/7045 [8:55:34<14:15:51, 11.94s/it] {'loss': 1.147, 'learning_rate': 3.4878144246415026e-06, 'epoch': 0.39} + 39%|███▉ | 2745/7045 [8:55:34<14:15:51, 11.94s/it] 39%|███▉ | 2746/7045 [8:55:45<13:59:21, 11.71s/it] {'loss': 1.1064, 'learning_rate': 3.486758432691637e-06, 'epoch': 0.39} + 39%|███▉ | 2746/7045 [8:55:45<13:59:21, 11.71s/it] 39%|███▉ | 2747/7045 [8:55:56<13:46:31, 11.54s/it] {'loss': 1.1133, 'learning_rate': 3.4857022321545293e-06, 'epoch': 0.39} + 39%|███▉ | 2747/7045 [8:55:56<13:46:31, 11.54s/it] 39%|███▉ | 2748/7045 [8:56:07<13:45:03, 11.52s/it] {'loss': 1.1299, 'learning_rate': 3.4846458232534445e-06, 'epoch': 0.39} + 39%|███▉ | 2748/7045 [8:56:07<13:45:03, 11.52s/it] 39%|███▉ | 2749/7045 [8:56:18<13:35:00, 11.38s/it] {'loss': 1.1006, 'learning_rate': 3.4835892062116943e-06, 'epoch': 0.39} + 39%|███▉ | 2749/7045 [8:56:18<13:35:00, 11.38s/it] 39%|███▉ | 2750/7045 [8:56:29<13:27:47, 11.28s/it] {'loss': 1.1074, 'learning_rate': 3.4825323812526337e-06, 'epoch': 0.39} + 39%|███▉ | 2750/7045 [8:56:29<13:27:47, 11.28s/it] 39%|███▉ | 2751/7045 [8:56:42<13:44:29, 11.52s/it] {'loss': 1.0869, 'learning_rate': 3.4814753485996586e-06, 'epoch': 0.39} + 39%|███▉ | 2751/7045 [8:56:42<13:44:29, 11.52s/it] 39%|███▉ | 2752/7045 [8:56:54<14:14:21, 11.94s/it] {'loss': 1.0928, 'learning_rate': 3.480418108476214e-06, 'epoch': 0.39} + 39%|███▉ | 2752/7045 [8:56:54<14:14:21, 11.94s/it] 39%|███▉ | 2753/7045 [8:57:06<13:57:03, 11.70s/it] {'loss': 1.1152, 'learning_rate': 3.479360661105784e-06, 'epoch': 0.39} + 39%|███▉ | 2753/7045 [8:57:06<13:57:03, 11.70s/it] 39%|███▉ | 2754/7045 [8:57:18<14:08:16, 11.86s/it] {'loss': 1.0977, 'learning_rate': 3.4783030067119e-06, 'epoch': 0.39} + 39%|███▉ | 2754/7045 [8:57:18<14:08:16, 11.86s/it] 39%|███▉ | 2755/7045 [8:57:31<14:34:01, 12.22s/it] {'loss': 1.127, 'learning_rate': 3.4772451455181344e-06, 'epoch': 0.39} + 39%|███▉ | 2755/7045 [8:57:31<14:34:01, 12.22s/it] 39%|███▉ | 2756/7045 [8:57:42<14:14:46, 11.96s/it] {'loss': 1.1328, 'learning_rate': 3.476187077748105e-06, 'epoch': 0.39} + 39%|███▉ | 2756/7045 [8:57:42<14:14:46, 11.96s/it] 39%|███▉ | 2757/7045 [8:57:53<13:55:25, 11.69s/it] {'loss': 1.1299, 'learning_rate': 3.4751288036254732e-06, 'epoch': 0.39} + 39%|███▉ | 2757/7045 [8:57:53<13:55:25, 11.69s/it] 39%|███▉ | 2758/7045 [8:58:05<13:53:24, 11.66s/it] {'loss': 1.1182, 'learning_rate': 3.4740703233739442e-06, 'epoch': 0.39} + 39%|███▉ | 2758/7045 [8:58:05<13:53:24, 11.66s/it] 39%|███▉ | 2759/7045 [8:58:16<13:47:15, 11.58s/it] {'loss': 1.1006, 'learning_rate': 3.473011637217265e-06, 'epoch': 0.39} + 39%|███▉ | 2759/7045 [8:58:16<13:47:15, 11.58s/it] 39%|███▉ | 2760/7045 [8:58:27<13:35:05, 11.41s/it] {'loss': 1.0977, 'learning_rate': 3.471952745379228e-06, 'epoch': 0.39} + 39%|███▉ | 2760/7045 [8:58:27<13:35:05, 11.41s/it] 39%|███▉ | 2761/7045 [8:58:39<13:46:32, 11.58s/it] {'loss': 1.1025, 'learning_rate': 3.4708936480836686e-06, 'epoch': 0.39} + 39%|███▉ | 2761/7045 [8:58:39<13:46:32, 11.58s/it] 39%|███▉ | 2762/7045 [8:58:51<13:38:36, 11.47s/it] {'loss': 1.1309, 'learning_rate': 3.4698343455544646e-06, 'epoch': 0.39} + 39%|███▉ | 2762/7045 [8:58:51<13:38:36, 11.47s/it] 39%|███▉ | 2763/7045 [8:59:02<13:33:17, 11.40s/it] {'loss': 1.0977, 'learning_rate': 3.4687748380155394e-06, 'epoch': 0.39} + 39%|███▉ | 2763/7045 [8:59:02<13:33:17, 11.40s/it] 39%|███▉ | 2764/7045 [8:59:13<13:37:44, 11.46s/it] {'loss': 1.1143, 'learning_rate': 3.467715125690857e-06, 'epoch': 0.39} + 39%|███▉ | 2764/7045 [8:59:13<13:37:44, 11.46s/it] 39%|███▉ | 2765/7045 [8:59:26<13:58:32, 11.76s/it] {'loss': 1.1016, 'learning_rate': 3.466655208804428e-06, 'epoch': 0.39} + 39%|███▉ | 2765/7045 [8:59:26<13:58:32, 11.76s/it] 39%|███▉ | 2766/7045 [8:59:38<14:18:11, 12.03s/it] {'loss': 1.1162, 'learning_rate': 3.465595087580303e-06, 'epoch': 0.39} + 39%|███▉ | 2766/7045 [8:59:38<14:18:11, 12.03s/it] 39%|███▉ | 2767/7045 [8:59:50<14:06:13, 11.87s/it] {'loss': 1.1221, 'learning_rate': 3.4645347622425764e-06, 'epoch': 0.39} + 39%|███▉ | 2767/7045 [8:59:50<14:06:13, 11.87s/it] 39%|███▉ | 2768/7045 [9:00:03<14:34:42, 12.27s/it] {'loss': 1.0586, 'learning_rate': 3.463474233015388e-06, 'epoch': 0.39} + 39%|███▉ | 2768/7045 [9:00:03<14:34:42, 12.27s/it] 39%|███▉ | 2769/7045 [9:00:15<14:19:35, 12.06s/it] {'loss': 1.1011, 'learning_rate': 3.462413500122919e-06, 'epoch': 0.39} + 39%|███▉ | 2769/7045 [9:00:15<14:19:35, 12.06s/it] 39%|███▉ | 2770/7045 [9:00:27<14:23:28, 12.12s/it] {'loss': 1.1099, 'learning_rate': 3.461352563789393e-06, 'epoch': 0.39} + 39%|███▉ | 2770/7045 [9:00:27<14:23:28, 12.12s/it] 39%|███▉ | 2771/7045 [9:00:38<14:07:17, 11.89s/it] {'loss': 1.1533, 'learning_rate': 3.460291424239078e-06, 'epoch': 0.39} + 39%|███▉ | 2771/7045 [9:00:38<14:07:17, 11.89s/it] 39%|███▉ | 2772/7045 [9:00:49<13:50:52, 11.67s/it] {'loss': 1.1279, 'learning_rate': 3.4592300816962844e-06, 'epoch': 0.39} + 39%|███▉ | 2772/7045 [9:00:49<13:50:52, 11.67s/it] 39%|███▉ | 2773/7045 [9:01:01<13:39:10, 11.51s/it] {'loss': 1.0654, 'learning_rate': 3.4581685363853644e-06, 'epoch': 0.39} + 39%|███▉ | 2773/7045 [9:01:01<13:39:10, 11.51s/it] 39%|███▉ | 2774/7045 [9:01:14<14:12:01, 11.97s/it] {'loss': 1.1001, 'learning_rate': 3.4571067885307165e-06, 'epoch': 0.39} + 39%|███▉ | 2774/7045 [9:01:14<14:12:01, 11.97s/it] 39%|███▉ | 2775/7045 [9:01:26<14:27:20, 12.19s/it] {'loss': 1.1143, 'learning_rate': 3.4560448383567773e-06, 'epoch': 0.39} + 39%|███▉ | 2775/7045 [9:01:26<14:27:20, 12.19s/it] 39%|███▉ | 2776/7045 [9:01:37<14:01:12, 11.82s/it] {'loss': 1.124, 'learning_rate': 3.4549826860880305e-06, 'epoch': 0.39} + 39%|███▉ | 2776/7045 [9:01:37<14:01:12, 11.82s/it] 39%|███▉ | 2777/7045 [9:01:49<13:50:50, 11.68s/it] {'loss': 1.1357, 'learning_rate': 3.453920331948999e-06, 'epoch': 0.39} + 39%|███▉ | 2777/7045 [9:01:49<13:50:50, 11.68s/it] 39%|███▉ | 2778/7045 [9:02:00<13:35:22, 11.47s/it] {'loss': 1.126, 'learning_rate': 3.452857776164251e-06, 'epoch': 0.39} + 39%|███▉ | 2778/7045 [9:02:00<13:35:22, 11.47s/it] 39%|███▉ | 2779/7045 [9:02:14<14:27:07, 12.20s/it] {'loss': 1.1719, 'learning_rate': 3.451795018958394e-06, 'epoch': 0.39} + 39%|███▉ | 2779/7045 [9:02:14<14:27:07, 12.20s/it] 39%|███▉ | 2780/7045 [9:02:26<14:41:44, 12.40s/it] {'loss': 1.1396, 'learning_rate': 3.4507320605560844e-06, 'epoch': 0.39} + 39%|███▉ | 2780/7045 [9:02:26<14:41:44, 12.40s/it] 39%|███▉ | 2781/7045 [9:02:38<14:16:59, 12.06s/it] {'loss': 1.127, 'learning_rate': 3.4496689011820146e-06, 'epoch': 0.39} + 39%|███▉ | 2781/7045 [9:02:38<14:16:59, 12.06s/it] 39%|███▉ | 2782/7045 [9:02:51<14:33:25, 12.29s/it] {'loss': 1.0659, 'learning_rate': 3.4486055410609216e-06, 'epoch': 0.39} + 39%|███▉ | 2782/7045 [9:02:51<14:33:25, 12.29s/it] 40%|███▉ | 2783/7045 [9:03:02<14:08:59, 11.95s/it] {'loss': 1.0776, 'learning_rate': 3.447541980417586e-06, 'epoch': 0.4} + 40%|███▉ | 2783/7045 [9:03:02<14:08:59, 11.95s/it] 40%|███▉ | 2784/7045 [9:03:14<14:14:09, 12.03s/it] {'loss': 1.0791, 'learning_rate': 3.44647821947683e-06, 'epoch': 0.4} + 40%|███▉ | 2784/7045 [9:03:14<14:14:09, 12.03s/it] 40%|███▉ | 2785/7045 [9:03:25<13:54:38, 11.76s/it] {'loss': 1.1592, 'learning_rate': 3.4454142584635175e-06, 'epoch': 0.4} + 40%|███▉ | 2785/7045 [9:03:25<13:54:38, 11.76s/it] 40%|███▉ | 2786/7045 [9:03:36<13:39:57, 11.55s/it] {'loss': 1.124, 'learning_rate': 3.4443500976025557e-06, 'epoch': 0.4} + 40%|███▉ | 2786/7045 [9:03:36<13:39:57, 11.55s/it] 40%|███▉ | 2787/7045 [9:03:47<13:29:08, 11.40s/it] {'loss': 1.0791, 'learning_rate': 3.4432857371188937e-06, 'epoch': 0.4} + 40%|███▉ | 2787/7045 [9:03:47<13:29:08, 11.40s/it] 40%|███▉ | 2788/7045 [9:03:59<13:46:15, 11.65s/it] {'loss': 1.1201, 'learning_rate': 3.442221177237523e-06, 'epoch': 0.4} + 40%|███▉ | 2788/7045 [9:03:59<13:46:15, 11.65s/it] 40%|███▉ | 2789/7045 [9:04:11<13:47:10, 11.66s/it] {'loss': 1.0854, 'learning_rate': 3.4411564181834765e-06, 'epoch': 0.4} + 40%|███▉ | 2789/7045 [9:04:11<13:47:10, 11.66s/it] 40%|███▉ | 2790/7045 [9:04:22<13:39:42, 11.56s/it] {'loss': 1.1123, 'learning_rate': 3.4400914601818303e-06, 'epoch': 0.4} + 40%|███▉ | 2790/7045 [9:04:22<13:39:42, 11.56s/it] 40%|███▉ | 2791/7045 [9:04:33<13:29:50, 11.42s/it] {'loss': 1.1123, 'learning_rate': 3.439026303457702e-06, 'epoch': 0.4} + 40%|███▉ | 2791/7045 [9:04:33<13:29:50, 11.42s/it] 40%|███▉ | 2792/7045 [9:04:44<13:18:28, 11.26s/it] {'loss': 1.124, 'learning_rate': 3.4379609482362495e-06, 'epoch': 0.4} + 40%|███▉ | 2792/7045 [9:04:44<13:18:28, 11.26s/it] 40%|███▉ | 2793/7045 [9:04:56<13:28:09, 11.40s/it] {'loss': 1.1133, 'learning_rate': 3.4368953947426765e-06, 'epoch': 0.4} + 40%|███▉ | 2793/7045 [9:04:56<13:28:09, 11.40s/it] 40%|███▉ | 2794/7045 [9:05:09<14:10:06, 12.00s/it] {'loss': 1.0898, 'learning_rate': 3.4358296432022247e-06, 'epoch': 0.4} + 40%|███▉ | 2794/7045 [9:05:09<14:10:06, 12.00s/it] 40%|███▉ | 2795/7045 [9:05:21<13:48:49, 11.70s/it] {'loss': 1.1074, 'learning_rate': 3.4347636938401813e-06, 'epoch': 0.4} + 40%|███▉ | 2795/7045 [9:05:21<13:48:49, 11.70s/it] 40%|███▉ | 2796/7045 [9:05:32<13:36:32, 11.53s/it] {'loss': 1.1113, 'learning_rate': 3.4336975468818723e-06, 'epoch': 0.4} + 40%|███▉ | 2796/7045 [9:05:32<13:36:32, 11.53s/it] 40%|███��� | 2797/7045 [9:05:43<13:24:11, 11.36s/it] {'loss': 1.0859, 'learning_rate': 3.432631202552666e-06, 'epoch': 0.4} + 40%|███▉ | 2797/7045 [9:05:43<13:24:11, 11.36s/it] 40%|███▉ | 2798/7045 [9:05:54<13:22:17, 11.33s/it] {'loss': 1.1055, 'learning_rate': 3.431564661077974e-06, 'epoch': 0.4} + 40%|███▉ | 2798/7045 [9:05:54<13:22:17, 11.33s/it] 40%|███▉ | 2799/7045 [9:06:06<13:48:52, 11.71s/it] {'loss': 1.1289, 'learning_rate': 3.430497922683248e-06, 'epoch': 0.4} + 40%|███▉ | 2799/7045 [9:06:06<13:48:52, 11.71s/it] 40%|███▉ | 2800/7045 [9:06:17<13:30:59, 11.46s/it] {'loss': 1.0962, 'learning_rate': 3.4294309875939823e-06, 'epoch': 0.4} + 40%|███▉ | 2800/7045 [9:06:17<13:30:59, 11.46s/it] 40%|███▉ | 2801/7045 [9:06:31<14:23:31, 12.21s/it] {'loss': 1.1294, 'learning_rate': 3.4283638560357124e-06, 'epoch': 0.4} + 40%|███▉ | 2801/7045 [9:06:31<14:23:31, 12.21s/it] 40%|███▉ | 2802/7045 [9:06:43<14:02:47, 11.92s/it] {'loss': 1.1406, 'learning_rate': 3.427296528234016e-06, 'epoch': 0.4} + 40%|███▉ | 2802/7045 [9:06:43<14:02:47, 11.92s/it] 40%|███▉ | 2803/7045 [9:06:54<13:43:31, 11.65s/it] {'loss': 1.0859, 'learning_rate': 3.4262290044145096e-06, 'epoch': 0.4} + 40%|███▉ | 2803/7045 [9:06:54<13:43:31, 11.65s/it] 40%|███▉ | 2804/7045 [9:07:07<14:17:47, 12.14s/it] {'loss': 1.1338, 'learning_rate': 3.4251612848028542e-06, 'epoch': 0.4} + 40%|███▉ | 2804/7045 [9:07:07<14:17:47, 12.14s/it] 40%|███▉ | 2805/7045 [9:07:18<14:05:32, 11.97s/it] {'loss': 1.1436, 'learning_rate': 3.424093369624752e-06, 'epoch': 0.4} + 40%|███▉ | 2805/7045 [9:07:18<14:05:32, 11.97s/it] 40%|███▉ | 2806/7045 [9:07:32<14:43:10, 12.50s/it] {'loss': 1.1426, 'learning_rate': 3.4230252591059455e-06, 'epoch': 0.4} + 40%|███▉ | 2806/7045 [9:07:32<14:43:10, 12.50s/it] 40%|███▉ | 2807/7045 [9:07:45<14:47:19, 12.56s/it] {'loss': 1.0981, 'learning_rate': 3.421956953472217e-06, 'epoch': 0.4} + 40%|███▉ | 2807/7045 [9:07:45<14:47:19, 12.56s/it] 40%|███▉ | 2808/7045 [9:07:56<14:22:06, 12.21s/it] {'loss': 1.1182, 'learning_rate': 3.420888452949393e-06, 'epoch': 0.4} + 40%|███▉ | 2808/7045 [9:07:56<14:22:06, 12.21s/it] 40%|███▉ | 2809/7045 [9:08:08<14:06:35, 11.99s/it] {'loss': 1.1084, 'learning_rate': 3.4198197577633396e-06, 'epoch': 0.4} + 40%|███▉ | 2809/7045 [9:08:08<14:06:35, 11.99s/it] 40%|███▉ | 2810/7045 [9:08:19<13:49:03, 11.75s/it] {'loss': 1.1484, 'learning_rate': 3.418750868139965e-06, 'epoch': 0.4} + 40%|███▉ | 2810/7045 [9:08:19<13:49:03, 11.75s/it] 40%|███▉ | 2811/7045 [9:08:30<13:38:06, 11.59s/it] {'loss': 1.1152, 'learning_rate': 3.4176817843052168e-06, 'epoch': 0.4} + 40%|███▉ | 2811/7045 [9:08:30<13:38:06, 11.59s/it] 40%|███▉ | 2812/7045 [9:08:43<14:13:01, 12.09s/it] {'loss': 1.123, 'learning_rate': 3.416612506485085e-06, 'epoch': 0.4} + 40%|███▉ | 2812/7045 [9:08:43<14:13:01, 12.09s/it] 40%|███▉ | 2813/7045 [9:08:55<14:01:55, 11.94s/it] {'loss': 1.0986, 'learning_rate': 3.4155430349056017e-06, 'epoch': 0.4} + 40%|███▉ | 2813/7045 [9:08:55<14:01:55, 11.94s/it] 40%|███▉ | 2814/7045 [9:09:06<13:48:55, 11.75s/it] {'loss': 1.1162, 'learning_rate': 3.4144733697928368e-06, 'epoch': 0.4} + 40%|███▉ | 2814/7045 [9:09:06<13:48:55, 11.75s/it] 40%|███▉ | 2815/7045 [9:09:18<13:50:40, 11.78s/it] {'loss': 1.0986, 'learning_rate': 3.4134035113729032e-06, 'epoch': 0.4} + 40%|███▉ | 2815/7045 [9:09:18<13:50:40, 11.78s/it] 40%|███▉ | 2816/7045 [9:09:32<14:28:23, 12.32s/it] {'loss': 1.1216, 'learning_rate': 3.412333459871954e-06, 'epoch': 0.4} + 40%|███▉ | 2816/7045 [9:09:32<14:28:23, 12.32s/it] 40%|███▉ | 2817/7045 [9:09:43<14:07:34, 12.03s/it] {'loss': 1.1084, 'learning_rate': 3.4112632155161856e-06, 'epoch': 0.4} + 40%|███▉ | 2817/7045 [9:09:43<14:07:34, 12.03s/it] 40%|████ | 2818/7045 [9:09:54<13:44:08, 11.70s/it] {'loss': 1.1436, 'learning_rate': 3.4101927785318306e-06, 'epoch': 0.4} + 40%|████ | 2818/7045 [9:09:54<13:44:08, 11.70s/it] 40%|████ | 2819/7045 [9:10:07<14:16:45, 12.16s/it] {'loss': 1.1226, 'learning_rate': 3.409122149145166e-06, 'epoch': 0.4} + 40%|████ | 2819/7045 [9:10:07<14:16:45, 12.16s/it] 40%|████ | 2820/7045 [9:10:21<14:48:15, 12.61s/it] {'loss': 1.1113, 'learning_rate': 3.4080513275825077e-06, 'epoch': 0.4} + 40%|████ | 2820/7045 [9:10:21<14:48:15, 12.61s/it] 40%|████ | 2821/7045 [9:10:33<14:33:12, 12.40s/it] {'loss': 1.1357, 'learning_rate': 3.4069803140702136e-06, 'epoch': 0.4} + 40%|████ | 2821/7045 [9:10:33<14:33:12, 12.40s/it] 40%|████ | 2822/7045 [9:10:45<14:19:47, 12.22s/it] {'loss': 1.0908, 'learning_rate': 3.4059091088346793e-06, 'epoch': 0.4} + 40%|████ | 2822/7045 [9:10:45<14:19:47, 12.22s/it] 40%|████ | 2823/7045 [9:10:57<14:22:28, 12.26s/it] {'loss': 1.1172, 'learning_rate': 3.4048377121023444e-06, 'epoch': 0.4} + 40%|████ | 2823/7045 [9:10:57<14:22:28, 12.26s/it] 40%|████ | 2824/7045 [9:11:09<14:15:43, 12.16s/it] {'loss': 1.1543, 'learning_rate': 3.403766124099689e-06, 'epoch': 0.4} + 40%|████ | 2824/7045 [9:11:09<14:15:43, 12.16s/it] 40%|████ | 2825/7045 [9:11:21<14:21:22, 12.25s/it] {'loss': 1.0469, 'learning_rate': 3.402694345053229e-06, 'epoch': 0.4} + 40%|████ | 2825/7045 [9:11:21<14:21:22, 12.25s/it] 40%|████ | 2826/7045 [9:11:32<13:57:01, 11.90s/it] {'loss': 1.1084, 'learning_rate': 3.4016223751895263e-06, 'epoch': 0.4} + 40%|████ | 2826/7045 [9:11:32<13:57:01, 11.90s/it] 40%|████ | 2827/7045 [9:11:44<13:46:12, 11.75s/it] {'loss': 1.1426, 'learning_rate': 3.4005502147351786e-06, 'epoch': 0.4} + 40%|████ | 2827/7045 [9:11:44<13:46:12, 11.75s/it] 40%|████ | 2828/7045 [9:11:57<14:07:09, 12.05s/it] {'loss': 1.1309, 'learning_rate': 3.399477863916828e-06, 'epoch': 0.4} + 40%|████ | 2828/7045 [9:11:57<14:07:09, 12.05s/it] 40%|████ | 2829/7045 [9:12:08<13:43:07, 11.71s/it] {'loss': 1.1035, 'learning_rate': 3.3984053229611537e-06, 'epoch': 0.4} + 40%|████ | 2829/7045 [9:12:08<13:43:07, 11.71s/it] 40%|████ | 2830/7045 [9:12:20<14:03:50, 12.01s/it] {'loss': 1.125, 'learning_rate': 3.3973325920948763e-06, 'epoch': 0.4} + 40%|████ | 2830/7045 [9:12:20<14:03:50, 12.01s/it] 40%|████ | 2831/7045 [9:12:31<13:41:19, 11.69s/it] {'loss': 1.1436, 'learning_rate': 3.3962596715447565e-06, 'epoch': 0.4} + 40%|████ | 2831/7045 [9:12:31<13:41:19, 11.69s/it] 40%|████ | 2832/7045 [9:12:45<14:19:06, 12.24s/it] {'loss': 1.1021, 'learning_rate': 3.395186561537596e-06, 'epoch': 0.4} + 40%|████ | 2832/7045 [9:12:45<14:19:06, 12.24s/it] 40%|████ | 2833/7045 [9:12:57<14:12:43, 12.15s/it] {'loss': 1.0884, 'learning_rate': 3.3941132623002337e-06, 'epoch': 0.4} + 40%|████ | 2833/7045 [9:12:57<14:12:43, 12.15s/it] 40%|████ | 2834/7045 [9:13:07<13:38:45, 11.67s/it] {'loss': 1.083, 'learning_rate': 3.393039774059551e-06, 'epoch': 0.4} + 40%|████ | 2834/7045 [9:13:07<13:38:45, 11.67s/it] 40%|████ | 2835/7045 [9:13:18<13:28:53, 11.53s/it] {'loss': 1.1621, 'learning_rate': 3.3919660970424707e-06, 'epoch': 0.4} + 40%|████ | 2835/7045 [9:13:18<13:28:53, 11.53s/it] 40%|████ | 2836/7045 [9:13:30<13:22:29, 11.44s/it] {'loss': 1.1357, 'learning_rate': 3.3908922314759502e-06, 'epoch': 0.4} + 40%|████ | 2836/7045 [9:13:30<13:22:29, 11.44s/it] 40%|████ | 2837/7045 [9:13:41<13:21:27, 11.43s/it] {'loss': 1.1113, 'learning_rate': 3.389818177586992e-06, 'epoch': 0.4} + 40%|████ | 2837/7045 [9:13:41<13:21:27, 11.43s/it] 40%|████ | 2838/7045 [9:13:54<14:00:46, 11.99s/it] {'loss': 1.0879, 'learning_rate': 3.3887439356026356e-06, 'epoch': 0.4} + 40%|████ | 2838/7045 [9:13:54<14:00:46, 11.99s/it] 40%|████ | 2839/7045 [9:14:05<13:43:29, 11.75s/it] {'loss': 1.1094, 'learning_rate': 3.387669505749962e-06, 'epoch': 0.4} + 40%|████ | 2839/7045 [9:14:05<13:43:29, 11.75s/it] 40%|████ | 2840/7045 [9:14:17<13:28:41, 11.54s/it] {'loss': 1.1553, 'learning_rate': 3.3865948882560902e-06, 'epoch': 0.4} + 40%|████ | 2840/7045 [9:14:17<13:28:41, 11.54s/it] 40%|████ | 2841/7045 [9:14:28<13:19:32, 11.41s/it] {'loss': 1.1406, 'learning_rate': 3.38552008334818e-06, 'epoch': 0.4} + 40%|████ | 2841/7045 [9:14:28<13:19:32, 11.41s/it] 40%|████ | 2842/7045 [9:14:41<13:50:03, 11.85s/it] {'loss': 1.1162, 'learning_rate': 3.38444509125343e-06, 'epoch': 0.4} + 40%|████ | 2842/7045 [9:14:41<13:50:03, 11.85s/it] 40%|████ | 2843/7045 [9:14:53<13:56:21, 11.94s/it] {'loss': 1.0854, 'learning_rate': 3.38336991219908e-06, 'epoch': 0.4} + 40%|████ | 2843/7045 [9:14:53<13:56:21, 11.94s/it] 40%|████ | 2844/7045 [9:15:04<13:32:46, 11.61s/it] {'loss': 1.124, 'learning_rate': 3.3822945464124065e-06, 'epoch': 0.4} + 40%|████ | 2844/7045 [9:15:04<13:32:46, 11.61s/it] 40%|████ | 2845/7045 [9:15:14<13:18:21, 11.41s/it] {'loss': 1.1641, 'learning_rate': 3.381218994120728e-06, 'epoch': 0.4} + 40%|████ | 2845/7045 [9:15:14<13:18:21, 11.41s/it] 40%|████ | 2846/7045 [9:15:27<13:46:10, 11.81s/it] {'loss': 1.1323, 'learning_rate': 3.3801432555514014e-06, 'epoch': 0.4} + 40%|████ | 2846/7045 [9:15:27<13:46:10, 11.81s/it] 40%|████ | 2847/7045 [9:15:39<13:52:27, 11.90s/it] {'loss': 1.0957, 'learning_rate': 3.3790673309318233e-06, 'epoch': 0.4} + 40%|████ | 2847/7045 [9:15:39<13:52:27, 11.90s/it] 40%|████ | 2848/7045 [9:15:51<13:42:12, 11.75s/it] {'loss': 1.1021, 'learning_rate': 3.377991220489429e-06, 'epoch': 0.4} + 40%|████ | 2848/7045 [9:15:51<13:42:12, 11.75s/it] 40%|████ | 2849/7045 [9:16:02<13:40:06, 11.73s/it] {'loss': 1.0928, 'learning_rate': 3.376914924451694e-06, 'epoch': 0.4} + 40%|████ | 2849/7045 [9:16:02<13:40:06, 11.73s/it] 40%|████ | 2850/7045 [9:16:15<13:56:47, 11.97s/it] {'loss': 1.1348, 'learning_rate': 3.3758384430461325e-06, 'epoch': 0.4} + 40%|████ | 2850/7045 [9:16:15<13:56:47, 11.97s/it] 40%|████ | 2851/7045 [9:16:26<13:39:47, 11.73s/it] {'loss': 1.0957, 'learning_rate': 3.3747617765002993e-06, 'epoch': 0.4} + 40%|████ | 2851/7045 [9:16:26<13:39:47, 11.73s/it] 40%|████ | 2852/7045 [9:16:37<13:29:33, 11.58s/it] {'loss': 1.085, 'learning_rate': 3.373684925041784e-06, 'epoch': 0.4} + 40%|████ | 2852/7045 [9:16:37<13:29:33, 11.58s/it] 40%|████ | 2853/7045 [9:16:49<13:41:25, 11.76s/it] {'loss': 1.1377, 'learning_rate': 3.37260788889822e-06, 'epoch': 0.4} + 40%|████ | 2853/7045 [9:16:49<13:41:25, 11.76s/it] 41%|████ | 2854/7045 [9:17:01<13:43:03, 11.78s/it] {'loss': 1.1562, 'learning_rate': 3.371530668297278e-06, 'epoch': 0.41} + 41%|████ | 2854/7045 [9:17:01<13:43:03, 11.78s/it] 41%|████ | 2855/7045 [9:17:13<13:31:59, 11.63s/it] {'loss': 1.1406, 'learning_rate': 3.3704532634666683e-06, 'epoch': 0.41} + 41%|████ | 2855/7045 [9:17:13<13:31:59, 11.63s/it] 41%|█��██ | 2856/7045 [9:17:24<13:28:35, 11.58s/it] {'loss': 1.123, 'learning_rate': 3.3693756746341384e-06, 'epoch': 0.41} + 41%|████ | 2856/7045 [9:17:24<13:28:35, 11.58s/it] 41%|████ | 2857/7045 [9:17:35<13:20:37, 11.47s/it] {'loss': 1.1182, 'learning_rate': 3.368297902027476e-06, 'epoch': 0.41} + 41%|████ | 2857/7045 [9:17:35<13:20:37, 11.47s/it] 41%|████ | 2858/7045 [9:17:46<13:13:26, 11.37s/it] {'loss': 1.1377, 'learning_rate': 3.3672199458745085e-06, 'epoch': 0.41} + 41%|████ | 2858/7045 [9:17:46<13:13:26, 11.37s/it] 41%|████ | 2859/7045 [9:17:58<13:27:25, 11.57s/it] {'loss': 1.1338, 'learning_rate': 3.3661418064030993e-06, 'epoch': 0.41} + 41%|████ | 2859/7045 [9:17:58<13:27:25, 11.57s/it] 41%|████ | 2860/7045 [9:18:11<13:48:30, 11.88s/it] {'loss': 1.126, 'learning_rate': 3.3650634838411545e-06, 'epoch': 0.41} + 41%|████ | 2860/7045 [9:18:11<13:48:30, 11.88s/it] 41%|████ | 2861/7045 [9:18:22<13:36:13, 11.70s/it] {'loss': 1.1436, 'learning_rate': 3.3639849784166147e-06, 'epoch': 0.41} + 41%|████ | 2861/7045 [9:18:22<13:36:13, 11.70s/it] 41%|████ | 2862/7045 [9:18:34<13:27:41, 11.59s/it] {'loss': 1.1582, 'learning_rate': 3.3629062903574633e-06, 'epoch': 0.41} + 41%|████ | 2862/7045 [9:18:34<13:27:41, 11.59s/it] 41%|████ | 2863/7045 [9:18:45<13:20:54, 11.49s/it] {'loss': 1.0879, 'learning_rate': 3.3618274198917184e-06, 'epoch': 0.41} + 41%|████ | 2863/7045 [9:18:45<13:20:54, 11.49s/it] 41%|████ | 2864/7045 [9:18:56<13:12:34, 11.37s/it] {'loss': 1.1875, 'learning_rate': 3.3607483672474396e-06, 'epoch': 0.41} + 41%|████ | 2864/7045 [9:18:56<13:12:34, 11.37s/it] 41%|████ | 2865/7045 [9:19:07<13:13:07, 11.38s/it] {'loss': 1.1104, 'learning_rate': 3.359669132652723e-06, 'epoch': 0.41} + 41%|████ | 2865/7045 [9:19:07<13:13:07, 11.38s/it] 41%|████ | 2866/7045 [9:19:19<13:16:46, 11.44s/it] {'loss': 1.1001, 'learning_rate': 3.3585897163357044e-06, 'epoch': 0.41} + 41%|████ | 2866/7045 [9:19:19<13:16:46, 11.44s/it] 41%|████ | 2867/7045 [9:19:32<13:51:07, 11.94s/it] {'loss': 1.1143, 'learning_rate': 3.357510118524558e-06, 'epoch': 0.41} + 41%|████ | 2867/7045 [9:19:32<13:51:07, 11.94s/it] 41%|████ | 2868/7045 [9:19:43<13:32:57, 11.68s/it] {'loss': 1.1221, 'learning_rate': 3.3564303394474958e-06, 'epoch': 0.41} + 41%|████ | 2868/7045 [9:19:43<13:32:57, 11.68s/it] 41%|████ | 2869/7045 [9:19:54<13:19:58, 11.49s/it] {'loss': 1.1514, 'learning_rate': 3.3553503793327685e-06, 'epoch': 0.41} + 41%|████ | 2869/7045 [9:19:54<13:19:58, 11.49s/it] 41%|████ | 2870/7045 [9:20:06<13:17:12, 11.46s/it] {'loss': 1.1357, 'learning_rate': 3.3542702384086646e-06, 'epoch': 0.41} + 41%|████ | 2870/7045 [9:20:06<13:17:12, 11.46s/it] 41%|████ | 2871/7045 [9:20:17<13:16:41, 11.45s/it] {'loss': 1.1416, 'learning_rate': 3.35318991690351e-06, 'epoch': 0.41} + 41%|████ | 2871/7045 [9:20:17<13:16:41, 11.45s/it] 41%|████ | 2872/7045 [9:20:28<13:09:46, 11.36s/it] {'loss': 1.1338, 'learning_rate': 3.3521094150456725e-06, 'epoch': 0.41} + 41%|████ | 2872/7045 [9:20:28<13:09:46, 11.36s/it] 41%|████ | 2873/7045 [9:20:39<13:04:04, 11.28s/it] {'loss': 1.1558, 'learning_rate': 3.351028733063554e-06, 'epoch': 0.41} + 41%|████ | 2873/7045 [9:20:39<13:04:04, 11.28s/it] 41%|████ | 2874/7045 [9:20:52<13:37:12, 11.76s/it] {'loss': 1.1318, 'learning_rate': 3.3499478711855947e-06, 'epoch': 0.41} + 41%|████ | 2874/7045 [9:20:52<13:37:12, 11.76s/it] 41%|████ | 2875/7045 [9:21:05<14:01:30, 12.11s/it] {'loss': 1.0947, 'learning_rate': 3.348866829640276e-06, 'epoch': 0.41} + 41%|████ | 2875/7045 [9:21:05<14:01:30, 12.11s/it] 41%|████ | 2876/7045 [9:21:17<14:00:34, 12.10s/it] {'loss': 1.0918, 'learning_rate': 3.347785608656115e-06, 'epoch': 0.41} + 41%|████ | 2876/7045 [9:21:17<14:00:34, 12.10s/it] 41%|████ | 2877/7045 [9:21:28<13:42:55, 11.85s/it] {'loss': 1.0864, 'learning_rate': 3.3467042084616665e-06, 'epoch': 0.41} + 41%|████ | 2877/7045 [9:21:28<13:42:55, 11.85s/it] 41%|████ | 2878/7045 [9:21:40<13:27:48, 11.63s/it] {'loss': 1.1016, 'learning_rate': 3.345622629285523e-06, 'epoch': 0.41} + 41%|████ | 2878/7045 [9:21:40<13:27:48, 11.63s/it] 41%|████ | 2879/7045 [9:21:51<13:29:31, 11.66s/it] {'loss': 1.127, 'learning_rate': 3.3445408713563167e-06, 'epoch': 0.41} + 41%|████ | 2879/7045 [9:21:51<13:29:31, 11.66s/it] 41%|████ | 2880/7045 [9:22:02<13:15:33, 11.46s/it] {'loss': 1.1074, 'learning_rate': 3.343458934902717e-06, 'epoch': 0.41} + 41%|████ | 2880/7045 [9:22:02<13:15:33, 11.46s/it] 41%|████ | 2881/7045 [9:22:13<12:56:35, 11.19s/it] {'loss': 1.1011, 'learning_rate': 3.3423768201534283e-06, 'epoch': 0.41} + 41%|████ | 2881/7045 [9:22:13<12:56:35, 11.19s/it] 41%|████ | 2882/7045 [9:22:24<12:52:02, 11.13s/it] {'loss': 1.1055, 'learning_rate': 3.3412945273371964e-06, 'epoch': 0.41} + 41%|████ | 2882/7045 [9:22:24<12:52:02, 11.13s/it] 41%|████ | 2883/7045 [9:22:35<12:47:28, 11.06s/it] {'loss': 1.0757, 'learning_rate': 3.3402120566828033e-06, 'epoch': 0.41} + 41%|████ | 2883/7045 [9:22:35<12:47:28, 11.06s/it] 41%|████ | 2884/7045 [9:22:46<12:55:39, 11.18s/it] {'loss': 1.1279, 'learning_rate': 3.3391294084190686e-06, 'epoch': 0.41} + 41%|████ | 2884/7045 [9:22:46<12:55:39, 11.18s/it] 41%|████ | 2885/7045 [9:22:57<12:52:43, 11.15s/it] {'loss': 1.0771, 'learning_rate': 3.3380465827748482e-06, 'epoch': 0.41} + 41%|████ | 2885/7045 [9:22:57<12:52:43, 11.15s/it] 41%|████ | 2886/7045 [9:23:10<13:18:16, 11.52s/it] {'loss': 1.1006, 'learning_rate': 3.3369635799790372e-06, 'epoch': 0.41} + 41%|████ | 2886/7045 [9:23:10<13:18:16, 11.52s/it] 41%|████ | 2887/7045 [9:23:20<13:03:38, 11.31s/it] {'loss': 1.0986, 'learning_rate': 3.335880400260568e-06, 'epoch': 0.41} + 41%|████ | 2887/7045 [9:23:20<13:03:38, 11.31s/it] 41%|████ | 2888/7045 [9:23:32<13:04:55, 11.33s/it] {'loss': 1.1455, 'learning_rate': 3.33479704384841e-06, 'epoch': 0.41} + 41%|████ | 2888/7045 [9:23:32<13:04:55, 11.33s/it] 41%|████ | 2889/7045 [9:23:43<13:02:23, 11.30s/it] {'loss': 1.0942, 'learning_rate': 3.3337135109715684e-06, 'epoch': 0.41} + 41%|████ | 2889/7045 [9:23:43<13:02:23, 11.30s/it] 41%|████ | 2890/7045 [9:23:54<12:59:43, 11.26s/it] {'loss': 1.1836, 'learning_rate': 3.3326298018590897e-06, 'epoch': 0.41} + 41%|████ | 2890/7045 [9:23:54<12:59:43, 11.26s/it] 41%|████ | 2891/7045 [9:24:08<13:42:27, 11.88s/it] {'loss': 1.0776, 'learning_rate': 3.331545916740054e-06, 'epoch': 0.41} + 41%|████ | 2891/7045 [9:24:08<13:42:27, 11.88s/it] 41%|████ | 2892/7045 [9:24:19<13:25:58, 11.64s/it] {'loss': 1.1396, 'learning_rate': 3.3304618558435787e-06, 'epoch': 0.41} + 41%|████ | 2892/7045 [9:24:19<13:25:58, 11.64s/it] 41%|████ | 2893/7045 [9:24:29<13:08:06, 11.39s/it] {'loss': 1.1152, 'learning_rate': 3.3293776193988213e-06, 'epoch': 0.41} + 41%|████ | 2893/7045 [9:24:29<13:08:06, 11.39s/it] 41%|████ | 2894/7045 [9:24:41<13:19:13, 11.55s/it] {'loss': 1.1016, 'learning_rate': 3.328293207634973e-06, 'epoch': 0.41} + 41%|████ | 2894/7045 [9:24:41<13:19:13, 11.55s/it] 41%|████ | 2895/7045 [9:24:53<13:28:17, 11.69s/it] {'loss': 1.1401, 'learning_rate': 3.3272086207812655e-06, 'epoch': 0.41} + 41%|████ | 2895/7045 [9:24:53<13:28:17, 11.69s/it] 41%|████ | 2896/7045 [9:25:06<13:42:00, 11.89s/it] {'loss': 1.124, 'learning_rate': 3.3261238590669635e-06, 'epoch': 0.41} + 41%|████ | 2896/7045 [9:25:06<13:42:00, 11.89s/it] 41%|████ | 2897/7045 [9:25:18<13:46:33, 11.96s/it] {'loss': 1.1318, 'learning_rate': 3.325038922721372e-06, 'epoch': 0.41} + 41%|████ | 2897/7045 [9:25:18<13:46:33, 11.96s/it] 41%|████ | 2898/7045 [9:25:29<13:35:48, 11.80s/it] {'loss': 1.1152, 'learning_rate': 3.3239538119738318e-06, 'epoch': 0.41} + 41%|████ | 2898/7045 [9:25:29<13:35:48, 11.80s/it] 41%|████ | 2899/7045 [9:25:40<13:19:45, 11.57s/it] {'loss': 1.1172, 'learning_rate': 3.322868527053721e-06, 'epoch': 0.41} + 41%|████ | 2899/7045 [9:25:40<13:19:45, 11.57s/it] 41%|████ | 2900/7045 [9:25:55<14:28:26, 12.57s/it] {'loss': 1.1191, 'learning_rate': 3.3217830681904516e-06, 'epoch': 0.41} + 41%|████ | 2900/7045 [9:25:55<14:28:26, 12.57s/it] 41%|████ | 2901/7045 [9:26:07<14:09:57, 12.31s/it] {'loss': 1.1157, 'learning_rate': 3.320697435613477e-06, 'epoch': 0.41} + 41%|████ | 2901/7045 [9:26:07<14:09:57, 12.31s/it] 41%|████ | 2902/7045 [9:26:18<13:48:01, 11.99s/it] {'loss': 1.0977, 'learning_rate': 3.319611629552285e-06, 'epoch': 0.41} + 41%|████ | 2902/7045 [9:26:18<13:48:01, 11.99s/it] 41%|████ | 2903/7045 [9:26:31<14:00:26, 12.17s/it] {'loss': 1.123, 'learning_rate': 3.318525650236399e-06, 'epoch': 0.41} + 41%|████ | 2903/7045 [9:26:31<14:00:26, 12.17s/it] 41%|████ | 2904/7045 [9:26:43<14:11:53, 12.34s/it] {'loss': 1.1494, 'learning_rate': 3.3174394978953805e-06, 'epoch': 0.41} + 41%|████ | 2904/7045 [9:26:43<14:11:53, 12.34s/it] 41%|████ | 2905/7045 [9:26:55<13:46:44, 11.98s/it] {'loss': 1.1631, 'learning_rate': 3.316353172758828e-06, 'epoch': 0.41} + 41%|████ | 2905/7045 [9:26:55<13:46:44, 11.98s/it] 41%|████ | 2906/7045 [9:27:06<13:29:33, 11.74s/it] {'loss': 1.1523, 'learning_rate': 3.3152666750563757e-06, 'epoch': 0.41} + 41%|████ | 2906/7045 [9:27:06<13:29:33, 11.74s/it] 41%|████▏ | 2907/7045 [9:27:17<13:11:13, 11.47s/it] {'loss': 1.1758, 'learning_rate': 3.314180005017693e-06, 'epoch': 0.41} + 41%|████▏ | 2907/7045 [9:27:17<13:11:13, 11.47s/it] 41%|████▏ | 2908/7045 [9:27:28<13:12:44, 11.50s/it] {'loss': 1.1211, 'learning_rate': 3.3130931628724885e-06, 'epoch': 0.41} + 41%|████▏ | 2908/7045 [9:27:28<13:12:44, 11.50s/it] 41%|████▏ | 2909/7045 [9:27:39<13:05:35, 11.40s/it] {'loss': 1.1553, 'learning_rate': 3.312006148850505e-06, 'epoch': 0.41} + 41%|████▏ | 2909/7045 [9:27:39<13:05:35, 11.40s/it] 41%|████▏ | 2910/7045 [9:27:50<12:56:28, 11.27s/it] {'loss': 1.0894, 'learning_rate': 3.310918963181524e-06, 'epoch': 0.41} + 41%|████▏ | 2910/7045 [9:27:50<12:56:28, 11.27s/it] 41%|████▏ | 2911/7045 [9:28:02<13:13:19, 11.51s/it] {'loss': 1.1201, 'learning_rate': 3.3098316060953595e-06, 'epoch': 0.41} + 41%|████▏ | 2911/7045 [9:28:02<13:13:19, 11.51s/it] 41%|████▏ | 2912/7045 [9:28:13<13:03:09, 11.37s/it] {'loss': 1.0762, 'learning_rate': 3.308744077821865e-06, 'epoch': 0.41} + 41%|████▏ | 2912/7045 [9:28:13<13:03:09, 11.37s/it] 41%|████▏ | 2913/7045 [9:28:25<13:03:30, 11.38s/it] {'loss': 1.1982, 'learning_rate': 3.3076563785909288e-06, 'epoch': 0.41} + 41%|████▏ | 2913/7045 [9:28:25<13:03:30, 11.38s/it] 41%|████▏ | 2914/7045 [9:28:36<13:03:54, 11.39s/it] {'loss': 1.1621, 'learning_rate': 3.306568508632476e-06, 'epoch': 0.41} + 41%|████▏ | 2914/7045 [9:28:36<13:03:54, 11.39s/it] 41%|████▏ | 2915/7045 [9:28:47<12:59:41, 11.33s/it] {'loss': 1.1387, 'learning_rate': 3.305480468176467e-06, 'epoch': 0.41} + 41%|████▏ | 2915/7045 [9:28:47<12:59:41, 11.33s/it] 41%|████▏ | 2916/7045 [9:29:01<13:35:09, 11.85s/it] {'loss': 1.0996, 'learning_rate': 3.304392257452899e-06, 'epoch': 0.41} + 41%|████▏ | 2916/7045 [9:29:01<13:35:09, 11.85s/it] 41%|████▏ | 2917/7045 [9:29:13<13:42:27, 11.95s/it] {'loss': 1.0908, 'learning_rate': 3.303303876691805e-06, 'epoch': 0.41} + 41%|████▏ | 2917/7045 [9:29:13<13:42:27, 11.95s/it] 41%|████▏ | 2918/7045 [9:29:24<13:38:44, 11.90s/it] {'loss': 1.1328, 'learning_rate': 3.3022153261232537e-06, 'epoch': 0.41} + 41%|████▏ | 2918/7045 [9:29:24<13:38:44, 11.90s/it] 41%|████▏ | 2919/7045 [9:29:36<13:29:38, 11.77s/it] {'loss': 1.1113, 'learning_rate': 3.3011266059773493e-06, 'epoch': 0.41} + 41%|████▏ | 2919/7045 [9:29:36<13:29:38, 11.77s/it] 41%|████▏ | 2920/7045 [9:29:47<13:11:29, 11.51s/it] {'loss': 1.0977, 'learning_rate': 3.3000377164842324e-06, 'epoch': 0.41} + 41%|████▏ | 2920/7045 [9:29:47<13:11:29, 11.51s/it] 41%|████▏ | 2921/7045 [9:30:00<13:42:26, 11.97s/it] {'loss': 1.1523, 'learning_rate': 3.298948657874081e-06, 'epoch': 0.41} + 41%|████▏ | 2921/7045 [9:30:00<13:42:26, 11.97s/it] 41%|████▏ | 2922/7045 [9:30:11<13:17:54, 11.61s/it] {'loss': 1.0908, 'learning_rate': 3.297859430377105e-06, 'epoch': 0.41} + 41%|████▏ | 2922/7045 [9:30:11<13:17:54, 11.61s/it] 41%|████▏ | 2923/7045 [9:30:22<13:07:24, 11.46s/it] {'loss': 1.1025, 'learning_rate': 3.296770034223553e-06, 'epoch': 0.41} + 41%|████▏ | 2923/7045 [9:30:22<13:07:24, 11.46s/it] 42%|████▏ | 2924/7045 [9:30:33<13:02:34, 11.39s/it] {'loss': 1.1016, 'learning_rate': 3.295680469643709e-06, 'epoch': 0.42} + 42%|████▏ | 2924/7045 [9:30:33<13:02:34, 11.39s/it] 42%|████▏ | 2925/7045 [9:30:46<13:39:00, 11.93s/it] {'loss': 1.1016, 'learning_rate': 3.294590736867892e-06, 'epoch': 0.42} + 42%|████▏ | 2925/7045 [9:30:46<13:39:00, 11.93s/it] 42%|████▏ | 2926/7045 [9:30:59<14:04:29, 12.30s/it] {'loss': 1.125, 'learning_rate': 3.293500836126455e-06, 'epoch': 0.42} + 42%|████▏ | 2926/7045 [9:30:59<14:04:29, 12.30s/it] 42%|████▏ | 2927/7045 [9:31:12<14:18:43, 12.51s/it] {'loss': 1.0957, 'learning_rate': 3.29241076764979e-06, 'epoch': 0.42} + 42%|████▏ | 2927/7045 [9:31:12<14:18:43, 12.51s/it] 42%|████▏ | 2928/7045 [9:31:23<13:48:27, 12.07s/it] {'loss': 1.1133, 'learning_rate': 3.2913205316683227e-06, 'epoch': 0.42} + 42%|████▏ | 2928/7045 [9:31:23<13:48:27, 12.07s/it] 42%|████▏ | 2929/7045 [9:31:36<13:53:13, 12.15s/it] {'loss': 1.1152, 'learning_rate': 3.2902301284125125e-06, 'epoch': 0.42} + 42%|████▏ | 2929/7045 [9:31:36<13:53:13, 12.15s/it] 42%|████▏ | 2930/7045 [9:31:47<13:26:50, 11.76s/it] {'loss': 1.1689, 'learning_rate': 3.289139558112857e-06, 'epoch': 0.42} + 42%|████▏ | 2930/7045 [9:31:47<13:26:50, 11.76s/it] 42%|████▏ | 2931/7045 [9:32:00<13:54:19, 12.17s/it] {'loss': 1.0957, 'learning_rate': 3.2880488209998874e-06, 'epoch': 0.42} + 42%|████▏ | 2931/7045 [9:32:00<13:54:19, 12.17s/it] 42%|████▏ | 2932/7045 [9:32:11<13:29:01, 11.80s/it] {'loss': 1.1211, 'learning_rate': 3.28695791730417e-06, 'epoch': 0.42} + 42%|████▏ | 2932/7045 [9:32:11<13:29:01, 11.80s/it] 42%|████▏ | 2933/7045 [9:32:24<13:54:26, 12.18s/it] {'loss': 1.0962, 'learning_rate': 3.2858668472563082e-06, 'epoch': 0.42} + 42%|████▏ | 2933/7045 [9:32:24<13:54:26, 12.18s/it] 42%|████▏ | 2934/7045 [9:32:35<13:39:47, 11.96s/it] {'loss': 1.1533, 'learning_rate': 3.284775611086938e-06, 'epoch': 0.42} + 42%|████▏ | 2934/7045 [9:32:35<13:39:47, 11.96s/it] 42%|████▏ | 2935/7045 [9:32:46<13:18:09, 11.65s/it] {'loss': 1.1016, 'learning_rate': 3.283684209026733e-06, 'epoch': 0.42} + 42%|████▏ | 2935/7045 [9:32:46<13:18:09, 11.65s/it] 42%|████▏ | 2936/7045 [9:32:57<13:01:32, 11.41s/it] {'loss': 1.1113, 'learning_rate': 3.2825926413064e-06, 'epoch': 0.42} + 42%|████▏ | 2936/7045 [9:32:57<13:01:32, 11.41s/it] 42%|████▏ | 2937/7045 [9:33:08<12:59:10, 11.38s/it] {'loss': 1.0898, 'learning_rate': 3.2815009081566818e-06, 'epoch': 0.42} + 42%|████▏ | 2937/7045 [9:33:08<12:59:10, 11.38s/it] 42%|████▏ | 2938/7045 [9:33:19<12:53:35, 11.30s/it] {'loss': 1.0977, 'learning_rate': 3.280409009808355e-06, 'epoch': 0.42} + 42%|████▏ | 2938/7045 [9:33:19<12:53:35, 11.30s/it] 42%|████▏ | 2939/7045 [9:33:30<12:47:41, 11.22s/it] {'loss': 1.1318, 'learning_rate': 3.2793169464922332e-06, 'epoch': 0.42} + 42%|████▏ | 2939/7045 [9:33:30<12:47:41, 11.22s/it] 42%|████▏ | 2940/7045 [9:33:42<12:48:39, 11.24s/it] {'loss': 1.1294, 'learning_rate': 3.278224718439163e-06, 'epoch': 0.42} + 42%|████▏ | 2940/7045 [9:33:42<12:48:39, 11.24s/it] 42%|████▏ | 2941/7045 [9:33:53<12:51:02, 11.27s/it] {'loss': 1.1523, 'learning_rate': 3.2771323258800268e-06, 'epoch': 0.42} + 42%|████▏ | 2941/7045 [9:33:53<12:51:02, 11.27s/it] 42%|████▏ | 2942/7045 [9:34:04<12:52:36, 11.30s/it] {'loss': 1.1328, 'learning_rate': 3.276039769045741e-06, 'epoch': 0.42} + 42%|████▏ | 2942/7045 [9:34:04<12:52:36, 11.30s/it] 42%|████▏ | 2943/7045 [9:34:17<13:21:58, 11.73s/it] {'loss': 1.1167, 'learning_rate': 3.274947048167258e-06, 'epoch': 0.42} + 42%|████▏ | 2943/7045 [9:34:17<13:21:58, 11.73s/it] 42%|████▏ | 2944/7045 [9:34:29<13:32:11, 11.88s/it] {'loss': 1.1206, 'learning_rate': 3.2738541634755624e-06, 'epoch': 0.42} + 42%|████▏ | 2944/7045 [9:34:29<13:32:11, 11.88s/it] 42%|████▏ | 2945/7045 [9:34:41<13:16:44, 11.66s/it] {'loss': 1.1758, 'learning_rate': 3.2727611152016763e-06, 'epoch': 0.42} + 42%|████▏ | 2945/7045 [9:34:41<13:16:44, 11.66s/it] 42%|████▏ | 2946/7045 [9:34:51<13:02:14, 11.45s/it] {'loss': 1.1543, 'learning_rate': 3.271667903576656e-06, 'epoch': 0.42} + 42%|████▏ | 2946/7045 [9:34:51<13:02:14, 11.45s/it] 42%|████▏ | 2947/7045 [9:35:03<12:59:37, 11.41s/it] {'loss': 1.1143, 'learning_rate': 3.27057452883159e-06, 'epoch': 0.42} + 42%|████▏ | 2947/7045 [9:35:03<12:59:37, 11.41s/it] 42%|████▏ | 2948/7045 [9:35:16<13:38:19, 11.98s/it] {'loss': 1.084, 'learning_rate': 3.2694809911976034e-06, 'epoch': 0.42} + 42%|████▏ | 2948/7045 [9:35:16<13:38:19, 11.98s/it] 42%|████▏ | 2949/7045 [9:35:27<13:22:38, 11.76s/it] {'loss': 1.125, 'learning_rate': 3.2683872909058545e-06, 'epoch': 0.42} + 42%|████▏ | 2949/7045 [9:35:27<13:22:38, 11.76s/it] 42%|████▏ | 2950/7045 [9:35:38<13:08:37, 11.55s/it] {'loss': 1.1299, 'learning_rate': 3.267293428187538e-06, 'epoch': 0.42} + 42%|████▏ | 2950/7045 [9:35:38<13:08:37, 11.55s/it] 42%|████▏ | 2951/7045 [9:35:50<13:05:50, 11.52s/it] {'loss': 1.1553, 'learning_rate': 3.26619940327388e-06, 'epoch': 0.42} + 42%|████▏ | 2951/7045 [9:35:50<13:05:50, 11.52s/it] 42%|████▏ | 2952/7045 [9:36:01<13:00:06, 11.44s/it] {'loss': 1.0957, 'learning_rate': 3.2651052163961426e-06, 'epoch': 0.42} + 42%|████▏ | 2952/7045 [9:36:01<13:00:06, 11.44s/it] 42%|████▏ | 2953/7045 [9:36:12<12:51:19, 11.31s/it] {'loss': 1.125, 'learning_rate': 3.264010867785623e-06, 'epoch': 0.42} + 42%|████▏ | 2953/7045 [9:36:12<12:51:19, 11.31s/it] 42%|████▏ | 2954/7045 [9:36:25<13:13:01, 11.63s/it] {'loss': 1.0928, 'learning_rate': 3.2629163576736513e-06, 'epoch': 0.42} + 42%|████▏ | 2954/7045 [9:36:25<13:13:01, 11.63s/it] 42%|████▏ | 2955/7045 [9:36:37<13:30:42, 11.89s/it] {'loss': 1.1064, 'learning_rate': 3.2618216862915906e-06, 'epoch': 0.42} + 42%|████▏ | 2955/7045 [9:36:37<13:30:42, 11.89s/it] 42%|████▏ | 2956/7045 [9:36:48<13:13:39, 11.65s/it] {'loss': 1.1475, 'learning_rate': 3.2607268538708404e-06, 'epoch': 0.42} + 42%|████▏ | 2956/7045 [9:36:48<13:13:39, 11.65s/it] 42%|████▏ | 2957/7045 [9:37:00<13:14:31, 11.66s/it] {'loss': 1.1113, 'learning_rate': 3.259631860642834e-06, 'epoch': 0.42} + 42%|████▏ | 2957/7045 [9:37:00<13:14:31, 11.66s/it] 42%|████▏ | 2958/7045 [9:37:11<13:03:56, 11.51s/it] {'loss': 1.1582, 'learning_rate': 3.258536706839038e-06, 'epoch': 0.42} + 42%|████▏ | 2958/7045 [9:37:11<13:03:56, 11.51s/it] 42%|████▏ | 2959/7045 [9:37:24<13:25:44, 11.83s/it] {'loss': 1.1553, 'learning_rate': 3.2574413926909508e-06, 'epoch': 0.42} + 42%|████▏ | 2959/7045 [9:37:24<13:25:44, 11.83s/it] 42%|████▏ | 2960/7045 [9:37:35<13:13:43, 11.66s/it] {'loss': 1.0967, 'learning_rate': 3.2563459184301093e-06, 'epoch': 0.42} + 42%|████▏ | 2960/7045 [9:37:35<13:13:43, 11.66s/it] 42%|████▏ | 2961/7045 [9:37:49<13:56:02, 12.28s/it] {'loss': 1.0933, 'learning_rate': 3.2552502842880806e-06, 'epoch': 0.42} + 42%|████▏ | 2961/7045 [9:37:49<13:56:02, 12.28s/it] 42%|████▏ | 2962/7045 [9:38:01<13:55:15, 12.27s/it] {'loss': 1.0811, 'learning_rate': 3.2541544904964668e-06, 'epoch': 0.42} + 42%|████▏ | 2962/7045 [9:38:01<13:55:15, 12.27s/it] 42%|████▏ | 2963/7045 [9:38:14<14:14:58, 12.57s/it] {'loss': 1.0718, 'learning_rate': 3.253058537286904e-06, 'epoch': 0.42} + 42%|████▏ | 2963/7045 [9:38:14<14:14:58, 12.57s/it] 42%|████▏ | 2964/7045 [9:38:25<13:45:24, 12.14s/it] {'loss': 1.1367, 'learning_rate': 3.2519624248910613e-06, 'epoch': 0.42} + 42%|████▏ | 2964/7045 [9:38:25<13:45:24, 12.14s/it] 42%|████▏ | 2965/7045 [9:38:36<13:24:02, 11.82s/it] {'loss': 1.126, 'learning_rate': 3.2508661535406434e-06, 'epoch': 0.42} + 42%|████▏ | 2965/7045 [9:38:36<13:24:02, 11.82s/it] 42%|████▏ | 2966/7045 [9:38:48<13:14:52, 11.69s/it] {'loss': 1.1377, 'learning_rate': 3.249769723467385e-06, 'epoch': 0.42} + 42%|████▏ | 2966/7045 [9:38:48<13:14:52, 11.69s/it] 42%|████▏ | 2967/7045 [9:39:01<13:39:59, 12.06s/it] {'loss': 1.0811, 'learning_rate': 3.248673134903057e-06, 'epoch': 0.42} + 42%|████▏ | 2967/7045 [9:39:01<13:39:59, 12.06s/it] 42%|████▏ | 2968/7045 [9:39:13<13:48:24, 12.19s/it] {'loss': 1.0781, 'learning_rate': 3.2475763880794635e-06, 'epoch': 0.42} + 42%|████▏ | 2968/7045 [9:39:13<13:48:24, 12.19s/it] 42%|████▏ | 2969/7045 [9:39:24<13:27:40, 11.89s/it] {'loss': 1.1084, 'learning_rate': 3.246479483228443e-06, 'epoch': 0.42} + 42%|████▏ | 2969/7045 [9:39:24<13:27:40, 11.89s/it] 42%|████▏ | 2970/7045 [9:39:37<13:39:32, 12.07s/it] {'loss': 1.0884, 'learning_rate': 3.245382420581864e-06, 'epoch': 0.42} + 42%|████▏ | 2970/7045 [9:39:37<13:39:32, 12.07s/it] 42%|████▏ | 2971/7045 [9:39:48<13:23:59, 11.84s/it] {'loss': 1.1387, 'learning_rate': 3.244285200371632e-06, 'epoch': 0.42} + 42%|████▏ | 2971/7045 [9:39:48<13:23:59, 11.84s/it] 42%|████▏ | 2972/7045 [9:40:01<13:47:09, 12.19s/it] {'loss': 1.0698, 'learning_rate': 3.243187822829684e-06, 'epoch': 0.42} + 42%|████▏ | 2972/7045 [9:40:01<13:47:09, 12.19s/it] 42%|████▏ | 2973/7045 [9:40:14<14:02:50, 12.42s/it] {'loss': 1.1025, 'learning_rate': 3.2420902881879903e-06, 'epoch': 0.42} + 42%|████▏ | 2973/7045 [9:40:14<14:02:50, 12.42s/it] 42%|████▏ | 2974/7045 [9:40:25<13:32:07, 11.97s/it] {'loss': 1.1211, 'learning_rate': 3.2409925966785545e-06, 'epoch': 0.42} + 42%|████▏ | 2974/7045 [9:40:25<13:32:07, 11.97s/it] 42%|████▏ | 2975/7045 [9:40:37<13:40:14, 12.09s/it] {'loss': 1.0669, 'learning_rate': 3.2398947485334144e-06, 'epoch': 0.42} + 42%|████▏ | 2975/7045 [9:40:37<13:40:14, 12.09s/it] 42%|████▏ | 2976/7045 [9:40:49<13:24:16, 11.86s/it] {'loss': 1.1289, 'learning_rate': 3.23879674398464e-06, 'epoch': 0.42} + 42%|████▏ | 2976/7045 [9:40:49<13:24:16, 11.86s/it] 42%|████▏ | 2977/7045 [9:41:00<13:18:06, 11.77s/it] {'loss': 1.1143, 'learning_rate': 3.2376985832643344e-06, 'epoch': 0.42} + 42%|████▏ | 2977/7045 [9:41:00<13:18:06, 11.77s/it] 42%|████▏ | 2978/7045 [9:41:11<13:08:21, 11.63s/it] {'loss': 1.0938, 'learning_rate': 3.2366002666046333e-06, 'epoch': 0.42} + 42%|████▏ | 2978/7045 [9:41:11<13:08:21, 11.63s/it] 42%|████▏ | 2979/7045 [9:41:23<12:58:23, 11.49s/it] {'loss': 1.1445, 'learning_rate': 3.2355017942377055e-06, 'epoch': 0.42} + 42%|████▏ | 2979/7045 [9:41:23<12:58:23, 11.49s/it] 42%|████▏ | 2980/7045 [9:41:34<12:51:48, 11.39s/it] {'loss': 1.1445, 'learning_rate': 3.2344031663957543e-06, 'epoch': 0.42} + 42%|████▏ | 2980/7045 [9:41:34<12:51:48, 11.39s/it] 42%|████▏ | 2981/7045 [9:41:45<12:46:09, 11.31s/it] {'loss': 1.1523, 'learning_rate': 3.2333043833110135e-06, 'epoch': 0.42} + 42%|████▏ | 2981/7045 [9:41:45<12:46:09, 11.31s/it] 42%|████▏ | 2982/7045 [9:41:56<12:44:10, 11.28s/it] {'loss': 1.1338, 'learning_rate': 3.232205445215751e-06, 'epoch': 0.42} + 42%|████▏ | 2982/7045 [9:41:56<12:44:10, 11.28s/it] 42%|████▏ | 2983/7045 [9:42:07<12:44:30, 11.29s/it] {'loss': 1.1572, 'learning_rate': 3.2311063523422674e-06, 'epoch': 0.42} + 42%|████▏ | 2983/7045 [9:42:07<12:44:30, 11.29s/it] 42%|████▏ | 2984/7045 [9:42:19<12:47:26, 11.34s/it] {'loss': 1.1309, 'learning_rate': 3.230007104922898e-06, 'epoch': 0.42} + 42%|████▏ | 2984/7045 [9:42:19<12:47:26, 11.34s/it] 42%|████▏ | 2985/7045 [9:42:30<12:40:53, 11.24s/it] {'loss': 1.0962, 'learning_rate': 3.228907703190004e-06, 'epoch': 0.42} + 42%|████▏ | 2985/7045 [9:42:30<12:40:53, 11.24s/it] 42%|████▏ | 2986/7045 [9:42:42<12:59:42, 11.53s/it] {'loss': 1.1187, 'learning_rate': 3.227808147375988e-06, 'epoch': 0.42} + 42%|████▏ | 2986/7045 [9:42:42<12:59:42, 11.53s/it] 42%|████▏ | 2987/7045 [9:42:54<13:11:56, 11.71s/it] {'loss': 1.1191, 'learning_rate': 3.2267084377132796e-06, 'epoch': 0.42} + 42%|████▏ | 2987/7045 [9:42:54<13:11:56, 11.71s/it] 42%|████▏ | 2988/7045 [9:43:06<13:22:28, 11.87s/it] {'loss': 1.1191, 'learning_rate': 3.2256085744343425e-06, 'epoch': 0.42} + 42%|████▏ | 2988/7045 [9:43:06<13:22:28, 11.87s/it] 42%|████▏ | 2989/7045 [9:43:18<13:06:53, 11.64s/it] {'loss': 1.168, 'learning_rate': 3.224508557771672e-06, 'epoch': 0.42} + 42%|████▏ | 2989/7045 [9:43:18<13:06:53, 11.64s/it] 42%|████▏ | 2990/7045 [9:43:29<13:02:17, 11.58s/it] {'loss': 1.0986, 'learning_rate': 3.2234083879577977e-06, 'epoch': 0.42} + 42%|████▏ | 2990/7045 [9:43:29<13:02:17, 11.58s/it] 42%|████▏ | 2991/7045 [9:43:40<12:50:10, 11.40s/it] {'loss': 1.1367, 'learning_rate': 3.2223080652252802e-06, 'epoch': 0.42} + 42%|████▏ | 2991/7045 [9:43:40<12:50:10, 11.40s/it] 42%|████▏ | 2992/7045 [9:43:51<12:46:23, 11.35s/it] {'loss': 1.1289, 'learning_rate': 3.221207589806713e-06, 'epoch': 0.42} + 42%|████▏ | 2992/7045 [9:43:51<12:46:23, 11.35s/it] 42%|████▏ | 2993/7045 [9:44:05<13:32:45, 12.03s/it] {'loss': 1.1216, 'learning_rate': 3.2201069619347195e-06, 'epoch': 0.42} + 42%|████▏ | 2993/7045 [9:44:05<13:32:45, 12.03s/it] 42%|████▏ | 2994/7045 [9:44:16<13:17:12, 11.81s/it] {'loss': 1.1357, 'learning_rate': 3.2190061818419604e-06, 'epoch': 0.42} + 42%|████▏ | 2994/7045 [9:44:16<13:17:12, 11.81s/it] 43%|████▎ | 2995/7045 [9:44:29<13:29:18, 11.99s/it] {'loss': 1.1074, 'learning_rate': 3.217905249761124e-06, 'epoch': 0.43} + 43%|████▎ | 2995/7045 [9:44:29<13:29:18, 11.99s/it] 43%|████▎ | 2996/7045 [9:44:42<13:50:32, 12.31s/it] {'loss': 1.1074, 'learning_rate': 3.216804165924933e-06, 'epoch': 0.43} + 43%|████▎ | 2996/7045 [9:44:42<13:50:32, 12.31s/it] 43%|████▎ | 2997/7045 [9:44:53<13:31:39, 12.03s/it] {'loss': 1.1221, 'learning_rate': 3.2157029305661397e-06, 'epoch': 0.43} + 43%|████▎ | 2997/7045 [9:44:53<13:31:39, 12.03s/it] 43%|████▎ | 2998/7045 [9:45:04<13:08:47, 11.69s/it] {'loss': 1.1182, 'learning_rate': 3.2146015439175314e-06, 'epoch': 0.43} + 43%|████▎ | 2998/7045 [9:45:04<13:08:47, 11.69s/it] 43%|████▎ | 2999/7045 [9:45:15<12:53:52, 11.48s/it] {'loss': 1.124, 'learning_rate': 3.213500006211927e-06, 'epoch': 0.43} + 43%|████▎ | 2999/7045 [9:45:15<12:53:52, 11.48s/it] 43%|████▎ | 3000/7045 [9:45:27<13:06:28, 11.67s/it] {'loss': 1.1338, 'learning_rate': 3.212398317682175e-06, 'epoch': 0.43} + 43%|████▎ | 3000/7045 [9:45:27<13:06:28, 11.67s/it] 43%|████▎ | 3001/7045 [9:45:38<12:55:53, 11.51s/it] {'loss': 1.1338, 'learning_rate': 3.2112964785611583e-06, 'epoch': 0.43} + 43%|████▎ | 3001/7045 [9:45:38<12:55:53, 11.51s/it] 43%|████▎ | 3002/7045 [9:45:49<12:47:26, 11.39s/it] {'loss': 1.1128, 'learning_rate': 3.2101944890817904e-06, 'epoch': 0.43} + 43%|████▎ | 3002/7045 [9:45:49<12:47:26, 11.39s/it] 43%|████▎ | 3003/7045 [9:46:00<12:39:50, 11.28s/it] {'loss': 1.1113, 'learning_rate': 3.209092349477017e-06, 'epoch': 0.43} + 43%|████▎ | 3003/7045 [9:46:00<12:39:50, 11.28s/it] 43%|████▎ | 3004/7045 [9:46:12<12:51:18, 11.45s/it] {'loss': 1.165, 'learning_rate': 3.2079900599798148e-06, 'epoch': 0.43} + 43%|████▎ | 3004/7045 [9:46:12<12:51:18, 11.45s/it] 43%|████▎ | 3005/7045 [9:46:25<13:17:40, 11.85s/it] {'loss': 1.0737, 'learning_rate': 3.2068876208231925e-06, 'epoch': 0.43} + 43%|████▎ | 3005/7045 [9:46:25<13:17:40, 11.85s/it] 43%|████▎ | 3006/7045 [9:46:36<13:01:20, 11.61s/it] {'loss': 1.082, 'learning_rate': 3.2057850322401916e-06, 'epoch': 0.43} + 43%|████▎ | 3006/7045 [9:46:36<13:01:20, 11.61s/it] 43%|████▎ | 3007/7045 [9:46:48<13:09:15, 11.73s/it] {'loss': 1.1055, 'learning_rate': 3.204682294463884e-06, 'epoch': 0.43} + 43%|████▎ | 3007/7045 [9:46:48<13:09:15, 11.73s/it] 43%|████▎ | 3008/7045 [9:46:59<13:01:43, 11.62s/it] {'loss': 1.1445, 'learning_rate': 3.203579407727373e-06, 'epoch': 0.43} + 43%|████▎ | 3008/7045 [9:46:59<13:01:43, 11.62s/it] 43%|████▎ | 3009/7045 [9:47:14<13:55:11, 12.42s/it] {'loss': 1.0752, 'learning_rate': 3.202476372263794e-06, 'epoch': 0.43} + 43%|████▎ | 3009/7045 [9:47:14<13:55:11, 12.42s/it] 43%|████▎ | 3010/7045 [9:47:25<13:27:56, 12.01s/it] {'loss': 1.1426, 'learning_rate': 3.2013731883063143e-06, 'epoch': 0.43} + 43%|████▎ | 3010/7045 [9:47:25<13:27:56, 12.01s/it] 43%|████▎ | 3011/7045 [9:47:36<13:22:03, 11.93s/it] {'loss': 1.1475, 'learning_rate': 3.2002698560881296e-06, 'epoch': 0.43} + 43%|████▎ | 3011/7045 [9:47:36<13:22:03, 11.93s/it] 43%|████▎ | 3012/7045 [9:47:49<13:28:56, 12.03s/it] {'loss': 1.1328, 'learning_rate': 3.199166375842472e-06, 'epoch': 0.43} + 43%|████▎ | 3012/7045 [9:47:49<13:28:56, 12.03s/it] 43%|████▎ | 3013/7045 [9:48:00<13:11:22, 11.78s/it] {'loss': 1.1133, 'learning_rate': 3.198062747802601e-06, 'epoch': 0.43} + 43%|████▎ | 3013/7045 [9:48:00<13:11:22, 11.78s/it] 43%|████▎ | 3014/7045 [9:48:11<12:57:26, 11.57s/it] {'loss': 1.1377, 'learning_rate': 3.1969589722018084e-06, 'epoch': 0.43} + 43%|████▎ | 3014/7045 [9:48:11<12:57:26, 11.57s/it] 43%|████▎ | 3015/7045 [9:48:22<12:46:44, 11.42s/it] {'loss': 1.1631, 'learning_rate': 3.1958550492734176e-06, 'epoch': 0.43} + 43%|████▎ | 3015/7045 [9:48:22<12:46:44, 11.42s/it] 43%|████▎ | 3016/7045 [9:48:34<12:56:09, 11.56s/it] {'loss': 1.1367, 'learning_rate': 3.194750979250782e-06, 'epoch': 0.43} + 43%|████▎ | 3016/7045 [9:48:34<12:56:09, 11.56s/it] 43%|████▎ | 3017/7045 [9:48:45<12:47:55, 11.44s/it] {'loss': 1.0918, 'learning_rate': 3.1936467623672873e-06, 'epoch': 0.43} + 43%|████▎ | 3017/7045 [9:48:45<12:47:55, 11.44s/it] 43%|████▎ | 3018/7045 [9:48:57<12:48:34, 11.45s/it] {'loss': 1.0996, 'learning_rate': 3.1925423988563505e-06, 'epoch': 0.43} + 43%|████▎ | 3018/7045 [9:48:57<12:48:34, 11.45s/it] 43%|████▎ | 3019/7045 [9:49:08<12:43:02, 11.37s/it] {'loss': 1.1689, 'learning_rate': 3.191437888951418e-06, 'epoch': 0.43} + 43%|████▎ | 3019/7045 [9:49:08<12:43:02, 11.37s/it] 43%|████▎ | 3020/7045 [9:49:19<12:38:17, 11.30s/it] {'loss': 1.1494, 'learning_rate': 3.1903332328859694e-06, 'epoch': 0.43} + 43%|████▎ | 3020/7045 [9:49:19<12:38:17, 11.30s/it] 43%|████▎ | 3021/7045 [9:49:30<12:35:38, 11.27s/it] {'loss': 1.085, 'learning_rate': 3.189228430893513e-06, 'epoch': 0.43} + 43%|████▎ | 3021/7045 [9:49:30<12:35:38, 11.27s/it] 43%|████▎ | 3022/7045 [9:49:42<12:50:13, 11.49s/it] {'loss': 1.1313, 'learning_rate': 3.188123483207588e-06, 'epoch': 0.43} + 43%|████▎ | 3022/7045 [9:49:42<12:50:13, 11.49s/it] 43%|████▎ | 3023/7045 [9:49:53<12:42:43, 11.38s/it] {'loss': 1.0938, 'learning_rate': 3.1870183900617662e-06, 'epoch': 0.43} + 43%|████▎ | 3023/7045 [9:49:53<12:42:43, 11.38s/it] 43%|████▎ | 3024/7045 [9:50:05<12:46:07, 11.43s/it] {'loss': 1.1885, 'learning_rate': 3.1859131516896502e-06, 'epoch': 0.43} + 43%|████▎ | 3024/7045 [9:50:05<12:46:07, 11.43s/it] 43%|████▎ | 3025/7045 [9:50:18<13:18:13, 11.91s/it] {'loss': 1.1123, 'learning_rate': 3.18480776832487e-06, 'epoch': 0.43} + 43%|████▎ | 3025/7045 [9:50:18<13:18:13, 11.91s/it] 43%|████▎ | 3026/7045 [9:50:29<13:09:26, 11.79s/it] {'loss': 1.165, 'learning_rate': 3.183702240201091e-06, 'epoch': 0.43} + 43%|████▎ | 3026/7045 [9:50:29<13:09:26, 11.79s/it] 43%|████▎ | 3027/7045 [9:50:40<12:58:54, 11.63s/it] {'loss': 1.166, 'learning_rate': 3.1825965675520043e-06, 'epoch': 0.43} + 43%|████▎ | 3027/7045 [9:50:40<12:58:54, 11.63s/it] 43%|████▎ | 3028/7045 [9:50:53<13:18:44, 11.93s/it] {'loss': 1.0811, 'learning_rate': 3.181490750611336e-06, 'epoch': 0.43} + 43%|████▎ | 3028/7045 [9:50:53<13:18:44, 11.93s/it] 43%|████▎ | 3029/7045 [9:51:04<13:01:57, 11.68s/it] {'loss': 1.1494, 'learning_rate': 3.180384789612839e-06, 'epoch': 0.43} + 43%|████▎ | 3029/7045 [9:51:04<13:01:57, 11.68s/it] 43%|████▎ | 3030/7045 [9:51:16<13:08:50, 11.79s/it] {'loss': 1.1191, 'learning_rate': 3.1792786847902995e-06, 'epoch': 0.43} + 43%|████▎ | 3030/7045 [9:51:16<13:08:50, 11.79s/it] 43%|████▎ | 3031/7045 [9:51:27<12:53:55, 11.57s/it] {'loss': 1.1372, 'learning_rate': 3.178172436377532e-06, 'epoch': 0.43} + 43%|████▎ | 3031/7045 [9:51:27<12:53:55, 11.57s/it] 43%|████▎ | 3032/7045 [9:51:38<12:45:13, 11.44s/it] {'loss': 1.1045, 'learning_rate': 3.1770660446083845e-06, 'epoch': 0.43} + 43%|████▎ | 3032/7045 [9:51:38<12:45:13, 11.44s/it] 43%|████▎ | 3033/7045 [9:51:51<13:13:34, 11.87s/it] {'loss': 1.1265, 'learning_rate': 3.1759595097167306e-06, 'epoch': 0.43} + 43%|████▎ | 3033/7045 [9:51:51<13:13:34, 11.87s/it] 43%|████▎ | 3034/7045 [9:52:04<13:39:25, 12.26s/it] {'loss': 1.0786, 'learning_rate': 3.1748528319364775e-06, 'epoch': 0.43} + 43%|████▎ | 3034/7045 [9:52:04<13:39:25, 12.26s/it] 43%|████▎ | 3035/7045 [9:52:17<13:37:46, 12.24s/it] {'loss': 1.1035, 'learning_rate': 3.1737460115015616e-06, 'epoch': 0.43} + 43%|████▎ | 3035/7045 [9:52:17<13:37:46, 12.24s/it] 43%|████▎ | 3036/7045 [9:52:28<13:14:21, 11.89s/it] {'loss': 1.1377, 'learning_rate': 3.17263904864595e-06, 'epoch': 0.43} + 43%|████▎ | 3036/7045 [9:52:28<13:14:21, 11.89s/it] 43%|████▎ | 3037/7045 [9:52:39<13:06:23, 11.77s/it] {'loss': 1.1172, 'learning_rate': 3.171531943603639e-06, 'epoch': 0.43} + 43%|████▎ | 3037/7045 [9:52:39<13:06:23, 11.77s/it] 43%|████▎ | 3038/7045 [9:52:50<12:54:22, 11.60s/it] {'loss': 1.1309, 'learning_rate': 3.1704246966086562e-06, 'epoch': 0.43} + 43%|████▎ | 3038/7045 [9:52:50<12:54:22, 11.60s/it] 43%|████▎ | 3039/7045 [9:53:02<12:57:49, 11.65s/it] {'loss': 1.0938, 'learning_rate': 3.1693173078950577e-06, 'epoch': 0.43} + 43%|████▎ | 3039/7045 [9:53:02<12:57:49, 11.65s/it] 43%|████▎ | 3040/7045 [9:53:15<13:26:17, 12.08s/it] {'loss': 1.0918, 'learning_rate': 3.1682097776969313e-06, 'epoch': 0.43} + 43%|████▎ | 3040/7045 [9:53:15<13:26:17, 12.08s/it] 43%|████▎ | 3041/7045 [9:53:26<13:04:50, 11.76s/it] {'loss': 1.1562, 'learning_rate': 3.167102106248392e-06, 'epoch': 0.43} + 43%|████▎ | 3041/7045 [9:53:26<13:04:50, 11.76s/it] 43%|████▎ | 3042/7045 [9:53:37<12:50:24, 11.55s/it] {'loss': 1.1074, 'learning_rate': 3.1659942937835887e-06, 'epoch': 0.43} + 43%|████▎ | 3042/7045 [9:53:37<12:50:24, 11.55s/it] 43%|████▎ | 3043/7045 [9:53:49<12:43:33, 11.45s/it] {'loss': 1.1133, 'learning_rate': 3.1648863405366966e-06, 'epoch': 0.43} + 43%|████▎ | 3043/7045 [9:53:49<12:43:33, 11.45s/it] 43%|████▎ | 3044/7045 [9:54:01<13:05:03, 11.77s/it] {'loss': 1.1182, 'learning_rate': 3.163778246741921e-06, 'epoch': 0.43} + 43%|████▎ | 3044/7045 [9:54:01<13:05:03, 11.77s/it] 43%|████▎ | 3045/7045 [9:54:13<12:58:23, 11.68s/it] {'loss': 1.1289, 'learning_rate': 3.162670012633499e-06, 'epoch': 0.43} + 43%|████▎ | 3045/7045 [9:54:13<12:58:23, 11.68s/it] 43%|████▎ | 3046/7045 [9:54:24<12:47:48, 11.52s/it] {'loss': 1.1533, 'learning_rate': 3.161561638445696e-06, 'epoch': 0.43} + 43%|████▎ | 3046/7045 [9:54:24<12:47:48, 11.52s/it] 43%|████▎ | 3047/7045 [9:54:35<12:45:04, 11.48s/it] {'loss': 1.1279, 'learning_rate': 3.1604531244128073e-06, 'epoch': 0.43} + 43%|████▎ | 3047/7045 [9:54:35<12:45:04, 11.48s/it] 43%|████▎ | 3048/7045 [9:54:46<12:38:29, 11.39s/it] {'loss': 1.1328, 'learning_rate': 3.159344470769157e-06, 'epoch': 0.43} + 43%|████▎ | 3048/7045 [9:54:46<12:38:29, 11.39s/it] 43%|████▎ | 3049/7045 [9:54:58<12:43:04, 11.46s/it] {'loss': 1.1826, 'learning_rate': 3.1582356777490997e-06, 'epoch': 0.43} + 43%|████▎ | 3049/7045 [9:54:58<12:43:04, 11.46s/it] 43%|████▎ | 3050/7045 [9:55:11<13:14:28, 11.93s/it] {'loss': 1.0708, 'learning_rate': 3.1571267455870186e-06, 'epoch': 0.43} + 43%|████▎ | 3050/7045 [9:55:11<13:14:28, 11.93s/it] 43%|████▎ | 3051/7045 [9:55:24<13:36:22, 12.26s/it] {'loss': 1.0371, 'learning_rate': 3.156017674517328e-06, 'epoch': 0.43} + 43%|████▎ | 3051/7045 [9:55:24<13:36:22, 12.26s/it] 43%|████▎ | 3052/7045 [9:55:36<13:29:21, 12.16s/it] {'loss': 1.126, 'learning_rate': 3.1549084647744688e-06, 'epoch': 0.43} + 43%|████▎ | 3052/7045 [9:55:36<13:29:21, 12.16s/it] 43%|████▎ | 3053/7045 [9:55:49<13:43:57, 12.38s/it] {'loss': 1.1108, 'learning_rate': 3.1537991165929133e-06, 'epoch': 0.43} + 43%|████▎ | 3053/7045 [9:55:49<13:43:57, 12.38s/it] 43%|████▎ | 3054/7045 [9:56:00<13:20:47, 12.04s/it] {'loss': 1.1016, 'learning_rate': 3.152689630207164e-06, 'epoch': 0.43} + 43%|████▎ | 3054/7045 [9:56:00<13:20:47, 12.04s/it] 43%|████▎ | 3055/7045 [9:56:13<13:41:47, 12.36s/it] {'loss': 1.0615, 'learning_rate': 3.1515800058517486e-06, 'epoch': 0.43} + 43%|████▎ | 3055/7045 [9:56:13<13:41:47, 12.36s/it] 43%|████▎ | 3056/7045 [9:56:24<13:14:43, 11.95s/it] {'loss': 1.1074, 'learning_rate': 3.150470243761229e-06, 'epoch': 0.43} + 43%|████▎ | 3056/7045 [9:56:24<13:14:43, 11.95s/it] 43%|████▎ | 3057/7045 [9:56:38<13:44:38, 12.41s/it] {'loss': 1.125, 'learning_rate': 3.149360344170192e-06, 'epoch': 0.43} + 43%|████▎ | 3057/7045 [9:56:38<13:44:38, 12.41s/it] 43%|████▎ | 3058/7045 [9:56:49<13:20:30, 12.05s/it] {'loss': 1.1533, 'learning_rate': 3.1482503073132563e-06, 'epoch': 0.43} + 43%|████▎ | 3058/7045 [9:56:49<13:20:30, 12.05s/it] 43%|████▎ | 3059/7045 [9:57:00<13:06:42, 11.84s/it] {'loss': 1.0737, 'learning_rate': 3.1471401334250675e-06, 'epoch': 0.43} + 43%|████▎ | 3059/7045 [9:57:00<13:06:42, 11.84s/it] 43%|████▎ | 3060/7045 [9:57:12<13:12:13, 11.93s/it] {'loss': 1.1055, 'learning_rate': 3.146029822740301e-06, 'epoch': 0.43} + 43%|████▎ | 3060/7045 [9:57:12<13:12:13, 11.93s/it] 43%|████▎ | 3061/7045 [9:57:23<12:53:06, 11.64s/it] {'loss': 1.1392, 'learning_rate': 3.1449193754936625e-06, 'epoch': 0.43} + 43%|████▎ | 3061/7045 [9:57:23<12:53:06, 11.64s/it] 43%|████▎ | 3062/7045 [9:57:34<12:37:43, 11.41s/it] {'loss': 1.1104, 'learning_rate': 3.1438087919198855e-06, 'epoch': 0.43} + 43%|████▎ | 3062/7045 [9:57:34<12:37:43, 11.41s/it] 43%|████▎ | 3063/7045 [9:57:46<12:41:02, 11.47s/it] {'loss': 1.1484, 'learning_rate': 3.1426980722537315e-06, 'epoch': 0.43} + 43%|████▎ | 3063/7045 [9:57:46<12:41:02, 11.47s/it] 43%|████▎ | 3064/7045 [9:57:57<12:37:02, 11.41s/it] {'loss': 1.0811, 'learning_rate': 3.141587216729991e-06, 'epoch': 0.43} + 43%|████▎ | 3064/7045 [9:57:57<12:37:02, 11.41s/it] 44%|████▎ | 3065/7045 [9:58:08<12:29:01, 11.29s/it] {'loss': 1.1533, 'learning_rate': 3.140476225583484e-06, 'epoch': 0.44} + 44%|████▎ | 3065/7045 [9:58:08<12:29:01, 11.29s/it] 44%|████▎ | 3066/7045 [9:58:19<12:31:26, 11.33s/it] {'loss': 1.1367, 'learning_rate': 3.1393650990490594e-06, 'epoch': 0.44} + 44%|████▎ | 3066/7045 [9:58:19<12:31:26, 11.33s/it] 44%|████▎ | 3067/7045 [9:58:31<12:30:06, 11.31s/it] {'loss': 1.1099, 'learning_rate': 3.1382538373615935e-06, 'epoch': 0.44} + 44%|████▎ | 3067/7045 [9:58:31<12:30:06, 11.31s/it] 44%|████▎ | 3068/7045 [9:58:42<12:26:02, 11.26s/it] {'loss': 1.127, 'learning_rate': 3.137142440755992e-06, 'epoch': 0.44} + 44%|████▎ | 3068/7045 [9:58:42<12:26:02, 11.26s/it] 44%|████▎ | 3069/7045 [9:58:55<12:55:10, 11.70s/it] {'loss': 1.1025, 'learning_rate': 3.13603090946719e-06, 'epoch': 0.44} + 44%|████▎ | 3069/7045 [9:58:55<12:55:10, 11.70s/it] 44%|████▎ | 3070/7045 [9:59:06<12:43:32, 11.53s/it] {'loss': 1.1406, 'learning_rate': 3.1349192437301478e-06, 'epoch': 0.44} + 44%|████▎ | 3070/7045 [9:59:06<12:43:32, 11.53s/it] 44%|████▎ | 3071/7045 [9:59:18<13:01:47, 11.80s/it] {'loss': 1.1182, 'learning_rate': 3.133807443779858e-06, 'epoch': 0.44} + 44%|████▎ | 3071/7045 [9:59:18<13:01:47, 11.80s/it] 44%|████▎ | 3072/7045 [9:59:32<13:33:30, 12.29s/it] {'loss': 1.0796, 'learning_rate': 3.132695509851339e-06, 'epoch': 0.44} + 44%|████▎ | 3072/7045 [9:59:32<13:33:30, 12.29s/it] 44%|████▎ | 3073/7045 [9:59:43<13:13:15, 11.98s/it] {'loss': 1.1172, 'learning_rate': 3.1315834421796405e-06, 'epoch': 0.44} + 44%|████▎ | 3073/7045 [9:59:43<13:13:15, 11.98s/it] 44%|████▎ | 3074/7045 [9:59:54<12:58:05, 11.76s/it] {'loss': 1.0879, 'learning_rate': 3.1304712409998354e-06, 'epoch': 0.44} + 44%|████▎ | 3074/7045 [9:59:54<12:58:05, 11.76s/it] 44%|████▎ | 3075/7045 [10:00:05<12:49:20, 11.63s/it] {'loss': 1.127, 'learning_rate': 3.12935890654703e-06, 'epoch': 0.44} + 44%|████▎ | 3075/7045 [10:00:05<12:49:20, 11.63s/it] 44%|████▎ | 3076/7045 [10:00:17<12:40:19, 11.49s/it] {'loss': 1.126, 'learning_rate': 3.1282464390563548e-06, 'epoch': 0.44} + 44%|████▎ | 3076/7045 [10:00:17<12:40:19, 11.49s/it] 44%|████▎ | 3077/7045 [10:00:27<12:27:07, 11.30s/it] {'loss': 1.1094, 'learning_rate': 3.1271338387629728e-06, 'epoch': 0.44} + 44%|████▎ | 3077/7045 [10:00:27<12:27:07, 11.30s/it] 44%|████▎ | 3078/7045 [10:00:39<12:29:10, 11.33s/it] {'loss': 1.1387, 'learning_rate': 3.1260211059020694e-06, 'epoch': 0.44} + 44%|████▎ | 3078/7045 [10:00:39<12:29:10, 11.33s/it] 44%|████▎ | 3079/7045 [10:00:52<13:02:31, 11.84s/it] {'loss': 1.0723, 'learning_rate': 3.124908240708864e-06, 'epoch': 0.44} + 44%|████▎ | 3079/7045 [10:00:52<13:02:31, 11.84s/it] 44%|████▎ | 3080/7045 [10:01:03<12:56:52, 11.76s/it] {'loss': 1.1299, 'learning_rate': 3.1237952434185993e-06, 'epoch': 0.44} + 44%|████▎ | 3080/7045 [10:01:03<12:56:52, 11.76s/it] 44%|████▎ | 3081/7045 [10:01:15<12:56:40, 11.76s/it] {'loss': 1.1211, 'learning_rate': 3.122682114266548e-06, 'epoch': 0.44} + 44%|████▎ | 3081/7045 [10:01:15<12:56:40, 11.76s/it] 44%|████▎ | 3082/7045 [10:01:26<12:47:04, 11.61s/it] {'loss': 1.1035, 'learning_rate': 3.1215688534880107e-06, 'epoch': 0.44} + 44%|████▎ | 3082/7045 [10:01:26<12:47:04, 11.61s/it] 44%|████▍ | 3083/7045 [10:01:38<12:45:03, 11.59s/it] {'loss': 1.1133, 'learning_rate': 3.120455461318315e-06, 'epoch': 0.44} + 44%|████▍ | 3083/7045 [10:01:38<12:45:03, 11.59s/it] 44%|████▍ | 3084/7045 [10:01:52<13:24:11, 12.18s/it] {'loss': 1.0928, 'learning_rate': 3.1193419379928182e-06, 'epoch': 0.44} + 44%|████▍ | 3084/7045 [10:01:52<13:24:11, 12.18s/it] 44%|████▍ | 3085/7045 [10:02:03<13:00:33, 11.83s/it] {'loss': 1.1113, 'learning_rate': 3.118228283746902e-06, 'epoch': 0.44} + 44%|████▍ | 3085/7045 [10:02:03<13:00:33, 11.83s/it] 44%|████▍ | 3086/7045 [10:02:14<12:50:10, 11.67s/it] {'loss': 1.1416, 'learning_rate': 3.1171144988159787e-06, 'epoch': 0.44} + 44%|████▍ | 3086/7045 [10:02:14<12:50:10, 11.67s/it] 44%|████▍ | 3087/7045 [10:02:25<12:41:30, 11.54s/it] {'loss': 1.1094, 'learning_rate': 3.1160005834354867e-06, 'epoch': 0.44} + 44%|████▍ | 3087/7045 [10:02:25<12:41:30, 11.54s/it] 44%|████▍ | 3088/7045 [10:02:37<12:41:58, 11.55s/it] {'loss': 1.1152, 'learning_rate': 3.114886537840894e-06, 'epoch': 0.44} + 44%|████▍ | 3088/7045 [10:02:37<12:41:58, 11.55s/it] 44%|████▍ | 3089/7045 [10:02:48<12:33:15, 11.42s/it] {'loss': 1.1504, 'learning_rate': 3.1137723622676925e-06, 'epoch': 0.44} + 44%|████▍ | 3089/7045 [10:02:48<12:33:15, 11.42s/it] 44%|████▍ | 3090/7045 [10:02:59<12:29:39, 11.37s/it] {'loss': 1.1807, 'learning_rate': 3.112658056951404e-06, 'epoch': 0.44} + 44%|████▍ | 3090/7045 [10:02:59<12:29:39, 11.37s/it] 44%|████▍ | 3091/7045 [10:03:10<12:24:33, 11.30s/it] {'loss': 1.1343, 'learning_rate': 3.111543622127579e-06, 'epoch': 0.44} + 44%|████▍ | 3091/7045 [10:03:10<12:24:33, 11.30s/it] 44%|████▍ | 3092/7045 [10:03:21<12:18:29, 11.21s/it] {'loss': 1.1191, 'learning_rate': 3.110429058031793e-06, 'epoch': 0.44} + 44%|████▍ | 3092/7045 [10:03:21<12:18:29, 11.21s/it] 44%|████▍ | 3093/7045 [10:03:32<12:16:14, 11.18s/it] {'loss': 1.0918, 'learning_rate': 3.109314364899649e-06, 'epoch': 0.44} + 44%|████▍ | 3093/7045 [10:03:32<12:16:14, 11.18s/it] 44%|████▍ | 3094/7045 [10:03:43<12:14:13, 11.15s/it] {'loss': 1.0869, 'learning_rate': 3.1081995429667784e-06, 'epoch': 0.44} + 44%|████▍ | 3094/7045 [10:03:43<12:14:13, 11.15s/it] 44%|████▍ | 3095/7045 [10:03:55<12:13:52, 11.15s/it] {'loss': 1.1582, 'learning_rate': 3.10708459246884e-06, 'epoch': 0.44} + 44%|████▍ | 3095/7045 [10:03:55<12:13:52, 11.15s/it] 44%|████▍ | 3096/7045 [10:04:06<12:15:15, 11.17s/it] {'loss': 1.1396, 'learning_rate': 3.1059695136415168e-06, 'epoch': 0.44} + 44%|████▍ | 3096/7045 [10:04:06<12:15:15, 11.17s/it] 44%|████▍ | 3097/7045 [10:04:17<12:15:50, 11.18s/it] {'loss': 1.1426, 'learning_rate': 3.1048543067205232e-06, 'epoch': 0.44} + 44%|████▍ | 3097/7045 [10:04:17<12:15:50, 11.18s/it] 44%|████▍ | 3098/7045 [10:04:28<12:13:16, 11.15s/it] {'loss': 1.1279, 'learning_rate': 3.1037389719415987e-06, 'epoch': 0.44} + 44%|████▍ | 3098/7045 [10:04:28<12:13:16, 11.15s/it] 44%|████▍ | 3099/7045 [10:04:39<12:09:24, 11.09s/it] {'loss': 1.1255, 'learning_rate': 3.10262350954051e-06, 'epoch': 0.44} + 44%|████▍ | 3099/7045 [10:04:39<12:09:24, 11.09s/it] 44%|████▍ | 3100/7045 [10:04:51<12:28:30, 11.38s/it] {'loss': 1.166, 'learning_rate': 3.101507919753049e-06, 'epoch': 0.44} + 44%|████▍ | 3100/7045 [10:04:51<12:28:30, 11.38s/it] 44%|████▍ | 3101/7045 [10:05:03<12:30:25, 11.42s/it] {'loss': 1.1182, 'learning_rate': 3.100392202815037e-06, 'epoch': 0.44} + 44%|████▍ | 3101/7045 [10:05:03<12:30:25, 11.42s/it] 44%|████▍ | 3102/7045 [10:05:15<12:49:05, 11.70s/it] {'loss': 1.1113, 'learning_rate': 3.099276358962322e-06, 'epoch': 0.44} + 44%|████▍ | 3102/7045 [10:05:15<12:49:05, 11.70s/it] 44%|████▍ | 3103/7045 [10:05:26<12:36:34, 11.52s/it] {'loss': 1.1602, 'learning_rate': 3.098160388430777e-06, 'epoch': 0.44} + 44%|████▍ | 3103/7045 [10:05:26<12:36:34, 11.52s/it] 44%|████▍ | 3104/7045 [10:05:39<13:08:54, 12.01s/it] {'loss': 1.1235, 'learning_rate': 3.097044291456304e-06, 'epoch': 0.44} + 44%|████▍ | 3104/7045 [10:05:39<13:08:54, 12.01s/it] 44%|████▍ | 3105/7045 [10:05:52<13:18:01, 12.15s/it] {'loss': 1.1699, 'learning_rate': 3.095928068274829e-06, 'epoch': 0.44} + 44%|████▍ | 3105/7045 [10:05:52<13:18:01, 12.15s/it] 44%|████▍ | 3106/7045 [10:06:04<13:12:34, 12.07s/it] {'loss': 1.1104, 'learning_rate': 3.0948117191223093e-06, 'epoch': 0.44} + 44%|████▍ | 3106/7045 [10:06:04<13:12:34, 12.07s/it] 44%|████▍ | 3107/7045 [10:06:16<13:21:35, 12.21s/it] {'loss': 1.1172, 'learning_rate': 3.093695244234723e-06, 'epoch': 0.44} + 44%|████▍ | 3107/7045 [10:06:16<13:21:35, 12.21s/it] 44%|████▍ | 3108/7045 [10:06:29<13:33:18, 12.39s/it] {'loss': 1.0796, 'learning_rate': 3.092578643848078e-06, 'epoch': 0.44} + 44%|████▍ | 3108/7045 [10:06:29<13:33:18, 12.39s/it] 44%|████▍ | 3109/7045 [10:06:41<13:18:27, 12.17s/it] {'loss': 1.1318, 'learning_rate': 3.0914619181984095e-06, 'epoch': 0.44} + 44%|████▍ | 3109/7045 [10:06:41<13:18:27, 12.17s/it] 44%|████▍ | 3110/7045 [10:06:52<12:54:28, 11.81s/it] {'loss': 1.1377, 'learning_rate': 3.0903450675217778e-06, 'epoch': 0.44} + 44%|████▍ | 3110/7045 [10:06:52<12:54:28, 11.81s/it] 44%|████▍ | 3111/7045 [10:07:04<13:06:30, 12.00s/it] {'loss': 1.0962, 'learning_rate': 3.0892280920542692e-06, 'epoch': 0.44} + 44%|████▍ | 3111/7045 [10:07:04<13:06:30, 12.00s/it] 44%|████▍ | 3112/7045 [10:07:15<12:46:59, 11.70s/it] {'loss': 1.1562, 'learning_rate': 3.0881109920319973e-06, 'epoch': 0.44} + 44%|████▍ | 3112/7045 [10:07:15<12:46:59, 11.70s/it] 44%|████▍ | 3113/7045 [10:07:29<13:25:00, 12.28s/it] {'loss': 1.1162, 'learning_rate': 3.086993767691102e-06, 'epoch': 0.44} + 44%|████▍ | 3113/7045 [10:07:29<13:25:00, 12.28s/it] 44%|████▍ | 3114/7045 [10:07:40<13:07:44, 12.02s/it] {'loss': 1.127, 'learning_rate': 3.0858764192677493e-06, 'epoch': 0.44} + 44%|████▍ | 3114/7045 [10:07:40<13:07:44, 12.02s/it] 44%|████▍ | 3115/7045 [10:07:51<12:48:52, 11.74s/it] {'loss': 1.082, 'learning_rate': 3.084758946998131e-06, 'epoch': 0.44} + 44%|████▍ | 3115/7045 [10:07:51<12:48:52, 11.74s/it] 44%|████▍ | 3116/7045 [10:08:02<12:36:13, 11.55s/it] {'loss': 1.1357, 'learning_rate': 3.083641351118466e-06, 'epoch': 0.44} + 44%|████▍ | 3116/7045 [10:08:02<12:36:13, 11.55s/it] 44%|████▍ | 3117/7045 [10:08:15<13:01:07, 11.93s/it] {'loss': 1.1025, 'learning_rate': 3.0825236318649985e-06, 'epoch': 0.44} + 44%|████▍ | 3117/7045 [10:08:15<13:01:07, 11.93s/it] 44%|████▍ | 3118/7045 [10:08:26<12:46:18, 11.71s/it] {'loss': 1.1211, 'learning_rate': 3.081405789473999e-06, 'epoch': 0.44} + 44%|████▍ | 3118/7045 [10:08:26<12:46:18, 11.71s/it] 44%|████▍ | 3119/7045 [10:08:38<12:48:02, 11.74s/it] {'loss': 1.1367, 'learning_rate': 3.0802878241817634e-06, 'epoch': 0.44} + 44%|████▍ | 3119/7045 [10:08:38<12:48:02, 11.74s/it] 44%|████▍ | 3120/7045 [10:08:50<12:45:48, 11.71s/it] {'loss': 1.1089, 'learning_rate': 3.0791697362246154e-06, 'epoch': 0.44} + 44%|████▍ | 3120/7045 [10:08:50<12:45:48, 11.71s/it] 44%|████▍ | 3121/7045 [10:09:01<12:38:06, 11.59s/it] {'loss': 1.1309, 'learning_rate': 3.078051525838903e-06, 'epoch': 0.44} + 44%|████▍ | 3121/7045 [10:09:01<12:38:06, 11.59s/it] 44%|████▍ | 3122/7045 [10:09:13<12:41:09, 11.64s/it] {'loss': 1.0903, 'learning_rate': 3.0769331932610015e-06, 'epoch': 0.44} + 44%|████▍ | 3122/7045 [10:09:13<12:41:09, 11.64s/it] 44%|████▍ | 3123/7045 [10:09:24<12:31:00, 11.49s/it] {'loss': 1.1455, 'learning_rate': 3.0758147387273097e-06, 'epoch': 0.44} + 44%|████▍ | 3123/7045 [10:09:24<12:31:00, 11.49s/it] 44%|████▍ | 3124/7045 [10:09:35<12:22:37, 11.36s/it] {'loss': 1.0908, 'learning_rate': 3.074696162474254e-06, 'epoch': 0.44} + 44%|████▍ | 3124/7045 [10:09:35<12:22:37, 11.36s/it] 44%|████▍ | 3125/7045 [10:09:46<12:15:22, 11.26s/it] {'loss': 1.1338, 'learning_rate': 3.073577464738287e-06, 'epoch': 0.44} + 44%|████▍ | 3125/7045 [10:09:46<12:15:22, 11.26s/it] 44%|████▍ | 3126/7045 [10:09:58<12:25:19, 11.41s/it] {'loss': 1.1729, 'learning_rate': 3.0724586457558846e-06, 'epoch': 0.44} + 44%|████▍ | 3126/7045 [10:09:58<12:25:19, 11.41s/it] 44%|████▍ | 3127/7045 [10:10:09<12:15:36, 11.27s/it] {'loss': 1.1328, 'learning_rate': 3.0713397057635507e-06, 'epoch': 0.44} + 44%|████▍ | 3127/7045 [10:10:09<12:15:36, 11.27s/it] 44%|████▍ | 3128/7045 [10:10:21<12:31:24, 11.51s/it] {'loss': 1.1143, 'learning_rate': 3.070220644997815e-06, 'epoch': 0.44} + 44%|████▍ | 3128/7045 [10:10:21<12:31:24, 11.51s/it] 44%|████▍ | 3129/7045 [10:10:34<12:57:23, 11.91s/it] {'loss': 1.1045, 'learning_rate': 3.0691014636952297e-06, 'epoch': 0.44} + 44%|████▍ | 3129/7045 [10:10:34<12:57:23, 11.91s/it] 44%|████▍ | 3130/7045 [10:10:45<12:53:58, 11.86s/it] {'loss': 1.1377, 'learning_rate': 3.0679821620923757e-06, 'epoch': 0.44} + 44%|████▍ | 3130/7045 [10:10:45<12:53:58, 11.86s/it] 44%|████▍ | 3131/7045 [10:10:59<13:20:13, 12.27s/it] {'loss': 1.1221, 'learning_rate': 3.066862740425857e-06, 'epoch': 0.44} + 44%|████▍ | 3131/7045 [10:10:59<13:20:13, 12.27s/it] 44%|████▍ | 3132/7045 [10:11:10<12:55:58, 11.90s/it] {'loss': 1.1475, 'learning_rate': 3.0657431989323054e-06, 'epoch': 0.44} + 44%|████▍ | 3132/7045 [10:11:10<12:55:58, 11.90s/it] 44%|████▍ | 3133/7045 [10:11:22<13:01:24, 11.98s/it] {'loss': 1.0723, 'learning_rate': 3.064623537848376e-06, 'epoch': 0.44} + 44%|████▍ | 3133/7045 [10:11:22<13:01:24, 11.98s/it] 44%|████▍ | 3134/7045 [10:11:33<12:49:58, 11.81s/it] {'loss': 1.1133, 'learning_rate': 3.063503757410749e-06, 'epoch': 0.44} + 44%|████▍ | 3134/7045 [10:11:33<12:49:58, 11.81s/it] 44%|████▍ | 3135/7045 [10:11:47<13:23:07, 12.32s/it] {'loss': 1.1162, 'learning_rate': 3.0623838578561317e-06, 'epoch': 0.44} + 44%|████▍ | 3135/7045 [10:11:47<13:23:07, 12.32s/it] 45%|████▍ | 3136/7045 [10:11:58<13:06:04, 12.07s/it] {'loss': 1.1016, 'learning_rate': 3.0612638394212558e-06, 'epoch': 0.45} + 45%|████▍ | 3136/7045 [10:11:58<13:06:04, 12.07s/it] 45%|████▍ | 3137/7045 [10:12:11<13:19:45, 12.28s/it] {'loss': 1.1348, 'learning_rate': 3.060143702342877e-06, 'epoch': 0.45} + 45%|████▍ | 3137/7045 [10:12:11<13:19:45, 12.28s/it] 45%|████▍ | 3138/7045 [10:12:23<13:06:48, 12.08s/it] {'loss': 1.1562, 'learning_rate': 3.059023446857777e-06, 'epoch': 0.45} + 45%|████▍ | 3138/7045 [10:12:23<13:06:48, 12.08s/it] 45%|████▍ | 3139/7045 [10:12:36<13:25:34, 12.37s/it] {'loss': 1.1406, 'learning_rate': 3.0579030732027636e-06, 'epoch': 0.45} + 45%|████▍ | 3139/7045 [10:12:36<13:25:34, 12.37s/it] 45%|████▍ | 3140/7045 [10:12:48<13:20:37, 12.30s/it] {'loss': 1.1143, 'learning_rate': 3.056782581614668e-06, 'epoch': 0.45} + 45%|████▍ | 3140/7045 [10:12:48<13:20:37, 12.30s/it] 45%|████▍ | 3141/7045 [10:13:00<13:28:07, 12.42s/it] {'loss': 1.1494, 'learning_rate': 3.0556619723303453e-06, 'epoch': 0.45} + 45%|████▍ | 3141/7045 [10:13:00<13:28:07, 12.42s/it] 45%|████▍ | 3142/7045 [10:13:12<13:05:25, 12.07s/it] {'loss': 1.1689, 'learning_rate': 3.054541245586679e-06, 'epoch': 0.45} + 45%|████▍ | 3142/7045 [10:13:12<13:05:25, 12.07s/it] 45%|████▍ | 3143/7045 [10:13:23<12:47:02, 11.79s/it] {'loss': 1.1416, 'learning_rate': 3.053420401620575e-06, 'epoch': 0.45} + 45%|████▍ | 3143/7045 [10:13:23<12:47:02, 11.79s/it] 45%|████▍ | 3144/7045 [10:13:34<12:35:26, 11.62s/it] {'loss': 1.1318, 'learning_rate': 3.052299440668964e-06, 'epoch': 0.45} + 45%|████▍ | 3144/7045 [10:13:34<12:35:26, 11.62s/it] 45%|████▍ | 3145/7045 [10:13:46<12:45:09, 11.77s/it] {'loss': 1.1138, 'learning_rate': 3.051178362968801e-06, 'epoch': 0.45} + 45%|████▍ | 3145/7045 [10:13:46<12:45:09, 11.77s/it] 45%|████▍ | 3146/7045 [10:13:57<12:34:28, 11.61s/it] {'loss': 1.1553, 'learning_rate': 3.0500571687570673e-06, 'epoch': 0.45} + 45%|████▍ | 3146/7045 [10:13:57<12:34:28, 11.61s/it] 45%|████▍ | 3147/7045 [10:14:08<12:22:03, 11.42s/it] {'loss': 1.0376, 'learning_rate': 3.048935858270769e-06, 'epoch': 0.45} + 45%|████▍ | 3147/7045 [10:14:08<12:22:03, 11.42s/it] 45%|████▍ | 3148/7045 [10:14:19<12:11:06, 11.26s/it] {'loss': 1.0596, 'learning_rate': 3.047814431746934e-06, 'epoch': 0.45} + 45%|████▍ | 3148/7045 [10:14:19<12:11:06, 11.26s/it] 45%|████▍ | 3149/7045 [10:14:31<12:30:08, 11.55s/it] {'loss': 1.1445, 'learning_rate': 3.046692889422618e-06, 'epoch': 0.45} + 45%|████▍ | 3149/7045 [10:14:31<12:30:08, 11.55s/it] 45%|████▍ | 3150/7045 [10:14:43<12:29:40, 11.55s/it] {'loss': 1.0811, 'learning_rate': 3.045571231534899e-06, 'epoch': 0.45} + 45%|████▍ | 3150/7045 [10:14:43<12:29:40, 11.55s/it] 45%|████▍ | 3151/7045 [10:14:54<12:25:02, 11.48s/it] {'loss': 1.1064, 'learning_rate': 3.0444494583208807e-06, 'epoch': 0.45} + 45%|████▍ | 3151/7045 [10:14:54<12:25:02, 11.48s/it] 45%|████▍ | 3152/7045 [10:15:07<12:40:26, 11.72s/it] {'loss': 1.1113, 'learning_rate': 3.0433275700176894e-06, 'epoch': 0.45} + 45%|████▍ | 3152/7045 [10:15:07<12:40:26, 11.72s/it] 45%|████▍ | 3153/7045 [10:15:18<12:27:38, 11.53s/it] {'loss': 1.1377, 'learning_rate': 3.0422055668624777e-06, 'epoch': 0.45} + 45%|████▍ | 3153/7045 [10:15:18<12:27:38, 11.53s/it] 45%|████▍ | 3154/7045 [10:15:28<12:11:31, 11.28s/it] {'loss': 1.0732, 'learning_rate': 3.0410834490924223e-06, 'epoch': 0.45} + 45%|████▍ | 3154/7045 [10:15:28<12:11:31, 11.28s/it] 45%|████▍ | 3155/7045 [10:15:40<12:16:08, 11.35s/it] {'loss': 1.1025, 'learning_rate': 3.0399612169447234e-06, 'epoch': 0.45} + 45%|████▍ | 3155/7045 [10:15:40<12:16:08, 11.35s/it] 45%|████▍ | 3156/7045 [10:15:51<12:10:42, 11.27s/it] {'loss': 1.1143, 'learning_rate': 3.0388388706566045e-06, 'epoch': 0.45} + 45%|████▍ | 3156/7045 [10:15:51<12:10:42, 11.27s/it] 45%|████▍ | 3157/7045 [10:16:05<12:54:36, 11.95s/it] {'loss': 1.1318, 'learning_rate': 3.0377164104653143e-06, 'epoch': 0.45} + 45%|████▍ | 3157/7045 [10:16:05<12:54:36, 11.95s/it] 45%|████▍ | 3158/7045 [10:16:15<12:32:05, 11.61s/it] {'loss': 1.1182, 'learning_rate': 3.0365938366081277e-06, 'epoch': 0.45} + 45%|████▍ | 3158/7045 [10:16:15<12:32:05, 11.61s/it] 45%|████▍ | 3159/7045 [10:16:26<12:18:44, 11.41s/it] {'loss': 1.1387, 'learning_rate': 3.0354711493223395e-06, 'epoch': 0.45} + 45%|████▍ | 3159/7045 [10:16:26<12:18:44, 11.41s/it] 45%|████▍ | 3160/7045 [10:16:38<12:19:45, 11.42s/it] {'loss': 1.061, 'learning_rate': 3.0343483488452702e-06, 'epoch': 0.45} + 45%|████▍ | 3160/7045 [10:16:38<12:19:45, 11.42s/it] 45%|████▍ | 3161/7045 [10:16:49<12:20:46, 11.44s/it] {'loss': 1.0996, 'learning_rate': 3.0332254354142653e-06, 'epoch': 0.45} + 45%|████▍ | 3161/7045 [10:16:49<12:20:46, 11.44s/it] 45%|████▍ | 3162/7045 [10:17:00<12:07:55, 11.25s/it] {'loss': 1.0947, 'learning_rate': 3.032102409266694e-06, 'epoch': 0.45} + 45%|████▍ | 3162/7045 [10:17:00<12:07:55, 11.25s/it] 45%|████▍ | 3163/7045 [10:17:11<12:01:27, 11.15s/it] {'loss': 1.124, 'learning_rate': 3.0309792706399464e-06, 'epoch': 0.45} + 45%|████▍ | 3163/7045 [10:17:11<12:01:27, 11.15s/it] 45%|████▍ | 3164/7045 [10:17:22<12:00:06, 11.13s/it] {'loss': 1.106, 'learning_rate': 3.029856019771441e-06, 'epoch': 0.45} + 45%|████▍ | 3164/7045 [10:17:22<12:00:06, 11.13s/it] 45%|████▍ | 3165/7045 [10:17:33<11:57:45, 11.10s/it] {'loss': 1.082, 'learning_rate': 3.0287326568986175e-06, 'epoch': 0.45} + 45%|████▍ | 3165/7045 [10:17:33<11:57:45, 11.10s/it] 45%|████▍ | 3166/7045 [10:17:45<12:08:23, 11.27s/it] {'loss': 1.1426, 'learning_rate': 3.0276091822589376e-06, 'epoch': 0.45} + 45%|████▍ | 3166/7045 [10:17:45<12:08:23, 11.27s/it] 45%|████▍ | 3167/7045 [10:17:57<12:21:35, 11.47s/it] {'loss': 1.126, 'learning_rate': 3.0264855960898905e-06, 'epoch': 0.45} + 45%|████▍ | 3167/7045 [10:17:57<12:21:35, 11.47s/it] 45%|████▍ | 3168/7045 [10:18:08<12:14:57, 11.37s/it] {'loss': 1.1045, 'learning_rate': 3.025361898628985e-06, 'epoch': 0.45} + 45%|████▍ | 3168/7045 [10:18:08<12:14:57, 11.37s/it] 45%|████▍ | 3169/7045 [10:18:20<12:22:46, 11.50s/it] {'loss': 1.1377, 'learning_rate': 3.0242380901137575e-06, 'epoch': 0.45} + 45%|████▍ | 3169/7045 [10:18:20<12:22:46, 11.50s/it] 45%|████▍ | 3170/7045 [10:18:32<12:41:36, 11.79s/it] {'loss': 1.1152, 'learning_rate': 3.0231141707817642e-06, 'epoch': 0.45} + 45%|████▍ | 3170/7045 [10:18:32<12:41:36, 11.79s/it] 45%|████▌ | 3171/7045 [10:18:44<12:35:11, 11.70s/it] {'loss': 1.1104, 'learning_rate': 3.0219901408705867e-06, 'epoch': 0.45} + 45%|████▌ | 3171/7045 [10:18:44<12:35:11, 11.70s/it] 45%|████▌ | 3172/7045 [10:18:55<12:25:25, 11.55s/it] {'loss': 1.1279, 'learning_rate': 3.0208660006178297e-06, 'epoch': 0.45} + 45%|████▌ | 3172/7045 [10:18:55<12:25:25, 11.55s/it] 45%|████▌ | 3173/7045 [10:19:06<12:20:02, 11.47s/it] {'loss': 1.1279, 'learning_rate': 3.0197417502611213e-06, 'epoch': 0.45} + 45%|████▌ | 3173/7045 [10:19:06<12:20:02, 11.47s/it] 45%|████▌ | 3174/7045 [10:19:18<12:27:51, 11.59s/it] {'loss': 1.1011, 'learning_rate': 3.0186173900381126e-06, 'epoch': 0.45} + 45%|████▌ | 3174/7045 [10:19:18<12:27:51, 11.59s/it] 45%|████▌ | 3175/7045 [10:19:29<12:22:48, 11.52s/it] {'loss': 1.124, 'learning_rate': 3.0174929201864765e-06, 'epoch': 0.45} + 45%|████▌ | 3175/7045 [10:19:29<12:22:48, 11.52s/it] 45%|████▌ | 3176/7045 [10:19:42<12:42:44, 11.83s/it] {'loss': 1.0923, 'learning_rate': 3.0163683409439136e-06, 'epoch': 0.45} + 45%|████▌ | 3176/7045 [10:19:42<12:42:44, 11.83s/it] 45%|████▌ | 3177/7045 [10:19:55<13:17:16, 12.37s/it] {'loss': 1.1128, 'learning_rate': 3.015243652548142e-06, 'epoch': 0.45} + 45%|████▌ | 3177/7045 [10:19:55<13:17:16, 12.37s/it] 45%|████▌ | 3178/7045 [10:20:07<12:52:56, 11.99s/it] {'loss': 1.0864, 'learning_rate': 3.014118855236906e-06, 'epoch': 0.45} + 45%|████▌ | 3178/7045 [10:20:07<12:52:56, 11.99s/it] 45%|████▌ | 3179/7045 [10:20:18<12:50:00, 11.95s/it] {'loss': 1.1387, 'learning_rate': 3.0129939492479733e-06, 'epoch': 0.45} + 45%|████▌ | 3179/7045 [10:20:18<12:50:00, 11.95s/it] 45%|████▌ | 3180/7045 [10:20:31<12:56:12, 12.05s/it] {'loss': 1.1201, 'learning_rate': 3.011868934819132e-06, 'epoch': 0.45} + 45%|████▌ | 3180/7045 [10:20:31<12:56:12, 12.05s/it] 45%|████▌ | 3181/7045 [10:20:43<13:02:51, 12.16s/it] {'loss': 1.1162, 'learning_rate': 3.0107438121881967e-06, 'epoch': 0.45} + 45%|████▌ | 3181/7045 [10:20:43<13:02:51, 12.16s/it] 45%|████▌ | 3182/7045 [10:20:55<13:02:19, 12.15s/it] {'loss': 1.1045, 'learning_rate': 3.0096185815930013e-06, 'epoch': 0.45} + 45%|████▌ | 3182/7045 [10:20:55<13:02:19, 12.15s/it] 45%|████▌ | 3183/7045 [10:21:07<12:44:52, 11.88s/it] {'loss': 1.1201, 'learning_rate': 3.0084932432714053e-06, 'epoch': 0.45} + 45%|████▌ | 3183/7045 [10:21:07<12:44:52, 11.88s/it] 45%|████▌ | 3184/7045 [10:21:18<12:30:41, 11.67s/it] {'loss': 1.0688, 'learning_rate': 3.0073677974612897e-06, 'epoch': 0.45} + 45%|████▌ | 3184/7045 [10:21:18<12:30:41, 11.67s/it] 45%|████▌ | 3185/7045 [10:21:29<12:30:20, 11.66s/it] {'loss': 1.1226, 'learning_rate': 3.0062422444005573e-06, 'epoch': 0.45} + 45%|████▌ | 3185/7045 [10:21:29<12:30:20, 11.66s/it] 45%|████▌ | 3186/7045 [10:21:41<12:25:45, 11.60s/it] {'loss': 1.1357, 'learning_rate': 3.005116584327136e-06, 'epoch': 0.45} + 45%|████▌ | 3186/7045 [10:21:41<12:25:45, 11.60s/it] 45%|████▌ | 3187/7045 [10:21:52<12:18:07, 11.48s/it] {'loss': 1.0737, 'learning_rate': 3.0039908174789734e-06, 'epoch': 0.45} + 45%|████▌ | 3187/7045 [10:21:52<12:18:07, 11.48s/it] 45%|████▌ | 3188/7045 [10:22:03<12:13:23, 11.41s/it] {'loss': 1.1123, 'learning_rate': 3.0028649440940427e-06, 'epoch': 0.45} + 45%|████▌ | 3188/7045 [10:22:03<12:13:23, 11.41s/it] 45%|████▌ | 3189/7045 [10:22:15<12:11:18, 11.38s/it] {'loss': 1.0928, 'learning_rate': 3.001738964410338e-06, 'epoch': 0.45} + 45%|████▌ | 3189/7045 [10:22:15<12:11:18, 11.38s/it] 45%|████▌ | 3190/7045 [10:22:26<12:07:08, 11.32s/it] {'loss': 1.1123, 'learning_rate': 3.000612878665875e-06, 'epoch': 0.45} + 45%|████▌ | 3190/7045 [10:22:26<12:07:08, 11.32s/it] 45%|████▌ | 3191/7045 [10:22:37<12:13:10, 11.41s/it] {'loss': 1.1143, 'learning_rate': 2.9994866870986937e-06, 'epoch': 0.45} + 45%|████▌ | 3191/7045 [10:22:37<12:13:10, 11.41s/it] 45%|████▌ | 3192/7045 [10:22:49<12:22:15, 11.56s/it] {'loss': 1.1411, 'learning_rate': 2.9983603899468555e-06, 'epoch': 0.45} + 45%|████▌ | 3192/7045 [10:22:49<12:22:15, 11.56s/it] 45%|████▌ | 3193/7045 [10:23:00<12:12:21, 11.41s/it] {'loss': 1.1279, 'learning_rate': 2.9972339874484442e-06, 'epoch': 0.45} + 45%|████▌ | 3193/7045 [10:23:00<12:12:21, 11.41s/it] 45%|████▌ | 3194/7045 [10:23:13<12:36:12, 11.78s/it] {'loss': 1.1201, 'learning_rate': 2.9961074798415646e-06, 'epoch': 0.45} + 45%|████▌ | 3194/7045 [10:23:13<12:36:12, 11.78s/it] 45%|████▌ | 3195/7045 [10:23:24<12:23:11, 11.58s/it] {'loss': 1.1445, 'learning_rate': 2.9949808673643477e-06, 'epoch': 0.45} + 45%|████▌ | 3195/7045 [10:23:24<12:23:11, 11.58s/it] 45%|████▌ | 3196/7045 [10:23:35<12:14:37, 11.45s/it] {'loss': 1.1338, 'learning_rate': 2.993854150254941e-06, 'epoch': 0.45} + 45%|████▌ | 3196/7045 [10:23:35<12:14:37, 11.45s/it] 45%|████▌ | 3197/7045 [10:23:47<12:20:56, 11.55s/it] {'loss': 1.0977, 'learning_rate': 2.9927273287515196e-06, 'epoch': 0.45} + 45%|████▌ | 3197/7045 [10:23:47<12:20:56, 11.55s/it] 45%|████▌ | 3198/7045 [10:24:01<13:01:34, 12.19s/it] {'loss': 1.1094, 'learning_rate': 2.9916004030922768e-06, 'epoch': 0.45} + 45%|████▌ | 3198/7045 [10:24:01<13:01:34, 12.19s/it] 45%|████▌ | 3199/7045 [10:24:12<12:47:14, 11.97s/it] {'loss': 1.1123, 'learning_rate': 2.99047337351543e-06, 'epoch': 0.45} + 45%|████▌ | 3199/7045 [10:24:12<12:47:14, 11.97s/it] 45%|████▌ | 3200/7045 [10:24:24<12:42:03, 11.89s/it] {'loss': 1.1001, 'learning_rate': 2.989346240259216e-06, 'epoch': 0.45} + 45%|████▌ | 3200/7045 [10:24:24<12:42:03, 11.89s/it] 45%|████▌ | 3201/7045 [10:24:35<12:32:02, 11.74s/it] {'loss': 1.1396, 'learning_rate': 2.9882190035618976e-06, 'epoch': 0.45} + 45%|████▌ | 3201/7045 [10:24:35<12:32:02, 11.74s/it] 45%|████▌ | 3202/7045 [10:24:46<12:17:08, 11.51s/it] {'loss': 1.0952, 'learning_rate': 2.9870916636617564e-06, 'epoch': 0.45} + 45%|████▌ | 3202/7045 [10:24:46<12:17:08, 11.51s/it] 45%|████▌ | 3203/7045 [10:24:58<12:16:10, 11.50s/it] {'loss': 1.1367, 'learning_rate': 2.9859642207970975e-06, 'epoch': 0.45} + 45%|████▌ | 3203/7045 [10:24:58<12:16:10, 11.50s/it] 45%|████▌ | 3204/7045 [10:25:10<12:33:14, 11.77s/it] {'loss': 1.0312, 'learning_rate': 2.984836675206245e-06, 'epoch': 0.45} + 45%|████▌ | 3204/7045 [10:25:10<12:33:14, 11.77s/it] 45%|████▌ | 3205/7045 [10:25:21<12:23:33, 11.62s/it] {'loss': 1.1152, 'learning_rate': 2.9837090271275476e-06, 'epoch': 0.45} + 45%|████▌ | 3205/7045 [10:25:21<12:23:33, 11.62s/it] 46%|████▌ | 3206/7045 [10:25:33<12:22:45, 11.61s/it] {'loss': 1.1318, 'learning_rate': 2.982581276799375e-06, 'epoch': 0.46} + 46%|████▌ | 3206/7045 [10:25:33<12:22:45, 11.61s/it] 46%|████▌ | 3207/7045 [10:25:44<12:15:36, 11.50s/it] {'loss': 1.1416, 'learning_rate': 2.981453424460118e-06, 'epoch': 0.46} + 46%|████▌ | 3207/7045 [10:25:44<12:15:36, 11.50s/it] 46%|████▌ | 3208/7045 [10:25:55<12:11:15, 11.43s/it] {'loss': 1.1357, 'learning_rate': 2.980325470348189e-06, 'epoch': 0.46} + 46%|████▌ | 3208/7045 [10:25:55<12:11:15, 11.43s/it] 46%|████▌ | 3209/7045 [10:26:08<12:35:38, 11.82s/it] {'loss': 1.1108, 'learning_rate': 2.979197414702022e-06, 'epoch': 0.46} + 46%|████▌ | 3209/7045 [10:26:08<12:35:38, 11.82s/it] 46%|████▌ | 3210/7045 [10:26:21<12:46:12, 11.99s/it] {'loss': 1.0967, 'learning_rate': 2.978069257760074e-06, 'epoch': 0.46} + 46%|████▌ | 3210/7045 [10:26:21<12:46:12, 11.99s/it] 46%|████▌ | 3211/7045 [10:26:34<13:11:22, 12.38s/it] {'loss': 1.0898, 'learning_rate': 2.9769409997608194e-06, 'epoch': 0.46} + 46%|████▌ | 3211/7045 [10:26:34<13:11:22, 12.38s/it] 46%|████▌ | 3212/7045 [10:26:45<12:46:41, 12.00s/it] {'loss': 1.0801, 'learning_rate': 2.9758126409427575e-06, 'epoch': 0.46} + 46%|████▌ | 3212/7045 [10:26:45<12:46:41, 12.00s/it] 46%|████▌ | 3213/7045 [10:26:56<12:34:52, 11.82s/it] {'loss': 1.0957, 'learning_rate': 2.974684181544409e-06, 'epoch': 0.46} + 46%|████▌ | 3213/7045 [10:26:56<12:34:52, 11.82s/it] 46%|████▌ | 3214/7045 [10:27:07<12:16:40, 11.54s/it] {'loss': 1.1387, 'learning_rate': 2.973555621804314e-06, 'epoch': 0.46} + 46%|████▌ | 3214/7045 [10:27:07<12:16:40, 11.54s/it] 46%|████▌ | 3215/7045 [10:27:18<12:03:33, 11.34s/it] {'loss': 1.1411, 'learning_rate': 2.9724269619610347e-06, 'epoch': 0.46} + 46%|████▌ | 3215/7045 [10:27:18<12:03:33, 11.34s/it] 46%|████▌ | 3216/7045 [10:27:30<12:09:56, 11.44s/it] {'loss': 1.1689, 'learning_rate': 2.971298202253154e-06, 'epoch': 0.46} + 46%|████▌ | 3216/7045 [10:27:30<12:09:56, 11.44s/it] 46%|████▌ | 3217/7045 [10:27:42<12:24:13, 11.66s/it] {'loss': 1.1289, 'learning_rate': 2.9701693429192767e-06, 'epoch': 0.46} + 46%|████▌ | 3217/7045 [10:27:42<12:24:13, 11.66s/it] 46%|████▌ | 3218/7045 [10:27:54<12:21:17, 11.62s/it] {'loss': 1.1143, 'learning_rate': 2.969040384198028e-06, 'epoch': 0.46} + 46%|████▌ | 3218/7045 [10:27:54<12:21:17, 11.62s/it] 46%|████▌ | 3219/7045 [10:28:05<12:12:05, 11.48s/it] {'loss': 1.0938, 'learning_rate': 2.9679113263280544e-06, 'epoch': 0.46} + 46%|████▌ | 3219/7045 [10:28:05<12:12:05, 11.48s/it] 46%|████▌ | 3220/7045 [10:28:16<12:06:54, 11.40s/it] {'loss': 1.126, 'learning_rate': 2.9667821695480237e-06, 'epoch': 0.46} + 46%|████▌ | 3220/7045 [10:28:16<12:06:54, 11.40s/it] 46%|████▌ | 3221/7045 [10:28:28<12:21:17, 11.63s/it] {'loss': 1.1006, 'learning_rate': 2.965652914096624e-06, 'epoch': 0.46} + 46%|████▌ | 3221/7045 [10:28:28<12:21:17, 11.63s/it] 46%|████▌ | 3222/7045 [10:28:40<12:26:15, 11.71s/it] {'loss': 1.1177, 'learning_rate': 2.964523560212564e-06, 'epoch': 0.46} + 46%|████▌ | 3222/7045 [10:28:40<12:26:15, 11.71s/it] 46%|████▌ | 3223/7045 [10:28:52<12:32:50, 11.82s/it] {'loss': 1.0938, 'learning_rate': 2.9633941081345733e-06, 'epoch': 0.46} + 46%|████▌ | 3223/7045 [10:28:52<12:32:50, 11.82s/it] 46%|████▌ | 3224/7045 [10:29:03<12:21:03, 11.64s/it] {'loss': 1.1016, 'learning_rate': 2.9622645581014038e-06, 'epoch': 0.46} + 46%|████▌ | 3224/7045 [10:29:03<12:21:03, 11.64s/it] 46%|████▌ | 3225/7045 [10:29:16<12:48:59, 12.08s/it] {'loss': 1.0488, 'learning_rate': 2.9611349103518267e-06, 'epoch': 0.46} + 46%|████▌ | 3225/7045 [10:29:16<12:48:59, 12.08s/it] 46%|████▌ | 3226/7045 [10:29:27<12:26:46, 11.73s/it] {'loss': 1.1133, 'learning_rate': 2.9600051651246337e-06, 'epoch': 0.46} + 46%|████▌ | 3226/7045 [10:29:27<12:26:46, 11.73s/it] 46%|████▌ | 3227/7045 [10:29:41<13:05:05, 12.34s/it] {'loss': 1.0508, 'learning_rate': 2.9588753226586374e-06, 'epoch': 0.46} + 46%|████▌ | 3227/7045 [10:29:41<13:05:05, 12.34s/it] 46%|████▌ | 3228/7045 [10:29:52<12:47:39, 12.07s/it] {'loss': 1.1216, 'learning_rate': 2.9577453831926713e-06, 'epoch': 0.46} + 46%|████▌ | 3228/7045 [10:29:52<12:47:39, 12.07s/it] 46%|████▌ | 3229/7045 [10:30:05<13:03:58, 12.33s/it] {'loss': 1.0664, 'learning_rate': 2.9566153469655894e-06, 'epoch': 0.46} + 46%|████▌ | 3229/7045 [10:30:05<13:03:58, 12.33s/it] 46%|████▌ | 3230/7045 [10:30:17<12:42:20, 11.99s/it] {'loss': 1.1416, 'learning_rate': 2.9554852142162647e-06, 'epoch': 0.46} + 46%|████▌ | 3230/7045 [10:30:17<12:42:20, 11.99s/it] 46%|████▌ | 3231/7045 [10:30:27<12:20:16, 11.65s/it] {'loss': 1.0957, 'learning_rate': 2.954354985183594e-06, 'epoch': 0.46} + 46%|████▌ | 3231/7045 [10:30:27<12:20:16, 11.65s/it] 46%|████▌ | 3232/7045 [10:30:40<12:42:51, 12.00s/it] {'loss': 1.0898, 'learning_rate': 2.9532246601064907e-06, 'epoch': 0.46} + 46%|████▌ | 3232/7045 [10:30:40<12:42:51, 12.00s/it] 46%|████▌ | 3233/7045 [10:30:51<12:25:22, 11.73s/it] {'loss': 1.1201, 'learning_rate': 2.9520942392238903e-06, 'epoch': 0.46} + 46%|████▌ | 3233/7045 [10:30:51<12:25:22, 11.73s/it] 46%|████▌ | 3234/7045 [10:31:03<12:31:59, 11.84s/it] {'loss': 1.0776, 'learning_rate': 2.9509637227747485e-06, 'epoch': 0.46} + 46%|████▌ | 3234/7045 [10:31:03<12:31:59, 11.84s/it] 46%|████▌ | 3235/7045 [10:31:15<12:28:04, 11.78s/it] {'loss': 1.126, 'learning_rate': 2.9498331109980404e-06, 'epoch': 0.46} + 46%|████▌ | 3235/7045 [10:31:15<12:28:04, 11.78s/it] 46%|████▌ | 3236/7045 [10:31:26<12:18:40, 11.64s/it] {'loss': 1.0918, 'learning_rate': 2.9487024041327627e-06, 'epoch': 0.46} + 46%|████▌ | 3236/7045 [10:31:26<12:18:40, 11.64s/it] 46%|████▌ | 3237/7045 [10:31:39<12:35:40, 11.91s/it] {'loss': 1.083, 'learning_rate': 2.947571602417931e-06, 'epoch': 0.46} + 46%|████▌ | 3237/7045 [10:31:39<12:35:40, 11.91s/it] 46%|████▌ | 3238/7045 [10:31:50<12:25:40, 11.75s/it] {'loss': 1.1309, 'learning_rate': 2.9464407060925815e-06, 'epoch': 0.46} + 46%|████▌ | 3238/7045 [10:31:50<12:25:40, 11.75s/it] 46%|████▌ | 3239/7045 [10:32:02<12:18:38, 11.64s/it] {'loss': 1.1338, 'learning_rate': 2.9453097153957704e-06, 'epoch': 0.46} + 46%|████▌ | 3239/7045 [10:32:02<12:18:38, 11.64s/it] 46%|████▌ | 3240/7045 [10:32:13<12:09:05, 11.50s/it] {'loss': 1.1514, 'learning_rate': 2.944178630566574e-06, 'epoch': 0.46} + 46%|████▌ | 3240/7045 [10:32:13<12:09:05, 11.50s/it] 46%|████▌ | 3241/7045 [10:32:24<12:09:17, 11.50s/it] {'loss': 1.0913, 'learning_rate': 2.943047451844086e-06, 'epoch': 0.46} + 46%|████▌ | 3241/7045 [10:32:24<12:09:17, 11.50s/it] 46%|████▌ | 3242/7045 [10:32:36<12:06:13, 11.46s/it] {'loss': 1.0967, 'learning_rate': 2.9419161794674238e-06, 'epoch': 0.46} + 46%|████▌ | 3242/7045 [10:32:36<12:06:13, 11.46s/it] 46%|████▌ | 3243/7045 [10:32:49<12:43:51, 12.05s/it] {'loss': 1.0552, 'learning_rate': 2.940784813675724e-06, 'epoch': 0.46} + 46%|████▌ | 3243/7045 [10:32:49<12:43:51, 12.05s/it] 46%|████▌ | 3244/7045 [10:33:00<12:24:19, 11.75s/it] {'loss': 1.1211, 'learning_rate': 2.9396533547081394e-06, 'epoch': 0.46} + 46%|████▌ | 3244/7045 [10:33:00<12:24:19, 11.75s/it] 46%|████▌ | 3245/7045 [10:33:12<12:17:37, 11.65s/it] {'loss': 1.1152, 'learning_rate': 2.938521802803847e-06, 'epoch': 0.46} + 46%|████▌ | 3245/7045 [10:33:12<12:17:37, 11.65s/it] 46%|████▌ | 3246/7045 [10:33:24<12:35:45, 11.94s/it] {'loss': 1.1562, 'learning_rate': 2.9373901582020403e-06, 'epoch': 0.46} + 46%|████▌ | 3246/7045 [10:33:24<12:35:45, 11.94s/it] 46%|████▌ | 3247/7045 [10:33:35<12:13:10, 11.58s/it] {'loss': 1.085, 'learning_rate': 2.9362584211419348e-06, 'epoch': 0.46} + 46%|████▌ | 3247/7045 [10:33:35<12:13:10, 11.58s/it] 46%|████▌ | 3248/7045 [10:33:48<12:30:38, 11.86s/it] {'loss': 1.165, 'learning_rate': 2.9351265918627616e-06, 'epoch': 0.46} + 46%|████▌ | 3248/7045 [10:33:48<12:30:38, 11.86s/it] 46%|████▌ | 3249/7045 [10:33:59<12:26:19, 11.80s/it] {'loss': 1.1099, 'learning_rate': 2.9339946706037762e-06, 'epoch': 0.46} + 46%|████▌ | 3249/7045 [10:33:59<12:26:19, 11.80s/it] 46%|████▌ | 3250/7045 [10:34:10<12:10:30, 11.55s/it] {'loss': 1.0996, 'learning_rate': 2.9328626576042507e-06, 'epoch': 0.46} + 46%|████▌ | 3250/7045 [10:34:10<12:10:30, 11.55s/it] 46%|████▌ | 3251/7045 [10:34:21<12:02:44, 11.43s/it] {'loss': 1.1406, 'learning_rate': 2.9317305531034778e-06, 'epoch': 0.46} + 46%|████▌ | 3251/7045 [10:34:21<12:02:44, 11.43s/it] 46%|████▌ | 3252/7045 [10:34:33<11:59:31, 11.38s/it] {'loss': 1.1348, 'learning_rate': 2.930598357340768e-06, 'epoch': 0.46} + 46%|████▌ | 3252/7045 [10:34:33<11:59:31, 11.38s/it] 46%|████▌ | 3253/7045 [10:34:44<12:08:13, 11.52s/it] {'loss': 1.1372, 'learning_rate': 2.9294660705554516e-06, 'epoch': 0.46} + 46%|████▌ | 3253/7045 [10:34:44<12:08:13, 11.52s/it] 46%|████▌ | 3254/7045 [10:34:56<12:03:15, 11.45s/it] {'loss': 1.1172, 'learning_rate': 2.92833369298688e-06, 'epoch': 0.46} + 46%|████▌ | 3254/7045 [10:34:56<12:03:15, 11.45s/it] 46%|████▌ | 3255/7045 [10:35:07<12:02:18, 11.43s/it] {'loss': 1.1328, 'learning_rate': 2.927201224874421e-06, 'epoch': 0.46} + 46%|████▌ | 3255/7045 [10:35:07<12:02:18, 11.43s/it] 46%|████▌ | 3256/7045 [10:35:21<12:41:53, 12.06s/it] {'loss': 1.1138, 'learning_rate': 2.9260686664574633e-06, 'epoch': 0.46} + 46%|████▌ | 3256/7045 [10:35:21<12:41:53, 12.06s/it] 46%|████▌ | 3257/7045 [10:35:33<12:48:53, 12.18s/it] {'loss': 1.1621, 'learning_rate': 2.924936017975415e-06, 'epoch': 0.46} + 46%|████▌ | 3257/7045 [10:35:33<12:48:53, 12.18s/it] 46%|████▌ | 3258/7045 [10:35:44<12:30:32, 11.89s/it] {'loss': 1.0654, 'learning_rate': 2.9238032796677017e-06, 'epoch': 0.46} + 46%|████▌ | 3258/7045 [10:35:44<12:30:32, 11.89s/it] 46%|████▋ | 3259/7045 [10:35:56<12:17:52, 11.69s/it] {'loss': 1.1494, 'learning_rate': 2.9226704517737685e-06, 'epoch': 0.46} + 46%|██��█▋ | 3259/7045 [10:35:56<12:17:52, 11.69s/it] 46%|████▋ | 3260/7045 [10:36:09<12:51:54, 12.24s/it] {'loss': 1.0981, 'learning_rate': 2.921537534533081e-06, 'epoch': 0.46} + 46%|████▋ | 3260/7045 [10:36:09<12:51:54, 12.24s/it] 46%|████▋ | 3261/7045 [10:36:20<12:31:12, 11.91s/it] {'loss': 1.1113, 'learning_rate': 2.9204045281851203e-06, 'epoch': 0.46} + 46%|████▋ | 3261/7045 [10:36:20<12:31:12, 11.91s/it] 46%|████▋ | 3262/7045 [10:36:37<14:12:13, 13.52s/it] {'loss': 1.1328, 'learning_rate': 2.919271432969391e-06, 'epoch': 0.46} + 46%|████▋ | 3262/7045 [10:36:37<14:12:13, 13.52s/it] 46%|████▋ | 3263/7045 [10:36:49<13:29:38, 12.84s/it] {'loss': 1.1514, 'learning_rate': 2.9181382491254123e-06, 'epoch': 0.46} + 46%|████▋ | 3263/7045 [10:36:49<13:29:38, 12.84s/it] 46%|████▋ | 3264/7045 [10:37:00<13:03:03, 12.43s/it] {'loss': 1.1348, 'learning_rate': 2.9170049768927245e-06, 'epoch': 0.46} + 46%|████▋ | 3264/7045 [10:37:00<13:03:03, 12.43s/it] 46%|████▋ | 3265/7045 [10:37:13<13:11:43, 12.57s/it] {'loss': 1.0986, 'learning_rate': 2.9158716165108852e-06, 'epoch': 0.46} + 46%|████▋ | 3265/7045 [10:37:13<13:11:43, 12.57s/it] 46%|████▋ | 3266/7045 [10:37:24<12:41:48, 12.10s/it] {'loss': 1.1406, 'learning_rate': 2.914738168219472e-06, 'epoch': 0.46} + 46%|████▋ | 3266/7045 [10:37:24<12:41:48, 12.10s/it] 46%|████▋ | 3267/7045 [10:37:35<12:22:02, 11.78s/it] {'loss': 1.1162, 'learning_rate': 2.9136046322580793e-06, 'epoch': 0.46} + 46%|████▋ | 3267/7045 [10:37:35<12:22:02, 11.78s/it] 46%|████▋ | 3268/7045 [10:37:46<12:10:09, 11.60s/it] {'loss': 1.1328, 'learning_rate': 2.9124710088663224e-06, 'epoch': 0.46} + 46%|████▋ | 3268/7045 [10:37:46<12:10:09, 11.60s/it] 46%|████▋ | 3269/7045 [10:37:58<12:17:27, 11.72s/it] {'loss': 1.1113, 'learning_rate': 2.9113372982838335e-06, 'epoch': 0.46} + 46%|████▋ | 3269/7045 [10:37:58<12:17:27, 11.72s/it] 46%|████▋ | 3270/7045 [10:38:10<12:16:42, 11.71s/it] {'loss': 1.1514, 'learning_rate': 2.9102035007502625e-06, 'epoch': 0.46} + 46%|████▋ | 3270/7045 [10:38:10<12:16:42, 11.71s/it] 46%|████▋ | 3271/7045 [10:38:21<12:06:29, 11.55s/it] {'loss': 1.1621, 'learning_rate': 2.9090696165052794e-06, 'epoch': 0.46} + 46%|████▋ | 3271/7045 [10:38:21<12:06:29, 11.55s/it] 46%|████▋ | 3272/7045 [10:38:32<11:56:15, 11.39s/it] {'loss': 1.1094, 'learning_rate': 2.907935645788572e-06, 'epoch': 0.46} + 46%|████▋ | 3272/7045 [10:38:32<11:56:15, 11.39s/it] 46%|████▋ | 3273/7045 [10:38:43<11:48:57, 11.28s/it] {'loss': 1.168, 'learning_rate': 2.9068015888398464e-06, 'epoch': 0.46} + 46%|████▋ | 3273/7045 [10:38:43<11:48:57, 11.28s/it] 46%|████▋ | 3274/7045 [10:38:55<11:53:50, 11.36s/it] {'loss': 1.1309, 'learning_rate': 2.9056674458988264e-06, 'epoch': 0.46} + 46%|████▋ | 3274/7045 [10:38:55<11:53:50, 11.36s/it] 46%|████▋ | 3275/7045 [10:39:06<11:46:15, 11.24s/it] {'loss': 1.1143, 'learning_rate': 2.904533217205254e-06, 'epoch': 0.46} + 46%|████▋ | 3275/7045 [10:39:06<11:46:15, 11.24s/it] 47%|████▋ | 3276/7045 [10:39:17<11:43:04, 11.19s/it] {'loss': 1.145, 'learning_rate': 2.9033989029988895e-06, 'epoch': 0.47} + 47%|████▋ | 3276/7045 [10:39:17<11:43:04, 11.19s/it] 47%|████▋ | 3277/7045 [10:39:28<11:44:02, 11.21s/it] {'loss': 1.0908, 'learning_rate': 2.9022645035195135e-06, 'epoch': 0.47} + 47%|████▋ | 3277/7045 [10:39:28<11:44:02, 11.21s/it] 47%|████▋ | 3278/7045 [10:39:41<12:21:41, 11.81s/it] {'loss': 1.1333, 'learning_rate': 2.9011300190069196e-06, 'epoch': 0.47} + 47%|████▋ | 3278/7045 [10:39:41<12:21:41, 11.81s/it] 47%|████▋ | 3279/7045 [10:39:52<12:05:28, 11.56s/it] {'loss': 1.126, 'learning_rate': 2.8999954497009232e-06, 'epoch': 0.47} + 47%|████▋ | 3279/7045 [10:39:52<12:05:28, 11.56s/it] 47%|████▋ | 3280/7045 [10:40:03<11:59:10, 11.46s/it] {'loss': 1.1533, 'learning_rate': 2.8988607958413583e-06, 'epoch': 0.47} + 47%|████▋ | 3280/7045 [10:40:03<11:59:10, 11.46s/it] 47%|████▋ | 3281/7045 [10:40:17<12:31:06, 11.97s/it] {'loss': 1.0942, 'learning_rate': 2.8977260576680733e-06, 'epoch': 0.47} + 47%|████▋ | 3281/7045 [10:40:17<12:31:06, 11.97s/it] 47%|████▋ | 3282/7045 [10:40:28<12:24:24, 11.87s/it] {'loss': 1.1084, 'learning_rate': 2.8965912354209373e-06, 'epoch': 0.47} + 47%|████▋ | 3282/7045 [10:40:28<12:24:24, 11.87s/it] 47%|████▋ | 3283/7045 [10:40:41<12:34:02, 12.03s/it] {'loss': 1.0708, 'learning_rate': 2.8954563293398363e-06, 'epoch': 0.47} + 47%|████▋ | 3283/7045 [10:40:41<12:34:02, 12.03s/it] 47%|████▋ | 3284/7045 [10:40:52<12:20:00, 11.81s/it] {'loss': 1.124, 'learning_rate': 2.894321339664673e-06, 'epoch': 0.47} + 47%|████▋ | 3284/7045 [10:40:52<12:20:00, 11.81s/it] 47%|████▋ | 3285/7045 [10:41:04<12:33:43, 12.03s/it] {'loss': 1.0791, 'learning_rate': 2.89318626663537e-06, 'epoch': 0.47} + 47%|████▋ | 3285/7045 [10:41:04<12:33:43, 12.03s/it] 47%|████▋ | 3286/7045 [10:41:15<12:12:48, 11.70s/it] {'loss': 1.1167, 'learning_rate': 2.8920511104918654e-06, 'epoch': 0.47} + 47%|████▋ | 3286/7045 [10:41:15<12:12:48, 11.70s/it] 47%|████▋ | 3287/7045 [10:41:27<12:04:45, 11.57s/it] {'loss': 1.1289, 'learning_rate': 2.8909158714741155e-06, 'epoch': 0.47} + 47%|████▋ | 3287/7045 [10:41:27<12:04:45, 11.57s/it] 47%|████▋ | 3288/7045 [10:41:38<11:58:24, 11.47s/it] {'loss': 1.1592, 'learning_rate': 2.8897805498220954e-06, 'epoch': 0.47} + 47%|████▋ | 3288/7045 [10:41:38<11:58:24, 11.47s/it] 47%|████▋ | 3289/7045 [10:41:49<11:56:40, 11.45s/it] {'loss': 1.1348, 'learning_rate': 2.8886451457757953e-06, 'epoch': 0.47} + 47%|████▋ | 3289/7045 [10:41:49<11:56:40, 11.45s/it] 47%|████▋ | 3290/7045 [10:42:02<12:15:23, 11.75s/it] {'loss': 1.1172, 'learning_rate': 2.887509659575225e-06, 'epoch': 0.47} + 47%|████▋ | 3290/7045 [10:42:02<12:15:23, 11.75s/it] 47%|████▋ | 3291/7045 [10:42:13<12:06:04, 11.60s/it] {'loss': 1.0957, 'learning_rate': 2.88637409146041e-06, 'epoch': 0.47} + 47%|████▋ | 3291/7045 [10:42:13<12:06:04, 11.60s/it] 47%|████▋ | 3292/7045 [10:42:24<11:59:48, 11.51s/it] {'loss': 1.1367, 'learning_rate': 2.8852384416713942e-06, 'epoch': 0.47} + 47%|████▋ | 3292/7045 [10:42:24<11:59:48, 11.51s/it] 47%|████▋ | 3293/7045 [10:42:36<11:55:57, 11.45s/it] {'loss': 1.1299, 'learning_rate': 2.8841027104482388e-06, 'epoch': 0.47} + 47%|████▋ | 3293/7045 [10:42:36<11:55:57, 11.45s/it] 47%|████▋ | 3294/7045 [10:42:47<11:49:36, 11.35s/it] {'loss': 1.1143, 'learning_rate': 2.882966898031021e-06, 'epoch': 0.47} + 47%|████▋ | 3294/7045 [10:42:47<11:49:36, 11.35s/it] 47%|████▋ | 3295/7045 [10:42:58<11:51:45, 11.39s/it] {'loss': 1.0835, 'learning_rate': 2.881831004659837e-06, 'epoch': 0.47} + 47%|████▋ | 3295/7045 [10:42:58<11:51:45, 11.39s/it] 47%|████▋ | 3296/7045 [10:43:10<11:55:17, 11.45s/it] {'loss': 1.084, 'learning_rate': 2.8806950305747993e-06, 'epoch': 0.47} + 47%|████▋ | 3296/7045 [10:43:10<11:55:17, 11.45s/it] 47%|████▋ | 3297/7045 [10:43:21<11:45:29, 11.29s/it] {'loss': 1.1055, 'learning_rate': 2.879558976016036e-06, 'epoch': 0.47} + 47%|████▋ | 3297/7045 [10:43:21<11:45:29, 11.29s/it] 47%|████▋ | 3298/7045 [10:43:32<11:36:25, 11.15s/it] {'loss': 1.1084, 'learning_rate': 2.8784228412236938e-06, 'epoch': 0.47} + 47%|████▋ | 3298/7045 [10:43:32<11:36:25, 11.15s/it] 47%|████▋ | 3299/7045 [10:43:43<11:32:55, 11.10s/it] {'loss': 1.1484, 'learning_rate': 2.877286626437937e-06, 'epoch': 0.47} + 47%|████▋ | 3299/7045 [10:43:43<11:32:55, 11.10s/it] 47%|████▋ | 3300/7045 [10:43:56<12:15:20, 11.78s/it] {'loss': 1.0928, 'learning_rate': 2.876150331898946e-06, 'epoch': 0.47} + 47%|████▋ | 3300/7045 [10:43:56<12:15:20, 11.78s/it] 47%|████▋ | 3301/7045 [10:44:07<12:07:09, 11.65s/it] {'loss': 1.1279, 'learning_rate': 2.875013957846916e-06, 'epoch': 0.47} + 47%|████▋ | 3301/7045 [10:44:07<12:07:09, 11.65s/it] 47%|████▋ | 3302/7045 [10:44:20<12:26:31, 11.97s/it] {'loss': 1.1318, 'learning_rate': 2.873877504522063e-06, 'epoch': 0.47} + 47%|████▋ | 3302/7045 [10:44:20<12:26:31, 11.97s/it] 47%|████▋ | 3303/7045 [10:44:33<12:52:30, 12.39s/it] {'loss': 1.0801, 'learning_rate': 2.8727409721646166e-06, 'epoch': 0.47} + 47%|████▋ | 3303/7045 [10:44:33<12:52:30, 12.39s/it] 47%|████▋ | 3304/7045 [10:44:44<12:24:35, 11.94s/it] {'loss': 1.1108, 'learning_rate': 2.8716043610148246e-06, 'epoch': 0.47} + 47%|████▋ | 3304/7045 [10:44:44<12:24:35, 11.94s/it] 47%|████▋ | 3305/7045 [10:44:56<12:27:05, 11.99s/it] {'loss': 1.1152, 'learning_rate': 2.8704676713129507e-06, 'epoch': 0.47} + 47%|████▋ | 3305/7045 [10:44:56<12:27:05, 11.99s/it] 47%|████▋ | 3306/7045 [10:45:08<12:16:14, 11.81s/it] {'loss': 1.0781, 'learning_rate': 2.869330903299277e-06, 'epoch': 0.47} + 47%|████▋ | 3306/7045 [10:45:08<12:16:14, 11.81s/it] 47%|████▋ | 3307/7045 [10:45:19<12:02:31, 11.60s/it] {'loss': 1.1074, 'learning_rate': 2.868194057214099e-06, 'epoch': 0.47} + 47%|████▋ | 3307/7045 [10:45:19<12:02:31, 11.60s/it] 47%|████▋ | 3308/7045 [10:45:30<11:51:23, 11.42s/it] {'loss': 1.126, 'learning_rate': 2.8670571332977303e-06, 'epoch': 0.47} + 47%|████▋ | 3308/7045 [10:45:30<11:51:23, 11.42s/it] 47%|████▋ | 3309/7045 [10:45:41<11:47:13, 11.36s/it] {'loss': 1.1455, 'learning_rate': 2.865920131790501e-06, 'epoch': 0.47} + 47%|████▋ | 3309/7045 [10:45:41<11:47:13, 11.36s/it] 47%|████▋ | 3310/7045 [10:45:53<11:56:56, 11.52s/it] {'loss': 1.0723, 'learning_rate': 2.8647830529327603e-06, 'epoch': 0.47} + 47%|████▋ | 3310/7045 [10:45:53<11:56:56, 11.52s/it] 47%|████▋ | 3311/7045 [10:46:06<12:17:22, 11.85s/it] {'loss': 1.1113, 'learning_rate': 2.8636458969648684e-06, 'epoch': 0.47} + 47%|████▋ | 3311/7045 [10:46:06<12:17:22, 11.85s/it] 47%|████▋ | 3312/7045 [10:46:17<12:05:34, 11.66s/it] {'loss': 1.168, 'learning_rate': 2.8625086641272047e-06, 'epoch': 0.47} + 47%|████▋ | 3312/7045 [10:46:17<12:05:34, 11.66s/it] 47%|████▋ | 3313/7045 [10:46:28<11:57:01, 11.53s/it] {'loss': 1.1104, 'learning_rate': 2.8613713546601664e-06, 'epoch': 0.47} + 47%|████▋ | 3313/7045 [10:46:28<11:57:01, 11.53s/it] 47%|████▋ | 3314/7045 [10:46:39<11:54:38, 11.49s/it] {'loss': 1.124, 'learning_rate': 2.8602339688041643e-06, 'epoch': 0.47} + 47%|████▋ | 3314/7045 [10:46:39<11:54:38, 11.49s/it] 47%|████▋ | 3315/7045 [10:46:51<11:51:19, 11.44s/it] {'loss': 1.1299, 'learning_rate': 2.8590965067996246e-06, 'epoch': 0.47} + 47%|████▋ | 3315/7045 [10:46:51<11:51:19, 11.44s/it] 47%|████▋ | 3316/7045 [10:47:02<11:39:59, 11.26s/it] {'loss': 1.1035, 'learning_rate': 2.8579589688869937e-06, 'epoch': 0.47} + 47%|████▋ | 3316/7045 [10:47:02<11:39:59, 11.26s/it] 47%|████▋ | 3317/7045 [10:47:13<11:39:45, 11.26s/it] {'loss': 1.1465, 'learning_rate': 2.8568213553067305e-06, 'epoch': 0.47} + 47%|████▋ | 3317/7045 [10:47:13<11:39:45, 11.26s/it] 47%|████▋ | 3318/7045 [10:47:24<11:35:42, 11.20s/it] {'loss': 1.127, 'learning_rate': 2.855683666299311e-06, 'epoch': 0.47} + 47%|████▋ | 3318/7045 [10:47:24<11:35:42, 11.20s/it] 47%|████▋ | 3319/7045 [10:47:37<12:07:34, 11.72s/it] {'loss': 1.1182, 'learning_rate': 2.8545459021052267e-06, 'epoch': 0.47} + 47%|████▋ | 3319/7045 [10:47:37<12:07:34, 11.72s/it] 47%|████▋ | 3320/7045 [10:47:48<12:00:17, 11.60s/it] {'loss': 1.123, 'learning_rate': 2.8534080629649856e-06, 'epoch': 0.47} + 47%|████▋ | 3320/7045 [10:47:48<12:00:17, 11.60s/it] 47%|████▋ | 3321/7045 [10:48:00<11:57:12, 11.56s/it] {'loss': 1.1299, 'learning_rate': 2.8522701491191117e-06, 'epoch': 0.47} + 47%|████▋ | 3321/7045 [10:48:00<11:57:12, 11.56s/it] 47%|████▋ | 3322/7045 [10:48:11<11:57:50, 11.57s/it] {'loss': 1.1074, 'learning_rate': 2.8511321608081443e-06, 'epoch': 0.47} + 47%|████▋ | 3322/7045 [10:48:11<11:57:50, 11.57s/it] 47%|████▋ | 3323/7045 [10:48:23<12:00:39, 11.62s/it] {'loss': 1.1416, 'learning_rate': 2.8499940982726385e-06, 'epoch': 0.47} + 47%|████▋ | 3323/7045 [10:48:23<12:00:39, 11.62s/it] 47%|████▋ | 3324/7045 [10:48:34<11:53:16, 11.50s/it] {'loss': 1.1504, 'learning_rate': 2.8488559617531654e-06, 'epoch': 0.47} + 47%|████▋ | 3324/7045 [10:48:34<11:53:16, 11.50s/it] 47%|████▋ | 3325/7045 [10:48:46<11:59:13, 11.60s/it] {'loss': 1.1016, 'learning_rate': 2.8477177514903114e-06, 'epoch': 0.47} + 47%|████▋ | 3325/7045 [10:48:46<11:59:13, 11.60s/it] 47%|████▋ | 3326/7045 [10:48:57<11:49:55, 11.45s/it] {'loss': 1.1455, 'learning_rate': 2.8465794677246783e-06, 'epoch': 0.47} + 47%|████▋ | 3326/7045 [10:48:57<11:49:55, 11.45s/it] 47%|████▋ | 3327/7045 [10:49:08<11:48:40, 11.44s/it] {'loss': 1.125, 'learning_rate': 2.845441110696884e-06, 'epoch': 0.47} + 47%|████▋ | 3327/7045 [10:49:08<11:48:40, 11.44s/it] 47%|████▋ | 3328/7045 [10:49:21<12:02:32, 11.66s/it] {'loss': 1.0698, 'learning_rate': 2.8443026806475605e-06, 'epoch': 0.47} + 47%|████▋ | 3328/7045 [10:49:21<12:02:32, 11.66s/it] 47%|████▋ | 3329/7045 [10:49:32<11:53:10, 11.52s/it] {'loss': 1.1162, 'learning_rate': 2.843164177817358e-06, 'epoch': 0.47} + 47%|████▋ | 3329/7045 [10:49:32<11:53:10, 11.52s/it] 47%|████▋ | 3330/7045 [10:49:43<11:42:15, 11.34s/it] {'loss': 1.0986, 'learning_rate': 2.84202560244694e-06, 'epoch': 0.47} + 47%|████▋ | 3330/7045 [10:49:43<11:42:15, 11.34s/it] 47%|████▋ | 3331/7045 [10:49:54<11:38:43, 11.29s/it] {'loss': 1.1348, 'learning_rate': 2.840886954776985e-06, 'epoch': 0.47} + 47%|████▋ | 3331/7045 [10:49:54<11:38:43, 11.29s/it] 47%|████▋ | 3332/7045 [10:50:05<11:38:40, 11.29s/it] {'loss': 1.1621, 'learning_rate': 2.839748235048189e-06, 'epoch': 0.47} + 47%|████▋ | 3332/7045 [10:50:05<11:38:40, 11.29s/it] 47%|████▋ | 3333/7045 [10:50:16<11:36:22, 11.26s/it] {'loss': 1.1367, 'learning_rate': 2.838609443501261e-06, 'epoch': 0.47} + 47%|████▋ | 3333/7045 [10:50:16<11:36:22, 11.26s/it] 47%|████▋ | 3334/7045 [10:50:29<12:09:39, 11.80s/it] {'loss': 1.082, 'learning_rate': 2.837470580376924e-06, 'epoch': 0.47} + 47%|████▋ | 3334/7045 [10:50:29<12:09:39, 11.80s/it] 47%|████▋ | 3335/7045 [10:50:41<12:01:54, 11.67s/it] {'loss': 1.1074, 'learning_rate': 2.836331645915921e-06, 'epoch': 0.47} + 47%|████▋ | 3335/7045 [10:50:41<12:01:54, 11.67s/it] 47%|████▋ | 3336/7045 [10:50:52<11:50:20, 11.49s/it] {'loss': 1.1328, 'learning_rate': 2.835192640359007e-06, 'epoch': 0.47} + 47%|████▋ | 3336/7045 [10:50:52<11:50:20, 11.49s/it] 47%|████▋ | 3337/7045 [10:51:04<12:05:27, 11.74s/it] {'loss': 1.0947, 'learning_rate': 2.8340535639469505e-06, 'epoch': 0.47} + 47%|████▋ | 3337/7045 [10:51:04<12:05:27, 11.74s/it] 47%|████▋ | 3338/7045 [10:51:17<12:22:46, 12.02s/it] {'loss': 1.1143, 'learning_rate': 2.8329144169205374e-06, 'epoch': 0.47} + 47%|████▋ | 3338/7045 [10:51:17<12:22:46, 12.02s/it] 47%|████▋ | 3339/7045 [10:51:28<11:59:07, 11.64s/it] {'loss': 1.1465, 'learning_rate': 2.8317751995205683e-06, 'epoch': 0.47} + 47%|████▋ | 3339/7045 [10:51:28<11:59:07, 11.64s/it] 47%|████▋ | 3340/7045 [10:51:39<11:53:43, 11.56s/it] {'loss': 1.0957, 'learning_rate': 2.8306359119878572e-06, 'epoch': 0.47} + 47%|████▋ | 3340/7045 [10:51:39<11:53:43, 11.56s/it] 47%|████▋ | 3341/7045 [10:51:52<12:28:17, 12.12s/it] {'loss': 1.0825, 'learning_rate': 2.829496554563235e-06, 'epoch': 0.47} + 47%|████▋ | 3341/7045 [10:51:52<12:28:17, 12.12s/it] 47%|████▋ | 3342/7045 [10:52:04<12:08:32, 11.80s/it] {'loss': 1.0918, 'learning_rate': 2.828357127487546e-06, 'epoch': 0.47} + 47%|████▋ | 3342/7045 [10:52:04<12:08:32, 11.80s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 47%|████▋ | 3343/7045 [10:52:17<12:32:29, 12.20s/it] {'loss': 1.0962, 'learning_rate': 2.8272176310016487e-06, 'epoch': 0.47} + 47%|████▋ | 3343/7045 [10:52:17<12:32:29, 12.20s/it] 47%|████▋ | 3344/7045 [10:52:28<12:24:20, 12.07s/it] {'loss': 1.083, 'learning_rate': 2.8260780653464186e-06, 'epoch': 0.47} + 47%|████▋ | 3344/7045 [10:52:28<12:24:20, 12.07s/it] 47%|████▋ | 3345/7045 [10:52:40<12:11:28, 11.86s/it] {'loss': 1.1396, 'learning_rate': 2.8249384307627427e-06, 'epoch': 0.47} + 47%|████▋ | 3345/7045 [10:52:40<12:11:28, 11.86s/it] 47%|████▋ | 3346/7045 [10:52:51<11:56:27, 11.62s/it] {'loss': 1.1309, 'learning_rate': 2.823798727491524e-06, 'epoch': 0.47} + 47%|████▋ | 3346/7045 [10:52:51<11:56:27, 11.62s/it] 48%|████▊ | 3347/7045 [10:53:04<12:22:35, 12.05s/it] {'loss': 1.1133, 'learning_rate': 2.8226589557736824e-06, 'epoch': 0.48} + 48%|████▊ | 3347/7045 [10:53:04<12:22:35, 12.05s/it] 48%|████▊ | 3348/7045 [10:53:16<12:14:31, 11.92s/it] {'loss': 1.0938, 'learning_rate': 2.8215191158501483e-06, 'epoch': 0.48} + 48%|████▊ | 3348/7045 [10:53:16<12:14:31, 11.92s/it] 48%|████▊ | 3349/7045 [10:53:28<12:26:03, 12.11s/it] {'loss': 1.0991, 'learning_rate': 2.820379207961868e-06, 'epoch': 0.48} + 48%|████▊ | 3349/7045 [10:53:28<12:26:03, 12.11s/it] 48%|████▊ | 3350/7045 [10:53:39<12:04:07, 11.76s/it] {'loss': 1.1211, 'learning_rate': 2.819239232349804e-06, 'epoch': 0.48} + 48%|████▊ | 3350/7045 [10:53:39<12:04:07, 11.76s/it] 48%|████▊ | 3351/7045 [10:53:52<12:35:21, 12.27s/it] {'loss': 1.0498, 'learning_rate': 2.8180991892549303e-06, 'epoch': 0.48} + 48%|████▊ | 3351/7045 [10:53:52<12:35:21, 12.27s/it] 48%|████▊ | 3352/7045 [10:54:04<12:15:23, 11.95s/it] {'loss': 1.0977, 'learning_rate': 2.8169590789182365e-06, 'epoch': 0.48} + 48%|████▊ | 3352/7045 [10:54:04<12:15:23, 11.95s/it] 48%|████▊ | 3353/7045 [10:54:15<12:04:22, 11.77s/it] {'loss': 1.167, 'learning_rate': 2.8158189015807267e-06, 'epoch': 0.48} + 48%|████▊ | 3353/7045 [10:54:15<12:04:22, 11.77s/it] 48%|████▊ | 3354/7045 [10:54:27<12:09:40, 11.86s/it] {'loss': 1.1299, 'learning_rate': 2.814678657483419e-06, 'epoch': 0.48} + 48%|████▊ | 3354/7045 [10:54:27<12:09:40, 11.86s/it] 48%|████▊ | 3355/7045 [10:54:38<11:52:38, 11.59s/it] {'loss': 1.0713, 'learning_rate': 2.8135383468673454e-06, 'epoch': 0.48} + 48%|████▊ | 3355/7045 [10:54:38<11:52:38, 11.59s/it] 48%|████▊ | 3356/7045 [10:54:50<11:57:32, 11.67s/it] {'loss': 1.0938, 'learning_rate': 2.8123979699735514e-06, 'epoch': 0.48} + 48%|████▊ | 3356/7045 [10:54:50<11:57:32, 11.67s/it] 48%|████▊ | 3357/7045 [10:55:01<11:48:51, 11.53s/it] {'loss': 1.167, 'learning_rate': 2.8112575270430973e-06, 'epoch': 0.48} + 48%|████▊ | 3357/7045 [10:55:01<11:48:51, 11.53s/it] 48%|████▊ | 3358/7045 [10:55:13<11:59:43, 11.71s/it] {'loss': 1.082, 'learning_rate': 2.8101170183170577e-06, 'epoch': 0.48} + 48%|████▊ | 3358/7045 [10:55:13<11:59:43, 11.71s/it] 48%|████▊ | 3359/7045 [10:55:24<11:46:23, 11.50s/it] {'loss': 1.0947, 'learning_rate': 2.8089764440365196e-06, 'epoch': 0.48} + 48%|████▊ | 3359/7045 [10:55:24<11:46:23, 11.50s/it] 48%|████▊ | 3360/7045 [10:55:37<12:03:43, 11.78s/it] {'loss': 1.1465, 'learning_rate': 2.8078358044425856e-06, 'epoch': 0.48} + 48%|████▊ | 3360/7045 [10:55:37<12:03:43, 11.78s/it] 48%|████▊ | 3361/7045 [10:55:48<11:57:19, 11.68s/it] {'loss': 1.0713, 'learning_rate': 2.8066950997763708e-06, 'epoch': 0.48} + 48%|████▊ | 3361/7045 [10:55:48<11:57:19, 11.68s/it] 48%|████▊ | 3362/7045 [10:56:01<12:15:31, 11.98s/it] {'loss': 1.0781, 'learning_rate': 2.8055543302790055e-06, 'epoch': 0.48} + 48%|████▊ | 3362/7045 [10:56:01<12:15:31, 11.98s/it] 48%|████▊ | 3363/7045 [10:56:12<11:59:12, 11.72s/it] {'loss': 1.1025, 'learning_rate': 2.8044134961916315e-06, 'epoch': 0.48} + 48%|████▊ | 3363/7045 [10:56:12<11:59:12, 11.72s/it] 48%|████▊ | 3364/7045 [10:56:23<11:47:41, 11.54s/it] {'loss': 1.2051, 'learning_rate': 2.8032725977554055e-06, 'epoch': 0.48} + 48%|████▊ | 3364/7045 [10:56:23<11:47:41, 11.54s/it] 48%|████▊ | 3365/7045 [10:56:36<12:13:05, 11.95s/it] {'loss': 1.125, 'learning_rate': 2.8021316352114985e-06, 'epoch': 0.48} + 48%|████▊ | 3365/7045 [10:56:36<12:13:05, 11.95s/it] 48%|████▊ | 3366/7045 [10:56:47<11:55:08, 11.66s/it] {'loss': 1.1357, 'learning_rate': 2.8009906088010956e-06, 'epoch': 0.48} + 48%|████▊ | 3366/7045 [10:56:47<11:55:08, 11.66s/it] 48%|████▊ | 3367/7045 [10:56:58<11:44:25, 11.49s/it] {'loss': 1.1152, 'learning_rate': 2.7998495187653922e-06, 'epoch': 0.48} + 48%|████▊ | 3367/7045 [10:56:58<11:44:25, 11.49s/it] 48%|████▊ | 3368/7045 [10:57:10<11:45:50, 11.52s/it] {'loss': 1.1211, 'learning_rate': 2.7987083653455998e-06, 'epoch': 0.48} + 48%|████▊ | 3368/7045 [10:57:10<11:45:50, 11.52s/it] 48%|████▊ | 3369/7045 [10:57:22<12:03:33, 11.81s/it] {'loss': 1.0669, 'learning_rate': 2.7975671487829426e-06, 'epoch': 0.48} + 48%|████▊ | 3369/7045 [10:57:22<12:03:33, 11.81s/it] 48%|████▊ | 3370/7045 [10:57:34<12:05:46, 11.85s/it] {'loss': 1.0879, 'learning_rate': 2.796425869318658e-06, 'epoch': 0.48} + 48%|████▊ | 3370/7045 [10:57:34<12:05:46, 11.85s/it] 48%|████▊ | 3371/7045 [10:57:45<11:54:11, 11.66s/it] {'loss': 1.1055, 'learning_rate': 2.7952845271939977e-06, 'epoch': 0.48} + 48%|████▊ | 3371/7045 [10:57:45<11:54:11, 11.66s/it] 48%|████▊ | 3372/7045 [10:57:58<12:05:55, 11.86s/it] {'loss': 1.1084, 'learning_rate': 2.794143122650225e-06, 'epoch': 0.48} + 48%|████▊ | 3372/7045 [10:57:58<12:05:55, 11.86s/it] 48%|████▊ | 3373/7045 [10:58:09<11:55:24, 11.69s/it] {'loss': 1.167, 'learning_rate': 2.793001655928618e-06, 'epoch': 0.48} + 48%|████▊ | 3373/7045 [10:58:09<11:55:24, 11.69s/it] 48%|████▊ | 3374/7045 [10:58:21<12:04:06, 11.84s/it] {'loss': 1.1211, 'learning_rate': 2.791860127270466e-06, 'epoch': 0.48} + 48%|████▊ | 3374/7045 [10:58:21<12:04:06, 11.84s/it] 48%|████▊ | 3375/7045 [10:58:32<11:49:52, 11.61s/it] {'loss': 1.1377, 'learning_rate': 2.790718536917073e-06, 'epoch': 0.48} + 48%|████▊ | 3375/7045 [10:58:32<11:49:52, 11.61s/it] 48%|████▊ | 3376/7045 [10:58:46<12:25:11, 12.19s/it] {'loss': 1.0562, 'learning_rate': 2.7895768851097554e-06, 'epoch': 0.48} + 48%|████▊ | 3376/7045 [10:58:46<12:25:11, 12.19s/it] 48%|████▊ | 3377/7045 [10:58:57<12:09:49, 11.94s/it] {'loss': 1.0625, 'learning_rate': 2.7884351720898443e-06, 'epoch': 0.48} + 48%|████▊ | 3377/7045 [10:58:57<12:09:49, 11.94s/it] 48%|████▊ | 3378/7045 [10:59:08<11:56:27, 11.72s/it] {'loss': 1.0967, 'learning_rate': 2.7872933980986806e-06, 'epoch': 0.48} + 48%|████▊ | 3378/7045 [10:59:08<11:56:27, 11.72s/it] 48%|████▊ | 3379/7045 [10:59:21<12:09:25, 11.94s/it] {'loss': 1.1118, 'learning_rate': 2.7861515633776203e-06, 'epoch': 0.48} + 48%|████▊ | 3379/7045 [10:59:21<12:09:25, 11.94s/it] 48%|████▊ | 3380/7045 [10:59:32<11:52:21, 11.66s/it] {'loss': 1.0752, 'learning_rate': 2.785009668168031e-06, 'epoch': 0.48} + 48%|████▊ | 3380/7045 [10:59:32<11:52:21, 11.66s/it] 48%|████▊ | 3381/7045 [10:59:43<11:42:02, 11.50s/it] {'loss': 1.1055, 'learning_rate': 2.783867712711296e-06, 'epoch': 0.48} + 48%|████▊ | 3381/7045 [10:59:43<11:42:02, 11.50s/it] 48%|████▊ | 3382/7045 [10:59:57<12:24:38, 12.20s/it] {'loss': 1.0747, 'learning_rate': 2.782725697248805e-06, 'epoch': 0.48} + 48%|████▊ | 3382/7045 [10:59:57<12:24:38, 12.20s/it] 48%|████▊ | 3383/7045 [11:00:10<12:38:01, 12.42s/it] {'loss': 1.1074, 'learning_rate': 2.7815836220219677e-06, 'epoch': 0.48} + 48%|████▊ | 3383/7045 [11:00:10<12:38:01, 12.42s/it] 48%|████▊ | 3384/7045 [11:00:22<12:46:16, 12.56s/it] {'loss': 1.0854, 'learning_rate': 2.780441487272203e-06, 'epoch': 0.48} + 48%|████▊ | 3384/7045 [11:00:22<12:46:16, 12.56s/it] 48%|████▊ | 3385/7045 [11:00:35<12:49:33, 12.62s/it] {'loss': 1.0566, 'learning_rate': 2.779299293240941e-06, 'epoch': 0.48} + 48%|████▊ | 3385/7045 [11:00:35<12:49:33, 12.62s/it] 48%|████▊ | 3386/7045 [11:00:48<12:53:00, 12.68s/it] {'loss': 1.0835, 'learning_rate': 2.778157040169627e-06, 'epoch': 0.48} + 48%|████▊ | 3386/7045 [11:00:48<12:53:00, 12.68s/it] 48%|████▊ | 3387/7045 [11:00:59<12:20:46, 12.15s/it] {'loss': 1.1006, 'learning_rate': 2.777014728299718e-06, 'epoch': 0.48} + 48%|████▊ | 3387/7045 [11:00:59<12:20:46, 12.15s/it] 48%|████▊ | 3388/7045 [11:01:12<12:41:09, 12.49s/it] {'loss': 1.1094, 'learning_rate': 2.7758723578726815e-06, 'epoch': 0.48} + 48%|████▊ | 3388/7045 [11:01:12<12:41:09, 12.49s/it] 48%|████▊ | 3389/7045 [11:01:24<12:19:18, 12.13s/it] {'loss': 1.0928, 'learning_rate': 2.7747299291300004e-06, 'epoch': 0.48} + 48%|████▊ | 3389/7045 [11:01:24<12:19:18, 12.13s/it] 48%|████▊ | 3390/7045 [11:01:36<12:18:11, 12.12s/it] {'loss': 1.0889, 'learning_rate': 2.773587442313169e-06, 'epoch': 0.48} + 48%|████▊ | 3390/7045 [11:01:36<12:18:11, 12.12s/it] 48%|████▊ | 3391/7045 [11:01:47<11:58:35, 11.80s/it] {'loss': 1.1191, 'learning_rate': 2.772444897663692e-06, 'epoch': 0.48} + 48%|████▊ | 3391/7045 [11:01:47<11:58:35, 11.80s/it] 48%|████▊ | 3392/7045 [11:01:58<11:53:05, 11.71s/it] {'loss': 1.1182, 'learning_rate': 2.7713022954230883e-06, 'epoch': 0.48} + 48%|████��� | 3392/7045 [11:01:58<11:53:05, 11.71s/it] 48%|████▊ | 3393/7045 [11:02:09<11:39:36, 11.49s/it] {'loss': 1.1099, 'learning_rate': 2.7701596358328887e-06, 'epoch': 0.48} + 48%|████▊ | 3393/7045 [11:02:09<11:39:36, 11.49s/it] 48%|████▊ | 3394/7045 [11:02:21<11:37:53, 11.47s/it] {'loss': 1.1357, 'learning_rate': 2.769016919134635e-06, 'epoch': 0.48} + 48%|████▊ | 3394/7045 [11:02:21<11:37:53, 11.47s/it] 48%|████▊ | 3395/7045 [11:02:32<11:33:32, 11.40s/it] {'loss': 1.1025, 'learning_rate': 2.767874145569882e-06, 'epoch': 0.48} + 48%|████▊ | 3395/7045 [11:02:32<11:33:32, 11.40s/it] 48%|████▊ | 3396/7045 [11:02:43<11:27:17, 11.30s/it] {'loss': 1.1089, 'learning_rate': 2.766731315380198e-06, 'epoch': 0.48} + 48%|████▊ | 3396/7045 [11:02:43<11:27:17, 11.30s/it] 48%|████▊ | 3397/7045 [11:02:54<11:29:12, 11.34s/it] {'loss': 1.082, 'learning_rate': 2.7655884288071595e-06, 'epoch': 0.48} + 48%|████▊ | 3397/7045 [11:02:54<11:29:12, 11.34s/it] 48%|████▊ | 3398/7045 [11:03:07<11:54:09, 11.75s/it] {'loss': 1.0801, 'learning_rate': 2.764445486092358e-06, 'epoch': 0.48} + 48%|████▊ | 3398/7045 [11:03:07<11:54:09, 11.75s/it] 48%|████▊ | 3399/7045 [11:03:18<11:40:57, 11.54s/it] {'loss': 1.1416, 'learning_rate': 2.7633024874773968e-06, 'epoch': 0.48} + 48%|████▊ | 3399/7045 [11:03:18<11:40:57, 11.54s/it] 48%|████▊ | 3400/7045 [11:03:30<11:49:16, 11.68s/it] {'loss': 1.0889, 'learning_rate': 2.7621594332038894e-06, 'epoch': 0.48} + 48%|████▊ | 3400/7045 [11:03:30<11:49:16, 11.68s/it] 48%|████▊ | 3401/7045 [11:03:42<11:55:00, 11.77s/it] {'loss': 1.1133, 'learning_rate': 2.7610163235134607e-06, 'epoch': 0.48} + 48%|████▊ | 3401/7045 [11:03:42<11:55:00, 11.77s/it] 48%|████▊ | 3402/7045 [11:03:53<11:47:35, 11.65s/it] {'loss': 1.124, 'learning_rate': 2.75987315864775e-06, 'epoch': 0.48} + 48%|████▊ | 3402/7045 [11:03:53<11:47:35, 11.65s/it] 48%|████▊ | 3403/7045 [11:04:05<11:43:34, 11.59s/it] {'loss': 1.1104, 'learning_rate': 2.758729938848407e-06, 'epoch': 0.48} + 48%|████▊ | 3403/7045 [11:04:05<11:43:34, 11.59s/it] 48%|████▊ | 3404/7045 [11:04:18<12:02:01, 11.90s/it] {'loss': 1.1357, 'learning_rate': 2.757586664357092e-06, 'epoch': 0.48} + 48%|████▊ | 3404/7045 [11:04:18<12:02:01, 11.90s/it] 48%|████▊ | 3405/7045 [11:04:30<12:19:13, 12.18s/it] {'loss': 1.0474, 'learning_rate': 2.7564433354154774e-06, 'epoch': 0.48} + 48%|████▊ | 3405/7045 [11:04:30<12:19:13, 12.18s/it] 48%|████▊ | 3406/7045 [11:04:42<12:15:19, 12.12s/it] {'loss': 1.1445, 'learning_rate': 2.755299952265248e-06, 'epoch': 0.48} + 48%|████▊ | 3406/7045 [11:04:42<12:15:19, 12.12s/it] 48%|████▊ | 3407/7045 [11:04:53<11:55:25, 11.80s/it] {'loss': 1.1309, 'learning_rate': 2.754156515148098e-06, 'epoch': 0.48} + 48%|████▊ | 3407/7045 [11:04:53<11:55:25, 11.80s/it] 48%|████▊ | 3408/7045 [11:05:06<12:17:49, 12.17s/it] {'loss': 1.0718, 'learning_rate': 2.753013024305736e-06, 'epoch': 0.48} + 48%|████▊ | 3408/7045 [11:05:06<12:17:49, 12.17s/it] 48%|████▊ | 3409/7045 [11:05:19<12:23:37, 12.27s/it] {'loss': 1.1113, 'learning_rate': 2.7518694799798797e-06, 'epoch': 0.48} + 48%|████▊ | 3409/7045 [11:05:19<12:23:37, 12.27s/it] 48%|████▊ | 3410/7045 [11:05:30<12:06:07, 11.99s/it] {'loss': 1.0996, 'learning_rate': 2.750725882412259e-06, 'epoch': 0.48} + 48%|████▊ | 3410/7045 [11:05:30<12:06:07, 11.99s/it] 48%|████▊ | 3411/7045 [11:05:42<11:53:46, 11.79s/it] {'loss': 1.0889, 'learning_rate': 2.749582231844614e-06, 'epoch': 0.48} + 48%|████▊ | 3411/7045 [11:05:42<11:53:46, 11.79s/it] 48%|████▊ | 3412/7045 [11:05:53<11:50:27, 11.73s/it] {'loss': 1.1074, 'learning_rate': 2.748438528518697e-06, 'epoch': 0.48} + 48%|████▊ | 3412/7045 [11:05:53<11:50:27, 11.73s/it] 48%|████▊ | 3413/7045 [11:06:04<11:37:59, 11.53s/it] {'loss': 1.1006, 'learning_rate': 2.7472947726762716e-06, 'epoch': 0.48} + 48%|████▊ | 3413/7045 [11:06:04<11:37:59, 11.53s/it] 48%|████▊ | 3414/7045 [11:06:15<11:25:58, 11.34s/it] {'loss': 1.1523, 'learning_rate': 2.746150964559113e-06, 'epoch': 0.48} + 48%|████▊ | 3414/7045 [11:06:15<11:25:58, 11.34s/it] 48%|████▊ | 3415/7045 [11:06:28<11:52:46, 11.78s/it] {'loss': 1.0908, 'learning_rate': 2.745007104409005e-06, 'epoch': 0.48} + 48%|████▊ | 3415/7045 [11:06:28<11:52:46, 11.78s/it] 48%|████▊ | 3416/7045 [11:06:39<11:46:21, 11.68s/it] {'loss': 1.1445, 'learning_rate': 2.7438631924677443e-06, 'epoch': 0.48} + 48%|████▊ | 3416/7045 [11:06:39<11:46:21, 11.68s/it] 49%|████▊ | 3417/7045 [11:06:50<11:34:33, 11.49s/it] {'loss': 1.0938, 'learning_rate': 2.742719228977139e-06, 'epoch': 0.49} + 49%|████▊ | 3417/7045 [11:06:50<11:34:33, 11.49s/it] 49%|████▊ | 3418/7045 [11:07:02<11:42:51, 11.63s/it] {'loss': 1.0981, 'learning_rate': 2.7415752141790075e-06, 'epoch': 0.49} + 49%|████▊ | 3418/7045 [11:07:02<11:42:51, 11.63s/it] 49%|████▊ | 3419/7045 [11:07:14<11:34:39, 11.49s/it] {'loss': 1.0918, 'learning_rate': 2.7404311483151775e-06, 'epoch': 0.49} + 49%|████▊ | 3419/7045 [11:07:14<11:34:39, 11.49s/it] 49%|████▊ | 3420/7045 [11:07:25<11:39:36, 11.58s/it] {'loss': 1.0928, 'learning_rate': 2.73928703162749e-06, 'epoch': 0.49} + 49%|████▊ | 3420/7045 [11:07:25<11:39:36, 11.58s/it] 49%|████▊ | 3421/7045 [11:07:36<11:30:54, 11.44s/it] {'loss': 1.1104, 'learning_rate': 2.7381428643577958e-06, 'epoch': 0.49} + 49%|████▊ | 3421/7045 [11:07:36<11:30:54, 11.44s/it] 49%|████▊ | 3422/7045 [11:07:48<11:30:05, 11.43s/it] {'loss': 1.1348, 'learning_rate': 2.7369986467479553e-06, 'epoch': 0.49} + 49%|████▊ | 3422/7045 [11:07:48<11:30:05, 11.43s/it] 49%|████▊ | 3423/7045 [11:07:59<11:24:48, 11.34s/it] {'loss': 1.1055, 'learning_rate': 2.7358543790398408e-06, 'epoch': 0.49} + 49%|████▊ | 3423/7045 [11:07:59<11:24:48, 11.34s/it] 49%|████▊ | 3424/7045 [11:08:12<11:53:03, 11.82s/it] {'loss': 1.1406, 'learning_rate': 2.7347100614753352e-06, 'epoch': 0.49} + 49%|████▊ | 3424/7045 [11:08:12<11:53:03, 11.82s/it] 49%|████▊ | 3425/7045 [11:08:25<12:15:30, 12.19s/it] {'loss': 1.0918, 'learning_rate': 2.7335656942963313e-06, 'epoch': 0.49} + 49%|████▊ | 3425/7045 [11:08:25<12:15:30, 12.19s/it] 49%|████▊ | 3426/7045 [11:08:37<12:07:41, 12.06s/it] {'loss': 1.1387, 'learning_rate': 2.732421277744733e-06, 'epoch': 0.49} + 49%|████▊ | 3426/7045 [11:08:37<12:07:41, 12.06s/it] 49%|████▊ | 3427/7045 [11:08:48<11:47:18, 11.73s/it] {'loss': 1.0869, 'learning_rate': 2.731276812062453e-06, 'epoch': 0.49} + 49%|████▊ | 3427/7045 [11:08:48<11:47:18, 11.73s/it] 49%|████▊ | 3428/7045 [11:08:59<11:47:31, 11.74s/it] {'loss': 1.1572, 'learning_rate': 2.730132297491418e-06, 'epoch': 0.49} + 49%|████▊ | 3428/7045 [11:08:59<11:47:31, 11.74s/it] 49%|████▊ | 3429/7045 [11:09:11<11:41:26, 11.64s/it] {'loss': 1.1436, 'learning_rate': 2.7289877342735614e-06, 'epoch': 0.49} + 49%|████▊ | 3429/7045 [11:09:11<11:41:26, 11.64s/it] 49%|████▊ | 3430/7045 [11:09:23<11:45:12, 11.70s/it] {'loss': 1.1025, 'learning_rate': 2.7278431226508282e-06, 'epoch': 0.49} + 49%|████▊ | 3430/7045 [11:09:23<11:45:12, 11.70s/it] 49%|████▊ | 3431/7045 [11:09:36<12:05:43, 12.05s/it] {'loss': 1.1353, 'learning_rate': 2.7266984628651732e-06, 'epoch': 0.49} + 49%|████▊ | 3431/7045 [11:09:36<12:05:43, 12.05s/it] 49%|████▊ | 3432/7045 [11:09:47<11:53:11, 11.84s/it] {'loss': 1.1338, 'learning_rate': 2.725553755158563e-06, 'epoch': 0.49} + 49%|████▊ | 3432/7045 [11:09:47<11:53:11, 11.84s/it] 49%|████▊ | 3433/7045 [11:09:58<11:43:53, 11.69s/it] {'loss': 1.1094, 'learning_rate': 2.724408999772973e-06, 'epoch': 0.49} + 49%|████▊ | 3433/7045 [11:09:58<11:43:53, 11.69s/it] 49%|████▊ | 3434/7045 [11:10:10<11:49:45, 11.79s/it] {'loss': 1.1367, 'learning_rate': 2.7232641969503875e-06, 'epoch': 0.49} + 49%|████▊ | 3434/7045 [11:10:10<11:49:45, 11.79s/it] 49%|████▉ | 3435/7045 [11:10:21<11:34:52, 11.55s/it] {'loss': 1.0801, 'learning_rate': 2.7221193469328033e-06, 'epoch': 0.49} + 49%|████▉ | 3435/7045 [11:10:21<11:34:52, 11.55s/it] 49%|████▉ | 3436/7045 [11:10:33<11:44:06, 11.71s/it] {'loss': 1.1021, 'learning_rate': 2.720974449962226e-06, 'epoch': 0.49} + 49%|████▉ | 3436/7045 [11:10:33<11:44:06, 11.71s/it] 49%|████▉ | 3437/7045 [11:10:45<11:39:46, 11.64s/it] {'loss': 1.1455, 'learning_rate': 2.7198295062806706e-06, 'epoch': 0.49} + 49%|████▉ | 3437/7045 [11:10:45<11:39:46, 11.64s/it] 49%|████▉ | 3438/7045 [11:10:56<11:26:28, 11.42s/it] {'loss': 1.1357, 'learning_rate': 2.7186845161301628e-06, 'epoch': 0.49} + 49%|████▉ | 3438/7045 [11:10:56<11:26:28, 11.42s/it] 49%|████▉ | 3439/7045 [11:11:08<11:43:18, 11.70s/it] {'loss': 1.1318, 'learning_rate': 2.7175394797527383e-06, 'epoch': 0.49} + 49%|████▉ | 3439/7045 [11:11:08<11:43:18, 11.70s/it] 49%|████▉ | 3440/7045 [11:11:21<11:56:27, 11.92s/it] {'loss': 1.1406, 'learning_rate': 2.7163943973904422e-06, 'epoch': 0.49} + 49%|████▉ | 3440/7045 [11:11:21<11:56:27, 11.92s/it] 49%|████▉ | 3441/7045 [11:11:32<11:47:21, 11.78s/it] {'loss': 1.1362, 'learning_rate': 2.7152492692853283e-06, 'epoch': 0.49} + 49%|████▉ | 3441/7045 [11:11:32<11:47:21, 11.78s/it] 49%|████▉ | 3442/7045 [11:11:44<11:46:01, 11.76s/it] {'loss': 1.1416, 'learning_rate': 2.7141040956794616e-06, 'epoch': 0.49} + 49%|████▉ | 3442/7045 [11:11:44<11:46:01, 11.76s/it] 49%|████▉ | 3443/7045 [11:11:55<11:33:02, 11.54s/it] {'loss': 1.0928, 'learning_rate': 2.712958876814916e-06, 'epoch': 0.49} + 49%|████▉ | 3443/7045 [11:11:55<11:33:02, 11.54s/it] 49%|████▉ | 3444/7045 [11:12:06<11:23:52, 11.39s/it] {'loss': 1.0654, 'learning_rate': 2.7118136129337757e-06, 'epoch': 0.49} + 49%|████▉ | 3444/7045 [11:12:06<11:23:52, 11.39s/it] 49%|████▉ | 3445/7045 [11:12:18<11:39:16, 11.65s/it] {'loss': 1.0498, 'learning_rate': 2.7106683042781334e-06, 'epoch': 0.49} + 49%|████▉ | 3445/7045 [11:12:18<11:39:16, 11.65s/it] 49%|████▉ | 3446/7045 [11:12:30<11:38:16, 11.64s/it] {'loss': 1.1582, 'learning_rate': 2.7095229510900917e-06, 'epoch': 0.49} + 49%|████▉ | 3446/7045 [11:12:30<11:38:16, 11.64s/it] 49%|████▉ | 3447/7045 [11:12:41<11:30:42, 11.52s/it] {'loss': 1.1348, 'learning_rate': 2.7083775536117623e-06, 'epoch': 0.49} + 49%|████▉ | 3447/7045 [11:12:41<11:30:42, 11.52s/it] 49%|████▉ | 3448/7045 [11:12:54<12:05:24, 12.10s/it] {'loss': 1.1157, 'learning_rate': 2.7072321120852675e-06, 'epoch': 0.49} + 49%|████▉ | 3448/7045 [11:12:54<12:05:24, 12.10s/it] 49%|████▉ | 3449/7045 [11:13:05<11:44:51, 11.76s/it] {'loss': 1.0986, 'learning_rate': 2.706086626752736e-06, 'epoch': 0.49} + 49%|████▉ | 3449/7045 [11:13:05<11:44:51, 11.76s/it] 49%|���███▉ | 3450/7045 [11:13:18<11:54:27, 11.92s/it] {'loss': 1.1445, 'learning_rate': 2.7049410978563097e-06, 'epoch': 0.49} + 49%|████▉ | 3450/7045 [11:13:18<11:54:27, 11.92s/it] 49%|████▉ | 3451/7045 [11:13:29<11:37:18, 11.64s/it] {'loss': 1.1191, 'learning_rate': 2.7037955256381373e-06, 'epoch': 0.49} + 49%|████▉ | 3451/7045 [11:13:29<11:37:18, 11.64s/it] 49%|████▉ | 3452/7045 [11:13:41<11:58:18, 12.00s/it] {'loss': 1.1426, 'learning_rate': 2.702649910340377e-06, 'epoch': 0.49} + 49%|████▉ | 3452/7045 [11:13:41<11:58:18, 12.00s/it] 49%|████▉ | 3453/7045 [11:13:53<11:50:09, 11.86s/it] {'loss': 1.1094, 'learning_rate': 2.7015042522051955e-06, 'epoch': 0.49} + 49%|████▉ | 3453/7045 [11:13:53<11:50:09, 11.86s/it] 49%|████▉ | 3454/7045 [11:14:05<12:01:24, 12.05s/it] {'loss': 1.0752, 'learning_rate': 2.7003585514747705e-06, 'epoch': 0.49} + 49%|████▉ | 3454/7045 [11:14:05<12:01:24, 12.05s/it] 49%|████▉ | 3455/7045 [11:14:19<12:19:18, 12.36s/it] {'loss': 1.0874, 'learning_rate': 2.6992128083912867e-06, 'epoch': 0.49} + 49%|████▉ | 3455/7045 [11:14:19<12:19:18, 12.36s/it] 49%|████▉ | 3456/7045 [11:14:30<11:55:43, 11.97s/it] {'loss': 1.1143, 'learning_rate': 2.6980670231969385e-06, 'epoch': 0.49} + 49%|████▉ | 3456/7045 [11:14:30<11:55:43, 11.97s/it] 49%|████▉ | 3457/7045 [11:14:41<11:46:13, 11.81s/it] {'loss': 1.1123, 'learning_rate': 2.6969211961339292e-06, 'epoch': 0.49} + 49%|████▉ | 3457/7045 [11:14:41<11:46:13, 11.81s/it] 49%|████▉ | 3458/7045 [11:14:52<11:34:53, 11.62s/it] {'loss': 1.1113, 'learning_rate': 2.695775327444472e-06, 'epoch': 0.49} + 49%|████▉ | 3458/7045 [11:14:52<11:34:53, 11.62s/it] 49%|████▉ | 3459/7045 [11:15:03<11:25:30, 11.47s/it] {'loss': 1.1216, 'learning_rate': 2.6946294173707864e-06, 'epoch': 0.49} + 49%|████▉ | 3459/7045 [11:15:03<11:25:30, 11.47s/it] 49%|████▉ | 3460/7045 [11:15:15<11:20:17, 11.39s/it] {'loss': 1.1348, 'learning_rate': 2.6934834661551037e-06, 'epoch': 0.49} + 49%|████▉ | 3460/7045 [11:15:15<11:20:17, 11.39s/it] 49%|████▉ | 3461/7045 [11:15:26<11:20:58, 11.40s/it] {'loss': 1.127, 'learning_rate': 2.69233747403966e-06, 'epoch': 0.49} + 49%|████▉ | 3461/7045 [11:15:26<11:20:58, 11.40s/it] 49%|████▉ | 3462/7045 [11:15:38<11:38:21, 11.69s/it] {'loss': 1.126, 'learning_rate': 2.6911914412667043e-06, 'epoch': 0.49} + 49%|████▉ | 3462/7045 [11:15:38<11:38:21, 11.69s/it] 49%|████▉ | 3463/7045 [11:15:51<11:56:16, 12.00s/it] {'loss': 1.0991, 'learning_rate': 2.6900453680784923e-06, 'epoch': 0.49} + 49%|████▉ | 3463/7045 [11:15:51<11:56:16, 12.00s/it] 49%|████▉ | 3464/7045 [11:16:03<11:49:40, 11.89s/it] {'loss': 1.1138, 'learning_rate': 2.6888992547172872e-06, 'epoch': 0.49} + 49%|████▉ | 3464/7045 [11:16:03<11:49:40, 11.89s/it] 49%|████▉ | 3465/7045 [11:16:14<11:38:14, 11.70s/it] {'loss': 1.1094, 'learning_rate': 2.687753101425362e-06, 'epoch': 0.49} + 49%|████▉ | 3465/7045 [11:16:14<11:38:14, 11.70s/it] 49%|████▉ | 3466/7045 [11:16:25<11:34:09, 11.64s/it] {'loss': 1.1309, 'learning_rate': 2.686606908444999e-06, 'epoch': 0.49} + 49%|████▉ | 3466/7045 [11:16:25<11:34:09, 11.64s/it] 49%|████▉ | 3467/7045 [11:16:37<11:33:16, 11.63s/it] {'loss': 1.1553, 'learning_rate': 2.6854606760184853e-06, 'epoch': 0.49} + 49%|████▉ | 3467/7045 [11:16:37<11:33:16, 11.63s/it] 49%|████▉ | 3468/7045 [11:16:49<11:35:45, 11.67s/it] {'loss': 1.1084, 'learning_rate': 2.6843144043881203e-06, 'epoch': 0.49} + 49%|████▉ | 3468/7045 [11:16:49<11:35:45, 11.67s/it] 49%|████▉ | 3469/7045 [11:17:00<11:28:14, 11.55s/it] {'loss': 1.1396, 'learning_rate': 2.6831680937962107e-06, 'epoch': 0.49} + 49%|████▉ | 3469/7045 [11:17:00<11:28:14, 11.55s/it] 49%|████▉ | 3470/7045 [11:17:11<11:22:52, 11.46s/it] {'loss': 1.1553, 'learning_rate': 2.68202174448507e-06, 'epoch': 0.49} + 49%|████▉ | 3470/7045 [11:17:11<11:22:52, 11.46s/it] 49%|████▉ | 3471/7045 [11:17:23<11:27:51, 11.55s/it] {'loss': 1.1348, 'learning_rate': 2.680875356697021e-06, 'epoch': 0.49} + 49%|████▉ | 3471/7045 [11:17:23<11:27:51, 11.55s/it] 49%|████▉ | 3472/7045 [11:17:37<12:06:13, 12.20s/it] {'loss': 1.0562, 'learning_rate': 2.679728930674394e-06, 'epoch': 0.49} + 49%|████▉ | 3472/7045 [11:17:37<12:06:13, 12.20s/it] 49%|████▉ | 3473/7045 [11:17:48<11:50:35, 11.94s/it] {'loss': 1.1768, 'learning_rate': 2.6785824666595284e-06, 'epoch': 0.49} + 49%|████▉ | 3473/7045 [11:17:48<11:50:35, 11.94s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 49%|████▉ | 3474/7045 [11:18:01<11:58:45, 12.08s/it] {'loss': 1.1338, 'learning_rate': 2.6774359648947704e-06, 'epoch': 0.49} + 49%|████▉ | 3474/7045 [11:18:01<11:58:45, 12.08s/it] 49%|████▉ | 3475/7045 [11:18:13<12:03:27, 12.16s/it] {'loss': 1.1177, 'learning_rate': 2.676289425622476e-06, 'epoch': 0.49} + 49%|████▉ | 3475/7045 [11:18:13<12:03:27, 12.16s/it] 49%|████▉ | 3476/7045 [11:18:26<12:24:39, 12.52s/it] {'loss': 1.0684, 'learning_rate': 2.675142849085007e-06, 'epoch': 0.49} + 49%|████▉ | 3476/7045 [11:18:26<12:24:39, 12.52s/it] 49%|████▉ | 3477/7045 [11:18:40<12:42:24, 12.82s/it] {'loss': 1.0054, 'learning_rate': 2.673996235524734e-06, 'epoch': 0.49} + 49%|████▉ | 3477/7045 [11:18:40<12:42:24, 12.82s/it] 49%|████▉ | 3478/7045 [11:18:52<12:33:30, 12.67s/it] {'loss': 1.1123, 'learning_rate': 2.672849585184035e-06, 'epoch': 0.49} + 49%|████▉ | 3478/7045 [11:18:52<12:33:30, 12.67s/it] 49%|████▉ | 3479/7045 [11:19:03<12:06:52, 12.23s/it] {'loss': 1.1484, 'learning_rate': 2.6717028983052973e-06, 'epoch': 0.49} + 49%|████▉ | 3479/7045 [11:19:03<12:06:52, 12.23s/it] 49%|████▉ | 3480/7045 [11:19:16<12:10:22, 12.29s/it] {'loss': 1.0815, 'learning_rate': 2.6705561751309143e-06, 'epoch': 0.49} + 49%|████▉ | 3480/7045 [11:19:16<12:10:22, 12.29s/it] 49%|████▉ | 3481/7045 [11:19:28<12:05:44, 12.22s/it] {'loss': 1.0957, 'learning_rate': 2.6694094159032875e-06, 'epoch': 0.49} + 49%|████▉ | 3481/7045 [11:19:28<12:05:44, 12.22s/it] 49%|████▉ | 3482/7045 [11:19:41<12:14:44, 12.37s/it] {'loss': 1.0747, 'learning_rate': 2.668262620864826e-06, 'epoch': 0.49} + 49%|████▉ | 3482/7045 [11:19:41<12:14:44, 12.37s/it] 49%|████▉ | 3483/7045 [11:19:52<11:55:45, 12.06s/it] {'loss': 1.1104, 'learning_rate': 2.667115790257947e-06, 'epoch': 0.49} + 49%|████▉ | 3483/7045 [11:19:52<11:55:45, 12.06s/it] 49%|████▉ | 3484/7045 [11:20:03<11:45:46, 11.89s/it] {'loss': 1.1113, 'learning_rate': 2.665968924325074e-06, 'epoch': 0.49} + 49%|████▉ | 3484/7045 [11:20:03<11:45:46, 11.89s/it] 49%|████▉ | 3485/7045 [11:20:14<11:32:03, 11.66s/it] {'loss': 1.0742, 'learning_rate': 2.664822023308641e-06, 'epoch': 0.49} + 49%|████▉ | 3485/7045 [11:20:14<11:32:03, 11.66s/it] 49%|████▉ | 3486/7045 [11:20:30<12:37:17, 12.77s/it] {'loss': 1.0908, 'learning_rate': 2.6636750874510835e-06, 'epoch': 0.49} + 49%|████▉ | 3486/7045 [11:20:30<12:37:17, 12.77s/it] 49%|████▉ | 3487/7045 [11:20:41<12:10:09, 12.31s/it] {'loss': 1.1045, 'learning_rate': 2.6625281169948515e-06, 'epoch': 0.49} + 49%|████▉ | 3487/7045 [11:20:41<12:10:09, 12.31s/it] 50%|████▉ | 3488/7045 [11:20:55<12:38:27, 12.79s/it] {'loss': 1.0869, 'learning_rate': 2.6613811121823973e-06, 'epoch': 0.5} + 50%|████▉ | 3488/7045 [11:20:55<12:38:27, 12.79s/it] 50%|████▉ | 3489/7045 [11:21:08<12:45:48, 12.92s/it] {'loss': 1.1016, 'learning_rate': 2.6602340732561828e-06, 'epoch': 0.5} + 50%|████▉ | 3489/7045 [11:21:08<12:45:48, 12.92s/it] 50%|████▉ | 3490/7045 [11:21:19<12:15:44, 12.42s/it] {'loss': 1.165, 'learning_rate': 2.659087000458675e-06, 'epoch': 0.5} + 50%|████▉ | 3490/7045 [11:21:19<12:15:44, 12.42s/it] 50%|████▉ | 3491/7045 [11:21:31<11:56:34, 12.10s/it] {'loss': 1.0737, 'learning_rate': 2.65793989403235e-06, 'epoch': 0.5} + 50%|████▉ | 3491/7045 [11:21:31<11:56:34, 12.10s/it] 50%|████▉ | 3492/7045 [11:21:43<12:05:46, 12.26s/it] {'loss': 1.0898, 'learning_rate': 2.656792754219691e-06, 'epoch': 0.5} + 50%|████▉ | 3492/7045 [11:21:43<12:05:46, 12.26s/it] 50%|████▉ | 3493/7045 [11:21:56<12:09:33, 12.32s/it] {'loss': 1.125, 'learning_rate': 2.6556455812631872e-06, 'epoch': 0.5} + 50%|████▉ | 3493/7045 [11:21:56<12:09:33, 12.32s/it] 50%|████▉ | 3494/7045 [11:22:07<11:51:09, 12.02s/it] {'loss': 1.1279, 'learning_rate': 2.6544983754053356e-06, 'epoch': 0.5} + 50%|████▉ | 3494/7045 [11:22:07<11:51:09, 12.02s/it] 50%|████▉ | 3495/7045 [11:22:19<11:47:23, 11.96s/it] {'loss': 1.1006, 'learning_rate': 2.6533511368886393e-06, 'epoch': 0.5} + 50%|████▉ | 3495/7045 [11:22:19<11:47:23, 11.96s/it] 50%|████▉ | 3496/7045 [11:22:30<11:31:52, 11.70s/it] {'loss': 1.0986, 'learning_rate': 2.65220386595561e-06, 'epoch': 0.5} + 50%|████▉ | 3496/7045 [11:22:30<11:31:52, 11.70s/it] 50%|████▉ | 3497/7045 [11:22:43<11:46:34, 11.95s/it] {'loss': 1.0991, 'learning_rate': 2.651056562848764e-06, 'epoch': 0.5} + 50%|████▉ | 3497/7045 [11:22:43<11:46:34, 11.95s/it] 50%|████▉ | 3498/7045 [11:22:54<11:28:04, 11.64s/it] {'loss': 1.0967, 'learning_rate': 2.649909227810624e-06, 'epoch': 0.5} + 50%|████▉ | 3498/7045 [11:22:54<11:28:04, 11.64s/it] 50%|████▉ | 3499/7045 [11:23:05<11:20:22, 11.51s/it] {'loss': 1.1025, 'learning_rate': 2.648761861083725e-06, 'epoch': 0.5} + 50%|████▉ | 3499/7045 [11:23:05<11:20:22, 11.51s/it] 50%|████▉ | 3500/7045 [11:23:16<11:20:03, 11.51s/it] {'loss': 1.0918, 'learning_rate': 2.6476144629106015e-06, 'epoch': 0.5} + 50%|████▉ | 3500/7045 [11:23:16<11:20:03, 11.51s/it] 50%|████▉ | 3501/7045 [11:23:28<11:17:15, 11.47s/it] {'loss': 1.1426, 'learning_rate': 2.646467033533798e-06, 'epoch': 0.5} + 50%|████▉ | 3501/7045 [11:23:28<11:17:15, 11.47s/it] 50%|████▉ | 3502/7045 [11:23:39<11:06:47, 11.29s/it] {'loss': 1.0806, 'learning_rate': 2.6453195731958663e-06, 'epoch': 0.5} + 50%|████▉ | 3502/7045 [11:23:39<11:06:47, 11.29s/it] 50%|████▉ | 3503/7045 [11:23:50<11:06:45, 11.29s/it] {'loss': 1.124, 'learning_rate': 2.6441720821393646e-06, 'epoch': 0.5} + 50%|████▉ | 3503/7045 [11:23:50<11:06:45, 11.29s/it] 50%|████▉ | 3504/7045 [11:24:01<11:00:34, 11.19s/it] {'loss': 1.1064, 'learning_rate': 2.6430245606068543e-06, 'epoch': 0.5} + 50%|████▉ | 3504/7045 [11:24:01<11:00:34, 11.19s/it] 50%|████▉ | 3505/7045 [11:24:12<11:02:58, 11.24s/it] {'loss': 1.1348, 'learning_rate': 2.6418770088409073e-06, 'epoch': 0.5} + 50%|████▉ | 3505/7045 [11:24:12<11:02:58, 11.24s/it] 50%|████▉ | 3506/7045 [11:24:23<10:56:13, 11.13s/it] {'loss': 1.1201, 'learning_rate': 2.640729427084101e-06, 'epoch': 0.5} + 50%|████▉ | 3506/7045 [11:24:23<10:56:13, 11.13s/it] 50%|████▉ | 3507/7045 [11:24:34<10:56:14, 11.13s/it] {'loss': 1.1309, 'learning_rate': 2.639581815579018e-06, 'epoch': 0.5} + 50%|████▉ | 3507/7045 [11:24:34<10:56:14, 11.13s/it] 50%|████▉ | 3508/7045 [11:24:45<10:49:51, 11.02s/it] {'loss': 1.1182, 'learning_rate': 2.638434174568247e-06, 'epoch': 0.5} + 50%|████▉ | 3508/7045 [11:24:45<10:49:51, 11.02s/it] 50%|████▉ | 3509/7045 [11:24:56<10:52:02, 11.06s/it] {'loss': 1.1094, 'learning_rate': 2.6372865042943837e-06, 'epoch': 0.5} + 50%|████▉ | 3509/7045 [11:24:56<10:52:02, 11.06s/it] 50%|████▉ | 3510/7045 [11:25:09<11:31:06, 11.73s/it] {'loss': 1.0796, 'learning_rate': 2.63613880500003e-06, 'epoch': 0.5} + 50%|████▉ | 3510/7045 [11:25:09<11:31:06, 11.73s/it] 50%|████▉ | 3511/7045 [11:25:22<11:54:07, 12.12s/it] {'loss': 1.1167, 'learning_rate': 2.6349910769277954e-06, 'epoch': 0.5} + 50%|████▉ | 3511/7045 [11:25:22<11:54:07, 12.12s/it] 50%|████▉ | 3512/7045 [11:25:33<11:34:19, 11.79s/it] {'loss': 1.1348, 'learning_rate': 2.6338433203202916e-06, 'epoch': 0.5} + 50%|████▉ | 3512/7045 [11:25:33<11:34:19, 11.79s/it] 50%|████▉ | 3513/7045 [11:25:45<11:25:10, 11.64s/it] {'loss': 1.1191, 'learning_rate': 2.63269553542014e-06, 'epoch': 0.5} + 50%|████▉ | 3513/7045 [11:25:45<11:25:10, 11.64s/it] 50%|████▉ | 3514/7045 [11:25:58<11:46:44, 12.01s/it] {'loss': 1.1367, 'learning_rate': 2.6315477224699663e-06, 'epoch': 0.5} + 50%|████▉ | 3514/7045 [11:25:58<11:46:44, 12.01s/it] 50%|████▉ | 3515/7045 [11:26:11<12:06:38, 12.35s/it] {'loss': 1.1104, 'learning_rate': 2.6303998817124027e-06, 'epoch': 0.5} + 50%|████▉ | 3515/7045 [11:26:11<12:06:38, 12.35s/it] 50%|████▉ | 3516/7045 [11:26:22<11:47:46, 12.03s/it] {'loss': 1.0938, 'learning_rate': 2.629252013390086e-06, 'epoch': 0.5} + 50%|████▉ | 3516/7045 [11:26:22<11:47:46, 12.03s/it] 50%|████▉ | 3517/7045 [11:26:33<11:32:18, 11.77s/it] {'loss': 1.0791, 'learning_rate': 2.628104117745661e-06, 'epoch': 0.5} + 50%|████▉ | 3517/7045 [11:26:33<11:32:18, 11.77s/it] 50%|████▉ | 3518/7045 [11:26:45<11:27:29, 11.70s/it] {'loss': 1.1484, 'learning_rate': 2.626956195021778e-06, 'epoch': 0.5} + 50%|████▉ | 3518/7045 [11:26:45<11:27:29, 11.70s/it] 50%|████▉ | 3519/7045 [11:26:56<11:16:15, 11.51s/it] {'loss': 1.1504, 'learning_rate': 2.62580824546109e-06, 'epoch': 0.5} + 50%|████▉ | 3519/7045 [11:26:56<11:16:15, 11.51s/it] 50%|████▉ | 3520/7045 [11:27:07<11:05:52, 11.33s/it] {'loss': 1.0791, 'learning_rate': 2.624660269306259e-06, 'epoch': 0.5} + 50%|████▉ | 3520/7045 [11:27:07<11:05:52, 11.33s/it] 50%|████▉ | 3521/7045 [11:27:18<11:10:23, 11.41s/it] {'loss': 1.1348, 'learning_rate': 2.6235122667999512e-06, 'epoch': 0.5} + 50%|████▉ | 3521/7045 [11:27:18<11:10:23, 11.41s/it] 50%|████▉ | 3522/7045 [11:27:30<11:13:04, 11.46s/it] {'loss': 1.0869, 'learning_rate': 2.6223642381848393e-06, 'epoch': 0.5} + 50%|████▉ | 3522/7045 [11:27:30<11:13:04, 11.46s/it] 50%|█████ | 3523/7045 [11:27:42<11:28:26, 11.73s/it] {'loss': 1.0933, 'learning_rate': 2.6212161837035993e-06, 'epoch': 0.5} + 50%|█████ | 3523/7045 [11:27:42<11:28:26, 11.73s/it] 50%|█████ | 3524/7045 [11:27:55<11:51:44, 12.13s/it] {'loss': 1.1494, 'learning_rate': 2.620068103598916e-06, 'epoch': 0.5} + 50%|█████ | 3524/7045 [11:27:55<11:51:44, 12.13s/it] 50%|█████ | 3525/7045 [11:28:08<12:01:31, 12.30s/it] {'loss': 1.1309, 'learning_rate': 2.6189199981134774e-06, 'epoch': 0.5} + 50%|█████ | 3525/7045 [11:28:08<12:01:31, 12.30s/it] 50%|█████ | 3526/7045 [11:28:19<11:36:27, 11.87s/it] {'loss': 1.0674, 'learning_rate': 2.6177718674899767e-06, 'epoch': 0.5} + 50%|█████ | 3526/7045 [11:28:19<11:36:27, 11.87s/it] 50%|█████ | 3527/7045 [11:28:31<11:40:26, 11.95s/it] {'loss': 1.0903, 'learning_rate': 2.616623711971113e-06, 'epoch': 0.5} + 50%|█████ | 3527/7045 [11:28:31<11:40:26, 11.95s/it] 50%|█████ | 3528/7045 [11:28:42<11:24:56, 11.69s/it] {'loss': 1.1211, 'learning_rate': 2.615475531799591e-06, 'epoch': 0.5} + 50%|█████ | 3528/7045 [11:28:42<11:24:56, 11.69s/it] 50%|█████ | 3529/7045 [11:28:53<11:11:21, 11.46s/it] {'loss': 1.0908, 'learning_rate': 2.61432732721812e-06, 'epoch': 0.5} + 50%|█████ | 3529/7045 [11:28:53<11:11:21, 11.46s/it] 50%|█████ | 3530/7045 [11:29:06<11:30:18, 11.78s/it] {'loss': 1.0996, 'learning_rate': 2.6131790984694155e-06, 'epoch': 0.5} + 50%|█████ | 3530/7045 [11:29:06<11:30:18, 11.78s/it] 50%|█████ | 3531/7045 [11:29:19<11:53:24, 12.18s/it] {'loss': 1.1338, 'learning_rate': 2.6120308457961967e-06, 'epoch': 0.5} + 50%|█████ | 3531/7045 [11:29:19<11:53:24, 12.18s/it] 50%|█████ | 3532/7045 [11:29:30<11:30:28, 11.79s/it] {'loss': 1.1201, 'learning_rate': 2.6108825694411886e-06, 'epoch': 0.5} + 50%|█████ | 3532/7045 [11:29:30<11:30:28, 11.79s/it] 50%|█████ | 3533/7045 [11:29:43<11:52:33, 12.17s/it] {'loss': 1.1167, 'learning_rate': 2.6097342696471225e-06, 'epoch': 0.5} + 50%|█████ | 3533/7045 [11:29:43<11:52:33, 12.17s/it] 50%|█████ | 3534/7045 [11:29:54<11:37:52, 11.93s/it] {'loss': 1.1396, 'learning_rate': 2.6085859466567304e-06, 'epoch': 0.5} + 50%|█████ | 3534/7045 [11:29:54<11:37:52, 11.93s/it] 50%|█████ | 3535/7045 [11:30:05<11:23:59, 11.69s/it] {'loss': 1.1562, 'learning_rate': 2.6074376007127535e-06, 'epoch': 0.5} + 50%|█████ | 3535/7045 [11:30:05<11:23:59, 11.69s/it] 50%|█████ | 3536/7045 [11:30:18<11:37:58, 11.93s/it] {'loss': 1.168, 'learning_rate': 2.6062892320579373e-06, 'epoch': 0.5} + 50%|█████ | 3536/7045 [11:30:18<11:37:58, 11.93s/it] 50%|█████ | 3537/7045 [11:30:29<11:21:37, 11.66s/it] {'loss': 1.126, 'learning_rate': 2.605140840935031e-06, 'epoch': 0.5} + 50%|█████ | 3537/7045 [11:30:29<11:21:37, 11.66s/it] 50%|█████ | 3538/7045 [11:30:40<11:15:52, 11.56s/it] {'loss': 1.0918, 'learning_rate': 2.6039924275867877e-06, 'epoch': 0.5} + 50%|█████ | 3538/7045 [11:30:40<11:15:52, 11.56s/it] 50%|█████ | 3539/7045 [11:30:51<11:06:19, 11.40s/it] {'loss': 1.127, 'learning_rate': 2.602843992255967e-06, 'epoch': 0.5} + 50%|█████ | 3539/7045 [11:30:51<11:06:19, 11.40s/it] 50%|█████ | 3540/7045 [11:31:02<10:58:24, 11.27s/it] {'loss': 1.1699, 'learning_rate': 2.601695535185333e-06, 'epoch': 0.5} + 50%|█████ | 3540/7045 [11:31:02<10:58:24, 11.27s/it] 50%|█████ | 3541/7045 [11:31:17<11:56:34, 12.27s/it] {'loss': 1.0947, 'learning_rate': 2.6005470566176517e-06, 'epoch': 0.5} + 50%|█████ | 3541/7045 [11:31:17<11:56:34, 12.27s/it] 50%|█████ | 3542/7045 [11:31:28<11:35:12, 11.91s/it] {'loss': 1.1387, 'learning_rate': 2.599398556795698e-06, 'epoch': 0.5} + 50%|█████ | 3542/7045 [11:31:28<11:35:12, 11.91s/it] 50%|█████ | 3543/7045 [11:31:40<11:35:27, 11.92s/it] {'loss': 1.1279, 'learning_rate': 2.5982500359622488e-06, 'epoch': 0.5} + 50%|█████ | 3543/7045 [11:31:40<11:35:27, 11.92s/it] 50%|█████ | 3544/7045 [11:31:51<11:22:23, 11.69s/it] {'loss': 1.1582, 'learning_rate': 2.597101494360085e-06, 'epoch': 0.5} + 50%|█████ | 3544/7045 [11:31:51<11:22:23, 11.69s/it] 50%|█████ | 3545/7045 [11:32:04<11:47:35, 12.13s/it] {'loss': 1.1089, 'learning_rate': 2.5959529322319927e-06, 'epoch': 0.5} + 50%|█████ | 3545/7045 [11:32:04<11:47:35, 12.13s/it] 50%|█████ | 3546/7045 [11:32:15<11:31:42, 11.86s/it] {'loss': 1.1475, 'learning_rate': 2.594804349820762e-06, 'epoch': 0.5} + 50%|█████ | 3546/7045 [11:32:15<11:31:42, 11.86s/it] 50%|█████ | 3547/7045 [11:32:26<11:20:56, 11.68s/it] {'loss': 1.1689, 'learning_rate': 2.5936557473691877e-06, 'epoch': 0.5} + 50%|█████ | 3547/7045 [11:32:26<11:20:56, 11.68s/it] 50%|█████ | 3548/7045 [11:32:38<11:28:18, 11.81s/it] {'loss': 1.1436, 'learning_rate': 2.592507125120069e-06, 'epoch': 0.5} + 50%|█████ | 3548/7045 [11:32:38<11:28:18, 11.81s/it] 50%|█████ | 3549/7045 [11:32:49<11:15:01, 11.58s/it] {'loss': 1.0732, 'learning_rate': 2.5913584833162086e-06, 'epoch': 0.5} + 50%|█████ | 3549/7045 [11:32:50<11:15:01, 11.58s/it] 50%|█████ | 3550/7045 [11:33:01<11:07:00, 11.45s/it] {'loss': 1.1348, 'learning_rate': 2.590209822200414e-06, 'epoch': 0.5} + 50%|█████ | 3550/7045 [11:33:01<11:07:00, 11.45s/it] 50%|█████ | 3551/7045 [11:33:11<10:53:01, 11.21s/it] {'loss': 1.0693, 'learning_rate': 2.589061142015497e-06, 'epoch': 0.5} + 50%|█████ | 3551/7045 [11:33:11<10:53:01, 11.21s/it] 50%|█████ | 3552/7045 [11:33:23<11:02:28, 11.38s/it] {'loss': 1.123, 'learning_rate': 2.5879124430042713e-06, 'epoch': 0.5} + 50%|█████ | 3552/7045 [11:33:23<11:02:28, 11.38s/it] 50%|█████ | 3553/7045 [11:33:34<11:01:16, 11.36s/it] {'loss': 1.127, 'learning_rate': 2.586763725409557e-06, 'epoch': 0.5} + 50%|█████ | 3553/7045 [11:33:34<11:01:16, 11.36s/it] 50%|█████ | 3554/7045 [11:33:46<11:06:48, 11.46s/it] {'loss': 1.1318, 'learning_rate': 2.585614989474177e-06, 'epoch': 0.5} + 50%|█████ | 3554/7045 [11:33:46<11:06:48, 11.46s/it] 50%|█████ | 3555/7045 [11:33:57<10:57:21, 11.30s/it] {'loss': 1.1191, 'learning_rate': 2.58446623544096e-06, 'epoch': 0.5} + 50%|█████ | 3555/7045 [11:33:57<10:57:21, 11.30s/it] 50%|█████ | 3556/7045 [11:34:08<10:53:05, 11.23s/it] {'loss': 1.1475, 'learning_rate': 2.5833174635527346e-06, 'epoch': 0.5} + 50%|█████ | 3556/7045 [11:34:08<10:53:05, 11.23s/it] 50%|█████ | 3557/7045 [11:34:19<10:55:48, 11.28s/it] {'loss': 1.1201, 'learning_rate': 2.5821686740523366e-06, 'epoch': 0.5} + 50%|█████ | 3557/7045 [11:34:19<10:55:48, 11.28s/it] 51%|█████ | 3558/7045 [11:34:32<11:17:53, 11.66s/it] {'loss': 1.0869, 'learning_rate': 2.5810198671826043e-06, 'epoch': 0.51} + 51%|█████ | 3558/7045 [11:34:32<11:17:53, 11.66s/it] 51%|█████ | 3559/7045 [11:34:43<11:13:23, 11.59s/it] {'loss': 1.0879, 'learning_rate': 2.579871043186379e-06, 'epoch': 0.51} + 51%|█████ | 3559/7045 [11:34:43<11:13:23, 11.59s/it] 51%|█████ | 3560/7045 [11:34:55<11:05:53, 11.46s/it] {'loss': 1.085, 'learning_rate': 2.5787222023065084e-06, 'epoch': 0.51} + 51%|█████ | 3560/7045 [11:34:55<11:05:53, 11.46s/it] 51%|█████ | 3561/7045 [11:35:06<11:06:00, 11.47s/it] {'loss': 1.1064, 'learning_rate': 2.57757334478584e-06, 'epoch': 0.51} + 51%|█████ | 3561/7045 [11:35:06<11:06:00, 11.47s/it] 51%|█████ | 3562/7045 [11:35:17<10:54:55, 11.28s/it] {'loss': 1.0791, 'learning_rate': 2.576424470867227e-06, 'epoch': 0.51} + 51%|█████ | 3562/7045 [11:35:17<10:54:55, 11.28s/it] 51%|█████ | 3563/7045 [11:35:30<11:26:40, 11.83s/it] {'loss': 1.1602, 'learning_rate': 2.5752755807935255e-06, 'epoch': 0.51} + 51%|█████ | 3563/7045 [11:35:30<11:26:40, 11.83s/it] 51%|█████ | 3564/7045 [11:35:42<11:20:20, 11.73s/it] {'loss': 1.1221, 'learning_rate': 2.5741266748075956e-06, 'epoch': 0.51} + 51%|█████ | 3564/7045 [11:35:42<11:20:20, 11.73s/it] 51%|█████ | 3565/7045 [11:35:53<11:07:20, 11.51s/it] {'loss': 1.1074, 'learning_rate': 2.5729777531522993e-06, 'epoch': 0.51} + 51%|█████ | 3565/7045 [11:35:53<11:07:20, 11.51s/it] 51%|█████ | 3566/7045 [11:36:04<10:58:36, 11.36s/it] {'loss': 1.1094, 'learning_rate': 2.5718288160705045e-06, 'epoch': 0.51} + 51%|█████ | 3566/7045 [11:36:04<10:58:36, 11.36s/it] 51%|█████ | 3567/7045 [11:36:16<11:09:14, 11.55s/it] {'loss': 1.1475, 'learning_rate': 2.5706798638050795e-06, 'epoch': 0.51} + 51%|█████ | 3567/7045 [11:36:16<11:09:14, 11.55s/it] 51%|█████ | 3568/7045 [11:36:27<11:05:07, 11.48s/it] {'loss': 1.1196, 'learning_rate': 2.569530896598898e-06, 'epoch': 0.51} + 51%|█████ | 3568/7045 [11:36:27<11:05:07, 11.48s/it] 51%|█████ | 3569/7045 [11:36:38<10:56:41, 11.34s/it] {'loss': 1.1201, 'learning_rate': 2.568381914694835e-06, 'epoch': 0.51} + 51%|█████ | 3569/7045 [11:36:38<10:56:41, 11.34s/it] 51%|█████ | 3570/7045 [11:36:50<11:08:29, 11.54s/it] {'loss': 1.1406, 'learning_rate': 2.567232918335771e-06, 'epoch': 0.51} + 51%|█████ | 3570/7045 [11:36:50<11:08:29, 11.54s/it] 51%|█████ | 3571/7045 [11:37:02<11:16:07, 11.68s/it] {'loss': 1.0928, 'learning_rate': 2.566083907764586e-06, 'epoch': 0.51} + 51%|█████ | 3571/7045 [11:37:02<11:16:07, 11.68s/it] 51%|█████ | 3572/7045 [11:37:13<11:09:33, 11.57s/it] {'loss': 1.1533, 'learning_rate': 2.5649348832241666e-06, 'epoch': 0.51} + 51%|█████ | 3572/7045 [11:37:13<11:09:33, 11.57s/it] 51%|█████ | 3573/7045 [11:37:24<11:04:06, 11.48s/it] {'loss': 1.1035, 'learning_rate': 2.5637858449574016e-06, 'epoch': 0.51} + 51%|█████ | 3573/7045 [11:37:24<11:04:06, 11.48s/it] 51%|█████ | 3574/7045 [11:37:37<11:18:57, 11.74s/it] {'loss': 1.0947, 'learning_rate': 2.5626367932071806e-06, 'epoch': 0.51} + 51%|█████ | 3574/7045 [11:37:37<11:18:57, 11.74s/it] 51%|█████ | 3575/7045 [11:37:48<11:05:31, 11.51s/it] {'loss': 1.0693, 'learning_rate': 2.5614877282163973e-06, 'epoch': 0.51} + 51%|█████ | 3575/7045 [11:37:48<11:05:31, 11.51s/it] 51%|█████ | 3576/7045 [11:38:00<11:12:51, 11.64s/it] {'loss': 1.062, 'learning_rate': 2.5603386502279494e-06, 'epoch': 0.51} + 51%|█████ | 3576/7045 [11:38:00<11:12:51, 11.64s/it] 51%|█████ | 3577/7045 [11:38:11<11:03:03, 11.47s/it] {'loss': 1.1426, 'learning_rate': 2.5591895594847353e-06, 'epoch': 0.51} + 51%|█████ | 3577/7045 [11:38:11<11:03:03, 11.47s/it] 51%|█████ | 3578/7045 [11:38:23<11:18:04, 11.73s/it] {'loss': 1.0874, 'learning_rate': 2.5580404562296574e-06, 'epoch': 0.51} + 51%|█████ | 3578/7045 [11:38:23<11:18:04, 11.73s/it] 51%|█████ | 3579/7045 [11:38:34<11:10:02, 11.60s/it] {'loss': 1.0649, 'learning_rate': 2.556891340705621e-06, 'epoch': 0.51} + 51%|█████ | 3579/7045 [11:38:34<11:10:02, 11.60s/it] 51%|█████ | 3580/7045 [11:38:46<11:17:17, 11.73s/it] {'loss': 1.0679, 'learning_rate': 2.5557422131555327e-06, 'epoch': 0.51} + 51%|█████ | 3580/7045 [11:38:46<11:17:17, 11.73s/it] 51%|█████ | 3581/7045 [11:38:58<11:05:34, 11.53s/it] {'loss': 1.1406, 'learning_rate': 2.5545930738223036e-06, 'epoch': 0.51} + 51%|█████ | 3581/7045 [11:38:58<11:05:34, 11.53s/it] 51%|█████ | 3582/7045 [11:39:09<11:10:39, 11.62s/it] {'loss': 1.1221, 'learning_rate': 2.553443922948844e-06, 'epoch': 0.51} + 51%|█████ | 3582/7045 [11:39:09<11:10:39, 11.62s/it] 51%|█████ | 3583/7045 [11:39:21<11:07:06, 11.56s/it] {'loss': 1.1387, 'learning_rate': 2.552294760778069e-06, 'epoch': 0.51} + 51%|█████ | 3583/7045 [11:39:21<11:07:06, 11.56s/it] 51%|█████ | 3584/7045 [11:39:32<11:07:32, 11.57s/it] {'loss': 1.1387, 'learning_rate': 2.5511455875528977e-06, 'epoch': 0.51} + 51%|█████ | 3584/7045 [11:39:32<11:07:32, 11.57s/it] 51%|█████ | 3585/7045 [11:39:44<11:00:11, 11.45s/it] {'loss': 1.1494, 'learning_rate': 2.5499964035162483e-06, 'epoch': 0.51} + 51%|█████ | 3585/7045 [11:39:44<11:00:11, 11.45s/it] 51%|█████ | 3586/7045 [11:39:54<10:47:41, 11.23s/it] {'loss': 1.1357, 'learning_rate': 2.5488472089110426e-06, 'epoch': 0.51} + 51%|█████ | 3586/7045 [11:39:54<10:47:41, 11.23s/it] 51%|█████ | 3587/7045 [11:40:06<10:52:48, 11.33s/it] {'loss': 1.1426, 'learning_rate': 2.5476980039802046e-06, 'epoch': 0.51} + 51%|█████ | 3587/7045 [11:40:06<10:52:48, 11.33s/it] 51%|█████ | 3588/7045 [11:40:18<11:06:38, 11.57s/it] {'loss': 1.125, 'learning_rate': 2.546548788966661e-06, 'epoch': 0.51} + 51%|█████ | 3588/7045 [11:40:18<11:06:38, 11.57s/it] 51%|█████ | 3589/7045 [11:40:29<11:02:34, 11.50s/it] {'loss': 1.1006, 'learning_rate': 2.5453995641133393e-06, 'epoch': 0.51} + 51%|█████ | 3589/7045 [11:40:29<11:02:34, 11.50s/it] 51%|█████ | 3590/7045 [11:40:41<11:10:12, 11.64s/it] {'loss': 1.123, 'learning_rate': 2.5442503296631705e-06, 'epoch': 0.51} + 51%|█████ | 3590/7045 [11:40:41<11:10:12, 11.64s/it] 51%|█████ | 3591/7045 [11:40:54<11:24:57, 11.90s/it] {'loss': 1.0645, 'learning_rate': 2.543101085859087e-06, 'epoch': 0.51} + 51%|█████ | 3591/7045 [11:40:54<11:24:57, 11.90s/it] 51%|█████ | 3592/7045 [11:41:05<11:15:09, 11.73s/it] {'loss': 1.145, 'learning_rate': 2.541951832944024e-06, 'epoch': 0.51} + 51%|█████ | 3592/7045 [11:41:05<11:15:09, 11.73s/it] 51%|█████ | 3593/7045 [11:41:16<11:01:25, 11.50s/it] {'loss': 1.105, 'learning_rate': 2.540802571160916e-06, 'epoch': 0.51} + 51%|█████ | 3593/7045 [11:41:16<11:01:25, 11.50s/it] 51%|█████ | 3594/7045 [11:41:27<10:59:37, 11.47s/it] {'loss': 1.1426, 'learning_rate': 2.5396533007527023e-06, 'epoch': 0.51} + 51%|█████ | 3594/7045 [11:41:27<10:59:37, 11.47s/it] 51%|█████ | 3595/7045 [11:41:39<11:03:59, 11.55s/it] {'loss': 1.0786, 'learning_rate': 2.5385040219623235e-06, 'epoch': 0.51} + 51%|█████ | 3595/7045 [11:41:39<11:03:59, 11.55s/it] 51%|█████ | 3596/7045 [11:41:50<10:55:57, 11.41s/it] {'loss': 1.1387, 'learning_rate': 2.5373547350327203e-06, 'epoch': 0.51} + 51%|█████ | 3596/7045 [11:41:50<10:55:57, 11.41s/it] 51%|█████ | 3597/7045 [11:42:04<11:29:33, 12.00s/it] {'loss': 1.084, 'learning_rate': 2.536205440206837e-06, 'epoch': 0.51} + 51%|█████ | 3597/7045 [11:42:04<11:29:33, 12.00s/it] 51%|█████ | 3598/7045 [11:42:15<11:16:57, 11.78s/it] {'loss': 1.1387, 'learning_rate': 2.5350561377276188e-06, 'epoch': 0.51} + 51%|█████ | 3598/7045 [11:42:15<11:16:57, 11.78s/it] 51%|█████ | 3599/7045 [11:42:26<11:03:34, 11.55s/it] {'loss': 1.1221, 'learning_rate': 2.533906827838013e-06, 'epoch': 0.51} + 51%|█████ | 3599/7045 [11:42:26<11:03:34, 11.55s/it] 51%|█████ | 3600/7045 [11:42:37<11:02:06, 11.53s/it] {'loss': 1.1362, 'learning_rate': 2.5327575107809664e-06, 'epoch': 0.51} + 51%|█████ | 3600/7045 [11:42:37<11:02:06, 11.53s/it] 51%|█████ | 3601/7045 [11:42:50<11:18:35, 11.82s/it] {'loss': 1.1055, 'learning_rate': 2.5316081867994304e-06, 'epoch': 0.51} + 51%|█████ | 3601/7045 [11:42:50<11:18:35, 11.82s/it] 51%|█████ | 3602/7045 [11:43:02<11:23:48, 11.92s/it] {'loss': 1.1289, 'learning_rate': 2.5304588561363545e-06, 'epoch': 0.51} + 51%|█████ | 3602/7045 [11:43:02<11:23:48, 11.92s/it] 51%|█████ | 3603/7045 [11:43:15<11:33:12, 12.08s/it] {'loss': 1.0928, 'learning_rate': 2.5293095190346945e-06, 'epoch': 0.51} + 51%|█████ | 3603/7045 [11:43:15<11:33:12, 12.08s/it] 51%|█████ | 3604/7045 [11:43:26<11:22:02, 11.89s/it] {'loss': 1.1221, 'learning_rate': 2.5281601757374017e-06, 'epoch': 0.51} + 51%|█████ | 3604/7045 [11:43:26<11:22:02, 11.89s/it] 51%|█████ | 3605/7045 [11:43:40<11:50:22, 12.39s/it] {'loss': 1.0298, 'learning_rate': 2.5270108264874334e-06, 'epoch': 0.51} + 51%|█████ | 3605/7045 [11:43:40<11:50:22, 12.39s/it] 51%|█████ | 3606/7045 [11:43:51<11:41:02, 12.23s/it] {'loss': 1.123, 'learning_rate': 2.525861471527745e-06, 'epoch': 0.51} + 51%|█████ | 3606/7045 [11:43:51<11:41:02, 12.23s/it] 51%|█████ | 3607/7045 [11:44:04<11:42:28, 12.26s/it] {'loss': 1.127, 'learning_rate': 2.5247121111012964e-06, 'epoch': 0.51} + 51%|█████ | 3607/7045 [11:44:04<11:42:28, 12.26s/it] 51%|█████ | 3608/7045 [11:44:15<11:25:59, 11.98s/it] {'loss': 1.085, 'learning_rate': 2.5235627454510435e-06, 'epoch': 0.51} + 51%|█████ | 3608/7045 [11:44:15<11:25:59, 11.98s/it] 51%|█████ | 3609/7045 [11:44:26<11:13:08, 11.75s/it] {'loss': 1.127, 'learning_rate': 2.5224133748199497e-06, 'epoch': 0.51} + 51%|█████ | 3609/7045 [11:44:26<11:13:08, 11.75s/it] 51%|█████ | 3610/7045 [11:44:39<11:21:41, 11.91s/it] {'loss': 1.1055, 'learning_rate': 2.5212639994509746e-06, 'epoch': 0.51} + 51%|█████ | 3610/7045 [11:44:39<11:21:41, 11.91s/it] 51%|█████▏ | 3611/7045 [11:44:50<11:09:25, 11.70s/it] {'loss': 1.1035, 'learning_rate': 2.5201146195870807e-06, 'epoch': 0.51} + 51%|█████▏ | 3611/7045 [11:44:50<11:09:25, 11.70s/it] 51%|█████▏ | 3612/7045 [11:45:01<10:56:40, 11.48s/it] {'loss': 1.127, 'learning_rate': 2.5189652354712313e-06, 'epoch': 0.51} + 51%|█████▏ | 3612/7045 [11:45:01<10:56:40, 11.48s/it] 51%|█████▏ | 3613/7045 [11:45:12<10:51:20, 11.39s/it] {'loss': 1.1143, 'learning_rate': 2.5178158473463906e-06, 'epoch': 0.51} + 51%|█████▏ | 3613/7045 [11:45:12<10:51:20, 11.39s/it] 51%|█████▏ | 3614/7045 [11:45:25<11:21:43, 11.92s/it] {'loss': 1.1357, 'learning_rate': 2.5166664554555224e-06, 'epoch': 0.51} + 51%|█████▏ | 3614/7045 [11:45:25<11:21:43, 11.92s/it] 51%|█████▏ | 3615/7045 [11:45:36<11:06:30, 11.66s/it] {'loss': 1.1182, 'learning_rate': 2.515517060041594e-06, 'epoch': 0.51} + 51%|█████▏ | 3615/7045 [11:45:36<11:06:30, 11.66s/it] 51%|█████▏ | 3616/7045 [11:45:49<11:32:17, 12.11s/it] {'loss': 1.0557, 'learning_rate': 2.514367661347571e-06, 'epoch': 0.51} + 51%|█████▏ | 3616/7045 [11:45:49<11:32:17, 12.11s/it] 51%|█████▏ | 3617/7045 [11:46:01<11:18:43, 11.88s/it] {'loss': 1.1123, 'learning_rate': 2.5132182596164212e-06, 'epoch': 0.51} + 51%|█████▏ | 3617/7045 [11:46:01<11:18:43, 11.88s/it] 51%|█████▏ | 3618/7045 [11:46:13<11:23:34, 11.97s/it] {'loss': 1.1021, 'learning_rate': 2.512068855091112e-06, 'epoch': 0.51} + 51%|█████▏ | 3618/7045 [11:46:13<11:23:34, 11.97s/it] 51%|█████▏ | 3619/7045 [11:46:24<11:12:56, 11.79s/it] {'loss': 1.1182, 'learning_rate': 2.510919448014612e-06, 'epoch': 0.51} + 51%|█████▏ | 3619/7045 [11:46:24<11:12:56, 11.79s/it] 51%|█████▏ | 3620/7045 [11:46:37<11:28:08, 12.05s/it] {'loss': 1.1279, 'learning_rate': 2.509770038629889e-06, 'epoch': 0.51} + 51%|█████▏ | 3620/7045 [11:46:37<11:28:08, 12.05s/it] 51%|█████▏ | 3621/7045 [11:46:50<11:44:34, 12.35s/it] {'loss': 1.124, 'learning_rate': 2.5086206271799144e-06, 'epoch': 0.51} + 51%|█████▏ | 3621/7045 [11:46:50<11:44:34, 12.35s/it] 51%|█████▏ | 3622/7045 [11:47:01<11:28:32, 12.07s/it] {'loss': 1.1758, 'learning_rate': 2.507471213907657e-06, 'epoch': 0.51} + 51%|█████▏ | 3622/7045 [11:47:01<11:28:32, 12.07s/it] 51%|█████▏ | 3623/7045 [11:47:14<11:38:44, 12.25s/it] {'loss': 1.123, 'learning_rate': 2.5063217990560863e-06, 'epoch': 0.51} + 51%|█████▏ | 3623/7045 [11:47:14<11:38:44, 12.25s/it] 51%|█████▏ | 3624/7045 [11:47:25<11:17:51, 11.89s/it] {'loss': 1.1123, 'learning_rate': 2.5051723828681746e-06, 'epoch': 0.51} + 51%|█████▏ | 3624/7045 [11:47:25<11:17:51, 11.89s/it] 51%|█████▏ | 3625/7045 [11:47:36<11:10:10, 11.76s/it] {'loss': 1.1328, 'learning_rate': 2.504022965586891e-06, 'epoch': 0.51} + 51%|█████▏ | 3625/7045 [11:47:36<11:10:10, 11.76s/it] 51%|█████▏ | 3626/7045 [11:47:48<10:58:17, 11.55s/it] {'loss': 1.1123, 'learning_rate': 2.502873547455208e-06, 'epoch': 0.51} + 51%|█████▏ | 3626/7045 [11:47:48<10:58:17, 11.55s/it] 51%|█████▏ | 3627/7045 [11:47:59<10:59:10, 11.57s/it] {'loss': 1.1172, 'learning_rate': 2.5017241287160965e-06, 'epoch': 0.51} + 51%|█████▏ | 3627/7045 [11:47:59<10:59:10, 11.57s/it] 51%|█████▏ | 3628/7045 [11:48:12<11:28:39, 12.09s/it] {'loss': 1.0576, 'learning_rate': 2.5005747096125275e-06, 'epoch': 0.51} + 51%|█████▏ | 3628/7045 [11:48:12<11:28:39, 12.09s/it] 52%|█████▏ | 3629/7045 [11:48:24<11:23:31, 12.01s/it] {'loss': 1.1274, 'learning_rate': 2.499425290387473e-06, 'epoch': 0.52} + 52%|█████▏ | 3629/7045 [11:48:24<11:23:31, 12.01s/it] 52%|█████▏ | 3630/7045 [11:48:35<11:09:32, 11.76s/it] {'loss': 1.1377, 'learning_rate': 2.4982758712839035e-06, 'epoch': 0.52} + 52%|█████▏ | 3630/7045 [11:48:35<11:09:32, 11.76s/it] 52%|█████▏ | 3631/7045 [11:48:47<10:57:57, 11.56s/it] {'loss': 1.1787, 'learning_rate': 2.4971264525447917e-06, 'epoch': 0.52} + 52%|█████▏ | 3631/7045 [11:48:47<10:57:57, 11.56s/it] 52%|█████▏ | 3632/7045 [11:48:58<10:51:06, 11.45s/it] {'loss': 1.1328, 'learning_rate': 2.4959770344131096e-06, 'epoch': 0.52} + 52%|█████▏ | 3632/7045 [11:48:58<10:51:06, 11.45s/it] 52%|█████▏ | 3633/7045 [11:49:10<11:06:49, 11.73s/it] {'loss': 1.0879, 'learning_rate': 2.4948276171318262e-06, 'epoch': 0.52} + 52%|█████▏ | 3633/7045 [11:49:10<11:06:49, 11.73s/it] 52%|█████▏ | 3634/7045 [11:49:21<10:55:26, 11.53s/it] {'loss': 1.126, 'learning_rate': 2.493678200943914e-06, 'epoch': 0.52} + 52%|█████▏ | 3634/7045 [11:49:21<10:55:26, 11.53s/it] 52%|█████▏ | 3635/7045 [11:49:32<10:46:51, 11.38s/it] {'loss': 1.083, 'learning_rate': 2.4925287860923443e-06, 'epoch': 0.52} + 52%|█████▏ | 3635/7045 [11:49:32<10:46:51, 11.38s/it] 52%|█████▏ | 3636/7045 [11:49:47<11:41:58, 12.36s/it] {'loss': 1.1152, 'learning_rate': 2.4913793728200864e-06, 'epoch': 0.52} + 52%|█████▏ | 3636/7045 [11:49:47<11:41:58, 12.36s/it] 52%|█████▏ | 3637/7045 [11:49:59<11:45:50, 12.43s/it] {'loss': 1.1172, 'learning_rate': 2.4902299613701115e-06, 'epoch': 0.52} + 52%|█████▏ | 3637/7045 [11:49:59<11:45:50, 12.43s/it] 52%|█████▏ | 3638/7045 [11:50:12<11:48:33, 12.48s/it] {'loss': 1.1094, 'learning_rate': 2.4890805519853893e-06, 'epoch': 0.52} + 52%|█████▏ | 3638/7045 [11:50:12<11:48:33, 12.48s/it] 52%|█████▏ | 3639/7045 [11:50:25<11:51:43, 12.54s/it] {'loss': 1.0693, 'learning_rate': 2.487931144908889e-06, 'epoch': 0.52} + 52%|█████▏ | 3639/7045 [11:50:25<11:51:43, 12.54s/it] 52%|█████▏ | 3640/7045 [11:50:36<11:34:03, 12.23s/it] {'loss': 1.1128, 'learning_rate': 2.4867817403835796e-06, 'epoch': 0.52} + 52%|█████▏ | 3640/7045 [11:50:36<11:34:03, 12.23s/it] 52%|█████▏ | 3641/7045 [11:50:48<11:20:59, 12.00s/it] {'loss': 1.125, 'learning_rate': 2.485632338652429e-06, 'epoch': 0.52} + 52%|█████▏ | 3641/7045 [11:50:48<11:20:59, 12.00s/it] 52%|█████▏ | 3642/7045 [11:50:59<11:03:15, 11.69s/it] {'loss': 1.1016, 'learning_rate': 2.484482939958406e-06, 'epoch': 0.52} + 52%|█████▏ | 3642/7045 [11:50:59<11:03:15, 11.69s/it] 52%|█████▏ | 3643/7045 [11:51:11<11:15:49, 11.92s/it] {'loss': 1.0825, 'learning_rate': 2.483333544544478e-06, 'epoch': 0.52} + 52%|█████▏ | 3643/7045 [11:51:11<11:15:49, 11.92s/it] 52%|█████▏ | 3644/7045 [11:51:22<11:03:35, 11.71s/it] {'loss': 1.1113, 'learning_rate': 2.4821841526536107e-06, 'epoch': 0.52} + 52%|█████▏ | 3644/7045 [11:51:22<11:03:35, 11.71s/it] 52%|█████▏ | 3645/7045 [11:51:35<11:16:38, 11.94s/it] {'loss': 1.0796, 'learning_rate': 2.4810347645287696e-06, 'epoch': 0.52} + 52%|█████▏ | 3645/7045 [11:51:35<11:16:38, 11.94s/it] 52%|█████▏ | 3646/7045 [11:51:45<10:54:50, 11.56s/it] {'loss': 1.1152, 'learning_rate': 2.4798853804129198e-06, 'epoch': 0.52} + 52%|█████▏ | 3646/7045 [11:51:45<10:54:50, 11.56s/it] 52%|█████▏ | 3647/7045 [11:51:57<10:51:46, 11.51s/it] {'loss': 1.1396, 'learning_rate': 2.478736000549026e-06, 'epoch': 0.52} + 52%|█████▏ | 3647/7045 [11:51:57<10:51:46, 11.51s/it] 52%|█████▏ | 3648/7045 [11:52:08<10:50:21, 11.49s/it] {'loss': 1.1045, 'learning_rate': 2.477586625180051e-06, 'epoch': 0.52} + 52%|█████▏ | 3648/7045 [11:52:08<10:50:21, 11.49s/it] 52%|█████▏ | 3649/7045 [11:52:21<11:08:34, 11.81s/it] {'loss': 1.1455, 'learning_rate': 2.476437254548957e-06, 'epoch': 0.52} + 52%|█████▏ | 3649/7045 [11:52:21<11:08:34, 11.81s/it] 52%|█████▏ | 3650/7045 [11:52:33<11:16:40, 11.96s/it] {'loss': 1.1177, 'learning_rate': 2.475287888898705e-06, 'epoch': 0.52} + 52%|█████▏ | 3650/7045 [11:52:33<11:16:40, 11.96s/it] 52%|█████▏ | 3651/7045 [11:52:44<11:03:20, 11.73s/it] {'loss': 1.1123, 'learning_rate': 2.4741385284722553e-06, 'epoch': 0.52} + 52%|█████▏ | 3651/7045 [11:52:44<11:03:20, 11.73s/it] 52%|█████▏ | 3652/7045 [11:52:57<11:23:04, 12.08s/it] {'loss': 1.1367, 'learning_rate': 2.472989173512568e-06, 'epoch': 0.52} + 52%|█████▏ | 3652/7045 [11:52:57<11:23:04, 12.08s/it] 52%|█████▏ | 3653/7045 [11:53:10<11:30:08, 12.21s/it] {'loss': 1.1045, 'learning_rate': 2.4718398242625982e-06, 'epoch': 0.52} + 52%|█████▏ | 3653/7045 [11:53:10<11:30:08, 12.21s/it] 52%|█████▏ | 3654/7045 [11:53:22<11:28:35, 12.18s/it] {'loss': 1.1025, 'learning_rate': 2.4706904809653064e-06, 'epoch': 0.52} + 52%|█████▏ | 3654/7045 [11:53:22<11:28:35, 12.18s/it] 52%|█████▏ | 3655/7045 [11:53:34<11:23:09, 12.09s/it] {'loss': 1.0947, 'learning_rate': 2.469541143863646e-06, 'epoch': 0.52} + 52%|█████▏ | 3655/7045 [11:53:34<11:23:09, 12.09s/it] 52%|█████▏ | 3656/7045 [11:53:45<11:09:54, 11.86s/it] {'loss': 1.1846, 'learning_rate': 2.468391813200571e-06, 'epoch': 0.52} + 52%|█████▏ | 3656/7045 [11:53:45<11:09:54, 11.86s/it] 52%|█████▏ | 3657/7045 [11:53:57<11:08:27, 11.84s/it] {'loss': 1.1289, 'learning_rate': 2.4672424892190344e-06, 'epoch': 0.52} + 52%|█████▏ | 3657/7045 [11:53:57<11:08:27, 11.84s/it] 52%|█████▏ | 3658/7045 [11:54:09<11:13:52, 11.94s/it] {'loss': 1.1396, 'learning_rate': 2.466093172161988e-06, 'epoch': 0.52} + 52%|█████▏ | 3658/7045 [11:54:09<11:13:52, 11.94s/it] 52%|█████▏ | 3659/7045 [11:54:20<10:55:56, 11.62s/it] {'loss': 1.1123, 'learning_rate': 2.4649438622723816e-06, 'epoch': 0.52} + 52%|█████▏ | 3659/7045 [11:54:20<10:55:56, 11.62s/it] 52%|█████▏ | 3660/7045 [11:54:31<10:43:55, 11.41s/it] {'loss': 1.1553, 'learning_rate': 2.4637945597931632e-06, 'epoch': 0.52} + 52%|█████▏ | 3660/7045 [11:54:31<10:43:55, 11.41s/it] 52%|█████▏ | 3661/7045 [11:54:42<10:42:13, 11.39s/it] {'loss': 1.1201, 'learning_rate': 2.46264526496728e-06, 'epoch': 0.52} + 52%|█████▏ | 3661/7045 [11:54:42<10:42:13, 11.39s/it] 52%|█████▏ | 3662/7045 [11:54:54<10:55:15, 11.62s/it] {'loss': 1.0889, 'learning_rate': 2.4614959780376774e-06, 'epoch': 0.52} + 52%|█████▏ | 3662/7045 [11:54:54<10:55:15, 11.62s/it] 52%|█████▏ | 3663/7045 [11:55:06<10:59:28, 11.70s/it] {'loss': 1.127, 'learning_rate': 2.460346699247299e-06, 'epoch': 0.52} + 52%|█████▏ | 3663/7045 [11:55:06<10:59:28, 11.70s/it] 52%|█████▏ | 3664/7045 [11:55:17<10:48:41, 11.51s/it] {'loss': 1.0938, 'learning_rate': 2.4591974288390853e-06, 'epoch': 0.52} + 52%|█████▏ | 3664/7045 [11:55:17<10:48:41, 11.51s/it] 52%|█████▏ | 3665/7045 [11:55:28<10:41:33, 11.39s/it] {'loss': 1.083, 'learning_rate': 2.4580481670559774e-06, 'epoch': 0.52} + 52%|█████▏ | 3665/7045 [11:55:28<10:41:33, 11.39s/it] 52%|█████▏ | 3666/7045 [11:55:41<10:56:13, 11.65s/it] {'loss': 1.1396, 'learning_rate': 2.4568989141409135e-06, 'epoch': 0.52} + 52%|█████▏ | 3666/7045 [11:55:41<10:56:13, 11.65s/it] 52%|█████▏ | 3667/7045 [11:55:53<11:10:13, 11.90s/it] {'loss': 1.083, 'learning_rate': 2.45574967033683e-06, 'epoch': 0.52} + 52%|█████▏ | 3667/7045 [11:55:53<11:10:13, 11.90s/it] 52%|█████▏ | 3668/7045 [11:56:04<10:55:56, 11.65s/it] {'loss': 1.0601, 'learning_rate': 2.454600435886661e-06, 'epoch': 0.52} + 52%|█████▏ | 3668/7045 [11:56:04<10:55:56, 11.65s/it] 52%|█████▏ | 3669/7045 [11:56:17<11:10:29, 11.92s/it] {'loss': 1.106, 'learning_rate': 2.4534512110333393e-06, 'epoch': 0.52} + 52%|█████▏ | 3669/7045 [11:56:17<11:10:29, 11.92s/it] 52%|█████▏ | 3670/7045 [11:56:28<10:59:41, 11.73s/it] {'loss': 1.1133, 'learning_rate': 2.4523019960197963e-06, 'epoch': 0.52} + 52%|█████▏ | 3670/7045 [11:56:28<10:59:41, 11.73s/it] 52%|█████▏ | 3671/7045 [11:56:39<10:42:45, 11.43s/it] {'loss': 1.0977, 'learning_rate': 2.4511527910889582e-06, 'epoch': 0.52} + 52%|█████▏ | 3671/7045 [11:56:39<10:42:45, 11.43s/it] 52%|█████▏ | 3672/7045 [11:56:50<10:39:50, 11.38s/it] {'loss': 1.0889, 'learning_rate': 2.4500035964837525e-06, 'epoch': 0.52} + 52%|█████▏ | 3672/7045 [11:56:50<10:39:50, 11.38s/it] 52%|█████▏ | 3673/7045 [11:57:01<10:35:21, 11.31s/it] {'loss': 1.1572, 'learning_rate': 2.4488544124471027e-06, 'epoch': 0.52} + 52%|█████▏ | 3673/7045 [11:57:01<10:35:21, 11.31s/it] 52%|█████▏ | 3674/7045 [11:57:12<10:32:01, 11.25s/it] {'loss': 1.1182, 'learning_rate': 2.4477052392219315e-06, 'epoch': 0.52} + 52%|█████▏ | 3674/7045 [11:57:12<10:32:01, 11.25s/it] 52%|█████▏ | 3675/7045 [11:57:23<10:30:55, 11.23s/it] {'loss': 1.1338, 'learning_rate': 2.4465560770511572e-06, 'epoch': 0.52} + 52%|█████▏ | 3675/7045 [11:57:23<10:30:55, 11.23s/it] 52%|█████▏ | 3676/7045 [11:57:35<10:29:37, 11.21s/it] {'loss': 1.0703, 'learning_rate': 2.445406926177698e-06, 'epoch': 0.52} + 52%|█████▏ | 3676/7045 [11:57:35<10:29:37, 11.21s/it] 52%|█████▏ | 3677/7045 [11:57:46<10:31:41, 11.25s/it] {'loss': 1.1006, 'learning_rate': 2.4442577868444677e-06, 'epoch': 0.52} + 52%|█████▏ | 3677/7045 [11:57:46<10:31:41, 11.25s/it] 52%|█████▏ | 3678/7045 [11:57:58<10:37:25, 11.36s/it] {'loss': 1.0996, 'learning_rate': 2.4431086592943792e-06, 'epoch': 0.52} + 52%|█████▏ | 3678/7045 [11:57:58<10:37:25, 11.36s/it] 52%|█████▏ | 3679/7045 [11:58:09<10:34:39, 11.31s/it] {'loss': 1.1553, 'learning_rate': 2.4419595437703426e-06, 'epoch': 0.52} + 52%|█████▏ | 3679/7045 [11:58:09<10:34:39, 11.31s/it] 52%|█████▏ | 3680/7045 [11:58:20<10:26:02, 11.16s/it] {'loss': 1.124, 'learning_rate': 2.4408104405152656e-06, 'epoch': 0.52} + 52%|█████▏ | 3680/7045 [11:58:20<10:26:02, 11.16s/it] 52%|█████▏ | 3681/7045 [11:58:31<10:30:55, 11.25s/it] {'loss': 1.0986, 'learning_rate': 2.4396613497720515e-06, 'epoch': 0.52} + 52%|█████▏ | 3681/7045 [11:58:31<10:30:55, 11.25s/it] 52%|█████▏ | 3682/7045 [11:58:42<10:26:43, 11.18s/it] {'loss': 1.0996, 'learning_rate': 2.4385122717836035e-06, 'epoch': 0.52} + 52%|█████▏ | 3682/7045 [11:58:42<10:26:43, 11.18s/it] 52%|█████▏ | 3683/7045 [11:58:56<11:05:04, 11.87s/it] {'loss': 1.0894, 'learning_rate': 2.4373632067928206e-06, 'epoch': 0.52} + 52%|█████▏ | 3683/7045 [11:58:56<11:05:04, 11.87s/it] 52%|█████▏ | 3684/7045 [11:59:07<10:55:53, 11.71s/it] {'loss': 1.1553, 'learning_rate': 2.4362141550425993e-06, 'epoch': 0.52} + 52%|█████▏ | 3684/7045 [11:59:07<10:55:53, 11.71s/it] 52%|█████▏ | 3685/7045 [11:59:19<11:08:17, 11.93s/it] {'loss': 1.0986, 'learning_rate': 2.435065116775834e-06, 'epoch': 0.52} + 52%|█████▏ | 3685/7045 [11:59:19<11:08:17, 11.93s/it] 52%|█████▏ | 3686/7045 [11:59:30<10:54:05, 11.68s/it] {'loss': 1.166, 'learning_rate': 2.433916092235415e-06, 'epoch': 0.52} + 52%|█████▏ | 3686/7045 [11:59:30<10:54:05, 11.68s/it] 52%|█████▏ | 3687/7045 [11:59:42<10:58:22, 11.76s/it] {'loss': 1.0889, 'learning_rate': 2.4327670816642303e-06, 'epoch': 0.52} + 52%|█████▏ | 3687/7045 [11:59:42<10:58:22, 11.76s/it] 52%|█████▏ | 3688/7045 [11:59:55<11:16:56, 12.10s/it] {'loss': 1.124, 'learning_rate': 2.4316180853051656e-06, 'epoch': 0.52} + 52%|█████▏ | 3688/7045 [11:59:55<11:16:56, 12.10s/it] 52%|█████▏ | 3689/7045 [12:00:06<11:00:54, 11.82s/it] {'loss': 1.1348, 'learning_rate': 2.4304691034011037e-06, 'epoch': 0.52} + 52%|█████▏ | 3689/7045 [12:00:06<11:00:54, 11.82s/it] 52%|█████▏ | 3690/7045 [12:00:19<11:14:15, 12.06s/it] {'loss': 1.0938, 'learning_rate': 2.4293201361949205e-06, 'epoch': 0.52} + 52%|█████▏ | 3690/7045 [12:00:19<11:14:15, 12.06s/it] 52%|█████▏ | 3691/7045 [12:00:31<11:07:54, 11.95s/it] {'loss': 1.1299, 'learning_rate': 2.4281711839294963e-06, 'epoch': 0.52} + 52%|█████▏ | 3691/7045 [12:00:31<11:07:54, 11.95s/it] 52%|█████▏ | 3692/7045 [12:00:42<10:58:44, 11.79s/it] {'loss': 1.1318, 'learning_rate': 2.427022246847701e-06, 'epoch': 0.52} + 52%|█████▏ | 3692/7045 [12:00:42<10:58:44, 11.79s/it] 52%|█████▏ | 3693/7045 [12:00:55<11:15:36, 12.09s/it] {'loss': 1.1177, 'learning_rate': 2.4258733251924053e-06, 'epoch': 0.52} + 52%|█████▏ | 3693/7045 [12:00:55<11:15:36, 12.09s/it] 52%|█████▏ | 3694/7045 [12:01:06<10:58:19, 11.79s/it] {'loss': 1.1523, 'learning_rate': 2.424724419206475e-06, 'epoch': 0.52} + 52%|█████▏ | 3694/7045 [12:01:06<10:58:19, 11.79s/it] 52%|█████▏ | 3695/7045 [12:01:17<10:49:03, 11.62s/it] {'loss': 1.1289, 'learning_rate': 2.4235755291327737e-06, 'epoch': 0.52} + 52%|█████▏ | 3695/7045 [12:01:17<10:49:03, 11.62s/it] 52%|█████▏ | 3696/7045 [12:01:29<10:49:57, 11.64s/it] {'loss': 1.1353, 'learning_rate': 2.422426655214161e-06, 'epoch': 0.52} + 52%|█████▏ | 3696/7045 [12:01:29<10:49:57, 11.64s/it] 52%|█████▏ | 3697/7045 [12:01:41<10:57:35, 11.78s/it] {'loss': 1.103, 'learning_rate': 2.4212777976934924e-06, 'epoch': 0.52} + 52%|█████▏ | 3697/7045 [12:01:41<10:57:35, 11.78s/it] 52%|█████▏ | 3698/7045 [12:01:53<11:03:26, 11.89s/it] {'loss': 1.0859, 'learning_rate': 2.4201289568136212e-06, 'epoch': 0.52} + 52%|█████▏ | 3698/7045 [12:01:53<11:03:26, 11.89s/it] 53%|█████▎ | 3699/7045 [12:02:06<11:12:34, 12.06s/it] {'loss': 1.1113, 'learning_rate': 2.4189801328173965e-06, 'epoch': 0.53} + 53%|█████▎ | 3699/7045 [12:02:06<11:12:34, 12.06s/it] 53%|█████▎ | 3700/7045 [12:02:17<10:53:36, 11.72s/it] {'loss': 1.0977, 'learning_rate': 2.4178313259476647e-06, 'epoch': 0.53} + 53%|█████▎ | 3700/7045 [12:02:17<10:53:36, 11.72s/it] 53%|█████▎ | 3701/7045 [12:02:28<10:42:30, 11.53s/it] {'loss': 1.082, 'learning_rate': 2.4166825364472667e-06, 'epoch': 0.53} + 53%|█████▎ | 3701/7045 [12:02:28<10:42:30, 11.53s/it] 53%|█████▎ | 3702/7045 [12:02:40<11:00:56, 11.86s/it] {'loss': 1.1543, 'learning_rate': 2.4155337645590406e-06, 'epoch': 0.53} + 53%|█████▎ | 3702/7045 [12:02:40<11:00:56, 11.86s/it] 53%|█████▎ | 3703/7045 [12:02:52<10:51:51, 11.70s/it] {'loss': 1.1104, 'learning_rate': 2.414385010525823e-06, 'epoch': 0.53} + 53%|█████▎ | 3703/7045 [12:02:52<10:51:51, 11.70s/it] 53%|█████▎ | 3704/7045 [12:03:03<10:41:35, 11.52s/it] {'loss': 1.0576, 'learning_rate': 2.4132362745904435e-06, 'epoch': 0.53} + 53%|█████▎ | 3704/7045 [12:03:03<10:41:35, 11.52s/it] 53%|█████▎ | 3705/7045 [12:03:14<10:33:12, 11.37s/it] {'loss': 1.0908, 'learning_rate': 2.412087556995729e-06, 'epoch': 0.53} + 53%|█████▎ | 3705/7045 [12:03:14<10:33:12, 11.37s/it] 53%|█████▎ | 3706/7045 [12:03:25<10:33:49, 11.39s/it] {'loss': 1.1318, 'learning_rate': 2.4109388579845037e-06, 'epoch': 0.53} + 53%|█████▎ | 3706/7045 [12:03:25<10:33:49, 11.39s/it] 53%|█████▎ | 3707/7045 [12:03:36<10:22:44, 11.19s/it] {'loss': 1.0815, 'learning_rate': 2.4097901777995864e-06, 'epoch': 0.53} + 53%|█████▎ | 3707/7045 [12:03:36<10:22:44, 11.19s/it] 53%|█████▎ | 3708/7045 [12:03:50<11:07:40, 12.01s/it] {'loss': 1.0698, 'learning_rate': 2.408641516683792e-06, 'epoch': 0.53} + 53%|█████▎ | 3708/7045 [12:03:50<11:07:40, 12.01s/it] 53%|█████▎ | 3709/7045 [12:04:03<11:31:28, 12.44s/it] {'loss': 1.0947, 'learning_rate': 2.4074928748799313e-06, 'epoch': 0.53} + 53%|█████▎ | 3709/7045 [12:04:03<11:31:28, 12.44s/it] 53%|█████▎ | 3710/7045 [12:04:15<11:13:03, 12.11s/it] {'loss': 1.166, 'learning_rate': 2.4063442526308127e-06, 'epoch': 0.53} + 53%|█████▎ | 3710/7045 [12:04:15<11:13:03, 12.11s/it] 53%|█████▎ | 3711/7045 [12:04:26<10:52:31, 11.74s/it] {'loss': 1.1162, 'learning_rate': 2.4051956501792393e-06, 'epoch': 0.53} + 53%|█████▎ | 3711/7045 [12:04:26<10:52:31, 11.74s/it] 53%|█████▎ | 3712/7045 [12:04:37<10:40:27, 11.53s/it] {'loss': 1.1406, 'learning_rate': 2.404047067768009e-06, 'epoch': 0.53} + 53%|█████▎ | 3712/7045 [12:04:37<10:40:27, 11.53s/it] 53%|█████▎ | 3713/7045 [12:04:49<10:51:58, 11.74s/it] {'loss': 1.1572, 'learning_rate': 2.4028985056399163e-06, 'epoch': 0.53} + 53%|█████▎ | 3713/7045 [12:04:49<10:51:58, 11.74s/it] 53%|█████▎ | 3714/7045 [12:05:00<10:40:43, 11.54s/it] {'loss': 1.1387, 'learning_rate': 2.4017499640377516e-06, 'epoch': 0.53} + 53%|█████▎ | 3714/7045 [12:05:00<10:40:43, 11.54s/it] 53%|█████▎ | 3715/7045 [12:05:11<10:28:55, 11.33s/it] {'loss': 1.1387, 'learning_rate': 2.400601443204302e-06, 'epoch': 0.53} + 53%|█████▎ | 3715/7045 [12:05:11<10:28:55, 11.33s/it] 53%|█████▎ | 3716/7045 [12:05:24<11:04:52, 11.98s/it] {'loss': 1.1011, 'learning_rate': 2.3994529433823483e-06, 'epoch': 0.53} + 53%|█████▎ | 3716/7045 [12:05:24<11:04:52, 11.98s/it] 53%|█████▎ | 3717/7045 [12:05:36<11:06:13, 12.01s/it] {'loss': 1.0928, 'learning_rate': 2.398304464814668e-06, 'epoch': 0.53} + 53%|█████▎ | 3717/7045 [12:05:36<11:06:13, 12.01s/it] 53%|█████▎ | 3718/7045 [12:05:47<10:45:13, 11.64s/it] {'loss': 1.1211, 'learning_rate': 2.3971560077440336e-06, 'epoch': 0.53} + 53%|█████▎ | 3718/7045 [12:05:47<10:45:13, 11.64s/it] 53%|█████▎ | 3719/7045 [12:06:00<11:07:13, 12.04s/it] {'loss': 1.0981, 'learning_rate': 2.396007572413213e-06, 'epoch': 0.53} + 53%|█████▎ | 3719/7045 [12:06:00<11:07:13, 12.04s/it] 53%|█████▎ | 3720/7045 [12:06:11<10:51:42, 11.76s/it] {'loss': 1.1328, 'learning_rate': 2.39485915906497e-06, 'epoch': 0.53} + 53%|█████▎ | 3720/7045 [12:06:11<10:51:42, 11.76s/it] 53%|█████▎ | 3721/7045 [12:06:22<10:41:07, 11.57s/it] {'loss': 1.1152, 'learning_rate': 2.393710767942063e-06, 'epoch': 0.53} + 53%|█████▎ | 3721/7045 [12:06:22<10:41:07, 11.57s/it] 53%|█████▎ | 3722/7045 [12:06:33<10:31:33, 11.40s/it] {'loss': 1.1309, 'learning_rate': 2.392562399287247e-06, 'epoch': 0.53} + 53%|█████▎ | 3722/7045 [12:06:33<10:31:33, 11.40s/it] 53%|█████▎ | 3723/7045 [12:06:45<10:36:06, 11.49s/it] {'loss': 1.1553, 'learning_rate': 2.391414053343271e-06, 'epoch': 0.53} + 53%|█████▎ | 3723/7045 [12:06:45<10:36:06, 11.49s/it] 53%|█████▎ | 3724/7045 [12:06:57<10:42:41, 11.61s/it] {'loss': 1.1553, 'learning_rate': 2.3902657303528792e-06, 'epoch': 0.53} + 53%|█████▎ | 3724/7045 [12:06:57<10:42:41, 11.61s/it] 53%|█████▎ | 3725/7045 [12:07:08<10:34:52, 11.47s/it] {'loss': 1.0903, 'learning_rate': 2.389117430558812e-06, 'epoch': 0.53} + 53%|█████▎ | 3725/7045 [12:07:08<10:34:52, 11.47s/it] 53%|█████▎ | 3726/7045 [12:07:20<10:38:04, 11.53s/it] {'loss': 1.1182, 'learning_rate': 2.3879691542038033e-06, 'epoch': 0.53} + 53%|█████▎ | 3726/7045 [12:07:20<10:38:04, 11.53s/it] 53%|█████▎ | 3727/7045 [12:07:33<11:10:33, 12.13s/it] {'loss': 1.0967, 'learning_rate': 2.3868209015305845e-06, 'epoch': 0.53} + 53%|█████▎ | 3727/7045 [12:07:33<11:10:33, 12.13s/it] 53%|█████▎ | 3728/7045 [12:07:44<10:53:19, 11.82s/it] {'loss': 1.1465, 'learning_rate': 2.38567267278188e-06, 'epoch': 0.53} + 53%|█████▎ | 3728/7045 [12:07:44<10:53:19, 11.82s/it] 53%|█████▎ | 3729/7045 [12:07:56<10:43:26, 11.64s/it] {'loss': 1.0854, 'learning_rate': 2.3845244682004097e-06, 'epoch': 0.53} + 53%|█████▎ | 3729/7045 [12:07:56<10:43:26, 11.64s/it] 53%|█████▎ | 3730/7045 [12:08:07<10:35:52, 11.51s/it] {'loss': 1.1084, 'learning_rate': 2.3833762880288875e-06, 'epoch': 0.53} + 53%|█████▎ | 3730/7045 [12:08:07<10:35:52, 11.51s/it] 53%|█████▎ | 3731/7045 [12:08:18<10:34:50, 11.49s/it] {'loss': 1.085, 'learning_rate': 2.382228132510024e-06, 'epoch': 0.53} + 53%|█████▎ | 3731/7045 [12:08:18<10:34:50, 11.49s/it] 53%|█████▎ | 3732/7045 [12:08:29<10:29:50, 11.41s/it] {'loss': 1.0693, 'learning_rate': 2.381080001886523e-06, 'epoch': 0.53} + 53%|█████▎ | 3732/7045 [12:08:29<10:29:50, 11.41s/it] 53%|█████▎ | 3733/7045 [12:08:41<10:28:03, 11.38s/it] {'loss': 1.1396, 'learning_rate': 2.379931896401085e-06, 'epoch': 0.53} + 53%|█████▎ | 3733/7045 [12:08:41<10:28:03, 11.38s/it] 53%|█████▎ | 3734/7045 [12:08:52<10:26:31, 11.35s/it] {'loss': 1.1191, 'learning_rate': 2.378783816296401e-06, 'epoch': 0.53} + 53%|█████▎ | 3734/7045 [12:08:52<10:26:31, 11.35s/it] 53%|█████▎ | 3735/7045 [12:09:03<10:21:08, 11.26s/it] {'loss': 1.1543, 'learning_rate': 2.377635761815162e-06, 'epoch': 0.53} + 53%|█████▎ | 3735/7045 [12:09:03<10:21:08, 11.26s/it] 53%|█████▎ | 3736/7045 [12:09:14<10:13:43, 11.13s/it] {'loss': 1.0996, 'learning_rate': 2.376487733200049e-06, 'epoch': 0.53} + 53%|█████▎ | 3736/7045 [12:09:14<10:13:43, 11.13s/it] 53%|█████▎ | 3737/7045 [12:09:25<10:18:47, 11.22s/it] {'loss': 1.1338, 'learning_rate': 2.3753397306937423e-06, 'epoch': 0.53} + 53%|█████▎ | 3737/7045 [12:09:25<10:18:47, 11.22s/it] 53%|█████▎ | 3738/7045 [12:09:37<10:18:17, 11.22s/it] {'loss': 1.1357, 'learning_rate': 2.374191754538911e-06, 'epoch': 0.53} + 53%|█████▎ | 3738/7045 [12:09:37<10:18:17, 11.22s/it] 53%|█████▎ | 3739/7045 [12:09:49<10:35:16, 11.53s/it] {'loss': 1.1172, 'learning_rate': 2.3730438049782223e-06, 'epoch': 0.53} + 53%|█████▎ | 3739/7045 [12:09:49<10:35:16, 11.53s/it] 53%|█████▎ | 3740/7045 [12:10:00<10:33:17, 11.50s/it] {'loss': 1.1094, 'learning_rate': 2.371895882254339e-06, 'epoch': 0.53} + 53%|█████▎ | 3740/7045 [12:10:00<10:33:17, 11.50s/it] 53%|█████▎ | 3741/7045 [12:10:11<10:28:28, 11.41s/it] {'loss': 1.1357, 'learning_rate': 2.3707479866099144e-06, 'epoch': 0.53} + 53%|█████▎ | 3741/7045 [12:10:11<10:28:28, 11.41s/it] 53%|█████▎ | 3742/7045 [12:10:23<10:23:54, 11.33s/it] {'loss': 1.1221, 'learning_rate': 2.369600118287598e-06, 'epoch': 0.53} + 53%|█████▎ | 3742/7045 [12:10:23<10:23:54, 11.33s/it] 53%|█████▎ | 3743/7045 [12:10:35<10:43:48, 11.70s/it] {'loss': 1.1406, 'learning_rate': 2.368452277530034e-06, 'epoch': 0.53} + 53%|█████▎ | 3743/7045 [12:10:35<10:43:48, 11.70s/it] 53%|█████▎ | 3744/7045 [12:10:46<10:30:13, 11.46s/it] {'loss': 1.124, 'learning_rate': 2.367304464579861e-06, 'epoch': 0.53} + 53%|█████▎ | 3744/7045 [12:10:46<10:30:13, 11.46s/it] 53%|█████▎ | 3745/7045 [12:10:59<11:02:58, 12.05s/it] {'loss': 1.0698, 'learning_rate': 2.366156679679709e-06, 'epoch': 0.53} + 53%|█████▎ | 3745/7045 [12:10:59<11:02:58, 12.05s/it] 53%|█████▎ | 3746/7045 [12:11:10<10:43:16, 11.70s/it] {'loss': 1.1445, 'learning_rate': 2.365008923072206e-06, 'epoch': 0.53} + 53%|█████▎ | 3746/7045 [12:11:10<10:43:16, 11.70s/it] 53%|█████▎ | 3747/7045 [12:11:21<10:33:55, 11.53s/it] {'loss': 1.0732, 'learning_rate': 2.3638611949999702e-06, 'epoch': 0.53} + 53%|█████▎ | 3747/7045 [12:11:21<10:33:55, 11.53s/it] 53%|█████▎ | 3748/7045 [12:11:33<10:25:42, 11.39s/it] {'loss': 1.084, 'learning_rate': 2.3627134957056176e-06, 'epoch': 0.53} + 53%|█████▎ | 3748/7045 [12:11:33<10:25:42, 11.39s/it] 53%|█████▎ | 3749/7045 [12:11:45<10:46:58, 11.78s/it] {'loss': 1.1162, 'learning_rate': 2.3615658254317543e-06, 'epoch': 0.53} + 53%|█████▎ | 3749/7045 [12:11:45<10:46:58, 11.78s/it] 53%|█████▎ | 3750/7045 [12:11:56<10:30:50, 11.49s/it] {'loss': 1.1104, 'learning_rate': 2.360418184420983e-06, 'epoch': 0.53} + 53%|█████▎ | 3750/7045 [12:11:56<10:30:50, 11.49s/it] 53%|█████▎ | 3751/7045 [12:12:07<10:24:17, 11.37s/it] {'loss': 1.1406, 'learning_rate': 2.3592705729158993e-06, 'epoch': 0.53} + 53%|█████▎ | 3751/7045 [12:12:07<10:24:17, 11.37s/it] 53%|█████▎ | 3752/7045 [12:12:18<10:14:27, 11.20s/it] {'loss': 1.1055, 'learning_rate': 2.3581229911590927e-06, 'epoch': 0.53} + 53%|█████▎ | 3752/7045 [12:12:18<10:14:27, 11.20s/it] 53%|█████▎ | 3753/7045 [12:12:29<10:11:37, 11.15s/it] {'loss': 1.1152, 'learning_rate': 2.356975439393146e-06, 'epoch': 0.53} + 53%|█████▎ | 3753/7045 [12:12:29<10:11:37, 11.15s/it] 53%|█████▎ | 3754/7045 [12:12:42<10:36:33, 11.61s/it] {'loss': 1.1152, 'learning_rate': 2.355827917860636e-06, 'epoch': 0.53} + 53%|█████▎ | 3754/7045 [12:12:42<10:36:33, 11.61s/it] 53%|█████▎ | 3755/7045 [12:12:53<10:31:56, 11.52s/it] {'loss': 1.1201, 'learning_rate': 2.354680426804134e-06, 'epoch': 0.53} + 53%|█████▎ | 3755/7045 [12:12:53<10:31:56, 11.52s/it] 53%|█████▎ | 3756/7045 [12:13:04<10:24:38, 11.40s/it] {'loss': 1.1914, 'learning_rate': 2.3535329664662022e-06, 'epoch': 0.53} + 53%|█████▎ | 3756/7045 [12:13:04<10:24:38, 11.40s/it] 53%|█████▎ | 3757/7045 [12:13:15<10:18:58, 11.30s/it] {'loss': 1.1211, 'learning_rate': 2.3523855370893993e-06, 'epoch': 0.53} + 53%|█████▎ | 3757/7045 [12:13:15<10:18:58, 11.30s/it] 53%|█████▎ | 3758/7045 [12:13:28<10:49:00, 11.85s/it] {'loss': 1.1079, 'learning_rate': 2.351238138916276e-06, 'epoch': 0.53} + 53%|█████▎ | 3758/7045 [12:13:28<10:49:00, 11.85s/it] 53%|█████▎ | 3759/7045 [12:13:40<10:38:58, 11.67s/it] {'loss': 1.042, 'learning_rate': 2.3500907721893764e-06, 'epoch': 0.53} + 53%|█████▎ | 3759/7045 [12:13:40<10:38:58, 11.67s/it] 53%|█████▎ | 3760/7045 [12:13:51<10:28:52, 11.49s/it] {'loss': 1.0947, 'learning_rate': 2.3489434371512377e-06, 'epoch': 0.53} + 53%|█████▎ | 3760/7045 [12:13:51<10:28:52, 11.49s/it] 53%|█████▎ | 3761/7045 [12:14:04<10:53:03, 11.93s/it] {'loss': 1.1309, 'learning_rate': 2.3477961340443912e-06, 'epoch': 0.53} + 53%|█████▎ | 3761/7045 [12:14:04<10:53:03, 11.93s/it] 53%|█████▎ | 3762/7045 [12:14:14<10:33:39, 11.58s/it] {'loss': 1.1216, 'learning_rate': 2.346648863111361e-06, 'epoch': 0.53} + 53%|█████▎ | 3762/7045 [12:14:14<10:33:39, 11.58s/it] 53%|█████▎ | 3763/7045 [12:14:25<10:26:12, 11.45s/it] {'loss': 1.1348, 'learning_rate': 2.3455016245946648e-06, 'epoch': 0.53} + 53%|█████▎ | 3763/7045 [12:14:25<10:26:12, 11.45s/it] 53%|█████▎ | 3764/7045 [12:14:37<10:27:04, 11.47s/it] {'loss': 1.1729, 'learning_rate': 2.3443544187368128e-06, 'epoch': 0.53} + 53%|█████▎ | 3764/7045 [12:14:37<10:27:04, 11.47s/it] 53%|█████▎ | 3765/7045 [12:14:48<10:20:52, 11.36s/it] {'loss': 1.1035, 'learning_rate': 2.343207245780309e-06, 'epoch': 0.53} + 53%|█████▎ | 3765/7045 [12:14:48<10:20:52, 11.36s/it] 53%|█████▎ | 3766/7045 [12:14:59<10:12:27, 11.21s/it] {'loss': 1.1289, 'learning_rate': 2.3420601059676504e-06, 'epoch': 0.53} + 53%|█████▎ | 3766/7045 [12:14:59<10:12:27, 11.21s/it] 53%|█████▎ | 3767/7045 [12:15:13<10:54:53, 11.99s/it] {'loss': 1.1484, 'learning_rate': 2.340912999541326e-06, 'epoch': 0.53} + 53%|█████▎ | 3767/7045 [12:15:13<10:54:53, 11.99s/it] 53%|█████▎ | 3768/7045 [12:15:24<10:40:58, 11.74s/it] {'loss': 1.1104, 'learning_rate': 2.3397659267438185e-06, 'epoch': 0.53} + 53%|█████▎ | 3768/7045 [12:15:24<10:40:58, 11.74s/it] 53%|█████▎ | 3769/7045 [12:15:35<10:37:40, 11.68s/it] {'loss': 1.1318, 'learning_rate': 2.338618887817603e-06, 'epoch': 0.53} + 53%|█████▎ | 3769/7045 [12:15:35<10:37:40, 11.68s/it] 54%|█████▎ | 3770/7045 [12:15:47<10:29:27, 11.53s/it] {'loss': 1.0996, 'learning_rate': 2.3374718830051494e-06, 'epoch': 0.54} + 54%|█████▎ | 3770/7045 [12:15:47<10:29:27, 11.53s/it] 54%|█████▎ | 3771/7045 [12:15:59<10:36:21, 11.66s/it] {'loss': 1.0752, 'learning_rate': 2.336324912548917e-06, 'epoch': 0.54} + 54%|█████▎ | 3771/7045 [12:15:59<10:36:21, 11.66s/it] 54%|█████▎ | 3772/7045 [12:16:11<10:49:23, 11.90s/it] {'loss': 1.1104, 'learning_rate': 2.3351779766913604e-06, 'epoch': 0.54} + 54%|█████▎ | 3772/7045 [12:16:11<10:49:23, 11.90s/it] 54%|█████▎ | 3773/7045 [12:16:22<10:39:53, 11.73s/it] {'loss': 1.1172, 'learning_rate': 2.334031075674926e-06, 'epoch': 0.54} + 54%|█████▎ | 3773/7045 [12:16:22<10:39:53, 11.73s/it] 54%|█████▎ | 3774/7045 [12:16:36<11:02:34, 12.15s/it] {'loss': 1.1421, 'learning_rate': 2.3328842097420546e-06, 'epoch': 0.54} + 54%|█████▎ | 3774/7045 [12:16:36<11:02:34, 12.15s/it] 54%|█████▎ | 3775/7045 [12:16:47<10:48:14, 11.89s/it] {'loss': 1.124, 'learning_rate': 2.331737379135174e-06, 'epoch': 0.54} + 54%|█████▎ | 3775/7045 [12:16:47<10:48:14, 11.89s/it] 54%|█████▎ | 3776/7045 [12:16:59<10:45:46, 11.85s/it] {'loss': 1.0967, 'learning_rate': 2.330590584096713e-06, 'epoch': 0.54} + 54%|█████▎ | 3776/7045 [12:16:59<10:45:46, 11.85s/it] 54%|█████▎ | 3777/7045 [12:17:10<10:34:34, 11.65s/it] {'loss': 1.1157, 'learning_rate': 2.3294438248690865e-06, 'epoch': 0.54} + 54%|█████▎ | 3777/7045 [12:17:10<10:34:34, 11.65s/it] 54%|█████▎ | 3778/7045 [12:17:22<10:38:25, 11.72s/it] {'loss': 1.1162, 'learning_rate': 2.328297101694703e-06, 'epoch': 0.54} + 54%|█████▎ | 3778/7045 [12:17:22<10:38:25, 11.72s/it] 54%|█████▎ | 3779/7045 [12:17:33<10:29:40, 11.57s/it] {'loss': 1.0874, 'learning_rate': 2.3271504148159653e-06, 'epoch': 0.54} + 54%|█████▎ | 3779/7045 [12:17:33<10:29:40, 11.57s/it] 54%|█████▎ | 3780/7045 [12:17:44<10:21:49, 11.43s/it] {'loss': 1.1323, 'learning_rate': 2.3260037644752667e-06, 'epoch': 0.54} + 54%|█████▎ | 3780/7045 [12:17:44<10:21:49, 11.43s/it] 54%|█████▎ | 3781/7045 [12:17:57<10:51:45, 11.98s/it] {'loss': 1.0562, 'learning_rate': 2.324857150914994e-06, 'epoch': 0.54} + 54%|█████▎ | 3781/7045 [12:17:57<10:51:45, 11.98s/it] 54%|█████▎ | 3782/7045 [12:18:09<10:42:33, 11.82s/it] {'loss': 1.1289, 'learning_rate': 2.3237105743775246e-06, 'epoch': 0.54} + 54%|█████▎ | 3782/7045 [12:18:09<10:42:33, 11.82s/it] 54%|█████▎ | 3783/7045 [12:18:21<10:50:43, 11.97s/it] {'loss': 1.0835, 'learning_rate': 2.32256403510523e-06, 'epoch': 0.54} + 54%|█████▎ | 3783/7045 [12:18:21<10:50:43, 11.97s/it] 54%|█████▎ | 3784/7045 [12:18:32<10:39:57, 11.77s/it] {'loss': 1.0898, 'learning_rate': 2.3214175333404724e-06, 'epoch': 0.54} + 54%|█████▎ | 3784/7045 [12:18:32<10:39:57, 11.77s/it] 54%|█████▎ | 3785/7045 [12:18:43<10:28:01, 11.56s/it] {'loss': 1.1289, 'learning_rate': 2.320271069325607e-06, 'epoch': 0.54} + 54%|█████▎ | 3785/7045 [12:18:43<10:28:01, 11.56s/it] 54%|█████▎ | 3786/7045 [12:18:56<10:53:16, 12.03s/it] {'loss': 1.0869, 'learning_rate': 2.3191246433029804e-06, 'epoch': 0.54} + 54%|█████▎ | 3786/7045 [12:18:56<10:53:16, 12.03s/it] 54%|█████▍ | 3787/7045 [12:19:09<10:54:57, 12.06s/it] {'loss': 1.1396, 'learning_rate': 2.31797825551493e-06, 'epoch': 0.54} + 54%|█████▍ | 3787/7045 [12:19:09<10:54:57, 12.06s/it] 54%|█████▍ | 3788/7045 [12:19:20<10:49:48, 11.97s/it] {'loss': 1.1299, 'learning_rate': 2.31683190620379e-06, 'epoch': 0.54} + 54%|█████▍ | 3788/7045 [12:19:20<10:49:48, 11.97s/it] 54%|█████▍ | 3789/7045 [12:19:34<11:13:27, 12.41s/it] {'loss': 1.085, 'learning_rate': 2.31568559561188e-06, 'epoch': 0.54} + 54%|█████▍ | 3789/7045 [12:19:34<11:13:27, 12.41s/it] 54%|█████▍ | 3790/7045 [12:19:46<11:08:59, 12.33s/it] {'loss': 1.0835, 'learning_rate': 2.314539323981515e-06, 'epoch': 0.54} + 54%|█████▍ | 3790/7045 [12:19:46<11:08:59, 12.33s/it] 54%|█████▍ | 3791/7045 [12:19:57<10:47:51, 11.95s/it] {'loss': 1.1206, 'learning_rate': 2.313393091555002e-06, 'epoch': 0.54} + 54%|█████▍ | 3791/7045 [12:19:57<10:47:51, 11.95s/it] 54%|█████▍ | 3792/7045 [12:20:08<10:30:01, 11.62s/it] {'loss': 1.1455, 'learning_rate': 2.3122468985746387e-06, 'epoch': 0.54} + 54%|█████▍ | 3792/7045 [12:20:08<10:30:01, 11.62s/it] 54%|█████▍ | 3793/7045 [12:20:19<10:23:33, 11.50s/it] {'loss': 1.1348, 'learning_rate': 2.3111007452827136e-06, 'epoch': 0.54} + 54%|█████▍ | 3793/7045 [12:20:19<10:23:33, 11.50s/it] 54%|█████▍ | 3794/7045 [12:20:30<10:17:22, 11.39s/it] {'loss': 1.0942, 'learning_rate': 2.3099546319215085e-06, 'epoch': 0.54} + 54%|█████▍ | 3794/7045 [12:20:30<10:17:22, 11.39s/it] 54%|█████▍ | 3795/7045 [12:20:42<10:26:10, 11.56s/it] {'loss': 1.1338, 'learning_rate': 2.308808558733296e-06, 'epoch': 0.54} + 54%|█████▍ | 3795/7045 [12:20:42<10:26:10, 11.56s/it] 54%|█████▍ | 3796/7045 [12:20:54<10:30:09, 11.64s/it] {'loss': 1.1265, 'learning_rate': 2.3076625259603412e-06, 'epoch': 0.54} + 54%|█████▍ | 3796/7045 [12:20:54<10:30:09, 11.64s/it] 54%|█████▍ | 3797/7045 [12:21:06<10:36:08, 11.75s/it] {'loss': 1.1338, 'learning_rate': 2.306516533844898e-06, 'epoch': 0.54} + 54%|█████▍ | 3797/7045 [12:21:06<10:36:08, 11.75s/it] 54%|█████▍ | 3798/7045 [12:21:17<10:25:15, 11.55s/it] {'loss': 1.0898, 'learning_rate': 2.305370582629215e-06, 'epoch': 0.54} + 54%|█████▍ | 3798/7045 [12:21:17<10:25:15, 11.55s/it] 54%|█████▍ | 3799/7045 [12:21:31<10:55:36, 12.12s/it] {'loss': 1.0806, 'learning_rate': 2.304224672555529e-06, 'epoch': 0.54} + 54%|█████▍ | 3799/7045 [12:21:31<10:55:36, 12.12s/it] 54%|█████▍ | 3800/7045 [12:21:42<10:38:14, 11.80s/it] {'loss': 1.1118, 'learning_rate': 2.3030788038660708e-06, 'epoch': 0.54} + 54%|█████▍ | 3800/7045 [12:21:42<10:38:14, 11.80s/it] 54%|█████▍ | 3801/7045 [12:21:55<10:57:19, 12.16s/it] {'loss': 1.0845, 'learning_rate': 2.301932976803062e-06, 'epoch': 0.54} + 54%|█████▍ | 3801/7045 [12:21:55<10:57:19, 12.16s/it] 54%|█████▍ | 3802/7045 [12:22:07<10:55:41, 12.13s/it] {'loss': 1.1123, 'learning_rate': 2.3007871916087137e-06, 'epoch': 0.54} + 54%|█████▍ | 3802/7045 [12:22:07<10:55:41, 12.13s/it] 54%|█████▍ | 3803/7045 [12:22:19<11:03:30, 12.28s/it] {'loss': 1.1475, 'learning_rate': 2.2996414485252307e-06, 'epoch': 0.54} + 54%|█████▍ | 3803/7045 [12:22:19<11:03:30, 12.28s/it] 54%|█████▍ | 3804/7045 [12:22:30<10:41:38, 11.88s/it] {'loss': 1.1094, 'learning_rate': 2.298495747794805e-06, 'epoch': 0.54} + 54%|█████▍ | 3804/7045 [12:22:30<10:41:38, 11.88s/it] 54%|█████▍ | 3805/7045 [12:22:41<10:26:46, 11.61s/it] {'loss': 1.1172, 'learning_rate': 2.297350089659624e-06, 'epoch': 0.54} + 54%|█████▍ | 3805/7045 [12:22:41<10:26:46, 11.61s/it] 54%|█████▍ | 3806/7045 [12:22:54<10:49:27, 12.03s/it] {'loss': 1.1328, 'learning_rate': 2.296204474361863e-06, 'epoch': 0.54} + 54%|█████▍ | 3806/7045 [12:22:54<10:49:27, 12.03s/it] 54%|█████▍ | 3807/7045 [12:23:05<10:33:04, 11.73s/it] {'loss': 1.1084, 'learning_rate': 2.2950589021436916e-06, 'epoch': 0.54} + 54%|█████▍ | 3807/7045 [12:23:05<10:33:04, 11.73s/it] 54%|█████▍ | 3808/7045 [12:23:16<10:14:36, 11.39s/it] {'loss': 1.0957, 'learning_rate': 2.2939133732472653e-06, 'epoch': 0.54} + 54%|█████▍ | 3808/7045 [12:23:16<10:14:36, 11.39s/it] 54%|█████▍ | 3809/7045 [12:23:27<10:11:21, 11.34s/it] {'loss': 1.1553, 'learning_rate': 2.292767887914734e-06, 'epoch': 0.54} + 54%|█████▍ | 3809/7045 [12:23:27<10:11:21, 11.34s/it] 54%|█████▍ | 3810/7045 [12:23:40<10:34:53, 11.78s/it] {'loss': 1.1191, 'learning_rate': 2.2916224463882385e-06, 'epoch': 0.54} + 54%|█████▍ | 3810/7045 [12:23:40<10:34:53, 11.78s/it] 54%|█████▍ | 3811/7045 [12:23:51<10:25:01, 11.60s/it] {'loss': 1.1299, 'learning_rate': 2.29047704890991e-06, 'epoch': 0.54} + 54%|█████▍ | 3811/7045 [12:23:51<10:25:01, 11.60s/it] 54%|█████▍ | 3812/7045 [12:24:02<10:17:47, 11.47s/it] {'loss': 1.0884, 'learning_rate': 2.2893316957218666e-06, 'epoch': 0.54} + 54%|█████▍ | 3812/7045 [12:24:02<10:17:47, 11.47s/it] 54%|█████▍ | 3813/7045 [12:24:13<10:12:08, 11.36s/it] {'loss': 1.1045, 'learning_rate': 2.2881863870662242e-06, 'epoch': 0.54} + 54%|█████▍ | 3813/7045 [12:24:13<10:12:08, 11.36s/it] 54%|█████▍ | 3814/7045 [12:24:25<10:10:43, 11.34s/it] {'loss': 1.1108, 'learning_rate': 2.2870411231850844e-06, 'epoch': 0.54} + 54%|█████▍ | 3814/7045 [12:24:25<10:10:43, 11.34s/it] 54%|█████▍ | 3815/7045 [12:24:38<10:39:10, 11.87s/it] {'loss': 1.0811, 'learning_rate': 2.285895904320539e-06, 'epoch': 0.54} + 54%|█████▍ | 3815/7045 [12:24:38<10:39:10, 11.87s/it] 54%|█████▍ | 3816/7045 [12:24:49<10:22:43, 11.57s/it] {'loss': 1.126, 'learning_rate': 2.284750730714672e-06, 'epoch': 0.54} + 54%|█████▍ | 3816/7045 [12:24:49<10:22:43, 11.57s/it] 54%|█████▍ | 3817/7045 [12:25:01<10:29:53, 11.71s/it] {'loss': 1.126, 'learning_rate': 2.2836056026095586e-06, 'epoch': 0.54} + 54%|█████▍ | 3817/7045 [12:25:01<10:29:53, 11.71s/it] 54%|█████▍ | 3818/7045 [12:25:12<10:17:13, 11.48s/it] {'loss': 1.083, 'learning_rate': 2.2824605202472626e-06, 'epoch': 0.54} + 54%|█████▍ | 3818/7045 [12:25:12<10:17:13, 11.48s/it] 54%|█████▍ | 3819/7045 [12:25:24<10:33:42, 11.79s/it] {'loss': 1.0527, 'learning_rate': 2.281315483869838e-06, 'epoch': 0.54} + 54%|█████▍ | 3819/7045 [12:25:24<10:33:42, 11.79s/it] 54%|█████▍ | 3820/7045 [12:25:36<10:30:56, 11.74s/it] {'loss': 1.126, 'learning_rate': 2.2801704937193302e-06, 'epoch': 0.54} + 54%|█████▍ | 3820/7045 [12:25:36<10:30:56, 11.74s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 2 bytes but only got 0. + warnings.warn(str(msg)) + 54%|█████▍ | 3821/7045 [12:25:47<10:24:25, 11.62s/it] {'loss': 1.1064, 'learning_rate': 2.279025550037775e-06, 'epoch': 0.54} + 54%|█████▍ | 3821/7045 [12:25:47<10:24:25, 11.62s/it] 54%|█████▍ | 3822/7045 [12:25:58<10:11:59, 11.39s/it] {'loss': 1.1006, 'learning_rate': 2.277880653067198e-06, 'epoch': 0.54} + 54%|█████▍ | 3822/7045 [12:25:58<10:11:59, 11.39s/it] 54%|█████▍ | 3823/7045 [12:26:09<10:13:31, 11.42s/it] {'loss': 1.1562, 'learning_rate': 2.276735803049614e-06, 'epoch': 0.54} + 54%|█████▍ | 3823/7045 [12:26:09<10:13:31, 11.42s/it] 54%|█████▍ | 3824/7045 [12:26:22<10:31:36, 11.77s/it] {'loss': 1.1318, 'learning_rate': 2.2755910002270275e-06, 'epoch': 0.54} + 54%|█████▍ | 3824/7045 [12:26:22<10:31:36, 11.77s/it] 54%|█████▍ | 3825/7045 [12:26:34<10:28:18, 11.71s/it] {'loss': 1.1172, 'learning_rate': 2.2744462448414373e-06, 'epoch': 0.54} + 54%|█████▍ | 3825/7045 [12:26:34<10:28:18, 11.71s/it] 54%|█████▍ | 3826/7045 [12:26:45<10:17:09, 11.50s/it] {'loss': 1.1367, 'learning_rate': 2.2733015371348268e-06, 'epoch': 0.54} + 54%|█████▍ | 3826/7045 [12:26:45<10:17:09, 11.50s/it] 54%|█████▍ | 3827/7045 [12:26:56<10:10:04, 11.37s/it] {'loss': 1.1084, 'learning_rate': 2.272156877349172e-06, 'epoch': 0.54} + 54%|█████▍ | 3827/7045 [12:26:56<10:10:04, 11.37s/it] 54%|█████▍ | 3828/7045 [12:27:07<10:05:11, 11.29s/it] {'loss': 1.1016, 'learning_rate': 2.271012265726439e-06, 'epoch': 0.54} + 54%|█████▍ | 3828/7045 [12:27:07<10:05:11, 11.29s/it] 54%|█████▍ | 3829/7045 [12:27:20<10:32:05, 11.79s/it] {'loss': 1.125, 'learning_rate': 2.2698677025085823e-06, 'epoch': 0.54} + 54%|█████▍ | 3829/7045 [12:27:20<10:32:05, 11.79s/it] 54%|█████▍ | 3830/7045 [12:27:31<10:22:59, 11.63s/it] {'loss': 1.1396, 'learning_rate': 2.2687231879375472e-06, 'epoch': 0.54} + 54%|█████▍ | 3830/7045 [12:27:31<10:22:59, 11.63s/it] 54%|█████▍ | 3831/7045 [12:27:44<10:44:06, 12.02s/it] {'loss': 1.1104, 'learning_rate': 2.267578722255268e-06, 'epoch': 0.54} + 54%|█████▍ | 3831/7045 [12:27:44<10:44:06, 12.02s/it] 54%|█████▍ | 3832/7045 [12:27:55<10:35:57, 11.88s/it] {'loss': 1.1348, 'learning_rate': 2.2664343057036695e-06, 'epoch': 0.54} + 54%|█████▍ | 3832/7045 [12:27:55<10:35:57, 11.88s/it] 54%|█████▍ | 3833/7045 [12:28:07<10:28:23, 11.74s/it] {'loss': 1.1338, 'learning_rate': 2.2652899385246665e-06, 'epoch': 0.54} + 54%|█████▍ | 3833/7045 [12:28:07<10:28:23, 11.74s/it] 54%|█████▍ | 3834/7045 [12:28:18<10:22:05, 11.62s/it] {'loss': 1.1074, 'learning_rate': 2.2641456209601605e-06, 'epoch': 0.54} + 54%|█████▍ | 3834/7045 [12:28:18<10:22:05, 11.62s/it] 54%|█████▍ | 3835/7045 [12:28:30<10:25:16, 11.69s/it] {'loss': 1.1094, 'learning_rate': 2.263001353252046e-06, 'epoch': 0.54} + 54%|█████▍ | 3835/7045 [12:28:30<10:25:16, 11.69s/it] 54%|█████▍ | 3836/7045 [12:28:42<10:36:12, 11.90s/it] {'loss': 1.0791, 'learning_rate': 2.261857135642205e-06, 'epoch': 0.54} + 54%|█████▍ | 3836/7045 [12:28:42<10:36:12, 11.90s/it] 54%|█████▍ | 3837/7045 [12:28:53<10:19:10, 11.58s/it] {'loss': 1.1094, 'learning_rate': 2.2607129683725105e-06, 'epoch': 0.54} + 54%|█████▍ | 3837/7045 [12:28:53<10:19:10, 11.58s/it] 54%|█████▍ | 3838/7045 [12:29:05<10:15:18, 11.51s/it] {'loss': 1.1338, 'learning_rate': 2.259568851684823e-06, 'epoch': 0.54} + 54%|█████▍ | 3838/7045 [12:29:05<10:15:18, 11.51s/it] 54%|█████▍ | 3839/7045 [12:29:16<10:08:37, 11.39s/it] {'loss': 1.1416, 'learning_rate': 2.2584247858209933e-06, 'epoch': 0.54} + 54%|█████▍ | 3839/7045 [12:29:16<10:08:37, 11.39s/it] 55%|█████▍ | 3840/7045 [12:29:27<10:03:18, 11.29s/it] {'loss': 1.125, 'learning_rate': 2.2572807710228616e-06, 'epoch': 0.55} + 55%|█████▍ | 3840/7045 [12:29:27<10:03:18, 11.29s/it] 55%|█████▍ | 3841/7045 [12:29:38<9:59:37, 11.23s/it] {'loss': 1.103, 'learning_rate': 2.256136807532256e-06, 'epoch': 0.55} + 55%|█████▍ | 3841/7045 [12:29:38<9:59:37, 11.23s/it] 55%|█████▍ | 3842/7045 [12:29:49<9:58:09, 11.20s/it] {'loss': 1.1104, 'learning_rate': 2.2549928955909962e-06, 'epoch': 0.55} + 55%|█████▍ | 3842/7045 [12:29:49<9:58:09, 11.20s/it] 55%|█████▍ | 3843/7045 [12:30:00<10:00:29, 11.25s/it] {'loss': 1.1514, 'learning_rate': 2.253849035440888e-06, 'epoch': 0.55} + 55%|█████▍ | 3843/7045 [12:30:00<10:00:29, 11.25s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 2. + warnings.warn(str(msg)) + 55%|█████▍ | 3844/7045 [12:30:11<9:56:30, 11.18s/it] {'loss': 1.1367, 'learning_rate': 2.2527052273237293e-06, 'epoch': 0.55} + 55%|█████▍ | 3844/7045 [12:30:11<9:56:30, 11.18s/it] 55%|█████▍ | 3845/7045 [12:30:23<9:56:50, 11.19s/it] {'loss': 1.1318, 'learning_rate': 2.251561471481304e-06, 'epoch': 0.55} + 55%|█████▍ | 3845/7045 [12:30:23<9:56:50, 11.19s/it] 55%|█████▍ | 3846/7045 [12:30:34<9:57:43, 11.21s/it] {'loss': 1.145, 'learning_rate': 2.2504177681553875e-06, 'epoch': 0.55} + 55%|█████▍ | 3846/7045 [12:30:34<9:57:43, 11.21s/it] 55%|█████▍ | 3847/7045 [12:30:45<9:57:22, 11.21s/it] {'loss': 1.1045, 'learning_rate': 2.2492741175877424e-06, 'epoch': 0.55} + 55%|█████▍ | 3847/7045 [12:30:45<9:57:22, 11.21s/it] 55%|█████▍ | 3848/7045 [12:30:56<10:00:21, 11.27s/it] {'loss': 1.124, 'learning_rate': 2.2481305200201207e-06, 'epoch': 0.55} + 55%|█████▍ | 3848/7045 [12:30:56<10:00:21, 11.27s/it] 55%|█████▍ | 3849/7045 [12:31:08<10:00:23, 11.27s/it] {'loss': 1.1094, 'learning_rate': 2.2469869756942643e-06, 'epoch': 0.55} + 55%|█████▍ | 3849/7045 [12:31:08<10:00:23, 11.27s/it] 55%|█████▍ | 3850/7045 [12:31:19<10:00:38, 11.28s/it] {'loss': 1.1279, 'learning_rate': 2.245843484851902e-06, 'epoch': 0.55} + 55%|█████▍ | 3850/7045 [12:31:19<10:00:38, 11.28s/it] 55%|█████▍ | 3851/7045 [12:31:32<10:26:41, 11.77s/it] {'loss': 1.0903, 'learning_rate': 2.244700047734753e-06, 'epoch': 0.55} + 55%|█████▍ | 3851/7045 [12:31:32<10:26:41, 11.77s/it] 55%|█████▍ | 3852/7045 [12:31:43<10:14:21, 11.54s/it] {'loss': 1.0986, 'learning_rate': 2.2435566645845234e-06, 'epoch': 0.55} + 55%|█████▍ | 3852/7045 [12:31:43<10:14:21, 11.54s/it] 55%|█████▍ | 3853/7045 [12:31:54<10:12:58, 11.52s/it] {'loss': 1.123, 'learning_rate': 2.2424133356429087e-06, 'epoch': 0.55} + 55%|█████▍ | 3853/7045 [12:31:54<10:12:58, 11.52s/it] 55%|█████▍ | 3854/7045 [12:32:06<10:16:13, 11.59s/it] {'loss': 1.1338, 'learning_rate': 2.2412700611515932e-06, 'epoch': 0.55} + 55%|█████▍ | 3854/7045 [12:32:06<10:16:13, 11.59s/it] 55%|█████▍ | 3855/7045 [12:32:18<10:13:23, 11.54s/it] {'loss': 1.1123, 'learning_rate': 2.2401268413522507e-06, 'epoch': 0.55} + 55%|█████▍ | 3855/7045 [12:32:18<10:13:23, 11.54s/it] 55%|█████▍ | 3856/7045 [12:32:29<10:04:04, 11.37s/it] {'loss': 1.1299, 'learning_rate': 2.23898367648654e-06, 'epoch': 0.55} + 55%|█████▍ | 3856/7045 [12:32:29<10:04:04, 11.37s/it] 55%|█████▍ | 3857/7045 [12:32:40<10:02:49, 11.35s/it] {'loss': 1.1094, 'learning_rate': 2.2378405667961123e-06, 'epoch': 0.55} + 55%|█████▍ | 3857/7045 [12:32:40<10:02:49, 11.35s/it] 55%|█████▍ | 3858/7045 [12:32:52<10:20:25, 11.68s/it] {'loss': 1.064, 'learning_rate': 2.236697512522604e-06, 'epoch': 0.55} + 55%|█████▍ | 3858/7045 [12:32:52<10:20:25, 11.68s/it] 55%|█████▍ | 3859/7045 [12:33:03<10:09:14, 11.47s/it] {'loss': 1.1416, 'learning_rate': 2.2355545139076427e-06, 'epoch': 0.55} + 55%|█████▍ | 3859/7045 [12:33:03<10:09:14, 11.47s/it] 55%|█████▍ | 3860/7045 [12:33:15<10:13:04, 11.55s/it] {'loss': 1.082, 'learning_rate': 2.234411571192841e-06, 'epoch': 0.55} + 55%|█████▍ | 3860/7045 [12:33:15<10:13:04, 11.55s/it] 55%|█████▍ | 3861/7045 [12:33:26<10:03:11, 11.37s/it] {'loss': 1.1377, 'learning_rate': 2.2332686846198026e-06, 'epoch': 0.55} + 55%|█████▍ | 3861/7045 [12:33:26<10:03:11, 11.37s/it] 55%|█████▍ | 3862/7045 [12:33:37<9:54:56, 11.21s/it] {'loss': 1.1455, 'learning_rate': 2.2321258544301184e-06, 'epoch': 0.55} + 55%|█████▍ | 3862/7045 [12:33:37<9:54:56, 11.21s/it] 55%|█████▍ | 3863/7045 [12:33:48<9:52:01, 11.16s/it] {'loss': 1.0786, 'learning_rate': 2.230983080865366e-06, 'epoch': 0.55} + 55%|█████▍ | 3863/7045 [12:33:48<9:52:01, 11.16s/it] 55%|█████▍ | 3864/7045 [12:34:01<10:15:18, 11.61s/it] {'loss': 1.1069, 'learning_rate': 2.2298403641671125e-06, 'epoch': 0.55} + 55%|█████▍ | 3864/7045 [12:34:01<10:15:18, 11.61s/it] 55%|█████▍ | 3865/7045 [12:34:12<10:14:34, 11.60s/it] {'loss': 1.0859, 'learning_rate': 2.228697704576912e-06, 'epoch': 0.55} + 55%|█████▍ | 3865/7045 [12:34:12<10:14:34, 11.60s/it] 55%|█████▍ | 3866/7045 [12:34:23<10:08:42, 11.49s/it] {'loss': 1.1348, 'learning_rate': 2.227555102336309e-06, 'epoch': 0.55} + 55%|█████▍ | 3866/7045 [12:34:23<10:08:42, 11.49s/it] 55%|█████▍ | 3867/7045 [12:34:35<10:03:48, 11.40s/it] {'loss': 1.0674, 'learning_rate': 2.226412557686832e-06, 'epoch': 0.55} + 55%|█████▍ | 3867/7045 [12:34:35<10:03:48, 11.40s/it] 55%|█████▍ | 3868/7045 [12:34:46<9:57:15, 11.28s/it] {'loss': 1.1221, 'learning_rate': 2.22527007087e-06, 'epoch': 0.55} + 55%|█████▍ | 3868/7045 [12:34:46<9:57:15, 11.28s/it] 55%|█████▍ | 3869/7045 [12:34:57<10:00:38, 11.35s/it] {'loss': 1.0693, 'learning_rate': 2.224127642127319e-06, 'epoch': 0.55} + 55%|█████▍ | 3869/7045 [12:34:57<10:00:38, 11.35s/it] 55%|█████▍ | 3870/7045 [12:35:10<10:21:14, 11.74s/it] {'loss': 1.0996, 'learning_rate': 2.222985271700283e-06, 'epoch': 0.55} + 55%|█████▍ | 3870/7045 [12:35:10<10:21:14, 11.74s/it] 55%|█████▍ | 3871/7045 [12:35:21<10:10:06, 11.53s/it] {'loss': 1.0879, 'learning_rate': 2.221842959830374e-06, 'epoch': 0.55} + 55%|█████▍ | 3871/7045 [12:35:21<10:10:06, 11.53s/it] 55%|█████▍ | 3872/7045 [12:35:32<10:11:07, 11.56s/it] {'loss': 1.1426, 'learning_rate': 2.22070070675906e-06, 'epoch': 0.55} + 55%|█████▍ | 3872/7045 [12:35:32<10:11:07, 11.56s/it] 55%|█████▍ | 3873/7045 [12:35:44<10:17:20, 11.68s/it] {'loss': 1.0933, 'learning_rate': 2.2195585127277976e-06, 'epoch': 0.55} + 55%|█████▍ | 3873/7045 [12:35:44<10:17:20, 11.68s/it] 55%|█████▍ | 3874/7045 [12:35:55<10:08:39, 11.52s/it] {'loss': 1.1621, 'learning_rate': 2.2184163779780323e-06, 'epoch': 0.55} + 55%|█████▍ | 3874/7045 [12:35:55<10:08:39, 11.52s/it] 55%|█████▌ | 3875/7045 [12:36:07<10:01:40, 11.39s/it] {'loss': 1.0957, 'learning_rate': 2.2172743027511955e-06, 'epoch': 0.55} + 55%|█████▌ | 3875/7045 [12:36:07<10:01:40, 11.39s/it] 55%|█████▌ | 3876/7045 [12:36:18<10:10:29, 11.56s/it] {'loss': 1.1484, 'learning_rate': 2.2161322872887054e-06, 'epoch': 0.55} + 55%|█████▌ | 3876/7045 [12:36:18<10:10:29, 11.56s/it] 55%|█████▌ | 3877/7045 [12:36:29<10:00:57, 11.38s/it] {'loss': 1.1328, 'learning_rate': 2.2149903318319695e-06, 'epoch': 0.55} + 55%|█████▌ | 3877/7045 [12:36:29<10:00:57, 11.38s/it] 55%|█████▌ | 3878/7045 [12:36:42<10:22:23, 11.79s/it] {'loss': 1.0845, 'learning_rate': 2.2138484366223806e-06, 'epoch': 0.55} + 55%|█████▌ | 3878/7045 [12:36:42<10:22:23, 11.79s/it] 55%|█████▌ | 3879/7045 [12:36:53<10:04:44, 11.46s/it] {'loss': 1.0771, 'learning_rate': 2.21270660190132e-06, 'epoch': 0.55} + 55%|█████▌ | 3879/7045 [12:36:53<10:04:44, 11.46s/it] 55%|█████▌ | 3880/7045 [12:37:05<10:08:07, 11.53s/it] {'loss': 1.0854, 'learning_rate': 2.211564827910156e-06, 'epoch': 0.55} + 55%|█████▌ | 3880/7045 [12:37:05<10:08:07, 11.53s/it] 55%|█████▌ | 3881/7045 [12:37:16<10:02:39, 11.43s/it] {'loss': 1.1191, 'learning_rate': 2.210423114890245e-06, 'epoch': 0.55} + 55%|█████▌ | 3881/7045 [12:37:16<10:02:39, 11.43s/it] 55%|█████▌ | 3882/7045 [12:37:29<10:28:41, 11.93s/it] {'loss': 1.1113, 'learning_rate': 2.2092814630829283e-06, 'epoch': 0.55} + 55%|█████▌ | 3882/7045 [12:37:29<10:28:41, 11.93s/it] 55%|█████▌ | 3883/7045 [12:37:40<10:13:32, 11.64s/it] {'loss': 1.1035, 'learning_rate': 2.2081398727295354e-06, 'epoch': 0.55} + 55%|█████▌ | 3883/7045 [12:37:40<10:13:32, 11.64s/it] 55%|█████▌ | 3884/7045 [12:37:53<10:34:01, 12.03s/it] {'loss': 1.0981, 'learning_rate': 2.2069983440713834e-06, 'epoch': 0.55} + 55%|█████▌ | 3884/7045 [12:37:53<10:34:01, 12.03s/it] 55%|█████▌ | 3885/7045 [12:38:04<10:17:25, 11.72s/it] {'loss': 1.1133, 'learning_rate': 2.205856877349775e-06, 'epoch': 0.55} + 55%|█████▌ | 3885/7045 [12:38:04<10:17:25, 11.72s/it] 55%|█████▌ | 3886/7045 [12:38:17<10:37:53, 12.12s/it] {'loss': 1.0781, 'learning_rate': 2.2047154728060023e-06, 'epoch': 0.55} + 55%|█████▌ | 3886/7045 [12:38:17<10:37:53, 12.12s/it] 55%|█████▌ | 3887/7045 [12:38:28<10:18:43, 11.76s/it] {'loss': 1.0957, 'learning_rate': 2.203574130681342e-06, 'epoch': 0.55} + 55%|█████▌ | 3887/7045 [12:38:28<10:18:43, 11.76s/it] 55%|█████▌ | 3888/7045 [12:38:40<10:25:59, 11.90s/it] {'loss': 1.0977, 'learning_rate': 2.2024328512170583e-06, 'epoch': 0.55} + 55%|█████▌ | 3888/7045 [12:38:40<10:25:59, 11.90s/it] 55%|█████▌ | 3889/7045 [12:38:52<10:28:37, 11.95s/it] {'loss': 1.1152, 'learning_rate': 2.201291634654401e-06, 'epoch': 0.55} + 55%|█████▌ | 3889/7045 [12:38:52<10:28:37, 11.95s/it] 55%|█████▌ | 3890/7045 [12:39:03<10:15:06, 11.70s/it] {'loss': 1.1104, 'learning_rate': 2.2001504812346086e-06, 'epoch': 0.55} + 55%|█████▌ | 3890/7045 [12:39:03<10:15:06, 11.70s/it] 55%|█████▌ | 3891/7045 [12:39:14<10:02:02, 11.45s/it] {'loss': 1.0713, 'learning_rate': 2.199009391198905e-06, 'epoch': 0.55} + 55%|█████▌ | 3891/7045 [12:39:14<10:02:02, 11.45s/it] 55%|█████▌ | 3892/7045 [12:39:25<10:01:41, 11.45s/it] {'loss': 1.0703, 'learning_rate': 2.197868364788502e-06, 'epoch': 0.55} + 55%|█████▌ | 3892/7045 [12:39:25<10:01:41, 11.45s/it] 55%|█████▌ | 3893/7045 [12:39:37<9:56:28, 11.35s/it] {'loss': 1.1279, 'learning_rate': 2.1967274022445953e-06, 'epoch': 0.55} + 55%|█████▌ | 3893/7045 [12:39:37<9:56:28, 11.35s/it] 55%|█████▌ | 3894/7045 [12:39:48<9:56:06, 11.35s/it] {'loss': 1.1406, 'learning_rate': 2.1955865038083698e-06, 'epoch': 0.55} + 55%|█████▌ | 3894/7045 [12:39:48<9:56:06, 11.35s/it] 55%|█████▌ | 3895/7045 [12:39:59<9:50:48, 11.25s/it] {'loss': 1.0474, 'learning_rate': 2.194445669720996e-06, 'epoch': 0.55} + 55%|█████▌ | 3895/7045 [12:39:59<9:50:48, 11.25s/it] 55%|█████▌ | 3896/7045 [12:40:11<10:03:29, 11.50s/it] {'loss': 1.106, 'learning_rate': 2.19330490022363e-06, 'epoch': 0.55} + 55%|█████▌ | 3896/7045 [12:40:11<10:03:29, 11.50s/it] 55%|█████▌ | 3897/7045 [12:40:23<10:04:37, 11.52s/it] {'loss': 1.1006, 'learning_rate': 2.1921641955574148e-06, 'epoch': 0.55} + 55%|█████▌ | 3897/7045 [12:40:23<10:04:37, 11.52s/it] 55%|█████▌ | 3898/7045 [12:40:34<9:58:15, 11.41s/it] {'loss': 1.1338, 'learning_rate': 2.1910235559634804e-06, 'epoch': 0.55} + 55%|█████▌ | 3898/7045 [12:40:34<9:58:15, 11.41s/it] 55%|█████▌ | 3899/7045 [12:40:45<10:01:58, 11.48s/it] {'loss': 1.0732, 'learning_rate': 2.189882981682943e-06, 'epoch': 0.55} + 55%|█████▌ | 3899/7045 [12:40:45<10:01:58, 11.48s/it] 55%|█████▌ | 3900/7045 [12:40:59<10:29:56, 12.02s/it] {'loss': 1.0986, 'learning_rate': 2.188742472956903e-06, 'epoch': 0.55} + 55%|█████▌ | 3900/7045 [12:40:59<10:29:56, 12.02s/it] 55%|█████▌ | 3901/7045 [12:41:10<10:19:05, 11.81s/it] {'loss': 1.0957, 'learning_rate': 2.187602030026449e-06, 'epoch': 0.55} + 55%|█████▌ | 3901/7045 [12:41:10<10:19:05, 11.81s/it] 55%|█████▌ | 3902/7045 [12:41:23<10:37:26, 12.17s/it] {'loss': 1.0967, 'learning_rate': 2.186461653132655e-06, 'epoch': 0.55} + 55%|█████▌ | 3902/7045 [12:41:23<10:37:26, 12.17s/it] 55%|█████▌ | 3903/7045 [12:41:34<10:19:08, 11.82s/it] {'loss': 1.0786, 'learning_rate': 2.185321342516582e-06, 'epoch': 0.55} + 55%|█████▌ | 3903/7045 [12:41:34<10:19:08, 11.82s/it] 55%|█████▌ | 3904/7045 [12:41:45<10:06:23, 11.58s/it] {'loss': 1.1191, 'learning_rate': 2.1841810984192737e-06, 'epoch': 0.55} + 55%|█████▌ | 3904/7045 [12:41:45<10:06:23, 11.58s/it] 55%|█████▌ | 3905/7045 [12:41:56<9:57:03, 11.41s/it] {'loss': 1.1382, 'learning_rate': 2.1830409210817643e-06, 'epoch': 0.55} + 55%|█████▌ | 3905/7045 [12:41:56<9:57:03, 11.41s/it] 55%|█████▌ | 3906/7045 [12:42:08<10:07:29, 11.61s/it] {'loss': 1.0957, 'learning_rate': 2.1819008107450705e-06, 'epoch': 0.55} + 55%|█████▌ | 3906/7045 [12:42:08<10:07:29, 11.61s/it] 55%|█████▌ | 3907/7045 [12:42:21<10:24:19, 11.94s/it] {'loss': 1.0835, 'learning_rate': 2.180760767650197e-06, 'epoch': 0.55} + 55%|█████▌ | 3907/7045 [12:42:21<10:24:19, 11.94s/it] 55%|█████▌ | 3908/7045 [12:42:32<10:12:32, 11.72s/it] {'loss': 1.1465, 'learning_rate': 2.179620792038133e-06, 'epoch': 0.55} + 55%|█████▌ | 3908/7045 [12:42:32<10:12:32, 11.72s/it] 55%|█████▌ | 3909/7045 [12:42:44<10:19:00, 11.84s/it] {'loss': 1.1279, 'learning_rate': 2.178480884149852e-06, 'epoch': 0.55} + 55%|█████▌ | 3909/7045 [12:42:44<10:19:00, 11.84s/it] 56%|█████▌ | 3910/7045 [12:42:55<10:05:27, 11.59s/it] {'loss': 1.1396, 'learning_rate': 2.177341044226318e-06, 'epoch': 0.56} + 56%|█████▌ | 3910/7045 [12:42:55<10:05:27, 11.59s/it] 56%|█████▌ | 3911/7045 [12:43:06<9:54:23, 11.38s/it] {'loss': 1.0635, 'learning_rate': 2.176201272508476e-06, 'epoch': 0.56} + 56%|█████▌ | 3911/7045 [12:43:06<9:54:23, 11.38s/it] 56%|█████▌ | 3912/7045 [12:43:19<10:17:38, 11.83s/it] {'loss': 1.0669, 'learning_rate': 2.175061569237258e-06, 'epoch': 0.56} + 56%|█████▌ | 3912/7045 [12:43:19<10:17:38, 11.83s/it] 56%|█████▌ | 3913/7045 [12:43:32<10:32:08, 12.11s/it] {'loss': 1.0972, 'learning_rate': 2.1739219346535823e-06, 'epoch': 0.56} + 56%|█████▌ | 3913/7045 [12:43:32<10:32:08, 12.11s/it] 56%|█████▌ | 3914/7045 [12:43:45<10:43:38, 12.33s/it] {'loss': 1.1309, 'learning_rate': 2.1727823689983517e-06, 'epoch': 0.56} + 56%|█████▌ | 3914/7045 [12:43:45<10:43:38, 12.33s/it] 56%|█████▌ | 3915/7045 [12:43:56<10:27:38, 12.03s/it] {'loss': 1.1245, 'learning_rate': 2.171642872512455e-06, 'epoch': 0.56} + 56%|█████▌ | 3915/7045 [12:43:56<10:27:38, 12.03s/it] 56%|█████▌ | 3916/7045 [12:44:07<10:17:35, 11.84s/it] {'loss': 1.1816, 'learning_rate': 2.1705034454367653e-06, 'epoch': 0.56} + 56%|█████▌ | 3916/7045 [12:44:07<10:17:35, 11.84s/it] 56%|█████▌ | 3917/7045 [12:44:18<10:05:04, 11.61s/it] {'loss': 1.1055, 'learning_rate': 2.169364088012143e-06, 'epoch': 0.56} + 56%|█████▌ | 3917/7045 [12:44:18<10:05:04, 11.61s/it] 56%|█████▌ | 3918/7045 [12:44:29<9:55:39, 11.43s/it] {'loss': 1.1353, 'learning_rate': 2.1682248004794325e-06, 'epoch': 0.56} + 56%|█████▌ | 3918/7045 [12:44:29<9:55:39, 11.43s/it] 56%|█████▌ | 3919/7045 [12:44:41<9:54:05, 11.40s/it] {'loss': 1.1147, 'learning_rate': 2.1670855830794634e-06, 'epoch': 0.56} + 56%|█████▌ | 3919/7045 [12:44:41<9:54:05, 11.40s/it] 56%|█████▌ | 3920/7045 [12:44:52<9:48:44, 11.30s/it] {'loss': 1.0977, 'learning_rate': 2.1659464360530503e-06, 'epoch': 0.56} + 56%|█████▌ | 3920/7045 [12:44:52<9:48:44, 11.30s/it] 56%|█████▌ | 3921/7045 [12:45:03<9:50:49, 11.35s/it] {'loss': 1.1689, 'learning_rate': 2.1648073596409936e-06, 'epoch': 0.56} + 56%|█████▌ | 3921/7045 [12:45:03<9:50:49, 11.35s/it] 56%|█████▌ | 3922/7045 [12:45:16<10:08:56, 11.70s/it] {'loss': 1.103, 'learning_rate': 2.163668354084079e-06, 'epoch': 0.56} + 56%|█████▌ | 3922/7045 [12:45:16<10:08:56, 11.70s/it] 56%|█████▌ | 3923/7045 [12:45:28<10:11:30, 11.75s/it] {'loss': 1.124, 'learning_rate': 2.162529419623076e-06, 'epoch': 0.56} + 56%|█████▌ | 3923/7045 [12:45:28<10:11:30, 11.75s/it] 56%|█████▌ | 3924/7045 [12:45:39<10:07:05, 11.67s/it] {'loss': 1.1289, 'learning_rate': 2.16139055649874e-06, 'epoch': 0.56} + 56%|█████▌ | 3924/7045 [12:45:39<10:07:05, 11.67s/it] 56%|█████▌ | 3925/7045 [12:45:52<10:21:30, 11.95s/it] {'loss': 1.1304, 'learning_rate': 2.1602517649518116e-06, 'epoch': 0.56} + 56%|█████▌ | 3925/7045 [12:45:52<10:21:30, 11.95s/it] 56%|█████▌ | 3926/7045 [12:46:03<10:11:27, 11.76s/it] {'loss': 1.1172, 'learning_rate': 2.1591130452230153e-06, 'epoch': 0.56} + 56%|█████▌ | 3926/7045 [12:46:03<10:11:27, 11.76s/it] 56%|█████▌ | 3927/7045 [12:46:14<10:02:53, 11.60s/it] {'loss': 1.1055, 'learning_rate': 2.1579743975530605e-06, 'epoch': 0.56} + 56%|█████▌ | 3927/7045 [12:46:14<10:02:53, 11.60s/it] 56%|█████▌ | 3928/7045 [12:46:25<9:52:28, 11.40s/it] {'loss': 1.1152, 'learning_rate': 2.1568358221826423e-06, 'epoch': 0.56} + 56%|█████▌ | 3928/7045 [12:46:25<9:52:28, 11.40s/it] 56%|█████▌ | 3929/7045 [12:46:37<9:58:33, 11.53s/it] {'loss': 1.1006, 'learning_rate': 2.15569731935244e-06, 'epoch': 0.56} + 56%|█████▌ | 3929/7045 [12:46:37<9:58:33, 11.53s/it] 56%|█████▌ | 3930/7045 [12:46:48<9:54:44, 11.46s/it] {'loss': 1.0898, 'learning_rate': 2.154558889303118e-06, 'epoch': 0.56} + 56%|█████▌ | 3930/7045 [12:46:48<9:54:44, 11.46s/it] 56%|█████▌ | 3931/7045 [12:46:59<9:47:30, 11.32s/it] {'loss': 1.1348, 'learning_rate': 2.1534205322753233e-06, 'epoch': 0.56} + 56%|█████▌ | 3931/7045 [12:46:59<9:47:30, 11.32s/it] 56%|█████▌ | 3932/7045 [12:47:11<9:49:23, 11.36s/it] {'loss': 1.1133, 'learning_rate': 2.15228224850969e-06, 'epoch': 0.56} + 56%|█████▌ | 3932/7045 [12:47:11<9:49:23, 11.36s/it] 56%|█████▌ | 3933/7045 [12:47:24<10:15:19, 11.86s/it] {'loss': 1.0952, 'learning_rate': 2.1511440382468355e-06, 'epoch': 0.56} + 56%|█████▌ | 3933/7045 [12:47:24<10:15:19, 11.86s/it] 56%|█████▌ | 3934/7045 [12:47:35<10:03:47, 11.65s/it] {'loss': 1.1172, 'learning_rate': 2.150005901727362e-06, 'epoch': 0.56} + 56%|█████▌ | 3934/7045 [12:47:35<10:03:47, 11.65s/it] 56%|█████▌ | 3935/7045 [12:47:46<9:53:16, 11.45s/it] {'loss': 1.1328, 'learning_rate': 2.1488678391918557e-06, 'epoch': 0.56} + 56%|█████▌ | 3935/7045 [12:47:46<9:53:16, 11.45s/it] 56%|█████▌ | 3936/7045 [12:47:57<9:55:08, 11.49s/it] {'loss': 1.0957, 'learning_rate': 2.1477298508808887e-06, 'epoch': 0.56} + 56%|█████▌ | 3936/7045 [12:47:57<9:55:08, 11.49s/it] 56%|█████▌ | 3937/7045 [12:48:09<9:49:10, 11.37s/it] {'loss': 1.1631, 'learning_rate': 2.146591937035015e-06, 'epoch': 0.56} + 56%|█████▌ | 3937/7045 [12:48:09<9:49:10, 11.37s/it] 56%|█████▌ | 3938/7045 [12:48:21<10:02:39, 11.64s/it] {'loss': 1.1006, 'learning_rate': 2.145454097894774e-06, 'epoch': 0.56} + 56%|█████▌ | 3938/7045 [12:48:21<10:02:39, 11.64s/it] 56%|█████▌ | 3939/7045 [12:48:34<10:20:22, 11.98s/it] {'loss': 1.0923, 'learning_rate': 2.14431633370069e-06, 'epoch': 0.56} + 56%|█████▌ | 3939/7045 [12:48:34<10:20:22, 11.98s/it] 56%|█████▌ | 3940/7045 [12:48:45<10:05:49, 11.71s/it] {'loss': 1.125, 'learning_rate': 2.14317864469327e-06, 'epoch': 0.56} + 56%|█████▌ | 3940/7045 [12:48:45<10:05:49, 11.71s/it] 56%|█████▌ | 3941/7045 [12:48:56<10:00:43, 11.61s/it] {'loss': 1.1392, 'learning_rate': 2.142041031113007e-06, 'epoch': 0.56} + 56%|█████▌ | 3941/7045 [12:48:56<10:00:43, 11.61s/it] 56%|█████▌ | 3942/7045 [12:49:07<9:49:28, 11.40s/it] {'loss': 1.1045, 'learning_rate': 2.140903493200376e-06, 'epoch': 0.56} + 56%|█████▌ | 3942/7045 [12:49:07<9:49:28, 11.40s/it] 56%|█████▌ | 3943/7045 [12:49:19<9:52:44, 11.47s/it] {'loss': 1.1069, 'learning_rate': 2.139766031195837e-06, 'epoch': 0.56} + 56%|█████▌ | 3943/7045 [12:49:19<9:52:44, 11.47s/it] 56%|█████▌ | 3944/7045 [12:49:31<10:03:52, 11.68s/it] {'loss': 1.1069, 'learning_rate': 2.1386286453398345e-06, 'epoch': 0.56} + 56%|█████▌ | 3944/7045 [12:49:31<10:03:52, 11.68s/it] 56%|█████▌ | 3945/7045 [12:49:42<9:59:58, 11.61s/it] {'loss': 1.1172, 'learning_rate': 2.137491335872796e-06, 'epoch': 0.56} + 56%|█████▌ | 3945/7045 [12:49:42<9:59:58, 11.61s/it] 56%|█████▌ | 3946/7045 [12:49:53<9:52:25, 11.47s/it] {'loss': 1.1338, 'learning_rate': 2.136354103035132e-06, 'epoch': 0.56} + 56%|█████▌ | 3946/7045 [12:49:53<9:52:25, 11.47s/it] 56%|█████▌ | 3947/7045 [12:50:04<9:44:48, 11.33s/it] {'loss': 1.0854, 'learning_rate': 2.1352169470672405e-06, 'epoch': 0.56} + 56%|█████▌ | 3947/7045 [12:50:04<9:44:48, 11.33s/it] 56%|█████▌ | 3948/7045 [12:50:16<9:44:39, 11.33s/it] {'loss': 1.1367, 'learning_rate': 2.134079868209499e-06, 'epoch': 0.56} + 56%|█████▌ | 3948/7045 [12:50:16<9:44:39, 11.33s/it] 56%|█████▌ | 3949/7045 [12:50:27<9:43:20, 11.31s/it] {'loss': 1.1064, 'learning_rate': 2.1329428667022706e-06, 'epoch': 0.56} + 56%|█████▌ | 3949/7045 [12:50:27<9:43:20, 11.31s/it] 56%|█████▌ | 3950/7045 [12:50:38<9:42:39, 11.30s/it] {'loss': 1.0938, 'learning_rate': 2.1318059427859024e-06, 'epoch': 0.56} + 56%|█████▌ | 3950/7045 [12:50:38<9:42:39, 11.30s/it] 56%|█████▌ | 3951/7045 [12:50:49<9:40:18, 11.25s/it] {'loss': 1.1201, 'learning_rate': 2.1306690967007236e-06, 'epoch': 0.56} + 56%|█████▌ | 3951/7045 [12:50:49<9:40:18, 11.25s/it] 56%|█████▌ | 3952/7045 [12:51:01<9:46:25, 11.38s/it] {'loss': 1.1738, 'learning_rate': 2.1295323286870497e-06, 'epoch': 0.56} + 56%|█████▌ | 3952/7045 [12:51:01<9:46:25, 11.38s/it] 56%|█████▌ | 3953/7045 [12:51:13<9:53:39, 11.52s/it] {'loss': 1.0879, 'learning_rate': 2.128395638985176e-06, 'epoch': 0.56} + 56%|█████▌ | 3953/7045 [12:51:13<9:53:39, 11.52s/it] 56%|█████▌ | 3954/7045 [12:51:24<9:50:29, 11.46s/it] {'loss': 1.1328, 'learning_rate': 2.1272590278353838e-06, 'epoch': 0.56} + 56%|█████▌ | 3954/7045 [12:51:24<9:50:29, 11.46s/it] 56%|█████▌ | 3955/7045 [12:51:36<9:47:54, 11.42s/it] {'loss': 1.1494, 'learning_rate': 2.1261224954779373e-06, 'epoch': 0.56} + 56%|█████▌ | 3955/7045 [12:51:36<9:47:54, 11.42s/it] 56%|█████▌ | 3956/7045 [12:51:47<9:40:48, 11.28s/it] {'loss': 1.1133, 'learning_rate': 2.124986042153085e-06, 'epoch': 0.56} + 56%|█████▌ | 3956/7045 [12:51:47<9:40:48, 11.28s/it] 56%|█████▌ | 3957/7045 [12:51:58<9:45:09, 11.37s/it] {'loss': 1.1133, 'learning_rate': 2.1238496681010553e-06, 'epoch': 0.56} + 56%|█████▌ | 3957/7045 [12:51:58<9:45:09, 11.37s/it] 56%|█████▌ | 3958/7045 [12:52:10<9:45:27, 11.38s/it] {'loss': 1.1631, 'learning_rate': 2.1227133735620633e-06, 'epoch': 0.56} + 56%|█████▌ | 3958/7045 [12:52:10<9:45:27, 11.38s/it] 56%|█████▌ | 3959/7045 [12:52:22<10:03:46, 11.74s/it] {'loss': 1.0977, 'learning_rate': 2.121577158776306e-06, 'epoch': 0.56} + 56%|█████▌ | 3959/7045 [12:52:22<10:03:46, 11.74s/it] 56%|█████▌ | 3960/7045 [12:52:33<9:50:34, 11.49s/it] {'loss': 1.1064, 'learning_rate': 2.1204410239839646e-06, 'epoch': 0.56} + 56%|█████▌ | 3960/7045 [12:52:33<9:50:34, 11.49s/it] 56%|█████▌ | 3961/7045 [12:52:44<9:44:19, 11.37s/it] {'loss': 1.1465, 'learning_rate': 2.119304969425201e-06, 'epoch': 0.56} + 56%|█████▌ | 3961/7045 [12:52:44<9:44:19, 11.37s/it] 56%|█████▌ | 3962/7045 [12:52:56<9:45:59, 11.40s/it] {'loss': 1.1182, 'learning_rate': 2.1181689953401635e-06, 'epoch': 0.56} + 56%|█████▌ | 3962/7045 [12:52:56<9:45:59, 11.40s/it] 56%|█████▋ | 3963/7045 [12:53:07<9:41:29, 11.32s/it] {'loss': 1.1299, 'learning_rate': 2.1170331019689798e-06, 'epoch': 0.56} + 56%|█████▋ | 3963/7045 [12:53:07<9:41:29, 11.32s/it] 56%|█████▋ | 3964/7045 [12:53:18<9:41:56, 11.33s/it] {'loss': 1.1553, 'learning_rate': 2.115897289551762e-06, 'epoch': 0.56} + 56%|█████▋ | 3964/7045 [12:53:18<9:41:56, 11.33s/it] 56%|█████▋ | 3965/7045 [12:53:29<9:39:27, 11.29s/it] {'loss': 1.0923, 'learning_rate': 2.1147615583286066e-06, 'epoch': 0.56} + 56%|█████▋ | 3965/7045 [12:53:29<9:39:27, 11.29s/it] 56%|█████▋ | 3966/7045 [12:53:42<9:58:13, 11.66s/it] {'loss': 1.1084, 'learning_rate': 2.113625908539591e-06, 'epoch': 0.56} + 56%|█████▋ | 3966/7045 [12:53:42<9:58:13, 11.66s/it] 56%|█████▋ | 3967/7045 [12:53:53<9:52:10, 11.54s/it] {'loss': 1.1357, 'learning_rate': 2.1124903404247766e-06, 'epoch': 0.56} + 56%|█████▋ | 3967/7045 [12:53:53<9:52:10, 11.54s/it] 56%|█████▋ | 3968/7045 [12:54:05<10:03:44, 11.77s/it] {'loss': 1.103, 'learning_rate': 2.1113548542242055e-06, 'epoch': 0.56} + 56%|█████▋ | 3968/7045 [12:54:05<10:03:44, 11.77s/it] 56%|█████▋ | 3969/7045 [12:54:17<9:54:56, 11.60s/it] {'loss': 1.1475, 'learning_rate': 2.110219450177906e-06, 'epoch': 0.56} + 56%|█████▋ | 3969/7045 [12:54:17<9:54:56, 11.60s/it] 56%|█████▋ | 3970/7045 [12:54:29<10:10:06, 11.90s/it] {'loss': 1.0898, 'learning_rate': 2.1090841285258845e-06, 'epoch': 0.56} + 56%|█████▋ | 3970/7045 [12:54:29<10:10:06, 11.90s/it] 56%|█████▋ | 3971/7045 [12:54:40<9:57:48, 11.67s/it] {'loss': 1.1133, 'learning_rate': 2.107948889508135e-06, 'epoch': 0.56} + 56%|█████▋ | 3971/7045 [12:54:40<9:57:48, 11.67s/it] 56%|█████▋ | 3972/7045 [12:54:54<10:26:53, 12.24s/it] {'loss': 1.1294, 'learning_rate': 2.1068137333646304e-06, 'epoch': 0.56} + 56%|█████▋ | 3972/7045 [12:54:54<10:26:53, 12.24s/it] 56%|█████▋ | 3973/7045 [12:55:07<10:33:24, 12.37s/it] {'loss': 1.0835, 'learning_rate': 2.1056786603353273e-06, 'epoch': 0.56} + 56%|█████▋ | 3973/7045 [12:55:07<10:33:24, 12.37s/it] 56%|█████▋ | 3974/7045 [12:55:19<10:42:03, 12.54s/it] {'loss': 1.084, 'learning_rate': 2.1045436706601645e-06, 'epoch': 0.56} + 56%|█████▋ | 3974/7045 [12:55:19<10:42:03, 12.54s/it] 56%|█████▋ | 3975/7045 [12:55:32<10:36:11, 12.43s/it] {'loss': 1.0718, 'learning_rate': 2.103408764579063e-06, 'epoch': 0.56} + 56%|█████▋ | 3975/7045 [12:55:32<10:36:11, 12.43s/it] 56%|█████▋ | 3976/7045 [12:55:43<10:14:24, 12.01s/it] {'loss': 1.1152, 'learning_rate': 2.102273942331927e-06, 'epoch': 0.56} + 56%|█████▋ | 3976/7045 [12:55:43<10:14:24, 12.01s/it] 56%|█████▋ | 3977/7045 [12:55:54<10:09:04, 11.91s/it] {'loss': 1.1191, 'learning_rate': 2.1011392041586425e-06, 'epoch': 0.56} + 56%|█████▋ | 3977/7045 [12:55:54<10:09:04, 11.91s/it] 56%|█████▋ | 3978/7045 [12:56:05<9:55:38, 11.65s/it] {'loss': 1.1543, 'learning_rate': 2.1000045502990776e-06, 'epoch': 0.56} + 56%|█████▋ | 3978/7045 [12:56:05<9:55:38, 11.65s/it] 56%|█████▋ | 3979/7045 [12:56:17<9:47:37, 11.50s/it] {'loss': 1.1367, 'learning_rate': 2.0988699809930816e-06, 'epoch': 0.56} + 56%|█████▋ | 3979/7045 [12:56:17<9:47:37, 11.50s/it] 56%|█████▋ | 3980/7045 [12:56:29<9:54:52, 11.65s/it] {'loss': 1.1543, 'learning_rate': 2.097735496480488e-06, 'epoch': 0.56} + 56%|█████▋ | 3980/7045 [12:56:29<9:54:52, 11.65s/it] 57%|█████▋ | 3981/7045 [12:56:42<10:23:29, 12.21s/it] {'loss': 1.1299, 'learning_rate': 2.096601097001111e-06, 'epoch': 0.57} + 57%|█████▋ | 3981/7045 [12:56:42<10:23:29, 12.21s/it] 57%|█████▋ | 3982/7045 [12:56:53<10:05:43, 11.87s/it] {'loss': 1.0928, 'learning_rate': 2.095466782794746e-06, 'epoch': 0.57} + 57%|█████▋ | 3982/7045 [12:56:53<10:05:43, 11.87s/it] 57%|█████▋ | 3983/7045 [12:57:06<10:16:43, 12.08s/it] {'loss': 1.0801, 'learning_rate': 2.094332554101174e-06, 'epoch': 0.57} + 57%|█████▋ | 3983/7045 [12:57:06<10:16:43, 12.08s/it] 57%|█████▋ | 3984/7045 [12:57:19<10:32:35, 12.40s/it] {'loss': 1.0547, 'learning_rate': 2.093198411160154e-06, 'epoch': 0.57} + 57%|█████▋ | 3984/7045 [12:57:19<10:32:35, 12.40s/it] 57%|█████▋ | 3985/7045 [12:57:30<10:19:21, 12.14s/it] {'loss': 1.1572, 'learning_rate': 2.0920643542114282e-06, 'epoch': 0.57} + 57%|█████▋ | 3985/7045 [12:57:30<10:19:21, 12.14s/it] 57%|█████▋ | 3986/7045 [12:57:42<10:14:24, 12.05s/it] {'loss': 1.1216, 'learning_rate': 2.090930383494721e-06, 'epoch': 0.57} + 57%|█████▋ | 3986/7045 [12:57:42<10:14:24, 12.05s/it] 57%|█████▋ | 3987/7045 [12:57:54<10:05:12, 11.87s/it] {'loss': 1.0806, 'learning_rate': 2.0897964992497383e-06, 'epoch': 0.57} + 57%|█████▋ | 3987/7045 [12:57:54<10:05:12, 11.87s/it] 57%|█████▋ | 3988/7045 [12:58:05<9:55:57, 11.70s/it] {'loss': 1.1387, 'learning_rate': 2.0886627017161678e-06, 'epoch': 0.57} + 57%|█████▋ | 3988/7045 [12:58:05<9:55:57, 11.70s/it] 57%|█████▋ | 3989/7045 [12:58:17<9:59:11, 11.76s/it] {'loss': 1.1191, 'learning_rate': 2.0875289911336785e-06, 'epoch': 0.57} + 57%|█████▋ | 3989/7045 [12:58:17<9:59:11, 11.76s/it] 57%|█████▋ | 3990/7045 [12:58:28<9:48:05, 11.55s/it] {'loss': 1.1113, 'learning_rate': 2.0863953677419215e-06, 'epoch': 0.57} + 57%|█████▋ | 3990/7045 [12:58:28<9:48:05, 11.55s/it] 57%|█████▋ | 3991/7045 [12:58:39<9:46:55, 11.53s/it] {'loss': 1.1484, 'learning_rate': 2.0852618317805293e-06, 'epoch': 0.57} + 57%|█████▋ | 3991/7045 [12:58:39<9:46:55, 11.53s/it] 57%|█████▋ | 3992/7045 [12:58:51<9:46:08, 11.52s/it] {'loss': 1.0601, 'learning_rate': 2.0841283834891156e-06, 'epoch': 0.57} + 57%|█████▋ | 3992/7045 [12:58:51<9:46:08, 11.52s/it] 57%|█████▋ | 3993/7045 [12:59:04<10:03:31, 11.86s/it] {'loss': 1.1035, 'learning_rate': 2.082995023107277e-06, 'epoch': 0.57} + 57%|█████▋ | 3993/7045 [12:59:04<10:03:31, 11.86s/it] 57%|█████▋ | 3994/7045 [12:59:15<9:50:07, 11.61s/it] {'loss': 1.1387, 'learning_rate': 2.0818617508745876e-06, 'epoch': 0.57} + 57%|█████▋ | 3994/7045 [12:59:15<9:50:07, 11.61s/it] 57%|█████▋ | 3995/7045 [12:59:26<9:52:40, 11.66s/it] {'loss': 1.0991, 'learning_rate': 2.0807285670306095e-06, 'epoch': 0.57} + 57%|█████▋ | 3995/7045 [12:59:26<9:52:40, 11.66s/it] 57%|█████▋ | 3996/7045 [12:59:38<9:52:03, 11.65s/it] {'loss': 1.1323, 'learning_rate': 2.0795954718148797e-06, 'epoch': 0.57} + 57%|█████▋ | 3996/7045 [12:59:38<9:52:03, 11.65s/it] 57%|█████▋ | 3997/7045 [12:59:49<9:44:02, 11.50s/it] {'loss': 1.1265, 'learning_rate': 2.07846246546692e-06, 'epoch': 0.57} + 57%|█████▋ | 3997/7045 [12:59:49<9:44:02, 11.50s/it] 57%|█████▋ | 3998/7045 [13:00:00<9:38:53, 11.40s/it] {'loss': 1.1299, 'learning_rate': 2.077329548226232e-06, 'epoch': 0.57} + 57%|█████▋ | 3998/7045 [13:00:00<9:38:53, 11.40s/it] 57%|█████▋ | 3999/7045 [13:00:14<10:18:24, 12.18s/it] {'loss': 1.0518, 'learning_rate': 2.076196720332299e-06, 'epoch': 0.57} + 57%|█████▋ | 3999/7045 [13:00:14<10:18:24, 12.18s/it] 57%|█████▋ | 4000/7045 [13:00:26<10:15:34, 12.13s/it] {'loss': 1.0684, 'learning_rate': 2.0750639820245862e-06, 'epoch': 0.57} + 57%|█████▋ | 4000/7045 [13:00:26<10:15:34, 12.13s/it]/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 57%|█████▋ | 4001/7045 [13:01:04<16:45:20, 19.82s/it] {'loss': 1.1084, 'learning_rate': 2.0739313335425375e-06, 'epoch': 0.57} + 57%|█████▋ | 4001/7045 [13:01:04<16:45:20, 19.82s/it] 57%|█████▋ | 4002/7045 [13:01:15<14:34:04, 17.23s/it] {'loss': 1.0811, 'learning_rate': 2.07279877512558e-06, 'epoch': 0.57} + 57%|█████▋ | 4002/7045 [13:01:15<14:34:04, 17.23s/it] 57%|█████▋ | 4003/7045 [13:01:26<13:00:17, 15.39s/it] {'loss': 1.1523, 'learning_rate': 2.071666307013121e-06, 'epoch': 0.57} + 57%|█████▋ | 4003/7045 [13:01:26<13:00:17, 15.39s/it] 57%|█████▋ | 4004/7045 [13:01:39<12:12:55, 14.46s/it] {'loss': 1.1357, 'learning_rate': 2.0705339294445496e-06, 'epoch': 0.57} + 57%|█████▋ | 4004/7045 [13:01:39<12:12:55, 14.46s/it] 57%|█████▋ | 4005/7045 [13:01:50<11:21:05, 13.44s/it] {'loss': 1.0879, 'learning_rate': 2.0694016426592333e-06, 'epoch': 0.57} + 57%|█████▋ | 4005/7045 [13:01:50<11:21:05, 13.44s/it] 57%|█████▋ | 4006/7045 [13:02:01<10:52:50, 12.89s/it] {'loss': 1.0752, 'learning_rate': 2.068269446896523e-06, 'epoch': 0.57} + 57%|█████▋ | 4006/7045 [13:02:01<10:52:50, 12.89s/it] 57%|█████▋ | 4007/7045 [13:02:12<10:25:00, 12.34s/it] {'loss': 1.125, 'learning_rate': 2.0671373423957493e-06, 'epoch': 0.57} + 57%|█████▋ | 4007/7045 [13:02:12<10:25:00, 12.34s/it] 57%|█████▋ | 4008/7045 [13:02:24<10:19:18, 12.24s/it] {'loss': 1.0938, 'learning_rate': 2.0660053293962238e-06, 'epoch': 0.57} + 57%|█████▋ | 4008/7045 [13:02:24<10:19:18, 12.24s/it] 57%|█████▋ | 4009/7045 [13:02:38<10:42:18, 12.69s/it] {'loss': 1.1592, 'learning_rate': 2.0648734081372384e-06, 'epoch': 0.57} + 57%|█████▋ | 4009/7045 [13:02:38<10:42:18, 12.69s/it] 57%|█████▋ | 4010/7045 [13:02:51<10:46:37, 12.78s/it] {'loss': 1.0908, 'learning_rate': 2.063741578858066e-06, 'epoch': 0.57} + 57%|█████▋ | 4010/7045 [13:02:51<10:46:37, 12.78s/it] 57%|█████▋ | 4011/7045 [13:03:03<10:25:17, 12.37s/it] {'loss': 1.1309, 'learning_rate': 2.06260984179796e-06, 'epoch': 0.57} + 57%|█████▋ | 4011/7045 [13:03:03<10:25:17, 12.37s/it] 57%|█████▋ | 4012/7045 [13:03:16<10:37:26, 12.61s/it] {'loss': 1.0815, 'learning_rate': 2.0614781971961534e-06, 'epoch': 0.57} + 57%|█████▋ | 4012/7045 [13:03:16<10:37:26, 12.61s/it] 57%|█████▋ | 4013/7045 [13:03:28<10:33:51, 12.54s/it] {'loss': 1.0967, 'learning_rate': 2.060346645291861e-06, 'epoch': 0.57} + 57%|█████▋ | 4013/7045 [13:03:28<10:33:51, 12.54s/it] 57%|█████▋ | 4014/7045 [13:03:39<10:10:00, 12.08s/it] {'loss': 1.1025, 'learning_rate': 2.0592151863242768e-06, 'epoch': 0.57} + 57%|█████▋ | 4014/7045 [13:03:39<10:10:00, 12.08s/it] 57%|█████▋ | 4015/7045 [13:03:51<10:00:16, 11.89s/it] {'loss': 1.1431, 'learning_rate': 2.0580838205325766e-06, 'epoch': 0.57} + 57%|█████▋ | 4015/7045 [13:03:51<10:00:16, 11.89s/it] 57%|█████▋ | 4016/7045 [13:04:01<9:45:09, 11.59s/it] {'loss': 1.0972, 'learning_rate': 2.0569525481559152e-06, 'epoch': 0.57} + 57%|█████▋ | 4016/7045 [13:04:01<9:45:09, 11.59s/it] 57%|█████▋ | 4017/7045 [13:04:12<9:35:01, 11.39s/it] {'loss': 1.1396, 'learning_rate': 2.055821369433428e-06, 'epoch': 0.57} + 57%|█████▋ | 4017/7045 [13:04:12<9:35:01, 11.39s/it] 57%|█████▋ | 4018/7045 [13:04:25<9:47:20, 11.64s/it] {'loss': 1.106, 'learning_rate': 2.0546902846042304e-06, 'epoch': 0.57} + 57%|█████▋ | 4018/7045 [13:04:25<9:47:20, 11.64s/it] 57%|█████▋ | 4019/7045 [13:04:36<9:48:24, 11.67s/it] {'loss': 1.126, 'learning_rate': 2.0535592939074185e-06, 'epoch': 0.57} + 57%|█████▋ | 4019/7045 [13:04:36<9:48:24, 11.67s/it] 57%|█████▋ | 4020/7045 [13:04:48<9:42:45, 11.56s/it] {'loss': 1.085, 'learning_rate': 2.0524283975820688e-06, 'epoch': 0.57} + 57%|█████▋ | 4020/7045 [13:04:48<9:42:45, 11.56s/it] 57%|█████▋ | 4021/7045 [13:04:59<9:37:59, 11.47s/it] {'loss': 1.1729, 'learning_rate': 2.0512975958672372e-06, 'epoch': 0.57} + 57%|█████▋ | 4021/7045 [13:04:59<9:37:59, 11.47s/it] 57%|█████▋ | 4022/7045 [13:05:11<9:52:08, 11.75s/it] {'loss': 1.0957, 'learning_rate': 2.0501668890019604e-06, 'epoch': 0.57} + 57%|█████▋ | 4022/7045 [13:05:11<9:52:08, 11.75s/it] 57%|█████▋ | 4023/7045 [13:05:22<9:42:45, 11.57s/it] {'loss': 1.1543, 'learning_rate': 2.0490362772252523e-06, 'epoch': 0.57} + 57%|█████▋ | 4023/7045 [13:05:22<9:42:45, 11.57s/it] 57%|█████▋ | 4024/7045 [13:05:34<9:41:54, 11.56s/it] {'loss': 1.1304, 'learning_rate': 2.04790576077611e-06, 'epoch': 0.57} + 57%|█████▋ | 4024/7045 [13:05:34<9:41:54, 11.56s/it] 57%|█████▋ | 4025/7045 [13:05:45<9:36:44, 11.46s/it] {'loss': 1.1348, 'learning_rate': 2.0467753398935097e-06, 'epoch': 0.57} + 57%|█████▋ | 4025/7045 [13:05:45<9:36:44, 11.46s/it] 57%|█████▋ | 4026/7045 [13:05:56<9:32:56, 11.39s/it] {'loss': 1.1021, 'learning_rate': 2.0456450148164067e-06, 'epoch': 0.57} + 57%|█████▋ | 4026/7045 [13:05:56<9:32:56, 11.39s/it] 57%|█████▋ | 4027/7045 [13:06:07<9:27:33, 11.28s/it] {'loss': 1.0957, 'learning_rate': 2.0445147857837357e-06, 'epoch': 0.57} + 57%|█████▋ | 4027/7045 [13:06:07<9:27:33, 11.28s/it] 57%|█████▋ | 4028/7045 [13:06:19<9:25:01, 11.24s/it] {'loss': 1.1436, 'learning_rate': 2.0433846530344115e-06, 'epoch': 0.57} + 57%|█████▋ | 4028/7045 [13:06:19<9:25:01, 11.24s/it] 57%|█████▋ | 4029/7045 [13:06:31<9:40:15, 11.54s/it] {'loss': 1.1055, 'learning_rate': 2.042254616807329e-06, 'epoch': 0.57} + 57%|█████▋ | 4029/7045 [13:06:31<9:40:15, 11.54s/it] 57%|█████▋ | 4030/7045 [13:06:43<9:50:26, 11.75s/it] {'loss': 1.1084, 'learning_rate': 2.041124677341364e-06, 'epoch': 0.57} + 57%|█████▋ | 4030/7045 [13:06:43<9:50:26, 11.75s/it] 57%|█████▋ | 4031/7045 [13:06:54<9:39:45, 11.54s/it] {'loss': 1.0884, 'learning_rate': 2.0399948348753667e-06, 'epoch': 0.57} + 57%|█████▋ | 4031/7045 [13:06:54<9:39:45, 11.54s/it] 57%|█████▋ | 4032/7045 [13:07:06<9:37:04, 11.49s/it] {'loss': 1.1406, 'learning_rate': 2.0388650896481733e-06, 'epoch': 0.57} + 57%|█████▋ | 4032/7045 [13:07:06<9:37:04, 11.49s/it] 57%|█████▋ | 4033/7045 [13:07:17<9:30:51, 11.37s/it] {'loss': 1.1143, 'learning_rate': 2.0377354418985966e-06, 'epoch': 0.57} + 57%|█████▋ | 4033/7045 [13:07:17<9:30:51, 11.37s/it] 57%|█████▋ | 4034/7045 [13:07:28<9:25:36, 11.27s/it] {'loss': 1.1543, 'learning_rate': 2.036605891865427e-06, 'epoch': 0.57} + 57%|█████▋ | 4034/7045 [13:07:28<9:25:36, 11.27s/it] 57%|█████▋ | 4035/7045 [13:07:39<9:22:53, 11.22s/it] {'loss': 1.0859, 'learning_rate': 2.035476439787437e-06, 'epoch': 0.57} + 57%|█████▋ | 4035/7045 [13:07:39<9:22:53, 11.22s/it] 57%|█████▋ | 4036/7045 [13:07:50<9:22:14, 11.21s/it] {'loss': 1.1021, 'learning_rate': 2.034347085903377e-06, 'epoch': 0.57} + 57%|█████▋ | 4036/7045 [13:07:50<9:22:14, 11.21s/it] 57%|█████▋ | 4037/7045 [13:08:04<10:02:00, 12.01s/it] {'loss': 1.0957, 'learning_rate': 2.033217830451977e-06, 'epoch': 0.57} + 57%|█████▋ | 4037/7045 [13:08:04<10:02:00, 12.01s/it] 57%|█████▋ | 4038/7045 [13:08:16<10:03:06, 12.03s/it] {'loss': 1.1172, 'learning_rate': 2.032088673671946e-06, 'epoch': 0.57} + 57%|█████▋ | 4038/7045 [13:08:16<10:03:06, 12.03s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 2 bytes but only got 0. + warnings.warn(str(msg)) + 57%|█████▋ | 4039/7045 [13:08:27<9:48:27, 11.75s/it] {'loss': 1.0649, 'learning_rate': 2.0309596158019728e-06, 'epoch': 0.57} + 57%|█████▋ | 4039/7045 [13:08:27<9:48:27, 11.75s/it] 57%|█████▋ | 4040/7045 [13:08:39<9:46:04, 11.70s/it] {'loss': 1.1445, 'learning_rate': 2.0298306570807237e-06, 'epoch': 0.57} + 57%|█████▋ | 4040/7045 [13:08:39<9:46:04, 11.70s/it] 57%|█████▋ | 4041/7045 [13:08:50<9:37:15, 11.53s/it] {'loss': 1.1519, 'learning_rate': 2.028701797746847e-06, 'epoch': 0.57} + 57%|█████▋ | 4041/7045 [13:08:50<9:37:15, 11.53s/it] 57%|█████▋ | 4042/7045 [13:09:01<9:32:28, 11.44s/it] {'loss': 1.1094, 'learning_rate': 2.027573038038966e-06, 'epoch': 0.57} + 57%|█████▋ | 4042/7045 [13:09:01<9:32:28, 11.44s/it] 57%|█████▋ | 4043/7045 [13:09:12<9:25:53, 11.31s/it] {'loss': 1.1055, 'learning_rate': 2.0264443781956862e-06, 'epoch': 0.57} + 57%|█████▋ | 4043/7045 [13:09:12<9:25:53, 11.31s/it] 57%|█████▋ | 4044/7045 [13:09:23<9:25:23, 11.30s/it] {'loss': 1.1201, 'learning_rate': 2.0253158184555914e-06, 'epoch': 0.57} + 57%|█████▋ | 4044/7045 [13:09:23<9:25:23, 11.30s/it] 57%|█████▋ | 4045/7045 [13:09:35<9:37:49, 11.56s/it] {'loss': 1.0791, 'learning_rate': 2.0241873590572425e-06, 'epoch': 0.57} + 57%|█████▋ | 4045/7045 [13:09:35<9:37:49, 11.56s/it] 57%|█████▋ | 4046/7045 [13:09:46<9:27:21, 11.35s/it] {'loss': 1.0977, 'learning_rate': 2.0230590002391814e-06, 'epoch': 0.57} + 57%|█████▋ | 4046/7045 [13:09:46<9:27:21, 11.35s/it] 57%|█████▋ | 4047/7045 [13:09:57<9:23:13, 11.27s/it] {'loss': 1.0947, 'learning_rate': 2.021930742239927e-06, 'epoch': 0.57} + 57%|█████▋ | 4047/7045 [13:09:57<9:23:13, 11.27s/it] 57%|█████▋ | 4048/7045 [13:10:10<9:45:20, 11.72s/it] {'loss': 1.125, 'learning_rate': 2.0208025852979783e-06, 'epoch': 0.57} + 57%|█████▋ | 4048/7045 [13:10:10<9:45:20, 11.72s/it] 57%|█████▋ | 4049/7045 [13:10:22<9:42:00, 11.66s/it] {'loss': 1.126, 'learning_rate': 2.0196745296518115e-06, 'epoch': 0.57} + 57%|█████▋ | 4049/7045 [13:10:22<9:42:00, 11.66s/it] 57%|█████▋ | 4050/7045 [13:10:34<9:46:52, 11.76s/it] {'loss': 1.0957, 'learning_rate': 2.018546575539883e-06, 'epoch': 0.57} + 57%|█████▋ | 4050/7045 [13:10:34<9:46:52, 11.76s/it] 58%|█████▊ | 4051/7045 [13:10:45<9:36:28, 11.55s/it] {'loss': 1.1016, 'learning_rate': 2.0174187232006255e-06, 'epoch': 0.58} + 58%|█████▊ | 4051/7045 [13:10:45<9:36:28, 11.55s/it] 58%|█████▊ | 4052/7045 [13:10:56<9:26:53, 11.36s/it] {'loss': 1.0591, 'learning_rate': 2.0162909728724536e-06, 'epoch': 0.58} + 58%|█████▊ | 4052/7045 [13:10:56<9:26:53, 11.36s/it] 58%|█████▊ | 4053/7045 [13:11:07<9:23:08, 11.29s/it] {'loss': 1.1377, 'learning_rate': 2.0151633247937564e-06, 'epoch': 0.58} + 58%|█████▊ | 4053/7045 [13:11:07<9:23:08, 11.29s/it] 58%|█████▊ | 4054/7045 [13:11:18<9:24:06, 11.32s/it] {'loss': 1.0859, 'learning_rate': 2.014035779202904e-06, 'epoch': 0.58} + 58%|█████▊ | 4054/7045 [13:11:18<9:24:06, 11.32s/it] 58%|█████▊ | 4055/7045 [13:11:29<9:25:23, 11.35s/it] {'loss': 1.124, 'learning_rate': 2.0129083363382435e-06, 'epoch': 0.58} + 58%|█████▊ | 4055/7045 [13:11:29<9:25:23, 11.35s/it] 58%|█████▊ | 4056/7045 [13:11:40<9:20:03, 11.24s/it] {'loss': 1.0908, 'learning_rate': 2.0117809964381024e-06, 'epoch': 0.58} + 58%|█████▊ | 4056/7045 [13:11:40<9:20:03, 11.24s/it] 58%|█████▊ | 4057/7045 [13:11:52<9:25:32, 11.36s/it] {'loss': 1.1455, 'learning_rate': 2.0106537597407843e-06, 'epoch': 0.58} + 58%|█████▊ | 4057/7045 [13:11:52<9:25:32, 11.36s/it] 58%|█████▊ | 4058/7045 [13:12:03<9:22:25, 11.30s/it] {'loss': 1.0801, 'learning_rate': 2.009526626484571e-06, 'epoch': 0.58} + 58%|█████▊ | 4058/7045 [13:12:03<9:22:25, 11.30s/it] 58%|█████▊ | 4059/7045 [13:12:15<9:21:43, 11.29s/it] {'loss': 1.1289, 'learning_rate': 2.008399596907724e-06, 'epoch': 0.58} + 58%|█████▊ | 4059/7045 [13:12:15<9:21:43, 11.29s/it] 58%|█████▊ | 4060/7045 [13:12:27<9:38:13, 11.62s/it] {'loss': 1.1602, 'learning_rate': 2.0072726712484812e-06, 'epoch': 0.58} + 58%|█████▊ | 4060/7045 [13:12:27<9:38:13, 11.62s/it] 58%|█████▊ | 4061/7045 [13:12:40<9:55:52, 11.98s/it] {'loss': 1.0737, 'learning_rate': 2.0061458497450594e-06, 'epoch': 0.58} + 58%|█████▊ | 4061/7045 [13:12:40<9:55:52, 11.98s/it] 58%|█████▊ | 4062/7045 [13:12:51<9:39:25, 11.65s/it] {'loss': 1.1367, 'learning_rate': 2.005019132635653e-06, 'epoch': 0.58} + 58%|█████▊ | 4062/7045 [13:12:51<9:39:25, 11.65s/it] 58%|█████▊ | 4063/7045 [13:13:02<9:31:07, 11.49s/it] {'loss': 1.0947, 'learning_rate': 2.003892520158436e-06, 'epoch': 0.58} + 58%|█████▊ | 4063/7045 [13:13:02<9:31:07, 11.49s/it] 58%|█████▊ | 4064/7045 [13:13:13<9:33:56, 11.55s/it] {'loss': 1.1226, 'learning_rate': 2.002766012551557e-06, 'epoch': 0.58} + 58%|█████▊ | 4064/7045 [13:13:13<9:33:56, 11.55s/it] 58%|█████▊ | 4065/7045 [13:13:26<9:42:06, 11.72s/it] {'loss': 1.1387, 'learning_rate': 2.0016396100531453e-06, 'epoch': 0.58} + 58%|█████▊ | 4065/7045 [13:13:26<9:42:06, 11.72s/it] 58%|█████▊ | 4066/7045 [13:13:37<9:36:52, 11.62s/it] {'loss': 1.1172, 'learning_rate': 2.000513312901307e-06, 'epoch': 0.58} + 58%|█████▊ | 4066/7045 [13:13:37<9:36:52, 11.62s/it] 58%|█████▊ | 4067/7045 [13:13:48<9:27:30, 11.43s/it] {'loss': 1.0933, 'learning_rate': 1.999387121334125e-06, 'epoch': 0.58} + 58%|█████▊ | 4067/7045 [13:13:48<9:27:30, 11.43s/it] 58%|█████▊ | 4068/7045 [13:13:59<9:24:17, 11.37s/it] {'loss': 1.1289, 'learning_rate': 1.998261035589662e-06, 'epoch': 0.58} + 58%|█████▊ | 4068/7045 [13:13:59<9:24:17, 11.37s/it] 58%|█████▊ | 4069/7045 [13:14:10<9:14:31, 11.18s/it] {'loss': 1.1182, 'learning_rate': 1.9971350559059573e-06, 'epoch': 0.58} + 58%|█████▊ | 4069/7045 [13:14:10<9:14:31, 11.18s/it] 58%|█████▊ | 4070/7045 [13:14:21<9:12:11, 11.14s/it] {'loss': 1.1221, 'learning_rate': 1.996009182521027e-06, 'epoch': 0.58} + 58%|█████▊ | 4070/7045 [13:14:21<9:12:11, 11.14s/it] 58%|█████▊ | 4071/7045 [13:14:32<9:08:54, 11.07s/it] {'loss': 1.0938, 'learning_rate': 1.994883415672865e-06, 'epoch': 0.58} + 58%|█████▊ | 4071/7045 [13:14:32<9:08:54, 11.07s/it] 58%|█████▊ | 4072/7045 [13:14:43<9:11:12, 11.12s/it] {'loss': 1.1152, 'learning_rate': 1.993757755599443e-06, 'epoch': 0.58} + 58%|█████▊ | 4072/7045 [13:14:43<9:11:12, 11.12s/it] 58%|█████▊ | 4073/7045 [13:14:54<9:10:31, 11.11s/it] {'loss': 1.1895, 'learning_rate': 1.992632202538711e-06, 'epoch': 0.58} + 58%|█████▊ | 4073/7045 [13:14:54<9:10:31, 11.11s/it] 58%|█████▊ | 4074/7045 [13:15:06<9:25:02, 11.41s/it] {'loss': 1.1025, 'learning_rate': 1.9915067567285956e-06, 'epoch': 0.58} + 58%|█████▊ | 4074/7045 [13:15:06<9:25:02, 11.41s/it] 58%|█████▊ | 4075/7045 [13:15:17<9:19:44, 11.31s/it] {'loss': 1.1123, 'learning_rate': 1.9903814184069995e-06, 'epoch': 0.58} + 58%|█████▊ | 4075/7045 [13:15:17<9:19:44, 11.31s/it] 58%|█████▊ | 4076/7045 [13:15:31<9:50:05, 11.93s/it] {'loss': 1.1035, 'learning_rate': 1.9892561878118046e-06, 'epoch': 0.58} + 58%|█████▊ | 4076/7045 [13:15:31<9:50:05, 11.93s/it] 58%|█████▊ | 4077/7045 [13:15:43<9:58:02, 12.09s/it] {'loss': 1.084, 'learning_rate': 1.9881310651808686e-06, 'epoch': 0.58} + 58%|█████▊ | 4077/7045 [13:15:43<9:58:02, 12.09s/it] 58%|█████▊ | 4078/7045 [13:15:55<9:54:00, 12.01s/it] {'loss': 1.1289, 'learning_rate': 1.9870060507520284e-06, 'epoch': 0.58} + 58%|█████▊ | 4078/7045 [13:15:55<9:54:00, 12.01s/it] 58%|█████▊ | 4079/7045 [13:16:09<10:19:06, 12.52s/it] {'loss': 1.1025, 'learning_rate': 1.985881144763095e-06, 'epoch': 0.58} + 58%|█████▊ | 4079/7045 [13:16:09<10:19:06, 12.52s/it] 58%|█████▊ | 4080/7045 [13:16:20<10:02:19, 12.19s/it] {'loss': 1.0967, 'learning_rate': 1.9847563474518584e-06, 'epoch': 0.58} + 58%|█████▊ | 4080/7045 [13:16:20<10:02:19, 12.19s/it] 58%|█████▊ | 4081/7045 [13:16:33<10:11:19, 12.38s/it] {'loss': 1.104, 'learning_rate': 1.983631659056087e-06, 'epoch': 0.58} + 58%|█████▊ | 4081/7045 [13:16:33<10:11:19, 12.38s/it] 58%|█████▊ | 4082/7045 [13:16:46<10:23:54, 12.63s/it] {'loss': 1.1064, 'learning_rate': 1.982507079813523e-06, 'epoch': 0.58} + 58%|█████▊ | 4082/7045 [13:16:46<10:23:54, 12.63s/it] 58%|█████▊ | 4083/7045 [13:16:57<10:02:51, 12.21s/it] {'loss': 1.1572, 'learning_rate': 1.981382609961888e-06, 'epoch': 0.58} + 58%|█████▊ | 4083/7045 [13:16:57<10:02:51, 12.21s/it] 58%|█████▊ | 4084/7045 [13:17:08<9:44:40, 11.85s/it] {'loss': 1.1289, 'learning_rate': 1.980258249738879e-06, 'epoch': 0.58} + 58%|█████▊ | 4084/7045 [13:17:08<9:44:40, 11.85s/it] 58%|█████▊ | 4085/7045 [13:17:19<9:29:23, 11.54s/it] {'loss': 1.0986, 'learning_rate': 1.979133999382171e-06, 'epoch': 0.58} + 58%|█████▊ | 4085/7045 [13:17:19<9:29:23, 11.54s/it] 58%|█████▊ | 4086/7045 [13:17:30<9:21:28, 11.39s/it] {'loss': 1.1436, 'learning_rate': 1.978009859129414e-06, 'epoch': 0.58} + 58%|█████▊ | 4086/7045 [13:17:30<9:21:28, 11.39s/it] 58%|█████▊ | 4087/7045 [13:17:41<9:15:46, 11.27s/it] {'loss': 1.1133, 'learning_rate': 1.976885829218237e-06, 'epoch': 0.58} + 58%|█████▊ | 4087/7045 [13:17:41<9:15:46, 11.27s/it] 58%|█████▊ | 4088/7045 [13:17:52<9:13:22, 11.23s/it] {'loss': 1.1162, 'learning_rate': 1.9757619098862434e-06, 'epoch': 0.58} + 58%|█████▊ | 4088/7045 [13:17:52<9:13:22, 11.23s/it] 58%|█████▊ | 4089/7045 [13:18:04<9:21:02, 11.39s/it] {'loss': 1.123, 'learning_rate': 1.9746381013710163e-06, 'epoch': 0.58} + 58%|█████▊ | 4089/7045 [13:18:04<9:21:02, 11.39s/it] 58%|█████▊ | 4090/7045 [13:18:15<9:15:06, 11.27s/it] {'loss': 1.1064, 'learning_rate': 1.973514403910111e-06, 'epoch': 0.58} + 58%|█████▊ | 4090/7045 [13:18:15<9:15:06, 11.27s/it] 58%|█████▊ | 4091/7045 [13:18:27<9:28:02, 11.54s/it] {'loss': 1.0996, 'learning_rate': 1.972390817741063e-06, 'epoch': 0.58} + 58%|█████▊ | 4091/7045 [13:18:27<9:28:02, 11.54s/it] 58%|█████▊ | 4092/7045 [13:18:38<9:21:11, 11.40s/it] {'loss': 1.1182, 'learning_rate': 1.9712673431013834e-06, 'epoch': 0.58} + 58%|█████▊ | 4092/7045 [13:18:38<9:21:11, 11.40s/it] 58%|█████▊ | 4093/7045 [13:18:51<9:33:51, 11.66s/it] {'loss': 1.1133, 'learning_rate': 1.970143980228559e-06, 'epoch': 0.58} + 58%|█████▊ | 4093/7045 [13:18:51<9:33:51, 11.66s/it] 58%|█████▊ | 4094/7045 [13:19:02<9:24:48, 11.48s/it] {'loss': 1.0894, 'learning_rate': 1.9690207293600536e-06, 'epoch': 0.58} + 58%|█████▊ | 4094/7045 [13:19:02<9:24:48, 11.48s/it] 58%|█████▊ | 4095/7045 [13:19:19<10:42:36, 13.07s/it] {'loss': 1.1401, 'learning_rate': 1.9678975907333067e-06, 'epoch': 0.58} + 58%|█████▊ | 4095/7045 [13:19:19<10:42:36, 13.07s/it] 58%|█████▊ | 4096/7045 [13:19:30<10:16:35, 12.55s/it] {'loss': 1.0767, 'learning_rate': 1.966774564585735e-06, 'epoch': 0.58} + 58%|█████▊ | 4096/7045 [13:19:30<10:16:35, 12.55s/it] 58%|█████▊ | 4097/7045 [13:19:41<10:01:37, 12.24s/it] {'loss': 1.1387, 'learning_rate': 1.9656516511547306e-06, 'epoch': 0.58} + 58%|█████▊ | 4097/7045 [13:19:41<10:01:37, 12.24s/it] 58%|█████▊ | 4098/7045 [13:19:53<9:44:46, 11.91s/it] {'loss': 1.1416, 'learning_rate': 1.9645288506776618e-06, 'epoch': 0.58} + 58%|█████▊ | 4098/7045 [13:19:53<9:44:46, 11.91s/it] 58%|█████▊ | 4099/7045 [13:20:04<9:34:36, 11.70s/it] {'loss': 1.0879, 'learning_rate': 1.963406163391873e-06, 'epoch': 0.58} + 58%|█████▊ | 4099/7045 [13:20:04<9:34:36, 11.70s/it] 58%|█████▊ | 4100/7045 [13:20:15<9:27:13, 11.56s/it] {'loss': 1.0967, 'learning_rate': 1.962283589534686e-06, 'epoch': 0.58} + 58%|█████▊ | 4100/7045 [13:20:15<9:27:13, 11.56s/it] 58%|█████▊ | 4101/7045 [13:20:27<9:38:32, 11.79s/it] {'loss': 1.124, 'learning_rate': 1.9611611293433968e-06, 'epoch': 0.58} + 58%|█████▊ | 4101/7045 [13:20:27<9:38:32, 11.79s/it] 58%|█████▊ | 4102/7045 [13:20:39<9:44:02, 11.91s/it] {'loss': 1.1719, 'learning_rate': 1.960038783055278e-06, 'epoch': 0.58} + 58%|█████▊ | 4102/7045 [13:20:39<9:44:02, 11.91s/it] 58%|█████▊ | 4103/7045 [13:20:50<9:29:21, 11.61s/it] {'loss': 1.0747, 'learning_rate': 1.958916550907578e-06, 'epoch': 0.58} + 58%|█████▊ | 4103/7045 [13:20:50<9:29:21, 11.61s/it] 58%|█████▊ | 4104/7045 [13:21:02<9:31:26, 11.66s/it] {'loss': 1.0977, 'learning_rate': 1.957794433137522e-06, 'epoch': 0.58} + 58%|█████▊ | 4104/7045 [13:21:02<9:31:26, 11.66s/it] 58%|█████▊ | 4105/7045 [13:21:13<9:20:58, 11.45s/it] {'loss': 1.1533, 'learning_rate': 1.956672429982311e-06, 'epoch': 0.58} + 58%|█████▊ | 4105/7045 [13:21:13<9:20:58, 11.45s/it] 58%|█████▊ | 4106/7045 [13:21:24<9:14:46, 11.33s/it] {'loss': 1.1377, 'learning_rate': 1.9555505416791197e-06, 'epoch': 0.58} + 58%|█████▊ | 4106/7045 [13:21:24<9:14:46, 11.33s/it] 58%|█████▊ | 4107/7045 [13:21:37<9:30:26, 11.65s/it] {'loss': 1.1211, 'learning_rate': 1.9544287684651016e-06, 'epoch': 0.58} + 58%|█████▊ | 4107/7045 [13:21:37<9:30:26, 11.65s/it] 58%|█████▊ | 4108/7045 [13:21:47<9:19:14, 11.42s/it] {'loss': 1.127, 'learning_rate': 1.9533071105773823e-06, 'epoch': 0.58} + 58%|█████▊ | 4108/7045 [13:21:47<9:19:14, 11.42s/it] 58%|█████▊ | 4109/7045 [13:21:59<9:17:41, 11.40s/it] {'loss': 1.0967, 'learning_rate': 1.952185568253066e-06, 'epoch': 0.58} + 58%|█████▊ | 4109/7045 [13:21:59<9:17:41, 11.40s/it] 58%|█████▊ | 4110/7045 [13:22:10<9:15:39, 11.36s/it] {'loss': 1.1562, 'learning_rate': 1.9510641417292315e-06, 'epoch': 0.58} + 58%|█████▊ | 4110/7045 [13:22:10<9:15:39, 11.36s/it] 58%|█████▊ | 4111/7045 [13:22:22<9:16:59, 11.39s/it] {'loss': 1.1768, 'learning_rate': 1.9499428312429335e-06, 'epoch': 0.58} + 58%|█████▊ | 4111/7045 [13:22:22<9:16:59, 11.39s/it] 58%|█████▊ | 4112/7045 [13:22:33<9:13:56, 11.33s/it] {'loss': 1.105, 'learning_rate': 1.9488216370312004e-06, 'epoch': 0.58} + 58%|█████▊ | 4112/7045 [13:22:33<9:13:56, 11.33s/it] 58%|█████▊ | 4113/7045 [13:22:46<9:42:58, 11.93s/it] {'loss': 1.1123, 'learning_rate': 1.9477005593310374e-06, 'epoch': 0.58} + 58%|█████▊ | 4113/7045 [13:22:46<9:42:58, 11.93s/it] 58%|█████▊ | 4114/7045 [13:22:57<9:34:09, 11.75s/it] {'loss': 1.0791, 'learning_rate': 1.9465795983794263e-06, 'epoch': 0.58} + 58%|█████▊ | 4114/7045 [13:22:57<9:34:09, 11.75s/it] 58%|█████▊ | 4115/7045 [13:23:09<9:28:53, 11.65s/it] {'loss': 1.1387, 'learning_rate': 1.945458754413322e-06, 'epoch': 0.58} + 58%|█████▊ | 4115/7045 [13:23:09<9:28:53, 11.65s/it] 58%|█████▊ | 4116/7045 [13:23:20<9:22:32, 11.52s/it] {'loss': 1.1348, 'learning_rate': 1.9443380276696542e-06, 'epoch': 0.58} + 58%|█████▊ | 4116/7045 [13:23:20<9:22:32, 11.52s/it] 58%|█████▊ | 4117/7045 [13:23:31<9:15:19, 11.38s/it] {'loss': 1.1279, 'learning_rate': 1.9432174183853324e-06, 'epoch': 0.58} + 58%|█████▊ | 4117/7045 [13:23:31<9:15:19, 11.38s/it] 58%|█████▊ | 4118/7045 [13:23:43<9:20:03, 11.48s/it] {'loss': 1.1123, 'learning_rate': 1.942096926797237e-06, 'epoch': 0.58} + 58%|█████▊ | 4118/7045 [13:23:43<9:20:03, 11.48s/it] 58%|█████▊ | 4119/7045 [13:23:54<9:14:52, 11.38s/it] {'loss': 1.0894, 'learning_rate': 1.9409765531422233e-06, 'epoch': 0.58} + 58%|█████▊ | 4119/7045 [13:23:54<9:14:52, 11.38s/it] 58%|█████▊ | 4120/7045 [13:24:06<9:30:07, 11.69s/it] {'loss': 1.0669, 'learning_rate': 1.9398562976571235e-06, 'epoch': 0.58} + 58%|█████▊ | 4120/7045 [13:24:06<9:30:07, 11.69s/it] 58%|█████▊ | 4121/7045 [13:24:19<9:49:50, 12.10s/it] {'loss': 1.0972, 'learning_rate': 1.9387361605787446e-06, 'epoch': 0.58} + 58%|█████▊ | 4121/7045 [13:24:19<9:49:50, 12.10s/it] 59%|█████▊ | 4122/7045 [13:24:31<9:35:51, 11.82s/it] {'loss': 1.1055, 'learning_rate': 1.937616142143869e-06, 'epoch': 0.59} + 59%|█████▊ | 4122/7045 [13:24:31<9:35:51, 11.82s/it] 59%|█████▊ | 4123/7045 [13:24:43<9:43:57, 11.99s/it] {'loss': 1.103, 'learning_rate': 1.9364962425892517e-06, 'epoch': 0.59} + 59%|█████▊ | 4123/7045 [13:24:43<9:43:57, 11.99s/it] 59%|█████▊ | 4124/7045 [13:24:54<9:31:38, 11.74s/it] {'loss': 1.1172, 'learning_rate': 1.9353764621516253e-06, 'epoch': 0.59} + 59%|█████▊ | 4124/7045 [13:24:54<9:31:38, 11.74s/it] 59%|█████▊ | 4125/7045 [13:25:05<9:22:23, 11.56s/it] {'loss': 1.1562, 'learning_rate': 1.9342568010676955e-06, 'epoch': 0.59} + 59%|█████▊ | 4125/7045 [13:25:05<9:22:23, 11.56s/it] 59%|█████▊ | 4126/7045 [13:25:17<9:18:01, 11.47s/it] {'loss': 1.1553, 'learning_rate': 1.9331372595741443e-06, 'epoch': 0.59} + 59%|█████▊ | 4126/7045 [13:25:17<9:18:01, 11.47s/it] 59%|█████▊ | 4127/7045 [13:25:29<9:34:41, 11.82s/it] {'loss': 1.0776, 'learning_rate': 1.9320178379076256e-06, 'epoch': 0.59} + 59%|█████▊ | 4127/7045 [13:25:29<9:34:41, 11.82s/it] 59%|█████▊ | 4128/7045 [13:25:42<9:43:15, 12.00s/it] {'loss': 1.1719, 'learning_rate': 1.9308985363047703e-06, 'epoch': 0.59} + 59%|█████▊ | 4128/7045 [13:25:42<9:43:15, 12.00s/it] 59%|█████▊ | 4129/7045 [13:25:53<9:37:37, 11.89s/it] {'loss': 1.1211, 'learning_rate': 1.9297793550021857e-06, 'epoch': 0.59} + 59%|█████▊ | 4129/7045 [13:25:53<9:37:37, 11.89s/it] 59%|█████▊ | 4130/7045 [13:26:04<9:25:29, 11.64s/it] {'loss': 1.0693, 'learning_rate': 1.9286602942364493e-06, 'epoch': 0.59} + 59%|█████▊ | 4130/7045 [13:26:04<9:25:29, 11.64s/it] 59%|█████▊ | 4131/7045 [13:26:16<9:32:31, 11.79s/it] {'loss': 1.0889, 'learning_rate': 1.927541354244116e-06, 'epoch': 0.59} + 59%|█████▊ | 4131/7045 [13:26:16<9:32:31, 11.79s/it] 59%|█████▊ | 4132/7045 [13:26:28<9:24:15, 11.62s/it] {'loss': 1.0977, 'learning_rate': 1.9264225352617137e-06, 'epoch': 0.59} + 59%|█████▊ | 4132/7045 [13:26:28<9:24:15, 11.62s/it] 59%|█████▊ | 4133/7045 [13:26:40<9:32:23, 11.79s/it] {'loss': 1.1143, 'learning_rate': 1.925303837525747e-06, 'epoch': 0.59} + 59%|█████▊ | 4133/7045 [13:26:40<9:32:23, 11.79s/it] 59%|█████▊ | 4134/7045 [13:26:51<9:29:59, 11.75s/it] {'loss': 1.1089, 'learning_rate': 1.924185261272691e-06, 'epoch': 0.59} + 59%|█████▊ | 4134/7045 [13:26:51<9:29:59, 11.75s/it] 59%|█████▊ | 4135/7045 [13:27:04<9:35:49, 11.87s/it] {'loss': 1.0679, 'learning_rate': 1.9230668067389993e-06, 'epoch': 0.59} + 59%|█████▊ | 4135/7045 [13:27:04<9:35:49, 11.87s/it] 59%|█████▊ | 4136/7045 [13:27:15<9:26:48, 11.69s/it] {'loss': 1.082, 'learning_rate': 1.9219484741610972e-06, 'epoch': 0.59} + 59%|█████▊ | 4136/7045 [13:27:15<9:26:48, 11.69s/it] 59%|█████▊ | 4137/7045 [13:27:26<9:19:19, 11.54s/it] {'loss': 1.167, 'learning_rate': 1.9208302637753855e-06, 'epoch': 0.59} + 59%|█████▊ | 4137/7045 [13:27:26<9:19:19, 11.54s/it] 59%|█████▊ | 4138/7045 [13:27:37<9:09:33, 11.34s/it] {'loss': 1.123, 'learning_rate': 1.9197121758182375e-06, 'epoch': 0.59} + 59%|█████▊ | 4138/7045 [13:27:37<9:09:33, 11.34s/it] 59%|█████▉ | 4139/7045 [13:27:49<9:19:58, 11.56s/it] {'loss': 1.1025, 'learning_rate': 1.9185942105260025e-06, 'epoch': 0.59} + 59%|█████▉ | 4139/7045 [13:27:49<9:19:58, 11.56s/it] 59%|█████▉ | 4140/7045 [13:28:00<9:17:20, 11.51s/it] {'loss': 1.0938, 'learning_rate': 1.9174763681350023e-06, 'epoch': 0.59} + 59%|█████▉ | 4140/7045 [13:28:00<9:17:20, 11.51s/it] 59%|█████▉ | 4141/7045 [13:28:14<9:42:35, 12.04s/it] {'loss': 1.106, 'learning_rate': 1.916358648881534e-06, 'epoch': 0.59} + 59%|█████▉ | 4141/7045 [13:28:14<9:42:35, 12.04s/it] 59%|█████▉ | 4142/7045 [13:28:25<9:27:54, 11.74s/it] {'loss': 1.1445, 'learning_rate': 1.915241053001869e-06, 'epoch': 0.59} + 59%|█████▉ | 4142/7045 [13:28:25<9:27:54, 11.74s/it] 59%|█████▉ | 4143/7045 [13:28:38<9:44:21, 12.08s/it] {'loss': 1.0801, 'learning_rate': 1.9141235807322507e-06, 'epoch': 0.59} + 59%|█████▉ | 4143/7045 [13:28:38<9:44:21, 12.08s/it] 59%|█████▉ | 4144/7045 [13:28:49<9:31:59, 11.83s/it] {'loss': 1.1035, 'learning_rate': 1.9130062323088984e-06, 'epoch': 0.59} + 59%|█████▉ | 4144/7045 [13:28:49<9:31:59, 11.83s/it] 59%|█████▉ | 4145/7045 [13:29:00<9:23:31, 11.66s/it] {'loss': 1.1006, 'learning_rate': 1.911889007968003e-06, 'epoch': 0.59} + 59%|█████▉ | 4145/7045 [13:29:00<9:23:31, 11.66s/it] 59%|█████▉ | 4146/7045 [13:29:11<9:14:06, 11.47s/it] {'loss': 1.1152, 'learning_rate': 1.910771907945731e-06, 'epoch': 0.59} + 59%|█████▉ | 4146/7045 [13:29:11<9:14:06, 11.47s/it] 59%|█████▉ | 4147/7045 [13:29:22<9:07:44, 11.34s/it] {'loss': 1.1211, 'learning_rate': 1.9096549324782226e-06, 'epoch': 0.59} + 59%|█████▉ | 4147/7045 [13:29:22<9:07:44, 11.34s/it] 59%|█████▉ | 4148/7045 [13:29:34<9:19:05, 11.58s/it] {'loss': 1.126, 'learning_rate': 1.9085380818015913e-06, 'epoch': 0.59} + 59%|█████▉ | 4148/7045 [13:29:34<9:19:05, 11.58s/it] 59%|█████▉ | 4149/7045 [13:29:46<9:18:10, 11.56s/it] {'loss': 1.123, 'learning_rate': 1.9074213561519227e-06, 'epoch': 0.59} + 59%|█████▉ | 4149/7045 [13:29:46<9:18:10, 11.56s/it] 59%|█████▉ | 4150/7045 [13:29:59<9:34:33, 11.91s/it] {'loss': 1.1025, 'learning_rate': 1.906304755765278e-06, 'epoch': 0.59} + 59%|█████▉ | 4150/7045 [13:29:59<9:34:33, 11.91s/it] 59%|█████▉ | 4151/7045 [13:30:10<9:29:46, 11.81s/it] {'loss': 1.126, 'learning_rate': 1.9051882808776918e-06, 'epoch': 0.59} + 59%|█████▉ | 4151/7045 [13:30:10<9:29:46, 11.81s/it] 59%|█████▉ | 4152/7045 [13:30:23<9:49:16, 12.22s/it] {'loss': 1.1138, 'learning_rate': 1.904071931725171e-06, 'epoch': 0.59} + 59%|█████▉ | 4152/7045 [13:30:23<9:49:16, 12.22s/it] 59%|█████▉ | 4153/7045 [13:30:36<9:59:25, 12.44s/it] {'loss': 1.0703, 'learning_rate': 1.9029557085436964e-06, 'epoch': 0.59} + 59%|█████▉ | 4153/7045 [13:30:36<9:59:25, 12.44s/it] 59%|█████▉ | 4154/7045 [13:30:48<9:42:26, 12.09s/it] {'loss': 1.0928, 'learning_rate': 1.901839611569223e-06, 'epoch': 0.59} + 59%|█████▉ | 4154/7045 [13:30:48<9:42:26, 12.09s/it] 59%|█████▉ | 4155/7045 [13:31:00<9:42:57, 12.10s/it] {'loss': 1.0908, 'learning_rate': 1.900723641037679e-06, 'epoch': 0.59} + 59%|█████▉ | 4155/7045 [13:31:00<9:42:57, 12.10s/it] 59%|█████▉ | 4156/7045 [13:31:11<9:25:56, 11.75s/it] {'loss': 1.0947, 'learning_rate': 1.8996077971849636e-06, 'epoch': 0.59} + 59%|█████▉ | 4156/7045 [13:31:11<9:25:56, 11.75s/it] 59%|█████▉ | 4157/7045 [13:31:22<9:20:49, 11.65s/it] {'loss': 1.1475, 'learning_rate': 1.898492080246952e-06, 'epoch': 0.59} + 59%|█████▉ | 4157/7045 [13:31:22<9:20:49, 11.65s/it] 59%|█████▉ | 4158/7045 [13:31:33<9:14:50, 11.53s/it] {'loss': 1.1377, 'learning_rate': 1.8973764904594911e-06, 'epoch': 0.59} + 59%|█████▉ | 4158/7045 [13:31:33<9:14:50, 11.53s/it] 59%|█████▉ | 4159/7045 [13:31:44<9:04:10, 11.31s/it] {'loss': 1.1201, 'learning_rate': 1.8962610280584021e-06, 'epoch': 0.59} + 59%|█████▉ | 4159/7045 [13:31:44<9:04:10, 11.31s/it] 59%|█████▉ | 4160/7045 [13:31:55<9:00:39, 11.24s/it] {'loss': 1.0879, 'learning_rate': 1.8951456932794774e-06, 'epoch': 0.59} + 59%|█████▉ | 4160/7045 [13:31:55<9:00:39, 11.24s/it] 59%|█████▉ | 4161/7045 [13:32:07<9:08:20, 11.41s/it] {'loss': 1.1064, 'learning_rate': 1.894030486358484e-06, 'epoch': 0.59} + 59%|█████▉ | 4161/7045 [13:32:07<9:08:20, 11.41s/it] 59%|█████▉ | 4162/7045 [13:32:18<9:03:10, 11.30s/it] {'loss': 1.1621, 'learning_rate': 1.8929154075311619e-06, 'epoch': 0.59} + 59%|█████▉ | 4162/7045 [13:32:18<9:03:10, 11.30s/it] 59%|█████▉ | 4163/7045 [13:32:30<9:11:21, 11.48s/it] {'loss': 1.1064, 'learning_rate': 1.8918004570332229e-06, 'epoch': 0.59} + 59%|█████▉ | 4163/7045 [13:32:30<9:11:21, 11.48s/it] 59%|█████▉ | 4164/7045 [13:32:41<9:04:51, 11.35s/it] {'loss': 1.0996, 'learning_rate': 1.8906856351003523e-06, 'epoch': 0.59} + 59%|█████▉ | 4164/7045 [13:32:41<9:04:51, 11.35s/it] 59%|█████▉ | 4165/7045 [13:32:53<9:10:25, 11.47s/it] {'loss': 1.0645, 'learning_rate': 1.8895709419682074e-06, 'epoch': 0.59} + 59%|█████▉ | 4165/7045 [13:32:53<9:10:25, 11.47s/it] 59%|█████▉ | 4166/7045 [13:33:04<9:10:21, 11.47s/it] {'loss': 1.1504, 'learning_rate': 1.8884563778724213e-06, 'epoch': 0.59} + 59%|█████▉ | 4166/7045 [13:33:04<9:10:21, 11.47s/it] 59%|█████▉ | 4167/7045 [13:33:15<9:05:26, 11.37s/it] {'loss': 1.1279, 'learning_rate': 1.887341943048596e-06, 'epoch': 0.59} + 59%|█████▉ | 4167/7045 [13:33:15<9:05:26, 11.37s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 12 bytes but only got 10. + warnings.warn(str(msg)) + 59%|█████▉ | 4168/7045 [13:33:26<8:58:14, 11.22s/it] {'loss': 1.1221, 'learning_rate': 1.8862276377323083e-06, 'epoch': 0.59} + 59%|█████▉ | 4168/7045 [13:33:26<8:58:14, 11.22s/it] 59%|█████▉ | 4169/7045 [13:33:39<9:27:38, 11.84s/it] {'loss': 1.127, 'learning_rate': 1.885113462159107e-06, 'epoch': 0.59} + 59%|█████▉ | 4169/7045 [13:33:40<9:27:38, 11.84s/it] 59%|█████▉ | 4170/7045 [13:33:51<9:16:19, 11.61s/it] {'loss': 1.1045, 'learning_rate': 1.8839994165645139e-06, 'epoch': 0.59} + 59%|█████▉ | 4170/7045 [13:33:51<9:16:19, 11.61s/it] 59%|█████▉ | 4171/7045 [13:34:02<9:14:57, 11.59s/it] {'loss': 1.1123, 'learning_rate': 1.8828855011840222e-06, 'epoch': 0.59} + 59%|█████▉ | 4171/7045 [13:34:02<9:14:57, 11.59s/it] 59%|█████▉ | 4172/7045 [13:34:13<9:11:45, 11.52s/it] {'loss': 1.1162, 'learning_rate': 1.881771716253099e-06, 'epoch': 0.59} + 59%|█████▉ | 4172/7045 [13:34:13<9:11:45, 11.52s/it] 59%|█████▉ | 4173/7045 [13:34:25<9:05:01, 11.39s/it] {'loss': 1.0898, 'learning_rate': 1.880658062007183e-06, 'epoch': 0.59} + 59%|█████▉ | 4173/7045 [13:34:25<9:05:01, 11.39s/it] 59%|█████▉ | 4174/7045 [13:34:36<9:03:22, 11.36s/it] {'loss': 1.1094, 'learning_rate': 1.8795445386816857e-06, 'epoch': 0.59} + 59%|█████▉ | 4174/7045 [13:34:36<9:03:22, 11.36s/it] 59%|█████▉ | 4175/7045 [13:34:47<8:59:22, 11.28s/it] {'loss': 1.1216, 'learning_rate': 1.8784311465119903e-06, 'epoch': 0.59} + 59%|█████▉ | 4175/7045 [13:34:47<8:59:22, 11.28s/it] 59%|█████▉ | 4176/7045 [13:34:58<8:59:09, 11.28s/it] {'loss': 1.1162, 'learning_rate': 1.8773178857334528e-06, 'epoch': 0.59} + 59%|█████▉ | 4176/7045 [13:34:58<8:59:09, 11.28s/it] 59%|█████▉ | 4177/7045 [13:35:12<9:28:43, 11.90s/it] {'loss': 1.0928, 'learning_rate': 1.8762047565814013e-06, 'epoch': 0.59} + 59%|█████▉ | 4177/7045 [13:35:12<9:28:43, 11.90s/it] 59%|█████▉ | 4178/7045 [13:35:23<9:19:21, 11.71s/it] {'loss': 1.1016, 'learning_rate': 1.8750917592911364e-06, 'epoch': 0.59} + 59%|█████▉ | 4178/7045 [13:35:23<9:19:21, 11.71s/it] 59%|█████▉ | 4179/7045 [13:35:34<9:08:31, 11.48s/it] {'loss': 1.1318, 'learning_rate': 1.8739788940979304e-06, 'epoch': 0.59} + 59%|█████▉ | 4179/7045 [13:35:34<9:08:31, 11.48s/it] 59%|█████▉ | 4180/7045 [13:35:44<8:57:30, 11.26s/it] {'loss': 1.1123, 'learning_rate': 1.8728661612370278e-06, 'epoch': 0.59} + 59%|█████▉ | 4180/7045 [13:35:44<8:57:30, 11.26s/it] 59%|█████▉ | 4181/7045 [13:35:55<8:53:04, 11.17s/it] {'loss': 1.1299, 'learning_rate': 1.8717535609436454e-06, 'epoch': 0.59} + 59%|█████▉ | 4181/7045 [13:35:55<8:53:04, 11.17s/it] 59%|█████▉ | 4182/7045 [13:36:08<9:17:41, 11.69s/it] {'loss': 1.0996, 'learning_rate': 1.8706410934529712e-06, 'epoch': 0.59} + 59%|█████▉ | 4182/7045 [13:36:08<9:17:41, 11.69s/it] 59%|█████▉ | 4183/7045 [13:36:20<9:15:17, 11.64s/it] {'loss': 1.1426, 'learning_rate': 1.8695287590001655e-06, 'epoch': 0.59} + 59%|█████▉ | 4183/7045 [13:36:20<9:15:17, 11.64s/it] 59%|█████▉ | 4184/7045 [13:36:31<9:03:37, 11.40s/it] {'loss': 1.0664, 'learning_rate': 1.8684165578203605e-06, 'epoch': 0.59} + 59%|█████▉ | 4184/7045 [13:36:31<9:03:37, 11.40s/it] 59%|█████▉ | 4185/7045 [13:36:42<8:59:31, 11.32s/it] {'loss': 1.1455, 'learning_rate': 1.8673044901486614e-06, 'epoch': 0.59} + 59%|█████▉ | 4185/7045 [13:36:42<8:59:31, 11.32s/it] 59%|█████▉ | 4186/7045 [13:36:53<9:03:47, 11.41s/it] {'loss': 1.1689, 'learning_rate': 1.866192556220143e-06, 'epoch': 0.59} + 59%|█████▉ | 4186/7045 [13:36:53<9:03:47, 11.41s/it] 59%|█████▉ | 4187/7045 [13:37:05<9:00:30, 11.35s/it] {'loss': 1.0977, 'learning_rate': 1.865080756269853e-06, 'epoch': 0.59} + 59%|█████▉ | 4187/7045 [13:37:05<9:00:30, 11.35s/it] 59%|█████▉ | 4188/7045 [13:37:16<8:56:43, 11.27s/it] {'loss': 1.0947, 'learning_rate': 1.8639690905328114e-06, 'epoch': 0.59} + 59%|█████▉ | 4188/7045 [13:37:16<8:56:43, 11.27s/it] 59%|█████▉ | 4189/7045 [13:37:27<8:50:43, 11.15s/it] {'loss': 1.082, 'learning_rate': 1.8628575592440078e-06, 'epoch': 0.59} + 59%|█████▉ | 4189/7045 [13:37:27<8:50:43, 11.15s/it] 59%|█████▉ | 4190/7045 [13:37:39<9:13:19, 11.63s/it] {'loss': 1.1089, 'learning_rate': 1.8617461626384067e-06, 'epoch': 0.59} + 59%|█████▉ | 4190/7045 [13:37:39<9:13:19, 11.63s/it] 59%|█████▉ | 4191/7045 [13:37:51<9:11:33, 11.60s/it] {'loss': 1.1455, 'learning_rate': 1.8606349009509408e-06, 'epoch': 0.59} + 59%|█████▉ | 4191/7045 [13:37:51<9:11:33, 11.60s/it] 60%|█████▉ | 4192/7045 [13:38:02<9:08:41, 11.54s/it] {'loss': 1.1514, 'learning_rate': 1.8595237744165168e-06, 'epoch': 0.6} + 60%|█████▉ | 4192/7045 [13:38:02<9:08:41, 11.54s/it] 60%|█████▉ | 4193/7045 [13:38:15<9:27:07, 11.93s/it] {'loss': 1.1118, 'learning_rate': 1.85841278327001e-06, 'epoch': 0.6} + 60%|█████▉ | 4193/7045 [13:38:15<9:27:07, 11.93s/it] 60%|█████▉ | 4194/7045 [13:38:28<9:33:55, 12.08s/it] {'loss': 1.0771, 'learning_rate': 1.8573019277462696e-06, 'epoch': 0.6} + 60%|█████▉ | 4194/7045 [13:38:28<9:33:55, 12.08s/it] 60%|█████▉ | 4195/7045 [13:38:39<9:19:40, 11.78s/it] {'loss': 1.1396, 'learning_rate': 1.8561912080801147e-06, 'epoch': 0.6} + 60%|█████▉ | 4195/7045 [13:38:39<9:19:40, 11.78s/it] 60%|█████▉ | 4196/7045 [13:38:50<9:09:05, 11.56s/it] {'loss': 1.1279, 'learning_rate': 1.855080624506338e-06, 'epoch': 0.6} + 60%|█████▉ | 4196/7045 [13:38:50<9:09:05, 11.56s/it] 60%|█████▉ | 4197/7045 [13:39:01<9:07:51, 11.54s/it] {'loss': 1.166, 'learning_rate': 1.8539701772596995e-06, 'epoch': 0.6} + 60%|█████▉ | 4197/7045 [13:39:01<9:07:51, 11.54s/it] 60%|█████▉ | 4198/7045 [13:39:13<9:05:40, 11.50s/it] {'loss': 1.1338, 'learning_rate': 1.8528598665749338e-06, 'epoch': 0.6} + 60%|█████▉ | 4198/7045 [13:39:13<9:05:40, 11.50s/it] 60%|█████▉ | 4199/7045 [13:39:24<9:03:40, 11.46s/it] {'loss': 1.1133, 'learning_rate': 1.8517496926867451e-06, 'epoch': 0.6} + 60%|█████▉ | 4199/7045 [13:39:24<9:03:40, 11.46s/it] 60%|█████▉ | 4200/7045 [13:39:36<9:10:51, 11.62s/it] {'loss': 1.1162, 'learning_rate': 1.850639655829809e-06, 'epoch': 0.6} + 60%|█████▉ | 4200/7045 [13:39:36<9:10:51, 11.62s/it] 60%|█████▉ | 4201/7045 [13:39:47<9:07:01, 11.54s/it] {'loss': 1.0972, 'learning_rate': 1.8495297562387712e-06, 'epoch': 0.6} + 60%|██���██▉ | 4201/7045 [13:39:47<9:07:01, 11.54s/it] 60%|█████▉ | 4202/7045 [13:39:58<9:01:22, 11.43s/it] {'loss': 1.1152, 'learning_rate': 1.8484199941482512e-06, 'epoch': 0.6} + 60%|█████▉ | 4202/7045 [13:39:58<9:01:22, 11.43s/it] 60%|█████▉ | 4203/7045 [13:40:10<9:05:57, 11.53s/it] {'loss': 1.0938, 'learning_rate': 1.847310369792837e-06, 'epoch': 0.6} + 60%|█████▉ | 4203/7045 [13:40:10<9:05:57, 11.53s/it] 60%|█████▉ | 4204/7045 [13:40:24<9:34:53, 12.14s/it] {'loss': 1.0957, 'learning_rate': 1.8462008834070865e-06, 'epoch': 0.6} + 60%|█████▉ | 4204/7045 [13:40:24<9:34:53, 12.14s/it] 60%|█████▉ | 4205/7045 [13:40:35<9:23:25, 11.90s/it] {'loss': 1.0947, 'learning_rate': 1.8450915352255317e-06, 'epoch': 0.6} + 60%|█████▉ | 4205/7045 [13:40:35<9:23:25, 11.90s/it] 60%|█████▉ | 4206/7045 [13:40:46<9:09:29, 11.61s/it] {'loss': 1.124, 'learning_rate': 1.8439823254826728e-06, 'epoch': 0.6} + 60%|█████▉ | 4206/7045 [13:40:46<9:09:29, 11.61s/it] 60%|█████▉ | 4207/7045 [13:41:00<9:36:20, 12.18s/it] {'loss': 1.1357, 'learning_rate': 1.842873254412982e-06, 'epoch': 0.6} + 60%|█████▉ | 4207/7045 [13:41:00<9:36:20, 12.18s/it] 60%|█████▉ | 4208/7045 [13:41:13<9:49:37, 12.47s/it] {'loss': 1.0762, 'learning_rate': 1.8417643222509014e-06, 'epoch': 0.6} + 60%|█████▉ | 4208/7045 [13:41:13<9:49:37, 12.47s/it] 60%|█████▉ | 4209/7045 [13:41:24<9:26:55, 11.99s/it] {'loss': 1.1201, 'learning_rate': 1.8406555292308437e-06, 'epoch': 0.6} + 60%|█████▉ | 4209/7045 [13:41:24<9:26:55, 11.99s/it] 60%|█████▉ | 4210/7045 [13:41:35<9:19:45, 11.85s/it] {'loss': 1.1465, 'learning_rate': 1.8395468755871938e-06, 'epoch': 0.6} + 60%|█████▉ | 4210/7045 [13:41:35<9:19:45, 11.85s/it] 60%|█████▉ | 4211/7045 [13:41:47<9:13:28, 11.72s/it] {'loss': 1.1582, 'learning_rate': 1.8384383615543045e-06, 'epoch': 0.6} + 60%|█████▉ | 4211/7045 [13:41:47<9:13:28, 11.72s/it] 60%|█████▉ | 4212/7045 [13:41:57<9:01:16, 11.46s/it] {'loss': 1.1348, 'learning_rate': 1.8373299873665018e-06, 'epoch': 0.6} + 60%|█████▉ | 4212/7045 [13:41:57<9:01:16, 11.46s/it] 60%|█████▉ | 4213/7045 [13:42:10<9:18:24, 11.83s/it] {'loss': 1.126, 'learning_rate': 1.83622175325808e-06, 'epoch': 0.6} + 60%|█████▉ | 4213/7045 [13:42:10<9:18:24, 11.83s/it] 60%|█████▉ | 4214/7045 [13:42:22<9:13:06, 11.72s/it] {'loss': 1.1426, 'learning_rate': 1.8351136594633045e-06, 'epoch': 0.6} + 60%|█████▉ | 4214/7045 [13:42:22<9:13:06, 11.72s/it] 60%|█████▉ | 4215/7045 [13:42:35<9:30:36, 12.10s/it] {'loss': 1.062, 'learning_rate': 1.834005706216412e-06, 'epoch': 0.6} + 60%|█████▉ | 4215/7045 [13:42:35<9:30:36, 12.10s/it] 60%|█████▉ | 4216/7045 [13:42:49<10:02:25, 12.78s/it] {'loss': 1.0786, 'learning_rate': 1.8328978937516078e-06, 'epoch': 0.6} + 60%|█████▉ | 4216/7045 [13:42:49<10:02:25, 12.78s/it] 60%|█████▉ | 4217/7045 [13:43:00<9:40:45, 12.32s/it] {'loss': 1.1069, 'learning_rate': 1.8317902223030693e-06, 'epoch': 0.6} + 60%|█████▉ | 4217/7045 [13:43:00<9:40:45, 12.32s/it] 60%|█████▉ | 4218/7045 [13:43:12<9:38:36, 12.28s/it] {'loss': 1.0869, 'learning_rate': 1.8306826921049432e-06, 'epoch': 0.6} + 60%|█████▉ | 4218/7045 [13:43:12<9:38:36, 12.28s/it] 60%|█████▉ | 4219/7045 [13:43:24<9:22:09, 11.94s/it] {'loss': 1.0889, 'learning_rate': 1.8295753033913446e-06, 'epoch': 0.6} + 60%|█████▉ | 4219/7045 [13:43:24<9:22:09, 11.94s/it] 60%|█████▉ | 4220/7045 [13:43:35<9:15:12, 11.79s/it] {'loss': 1.1484, 'learning_rate': 1.8284680563963616e-06, 'epoch': 0.6} + 60%|█████▉ | 4220/7045 [13:43:35<9:15:12, 11.79s/it] 60%|█████▉ | 4221/7045 [13:43:46<9:03:49, 11.55s/it] {'loss': 1.1211, 'learning_rate': 1.8273609513540509e-06, 'epoch': 0.6} + 60%|█████▉ | 4221/7045 [13:43:46<9:03:49, 11.55s/it] 60%|█████▉ | 4222/7045 [13:43:58<9:04:47, 11.58s/it] {'loss': 1.0996, 'learning_rate': 1.8262539884984392e-06, 'epoch': 0.6} + 60%|█████▉ | 4222/7045 [13:43:58<9:04:47, 11.58s/it] 60%|█████▉ | 4223/7045 [13:44:09<8:59:28, 11.47s/it] {'loss': 1.1602, 'learning_rate': 1.8251471680635238e-06, 'epoch': 0.6} + 60%|█████▉ | 4223/7045 [13:44:09<8:59:28, 11.47s/it] 60%|█████▉ | 4224/7045 [13:44:20<9:01:38, 11.52s/it] {'loss': 1.1436, 'learning_rate': 1.8240404902832706e-06, 'epoch': 0.6} + 60%|█████▉ | 4224/7045 [13:44:20<9:01:38, 11.52s/it] 60%|█████▉ | 4225/7045 [13:44:33<9:15:16, 11.81s/it] {'loss': 1.1025, 'learning_rate': 1.8229339553916168e-06, 'epoch': 0.6} + 60%|█████▉ | 4225/7045 [13:44:33<9:15:16, 11.81s/it] 60%|█████▉ | 4226/7045 [13:44:44<9:05:19, 11.61s/it] {'loss': 1.0811, 'learning_rate': 1.8218275636224676e-06, 'epoch': 0.6} + 60%|█████▉ | 4226/7045 [13:44:44<9:05:19, 11.61s/it] 60%|██████ | 4227/7045 [13:44:57<9:22:43, 11.98s/it] {'loss': 1.0605, 'learning_rate': 1.820721315209701e-06, 'epoch': 0.6} + 60%|██████ | 4227/7045 [13:44:57<9:22:43, 11.98s/it] 60%|██████ | 4228/7045 [13:45:08<9:12:15, 11.76s/it] {'loss': 1.1279, 'learning_rate': 1.8196152103871612e-06, 'epoch': 0.6} + 60%|██████ | 4228/7045 [13:45:08<9:12:15, 11.76s/it] 60%|██████ | 4229/7045 [13:45:19<9:02:52, 11.57s/it] {'loss': 1.0898, 'learning_rate': 1.8185092493886653e-06, 'epoch': 0.6} + 60%|██████ | 4229/7045 [13:45:19<9:02:52, 11.57s/it] 60%|██████ | 4230/7045 [13:45:30<8:56:07, 11.43s/it] {'loss': 1.1079, 'learning_rate': 1.8174034324479965e-06, 'epoch': 0.6} + 60%|██████ | 4230/7045 [13:45:30<8:56:07, 11.43s/it] 60%|██████ | 4231/7045 [13:45:41<8:51:02, 11.32s/it] {'loss': 1.1338, 'learning_rate': 1.8162977597989101e-06, 'epoch': 0.6} + 60%|██████ | 4231/7045 [13:45:41<8:51:02, 11.32s/it] 60%|██████ | 4232/7045 [13:45:53<8:49:48, 11.30s/it] {'loss': 1.1348, 'learning_rate': 1.8151922316751302e-06, 'epoch': 0.6} + 60%|██████ | 4232/7045 [13:45:53<8:49:48, 11.30s/it] 60%|██████ | 4233/7045 [13:46:04<8:43:29, 11.17s/it] {'loss': 1.0454, 'learning_rate': 1.8140868483103504e-06, 'epoch': 0.6} + 60%|██████ | 4233/7045 [13:46:04<8:43:29, 11.17s/it] 60%|██████ | 4234/7045 [13:46:15<8:42:12, 11.15s/it] {'loss': 1.1094, 'learning_rate': 1.8129816099382344e-06, 'epoch': 0.6} + 60%|██████ | 4234/7045 [13:46:15<8:42:12, 11.15s/it] 60%|██████ | 4235/7045 [13:46:27<8:57:10, 11.47s/it] {'loss': 1.0947, 'learning_rate': 1.811876516792413e-06, 'epoch': 0.6} + 60%|██████ | 4235/7045 [13:46:27<8:57:10, 11.47s/it] 60%|██████ | 4236/7045 [13:46:38<8:54:44, 11.42s/it] {'loss': 1.0908, 'learning_rate': 1.8107715691064883e-06, 'epoch': 0.6} + 60%|██████ | 4236/7045 [13:46:38<8:54:44, 11.42s/it] 60%|██████ | 4237/7045 [13:46:49<8:50:14, 11.33s/it] {'loss': 1.1299, 'learning_rate': 1.8096667671140317e-06, 'epoch': 0.6} + 60%|██████ | 4237/7045 [13:46:49<8:50:14, 11.33s/it] 60%|██████ | 4238/7045 [13:47:01<8:48:54, 11.31s/it] {'loss': 1.0938, 'learning_rate': 1.8085621110485818e-06, 'epoch': 0.6} + 60%|██████ | 4238/7045 [13:47:01<8:48:54, 11.31s/it] 60%|██████ | 4239/7045 [13:47:12<8:44:36, 11.22s/it] {'loss': 1.0908, 'learning_rate': 1.80745760114365e-06, 'epoch': 0.6} + 60%|██████ | 4239/7045 [13:47:12<8:44:36, 11.22s/it] 60%|██████ | 4240/7045 [13:47:24<9:00:16, 11.56s/it] {'loss': 1.1338, 'learning_rate': 1.8063532376327134e-06, 'epoch': 0.6} + 60%|██████ | 4240/7045 [13:47:24<9:00:16, 11.56s/it] 60%|██████ | 4241/7045 [13:47:35<8:54:29, 11.44s/it] {'loss': 1.126, 'learning_rate': 1.805249020749219e-06, 'epoch': 0.6} + 60%|██████ | 4241/7045 [13:47:35<8:54:29, 11.44s/it] 60%|██████ | 4242/7045 [13:47:48<9:09:58, 11.77s/it] {'loss': 1.0874, 'learning_rate': 1.8041449507265834e-06, 'epoch': 0.6} + 60%|██████ | 4242/7045 [13:47:48<9:09:58, 11.77s/it] 60%|██████ | 4243/7045 [13:47:59<9:01:50, 11.60s/it] {'loss': 1.0615, 'learning_rate': 1.8030410277981924e-06, 'epoch': 0.6} + 60%|██████ | 4243/7045 [13:47:59<9:01:50, 11.60s/it] 60%|██████ | 4244/7045 [13:48:10<8:53:00, 11.42s/it] {'loss': 1.0859, 'learning_rate': 1.8019372521973994e-06, 'epoch': 0.6} + 60%|██████ | 4244/7045 [13:48:10<8:53:00, 11.42s/it] 60%|██████ | 4245/7045 [13:48:22<9:07:25, 11.73s/it] {'loss': 1.1094, 'learning_rate': 1.8008336241575286e-06, 'epoch': 0.6} + 60%|██████ | 4245/7045 [13:48:22<9:07:25, 11.73s/it] 60%|██████ | 4246/7045 [13:48:34<9:04:27, 11.67s/it] {'loss': 1.127, 'learning_rate': 1.7997301439118709e-06, 'epoch': 0.6} + 60%|██████ | 4246/7045 [13:48:34<9:04:27, 11.67s/it] 60%|██████ | 4247/7045 [13:48:46<9:17:17, 11.95s/it] {'loss': 1.125, 'learning_rate': 1.7986268116936874e-06, 'epoch': 0.6} + 60%|██████ | 4247/7045 [13:48:46<9:17:17, 11.95s/it] 60%|██████ | 4248/7045 [13:49:00<9:37:46, 12.39s/it] {'loss': 1.0444, 'learning_rate': 1.7975236277362067e-06, 'epoch': 0.6} + 60%|██████ | 4248/7045 [13:49:00<9:37:46, 12.39s/it] 60%|██████ | 4249/7045 [13:49:11<9:20:11, 12.02s/it] {'loss': 1.0923, 'learning_rate': 1.7964205922726284e-06, 'epoch': 0.6} + 60%|██████ | 4249/7045 [13:49:11<9:20:11, 12.02s/it] 60%|██████ | 4250/7045 [13:49:22<9:05:26, 11.71s/it] {'loss': 1.1279, 'learning_rate': 1.7953177055361163e-06, 'epoch': 0.6} + 60%|██████ | 4250/7045 [13:49:22<9:05:26, 11.71s/it] 60%|██████ | 4251/7045 [13:49:36<9:34:17, 12.33s/it] {'loss': 1.0596, 'learning_rate': 1.794214967759809e-06, 'epoch': 0.6} + 60%|██████ | 4251/7045 [13:49:36<9:34:17, 12.33s/it] 60%|██████ | 4252/7045 [13:49:47<9:14:29, 11.91s/it] {'loss': 1.0918, 'learning_rate': 1.793112379176808e-06, 'epoch': 0.6} + 60%|██████ | 4252/7045 [13:49:47<9:14:29, 11.91s/it] 60%|██████ | 4253/7045 [13:49:58<8:59:28, 11.59s/it] {'loss': 1.1123, 'learning_rate': 1.7920099400201859e-06, 'epoch': 0.6} + 60%|██████ | 4253/7045 [13:49:58<8:59:28, 11.59s/it] 60%|██████ | 4254/7045 [13:50:09<9:02:30, 11.66s/it] {'loss': 1.126, 'learning_rate': 1.7909076505229839e-06, 'epoch': 0.6} + 60%|██████ | 4254/7045 [13:50:09<9:02:30, 11.66s/it] 60%|██████ | 4255/7045 [13:50:21<8:59:06, 11.59s/it] {'loss': 1.1016, 'learning_rate': 1.7898055109182105e-06, 'epoch': 0.6} + 60%|██████ | 4255/7045 [13:50:21<8:59:06, 11.59s/it] 60%|██████ | 4256/7045 [13:50:32<8:54:07, 11.49s/it] {'loss': 1.1562, 'learning_rate': 1.7887035214388424e-06, 'epoch': 0.6} + 60%|██████ | 4256/7045 [13:50:32<8:54:07, 11.49s/it] 60%|██████ | 4257/7045 [13:50:43<8:47:55, 11.36s/it] {'loss': 1.0928, 'learning_rate': 1.7876016823178254e-06, 'epoch': 0.6} + 60%|██████ | 4257/7045 [13:50:43<8:47:55, 11.36s/it] 60%|██████ | 4258/7045 [13:50:55<8:48:18, 11.37s/it] {'loss': 1.123, 'learning_rate': 1.786499993788074e-06, 'epoch': 0.6} + 60%|██████ | 4258/7045 [13:50:55<8:48:18, 11.37s/it] 60%|██████ | 4259/7045 [13:51:07<9:03:07, 11.70s/it] {'loss': 1.1221, 'learning_rate': 1.7853984560824692e-06, 'epoch': 0.6} + 60%|██████ | 4259/7045 [13:51:07<9:03:07, 11.70s/it] 60%|██████ | 4260/7045 [13:51:18<8:55:03, 11.53s/it] {'loss': 1.1748, 'learning_rate': 1.7842970694338618e-06, 'epoch': 0.6} + 60%|██████ | 4260/7045 [13:51:18<8:55:03, 11.53s/it] 60%|██████ | 4261/7045 [13:51:30<8:58:11, 11.60s/it] {'loss': 1.1162, 'learning_rate': 1.7831958340750688e-06, 'epoch': 0.6} + 60%|██████ | 4261/7045 [13:51:30<8:58:11, 11.60s/it] 60%|██████ | 4262/7045 [13:51:41<8:54:25, 11.52s/it] {'loss': 1.0957, 'learning_rate': 1.7820947502388763e-06, 'epoch': 0.6} + 60%|██████ | 4262/7045 [13:51:41<8:54:25, 11.52s/it] 61%|██████ | 4263/7045 [13:51:55<9:24:31, 12.18s/it] {'loss': 1.1128, 'learning_rate': 1.78099381815804e-06, 'epoch': 0.61} + 61%|██████ | 4263/7045 [13:51:55<9:24:31, 12.18s/it] 61%|██████ | 4264/7045 [13:52:06<9:10:46, 11.88s/it] {'loss': 1.085, 'learning_rate': 1.77989303806528e-06, 'epoch': 0.61} + 61%|██████ | 4264/7045 [13:52:06<9:10:46, 11.88s/it] 61%|██████ | 4265/7045 [13:52:17<9:00:45, 11.67s/it] {'loss': 1.1387, 'learning_rate': 1.778792410193288e-06, 'epoch': 0.61} + 61%|██████ | 4265/7045 [13:52:17<9:00:45, 11.67s/it] 61%|██████ | 4266/7045 [13:52:30<9:07:55, 11.83s/it] {'loss': 1.0942, 'learning_rate': 1.7776919347747206e-06, 'epoch': 0.61} + 61%|██████ | 4266/7045 [13:52:30<9:07:55, 11.83s/it] 61%|██████ | 4267/7045 [13:52:43<9:29:30, 12.30s/it] {'loss': 1.1113, 'learning_rate': 1.7765916120422031e-06, 'epoch': 0.61} + 61%|██████ | 4267/7045 [13:52:43<9:29:30, 12.30s/it] 61%|██████ | 4268/7045 [13:52:56<9:36:20, 12.45s/it] {'loss': 1.0513, 'learning_rate': 1.7754914422283286e-06, 'epoch': 0.61} + 61%|██████ | 4268/7045 [13:52:56<9:36:20, 12.45s/it] 61%|██████ | 4269/7045 [13:53:07<9:19:36, 12.10s/it] {'loss': 1.124, 'learning_rate': 1.7743914255656586e-06, 'epoch': 0.61} + 61%|██████ | 4269/7045 [13:53:07<9:19:36, 12.10s/it] 61%|██████ | 4270/7045 [13:53:20<9:37:33, 12.49s/it] {'loss': 1.1201, 'learning_rate': 1.7732915622867214e-06, 'epoch': 0.61} + 61%|██████ | 4270/7045 [13:53:20<9:37:33, 12.49s/it] 61%|██████ | 4271/7045 [13:53:31<9:17:32, 12.06s/it] {'loss': 1.0713, 'learning_rate': 1.772191852624013e-06, 'epoch': 0.61} + 61%|██████ | 4271/7045 [13:53:31<9:17:32, 12.06s/it] 61%|██████ | 4272/7045 [13:53:43<9:06:41, 11.83s/it] {'loss': 1.0972, 'learning_rate': 1.771092296809997e-06, 'epoch': 0.61} + 61%|██████ | 4272/7045 [13:53:43<9:06:41, 11.83s/it] 61%|██████ | 4273/7045 [13:53:55<9:16:33, 12.05s/it] {'loss': 1.0737, 'learning_rate': 1.7699928950771039e-06, 'epoch': 0.61} + 61%|██████ | 4273/7045 [13:53:55<9:16:33, 12.05s/it] 61%|██████ | 4274/7045 [13:54:06<9:01:16, 11.72s/it] {'loss': 1.126, 'learning_rate': 1.7688936476577328e-06, 'epoch': 0.61} + 61%|██████ | 4274/7045 [13:54:06<9:01:16, 11.72s/it] 61%|██████ | 4275/7045 [13:54:18<8:54:59, 11.59s/it] {'loss': 1.1367, 'learning_rate': 1.7677945547842494e-06, 'epoch': 0.61} + 61%|██████ | 4275/7045 [13:54:18<8:54:59, 11.59s/it] 61%|██████ | 4276/7045 [13:54:30<9:10:14, 11.92s/it] {'loss': 1.0884, 'learning_rate': 1.7666956166889871e-06, 'epoch': 0.61} + 61%|██████ | 4276/7045 [13:54:30<9:10:14, 11.92s/it] 61%|██████ | 4277/7045 [13:54:41<8:58:57, 11.68s/it] {'loss': 1.1367, 'learning_rate': 1.7655968336042468e-06, 'epoch': 0.61} + 61%|██████ | 4277/7045 [13:54:41<8:58:57, 11.68s/it] 61%|██████ | 4278/7045 [13:54:54<9:08:36, 11.90s/it] {'loss': 1.1191, 'learning_rate': 1.764498205762295e-06, 'epoch': 0.61} + 61%|██████ | 4278/7045 [13:54:54<9:08:36, 11.90s/it] 61%|██████ | 4279/7045 [13:55:06<9:10:08, 11.93s/it] {'loss': 1.0879, 'learning_rate': 1.7633997333953678e-06, 'epoch': 0.61} + 61%|██████ | 4279/7045 [13:55:06<9:10:08, 11.93s/it] 61%|██████ | 4280/7045 [13:55:17<9:03:14, 11.79s/it] {'loss': 1.0781, 'learning_rate': 1.7623014167356664e-06, 'epoch': 0.61} + 61%|██████ | 4280/7045 [13:55:17<9:03:14, 11.79s/it] 61%|██████ | 4281/7045 [13:55:31<9:26:09, 12.29s/it] {'loss': 1.1025, 'learning_rate': 1.7612032560153602e-06, 'epoch': 0.61} + 61%|██████ | 4281/7045 [13:55:31<9:26:09, 12.29s/it] 61%|██████ | 4282/7045 [13:55:43<9:27:27, 12.32s/it] {'loss': 1.1123, 'learning_rate': 1.7601052514665862e-06, 'epoch': 0.61} + 61%|██████ | 4282/7045 [13:55:43<9:27:27, 12.32s/it] 61%|██████ | 4283/7045 [13:55:54<9:14:02, 12.04s/it] {'loss': 1.1113, 'learning_rate': 1.7590074033214461e-06, 'epoch': 0.61} + 61%|██████ | 4283/7045 [13:55:54<9:14:02, 12.04s/it] 61%|██████ | 4284/7045 [13:56:06<9:02:09, 11.78s/it] {'loss': 1.123, 'learning_rate': 1.7579097118120108e-06, 'epoch': 0.61} + 61%|██████ | 4284/7045 [13:56:06<9:02:09, 11.78s/it] 61%|██████ | 4285/7045 [13:56:17<8:53:34, 11.60s/it] {'loss': 1.1455, 'learning_rate': 1.7568121771703172e-06, 'epoch': 0.61} + 61%|██████ | 4285/7045 [13:56:17<8:53:34, 11.60s/it] 61%|██████ | 4286/7045 [13:56:30<9:16:30, 12.10s/it] {'loss': 1.1055, 'learning_rate': 1.7557147996283695e-06, 'epoch': 0.61} + 61%|██████ | 4286/7045 [13:56:30<9:16:30, 12.10s/it] 61%|██████ | 4287/7045 [13:56:41<9:01:45, 11.79s/it] {'loss': 1.1123, 'learning_rate': 1.7546175794181362e-06, 'epoch': 0.61} + 61%|██████ | 4287/7045 [13:56:41<9:01:45, 11.79s/it] 61%|██████ | 4288/7045 [13:56:52<8:53:12, 11.60s/it] {'loss': 1.1357, 'learning_rate': 1.753520516771558e-06, 'epoch': 0.61} + 61%|██████ | 4288/7045 [13:56:52<8:53:12, 11.60s/it] 61%|██████ | 4289/7045 [13:57:04<8:55:44, 11.66s/it] {'loss': 1.1104, 'learning_rate': 1.7524236119205367e-06, 'epoch': 0.61} + 61%|██████ | 4289/7045 [13:57:04<8:55:44, 11.66s/it] 61%|██████ | 4290/7045 [13:57:15<8:48:51, 11.52s/it] {'loss': 1.0977, 'learning_rate': 1.7513268650969434e-06, 'epoch': 0.61} + 61%|██████ | 4290/7045 [13:57:15<8:48:51, 11.52s/it] 61%|██████ | 4291/7045 [13:57:28<8:58:45, 11.74s/it] {'loss': 1.0835, 'learning_rate': 1.7502302765326157e-06, 'epoch': 0.61} + 61%|██████ | 4291/7045 [13:57:28<8:58:45, 11.74s/it] 61%|██████ | 4292/7045 [13:57:39<8:52:48, 11.61s/it] {'loss': 1.085, 'learning_rate': 1.7491338464593577e-06, 'epoch': 0.61} + 61%|██████ | 4292/7045 [13:57:39<8:52:48, 11.61s/it] 61%|██████ | 4293/7045 [13:57:50<8:44:45, 11.44s/it] {'loss': 1.125, 'learning_rate': 1.7480375751089393e-06, 'epoch': 0.61} + 61%|██████ | 4293/7045 [13:57:50<8:44:45, 11.44s/it] 61%|██████ | 4294/7045 [13:58:03<9:01:02, 11.80s/it] {'loss': 1.1157, 'learning_rate': 1.7469414627130971e-06, 'epoch': 0.61} + 61%|██████ | 4294/7045 [13:58:03<9:01:02, 11.80s/it] 61%|██████ | 4295/7045 [13:58:14<9:01:27, 11.81s/it] {'loss': 1.0771, 'learning_rate': 1.745845509503534e-06, 'epoch': 0.61} + 61%|██████ | 4295/7045 [13:58:14<9:01:27, 11.81s/it] 61%|██████ | 4296/7045 [13:58:27<9:13:58, 12.09s/it] {'loss': 1.1221, 'learning_rate': 1.7447497157119202e-06, 'epoch': 0.61} + 61%|██████ | 4296/7045 [13:58:27<9:13:58, 12.09s/it] 61%|██████ | 4297/7045 [13:58:38<8:58:22, 11.76s/it] {'loss': 1.1406, 'learning_rate': 1.7436540815698921e-06, 'epoch': 0.61} + 61%|██████ | 4297/7045 [13:58:38<8:58:22, 11.76s/it] 61%|██████ | 4298/7045 [13:58:49<8:47:54, 11.53s/it] {'loss': 1.0864, 'learning_rate': 1.74255860730905e-06, 'epoch': 0.61} + 61%|██████ | 4298/7045 [13:58:49<8:47:54, 11.53s/it] 61%|██████ | 4299/7045 [13:59:00<8:44:46, 11.47s/it] {'loss': 1.1318, 'learning_rate': 1.7414632931609633e-06, 'epoch': 0.61} + 61%|██████ | 4299/7045 [13:59:00<8:44:46, 11.47s/it] 61%|██████ | 4300/7045 [13:59:11<8:39:24, 11.35s/it] {'loss': 1.1299, 'learning_rate': 1.740368139357166e-06, 'epoch': 0.61} + 61%|██████ | 4300/7045 [13:59:11<8:39:24, 11.35s/it] 61%|██████ | 4301/7045 [13:59:23<8:37:43, 11.32s/it] {'loss': 1.1108, 'learning_rate': 1.7392731461291596e-06, 'epoch': 0.61} + 61%|██████ | 4301/7045 [13:59:23<8:37:43, 11.32s/it] 61%|██████ | 4302/7045 [13:59:34<8:40:09, 11.38s/it] {'loss': 1.084, 'learning_rate': 1.7381783137084098e-06, 'epoch': 0.61} + 61%|██████ | 4302/7045 [13:59:34<8:40:09, 11.38s/it] 61%|██████ | 4303/7045 [13:59:45<8:32:30, 11.21s/it] {'loss': 1.1206, 'learning_rate': 1.7370836423263495e-06, 'epoch': 0.61} + 61%|██████ | 4303/7045 [13:59:45<8:32:30, 11.21s/it] 61%|██████ | 4304/7045 [13:59:57<8:41:21, 11.41s/it] {'loss': 1.0845, 'learning_rate': 1.7359891322143778e-06, 'epoch': 0.61} + 61%|██████ | 4304/7045 [13:59:57<8:41:21, 11.41s/it] 61%|██████ | 4305/7045 [14:00:10<9:04:53, 11.93s/it] {'loss': 1.0898, 'learning_rate': 1.7348947836038582e-06, 'epoch': 0.61} + 61%|██████ | 4305/7045 [14:00:10<9:04:53, 11.93s/it] 61%|██████ | 4306/7045 [14:00:21<8:55:18, 11.73s/it] {'loss': 1.1045, 'learning_rate': 1.7338005967261212e-06, 'epoch': 0.61} + 61%|██████ | 4306/7045 [14:00:21<8:55:18, 11.73s/it] 61%|██████ | 4307/7045 [14:00:33<8:56:27, 11.76s/it] {'loss': 1.0889, 'learning_rate': 1.732706571812463e-06, 'epoch': 0.61} + 61%|██████ | 4307/7045 [14:00:33<8:56:27, 11.76s/it] 61%|██████ | 4308/7045 [14:00:45<8:59:28, 11.83s/it] {'loss': 1.1172, 'learning_rate': 1.7316127090941466e-06, 'epoch': 0.61} + 61%|██████ | 4308/7045 [14:00:45<8:59:28, 11.83s/it] 61%|██████ | 4309/7045 [14:00:56<8:50:26, 11.63s/it] {'loss': 1.124, 'learning_rate': 1.730519008802398e-06, 'epoch': 0.61} + 61%|██████ | 4309/7045 [14:00:56<8:50:26, 11.63s/it] 61%|██████ | 4310/7045 [14:01:08<8:45:41, 11.53s/it] {'loss': 1.1055, 'learning_rate': 1.729425471168411e-06, 'epoch': 0.61} + 61%|██████ | 4310/7045 [14:01:08<8:45:41, 11.53s/it] 61%|██████ | 4311/7045 [14:01:19<8:41:13, 11.44s/it] {'loss': 1.1113, 'learning_rate': 1.7283320964233447e-06, 'epoch': 0.61} + 61%|██████ | 4311/7045 [14:01:19<8:41:13, 11.44s/it] 61%|██████ | 4312/7045 [14:01:30<8:31:41, 11.23s/it] {'loss': 1.1006, 'learning_rate': 1.7272388847983237e-06, 'epoch': 0.61} + 61%|██████ | 4312/7045 [14:01:30<8:31:41, 11.23s/it] 61%|██████ | 4313/7045 [14:01:41<8:31:30, 11.23s/it] {'loss': 1.0439, 'learning_rate': 1.726145836524438e-06, 'epoch': 0.61} + 61%|██████ | 4313/7045 [14:01:41<8:31:30, 11.23s/it] 61%|██████ | 4314/7045 [14:01:53<8:39:44, 11.42s/it] {'loss': 1.0947, 'learning_rate': 1.725052951832743e-06, 'epoch': 0.61} + 61%|██████ | 4314/7045 [14:01:53<8:39:44, 11.42s/it] 61%|██████ | 4315/7045 [14:02:04<8:40:48, 11.45s/it] {'loss': 1.0947, 'learning_rate': 1.7239602309542598e-06, 'epoch': 0.61} + 61%|██████ | 4315/7045 [14:02:04<8:40:48, 11.45s/it] 61%|██████▏ | 4316/7045 [14:02:17<8:54:56, 11.76s/it] {'loss': 1.127, 'learning_rate': 1.722867674119974e-06, 'epoch': 0.61} + 61%|██████▏ | 4316/7045 [14:02:17<8:54:56, 11.76s/it] 61%|██████▏ | 4317/7045 [14:02:28<8:47:49, 11.61s/it] {'loss': 1.1367, 'learning_rate': 1.7217752815608377e-06, 'epoch': 0.61} + 61%|██████▏ | 4317/7045 [14:02:28<8:47:49, 11.61s/it] 61%|██████▏ | 4318/7045 [14:02:39<8:39:46, 11.44s/it] {'loss': 1.0879, 'learning_rate': 1.7206830535077674e-06, 'epoch': 0.61} + 61%|██████▏ | 4318/7045 [14:02:39<8:39:46, 11.44s/it] 61%|██████▏ | 4319/7045 [14:02:52<8:59:23, 11.87s/it] {'loss': 1.1196, 'learning_rate': 1.719590990191646e-06, 'epoch': 0.61} + 61%|██████▏ | 4319/7045 [14:02:52<8:59:23, 11.87s/it] 61%|██████▏ | 4320/7045 [14:03:03<8:47:57, 11.62s/it] {'loss': 1.0996, 'learning_rate': 1.7184990918433197e-06, 'epoch': 0.61} + 61%|██████▏ | 4320/7045 [14:03:03<8:47:57, 11.62s/it] 61%|██████▏ | 4321/7045 [14:03:14<8:43:45, 11.54s/it] {'loss': 1.126, 'learning_rate': 1.717407358693601e-06, 'epoch': 0.61} + 61%|██████▏ | 4321/7045 [14:03:14<8:43:45, 11.54s/it] 61%|██████▏ | 4322/7045 [14:03:26<8:49:28, 11.67s/it] {'loss': 1.1001, 'learning_rate': 1.716315790973268e-06, 'epoch': 0.61} + 61%|██████▏ | 4322/7045 [14:03:26<8:49:28, 11.67s/it] 61%|██████▏ | 4323/7045 [14:03:37<8:40:28, 11.47s/it] {'loss': 1.127, 'learning_rate': 1.715224388913062e-06, 'epoch': 0.61} + 61%|██████▏ | 4323/7045 [14:03:37<8:40:28, 11.47s/it] 61%|██████▏ | 4324/7045 [14:03:49<8:40:58, 11.49s/it] {'loss': 1.1338, 'learning_rate': 1.7141331527436922e-06, 'epoch': 0.61} + 61%|██████▏ | 4324/7045 [14:03:49<8:40:58, 11.49s/it] 61%|██████▏ | 4325/7045 [14:04:00<8:33:55, 11.34s/it] {'loss': 1.1152, 'learning_rate': 1.7130420826958303e-06, 'epoch': 0.61} + 61%|██████▏ | 4325/7045 [14:04:00<8:33:55, 11.34s/it] 61%|██████▏ | 4326/7045 [14:04:11<8:29:53, 11.25s/it] {'loss': 1.124, 'learning_rate': 1.7119511790001136e-06, 'epoch': 0.61} + 61%|██████▏ | 4326/7045 [14:04:11<8:29:53, 11.25s/it] 61%|██████▏ | 4327/7045 [14:04:22<8:27:48, 11.21s/it] {'loss': 1.1094, 'learning_rate': 1.7108604418871437e-06, 'epoch': 0.61} + 61%|██████▏ | 4327/7045 [14:04:22<8:27:48, 11.21s/it] 61%|██████▏ | 4328/7045 [14:04:33<8:26:32, 11.19s/it] {'loss': 1.1333, 'learning_rate': 1.7097698715874877e-06, 'epoch': 0.61} + 61%|██████▏ | 4328/7045 [14:04:33<8:26:32, 11.19s/it] 61%|██████▏ | 4329/7045 [14:04:46<8:44:04, 11.58s/it] {'loss': 1.0747, 'learning_rate': 1.7086794683316777e-06, 'epoch': 0.61} + 61%|██████▏ | 4329/7045 [14:04:46<8:44:04, 11.58s/it] 61%|██████▏ | 4330/7045 [14:04:59<9:04:45, 12.04s/it] {'loss': 1.0908, 'learning_rate': 1.7075892323502103e-06, 'epoch': 0.61} + 61%|██████▏ | 4330/7045 [14:04:59<9:04:45, 12.04s/it] 61%|██████▏ | 4331/7045 [14:05:10<8:58:08, 11.90s/it] {'loss': 1.1533, 'learning_rate': 1.7064991638735454e-06, 'epoch': 0.61} + 61%|██████▏ | 4331/7045 [14:05:10<8:58:08, 11.90s/it] 61%|██████▏ | 4332/7045 [14:05:22<8:53:18, 11.79s/it] {'loss': 1.1045, 'learning_rate': 1.705409263132109e-06, 'epoch': 0.61} + 61%|██████▏ | 4332/7045 [14:05:22<8:53:18, 11.79s/it] 62%|██████▏ | 4333/7045 [14:05:33<8:45:29, 11.63s/it] {'loss': 1.1045, 'learning_rate': 1.7043195303562915e-06, 'epoch': 0.62} + 62%|██████▏ | 4333/7045 [14:05:33<8:45:29, 11.63s/it] 62%|██████▏ | 4334/7045 [14:05:44<8:37:17, 11.45s/it] {'loss': 1.1055, 'learning_rate': 1.7032299657764478e-06, 'epoch': 0.62} + 62%|██████▏ | 4334/7045 [14:05:44<8:37:17, 11.45s/it] 62%|██████▏ | 4335/7045 [14:05:57<8:56:27, 11.88s/it] {'loss': 1.082, 'learning_rate': 1.7021405696228952e-06, 'epoch': 0.62} + 62%|██████▏ | 4335/7045 [14:05:57<8:56:27, 11.88s/it] 62%|██████▏ | 4336/7045 [14:06:08<8:44:54, 11.63s/it] {'loss': 1.0923, 'learning_rate': 1.7010513421259201e-06, 'epoch': 0.62} + 62%|██████▏ | 4336/7045 [14:06:08<8:44:54, 11.63s/it] 62%|██████▏ | 4337/7045 [14:06:19<8:39:49, 11.52s/it] {'loss': 1.1118, 'learning_rate': 1.6999622835157676e-06, 'epoch': 0.62} + 62%|██████▏ | 4337/7045 [14:06:19<8:39:49, 11.52s/it] 62%|██████▏ | 4338/7045 [14:06:31<8:39:50, 11.52s/it] {'loss': 1.1333, 'learning_rate': 1.6988733940226511e-06, 'epoch': 0.62} + 62%|██████▏ | 4338/7045 [14:06:31<8:39:50, 11.52s/it] 62%|██████▏ | 4339/7045 [14:06:42<8:36:03, 11.44s/it] {'loss': 1.0977, 'learning_rate': 1.697784673876747e-06, 'epoch': 0.62} + 62%|██████▏ | 4339/7045 [14:06:42<8:36:03, 11.44s/it] 62%|██████▏ | 4340/7045 [14:06:53<8:31:31, 11.35s/it] {'loss': 1.0742, 'learning_rate': 1.6966961233081953e-06, 'epoch': 0.62} + 62%|██████▏ | 4340/7045 [14:06:53<8:31:31, 11.35s/it] 62%|██████▏ | 4341/7045 [14:07:04<8:30:32, 11.33s/it] {'loss': 1.0801, 'learning_rate': 1.6956077425471018e-06, 'epoch': 0.62} + 62%|██████▏ | 4341/7045 [14:07:04<8:30:32, 11.33s/it] 62%|██████▏ | 4342/7045 [14:07:15<8:25:51, 11.23s/it] {'loss': 1.1162, 'learning_rate': 1.6945195318235336e-06, 'epoch': 0.62} + 62%|██████▏ | 4342/7045 [14:07:15<8:25:51, 11.23s/it] 62%|██████▏ | 4343/7045 [14:07:27<8:28:13, 11.29s/it] {'loss': 1.1011, 'learning_rate': 1.6934314913675248e-06, 'epoch': 0.62} + 62%|██████▏ | 4343/7045 [14:07:27<8:28:13, 11.29s/it] 62%|██████▏ | 4344/7045 [14:07:39<8:36:57, 11.48s/it] {'loss': 1.0488, 'learning_rate': 1.6923436214090718e-06, 'epoch': 0.62} + 62%|██████▏ | 4344/7045 [14:07:39<8:36:57, 11.48s/it] 62%|██████▏ | 4345/7045 [14:07:51<8:48:17, 11.74s/it] {'loss': 1.0977, 'learning_rate': 1.6912559221781363e-06, 'epoch': 0.62} + 62%|██████▏ | 4345/7045 [14:07:51<8:48:17, 11.74s/it] 62%|██████▏ | 4346/7045 [14:08:02<8:39:44, 11.55s/it] {'loss': 1.0977, 'learning_rate': 1.6901683939046415e-06, 'epoch': 0.62} + 62%|██████▏ | 4346/7045 [14:08:02<8:39:44, 11.55s/it] 62%|██████▏ | 4347/7045 [14:08:14<8:43:18, 11.64s/it] {'loss': 1.1162, 'learning_rate': 1.6890810368184774e-06, 'epoch': 0.62} + 62%|██████▏ | 4347/7045 [14:08:14<8:43:18, 11.64s/it] 62%|██████▏ | 4348/7045 [14:08:25<8:38:33, 11.54s/it] {'loss': 1.1289, 'learning_rate': 1.6879938511494948e-06, 'epoch': 0.62} + 62%|██████▏ | 4348/7045 [14:08:25<8:38:33, 11.54s/it] 62%|██████▏ | 4349/7045 [14:08:36<8:30:08, 11.35s/it] {'loss': 1.0933, 'learning_rate': 1.6869068371275115e-06, 'epoch': 0.62} + 62%|██████▏ | 4349/7045 [14:08:36<8:30:08, 11.35s/it] 62%|██████▏ | 4350/7045 [14:08:47<8:26:52, 11.28s/it] {'loss': 1.1465, 'learning_rate': 1.6858199949823072e-06, 'epoch': 0.62} + 62%|██████▏ | 4350/7045 [14:08:47<8:26:52, 11.28s/it] 62%|██████▏ | 4351/7045 [14:08:58<8:23:17, 11.21s/it] {'loss': 1.1416, 'learning_rate': 1.6847333249436249e-06, 'epoch': 0.62} + 62%|██████▏ | 4351/7045 [14:08:58<8:23:17, 11.21s/it] 62%|██████▏ | 4352/7045 [14:09:10<8:32:03, 11.41s/it] {'loss': 1.1016, 'learning_rate': 1.6836468272411727e-06, 'epoch': 0.62} + 62%|██████▏ | 4352/7045 [14:09:10<8:32:03, 11.41s/it] 62%|██████▏ | 4353/7045 [14:09:24<8:57:59, 11.99s/it] {'loss': 1.041, 'learning_rate': 1.6825605021046199e-06, 'epoch': 0.62} + 62%|██████▏ | 4353/7045 [14:09:24<8:57:59, 11.99s/it] 62%|██████▏ | 4354/7045 [14:09:35<8:44:14, 11.69s/it] {'loss': 1.1172, 'learning_rate': 1.6814743497636018e-06, 'epoch': 0.62} + 62%|██████▏ | 4354/7045 [14:09:35<8:44:14, 11.69s/it] 62%|██████▏ | 4355/7045 [14:09:47<8:50:02, 11.82s/it] {'loss': 1.123, 'learning_rate': 1.6803883704477158e-06, 'epoch': 0.62} + 62%|██████▏ | 4355/7045 [14:09:47<8:50:02, 11.82s/it] 62%|██████▏ | 4356/7045 [14:09:58<8:42:44, 11.66s/it] {'loss': 1.1182, 'learning_rate': 1.6793025643865242e-06, 'epoch': 0.62} + 62%|██████▏ | 4356/7045 [14:09:58<8:42:44, 11.66s/it] 62%|██████▏ | 4357/7045 [14:10:10<8:47:19, 11.77s/it] {'loss': 1.1021, 'learning_rate': 1.6782169318095492e-06, 'epoch': 0.62} + 62%|██████▏ | 4357/7045 [14:10:10<8:47:19, 11.77s/it] 62%|██████▏ | 4358/7045 [14:10:22<8:42:59, 11.68s/it] {'loss': 1.1162, 'learning_rate': 1.6771314729462807e-06, 'epoch': 0.62} + 62%|██████▏ | 4358/7045 [14:10:22<8:42:59, 11.68s/it] 62%|██████▏ | 4359/7045 [14:10:35<9:01:07, 12.09s/it] {'loss': 1.0986, 'learning_rate': 1.6760461880261688e-06, 'epoch': 0.62} + 62%|██████▏ | 4359/7045 [14:10:35<9:01:07, 12.09s/it] 62%|██████▏ | 4360/7045 [14:10:47<9:11:18, 12.32s/it] {'loss': 1.0879, 'learning_rate': 1.6749610772786279e-06, 'epoch': 0.62} + 62%|██████▏ | 4360/7045 [14:10:47<9:11:18, 12.32s/it] 62%|██████▏ | 4361/7045 [14:10:59<8:56:44, 12.00s/it] {'loss': 1.1143, 'learning_rate': 1.6738761409330367e-06, 'epoch': 0.62} + 62%|██████▏ | 4361/7045 [14:10:59<8:56:44, 12.00s/it] 62%|██████▏ | 4362/7045 [14:11:10<8:49:22, 11.84s/it] {'loss': 1.0918, 'learning_rate': 1.672791379218735e-06, 'epoch': 0.62} + 62%|██████▏ | 4362/7045 [14:11:10<8:49:22, 11.84s/it] 62%|██████▏ | 4363/7045 [14:11:21<8:32:44, 11.47s/it] {'loss': 1.1064, 'learning_rate': 1.6717067923650275e-06, 'epoch': 0.62} + 62%|██████▏ | 4363/7045 [14:11:21<8:32:44, 11.47s/it] 62%|██████▏ | 4364/7045 [14:11:32<8:28:53, 11.39s/it] {'loss': 1.1416, 'learning_rate': 1.6706223806011795e-06, 'epoch': 0.62} + 62%|██████▏ | 4364/7045 [14:11:32<8:28:53, 11.39s/it] 62%|██████▏ | 4365/7045 [14:11:45<8:47:17, 11.81s/it] {'loss': 1.0908, 'learning_rate': 1.6695381441564217e-06, 'epoch': 0.62} + 62%|██████▏ | 4365/7045 [14:11:45<8:47:17, 11.81s/it] 62%|██████▏ | 4366/7045 [14:11:56<8:33:49, 11.51s/it] {'loss': 1.1133, 'learning_rate': 1.6684540832599472e-06, 'epoch': 0.62} + 62%|██████▏ | 4366/7045 [14:11:56<8:33:49, 11.51s/it] 62%|██████▏ | 4367/7045 [14:12:07<8:27:31, 11.37s/it] {'loss': 1.126, 'learning_rate': 1.6673701981409114e-06, 'epoch': 0.62} + 62%|██████▏ | 4367/7045 [14:12:07<8:27:31, 11.37s/it] 62%|██████▏ | 4368/7045 [14:12:18<8:23:36, 11.29s/it] {'loss': 1.0908, 'learning_rate': 1.666286489028432e-06, 'epoch': 0.62} + 62%|██████▏ | 4368/7045 [14:12:18<8:23:36, 11.29s/it] 62%|██████▏ | 4369/7045 [14:12:29<8:20:07, 11.21s/it] {'loss': 1.165, 'learning_rate': 1.6652029561515915e-06, 'epoch': 0.62} + 62%|██████▏ | 4369/7045 [14:12:29<8:20:07, 11.21s/it] 62%|██████▏ | 4370/7045 [14:12:40<8:19:23, 11.20s/it] {'loss': 1.1357, 'learning_rate': 1.6641195997394328e-06, 'epoch': 0.62} + 62%|██████▏ | 4370/7045 [14:12:40<8:19:23, 11.20s/it] 62%|██████▏ | 4371/7045 [14:12:51<8:18:33, 11.19s/it] {'loss': 1.1143, 'learning_rate': 1.6630364200209643e-06, 'epoch': 0.62} + 62%|██████▏ | 4371/7045 [14:12:51<8:18:33, 11.19s/it] 62%|█████���▏ | 4372/7045 [14:13:02<8:18:06, 11.18s/it] {'loss': 1.1572, 'learning_rate': 1.6619534172251522e-06, 'epoch': 0.62} + 62%|██████▏ | 4372/7045 [14:13:02<8:18:06, 11.18s/it] 62%|██████▏ | 4373/7045 [14:13:13<8:13:05, 11.07s/it] {'loss': 1.1094, 'learning_rate': 1.660870591580932e-06, 'epoch': 0.62} + 62%|██████▏ | 4373/7045 [14:13:13<8:13:05, 11.07s/it] 62%|██████▏ | 4374/7045 [14:13:24<8:16:43, 11.16s/it] {'loss': 1.105, 'learning_rate': 1.659787943317197e-06, 'epoch': 0.62} + 62%|██████▏ | 4374/7045 [14:13:24<8:16:43, 11.16s/it] 62%|██████▏ | 4375/7045 [14:13:36<8:15:54, 11.14s/it] {'loss': 1.1309, 'learning_rate': 1.6587054726628038e-06, 'epoch': 0.62} + 62%|██████▏ | 4375/7045 [14:13:36<8:15:54, 11.14s/it] 62%|██████▏ | 4376/7045 [14:13:47<8:16:41, 11.17s/it] {'loss': 1.1494, 'learning_rate': 1.6576231798465719e-06, 'epoch': 0.62} + 62%|██████▏ | 4376/7045 [14:13:47<8:16:41, 11.17s/it] 62%|██████▏ | 4377/7045 [14:14:00<8:41:37, 11.73s/it] {'loss': 1.1118, 'learning_rate': 1.6565410650972838e-06, 'epoch': 0.62} + 62%|██████▏ | 4377/7045 [14:14:00<8:41:37, 11.73s/it] 62%|██████▏ | 4378/7045 [14:14:13<8:57:51, 12.10s/it] {'loss': 1.125, 'learning_rate': 1.6554591286436839e-06, 'epoch': 0.62} + 62%|██████▏ | 4378/7045 [14:14:13<8:57:51, 12.10s/it] 62%|██████▏ | 4379/7045 [14:14:24<8:46:46, 11.86s/it] {'loss': 1.104, 'learning_rate': 1.6543773707144779e-06, 'epoch': 0.62} + 62%|██████▏ | 4379/7045 [14:14:24<8:46:46, 11.86s/it] 62%|██████▏ | 4380/7045 [14:14:35<8:34:22, 11.58s/it] {'loss': 1.0928, 'learning_rate': 1.6532957915383345e-06, 'epoch': 0.62} + 62%|██████▏ | 4380/7045 [14:14:35<8:34:22, 11.58s/it] 62%|██████▏ | 4381/7045 [14:14:46<8:28:50, 11.46s/it] {'loss': 1.1045, 'learning_rate': 1.6522143913438859e-06, 'epoch': 0.62} + 62%|██████▏ | 4381/7045 [14:14:46<8:28:50, 11.46s/it] 62%|██████▏ | 4382/7045 [14:14:58<8:29:47, 11.49s/it] {'loss': 1.0938, 'learning_rate': 1.651133170359725e-06, 'epoch': 0.62} + 62%|██████▏ | 4382/7045 [14:14:58<8:29:47, 11.49s/it] 62%|██████▏ | 4383/7045 [14:15:09<8:25:47, 11.40s/it] {'loss': 1.1045, 'learning_rate': 1.6500521288144062e-06, 'epoch': 0.62} + 62%|██████▏ | 4383/7045 [14:15:09<8:25:47, 11.40s/it]/usr/local/lib/python3.9/dist-packages/PIL/Image.py:3074: DecompressionBombWarning: Image size (97200000 pixels) exceeds limit of 89478485 pixels, could be decompression bomb DOS attack. + warnings.warn( + 62%|██████▏ | 4384/7045 [14:15:20<8:23:18, 11.35s/it] {'loss': 1.1094, 'learning_rate': 1.6489712669364466e-06, 'epoch': 0.62} + 62%|██████▏ | 4384/7045 [14:15:20<8:23:18, 11.35s/it] 62%|██████▏ | 4385/7045 [14:15:33<8:41:22, 11.76s/it] {'loss': 1.0718, 'learning_rate': 1.6478905849543282e-06, 'epoch': 0.62} + 62%|██████▏ | 4385/7045 [14:15:33<8:41:22, 11.76s/it] 62%|██████▏ | 4386/7045 [14:15:44<8:32:15, 11.56s/it] {'loss': 1.1191, 'learning_rate': 1.64681008309649e-06, 'epoch': 0.62} + 62%|██████▏ | 4386/7045 [14:15:44<8:32:15, 11.56s/it] 62%|██████▏ | 4387/7045 [14:15:57<8:48:12, 11.92s/it] {'loss': 1.085, 'learning_rate': 1.6457297615913364e-06, 'epoch': 0.62} + 62%|██████▏ | 4387/7045 [14:15:57<8:48:12, 11.92s/it] 62%|██████▏ | 4388/7045 [14:16:08<8:38:07, 11.70s/it] {'loss': 1.1064, 'learning_rate': 1.6446496206672325e-06, 'epoch': 0.62} + 62%|██████▏ | 4388/7045 [14:16:08<8:38:07, 11.70s/it] 62%|██████▏ | 4389/7045 [14:16:20<8:36:31, 11.67s/it] {'loss': 1.0693, 'learning_rate': 1.6435696605525053e-06, 'epoch': 0.62} + 62%|██████▏ | 4389/7045 [14:16:20<8:36:31, 11.67s/it] 62%|██���███▏ | 4390/7045 [14:16:34<9:12:17, 12.48s/it] {'loss': 1.1016, 'learning_rate': 1.6424898814754427e-06, 'epoch': 0.62} + 62%|██████▏ | 4390/7045 [14:16:34<9:12:17, 12.48s/it] 62%|██████▏ | 4391/7045 [14:16:45<8:59:34, 12.20s/it] {'loss': 1.1162, 'learning_rate': 1.6414102836642964e-06, 'epoch': 0.62} + 62%|██████▏ | 4391/7045 [14:16:45<8:59:34, 12.20s/it] 62%|██████▏ | 4392/7045 [14:16:57<8:52:49, 12.05s/it] {'loss': 1.1162, 'learning_rate': 1.6403308673472778e-06, 'epoch': 0.62} + 62%|██████▏ | 4392/7045 [14:16:57<8:52:49, 12.05s/it] 62%|██████▏ | 4393/7045 [14:17:09<8:48:56, 11.97s/it] {'loss': 1.1094, 'learning_rate': 1.6392516327525617e-06, 'epoch': 0.62} + 62%|██████▏ | 4393/7045 [14:17:09<8:48:56, 11.97s/it] 62%|██████▏ | 4394/7045 [14:17:21<8:56:39, 12.15s/it] {'loss': 1.0908, 'learning_rate': 1.6381725801082827e-06, 'epoch': 0.62} + 62%|██████▏ | 4394/7045 [14:17:21<8:56:39, 12.15s/it] 62%|██████▏ | 4395/7045 [14:17:33<8:45:53, 11.91s/it] {'loss': 1.1309, 'learning_rate': 1.6370937096425377e-06, 'epoch': 0.62} + 62%|██████▏ | 4395/7045 [14:17:33<8:45:53, 11.91s/it] 62%|██████▏ | 4396/7045 [14:17:44<8:36:03, 11.69s/it] {'loss': 1.1064, 'learning_rate': 1.6360150215833853e-06, 'epoch': 0.62} + 62%|██████▏ | 4396/7045 [14:17:44<8:36:03, 11.69s/it] 62%|██████▏ | 4397/7045 [14:17:55<8:26:58, 11.49s/it] {'loss': 1.1348, 'learning_rate': 1.634936516158846e-06, 'epoch': 0.62} + 62%|██████▏ | 4397/7045 [14:17:55<8:26:58, 11.49s/it] 62%|██████▏ | 4398/7045 [14:18:06<8:23:15, 11.41s/it] {'loss': 1.1016, 'learning_rate': 1.6338581935969005e-06, 'epoch': 0.62} + 62%|██████▏ | 4398/7045 [14:18:06<8:23:15, 11.41s/it] 62%|██████▏ | 4399/7045 [14:18:18<8:20:58, 11.36s/it] {'loss': 1.1011, 'learning_rate': 1.6327800541254923e-06, 'epoch': 0.62} + 62%|██████▏ | 4399/7045 [14:18:18<8:20:58, 11.36s/it] 62%|██████▏ | 4400/7045 [14:18:29<8:17:01, 11.27s/it] {'loss': 1.1074, 'learning_rate': 1.6317020979725245e-06, 'epoch': 0.62} + 62%|██████▏ | 4400/7045 [14:18:29<8:17:01, 11.27s/it] 62%|██████▏ | 4401/7045 [14:18:40<8:13:25, 11.20s/it] {'loss': 1.1445, 'learning_rate': 1.6306243253658627e-06, 'epoch': 0.62} + 62%|██████▏ | 4401/7045 [14:18:40<8:13:25, 11.20s/it] 62%|██████▏ | 4402/7045 [14:18:53<8:37:27, 11.75s/it] {'loss': 1.1162, 'learning_rate': 1.6295467365333323e-06, 'epoch': 0.62} + 62%|██████▏ | 4402/7045 [14:18:53<8:37:27, 11.75s/it] 62%|██████▏ | 4403/7045 [14:19:06<8:53:25, 12.11s/it] {'loss': 1.0566, 'learning_rate': 1.6284693317027222e-06, 'epoch': 0.62} + 62%|██████▏ | 4403/7045 [14:19:06<8:53:25, 12.11s/it] 63%|██████▎ | 4404/7045 [14:19:17<8:40:37, 11.83s/it] {'loss': 1.1377, 'learning_rate': 1.627392111101781e-06, 'epoch': 0.63} + 63%|██████▎ | 4404/7045 [14:19:17<8:40:37, 11.83s/it] 63%|██████▎ | 4405/7045 [14:19:28<8:32:20, 11.64s/it] {'loss': 1.0776, 'learning_rate': 1.626315074958217e-06, 'epoch': 0.63} + 63%|██████▎ | 4405/7045 [14:19:28<8:32:20, 11.64s/it] 63%|██████▎ | 4406/7045 [14:19:39<8:24:37, 11.47s/it] {'loss': 1.0933, 'learning_rate': 1.6252382234997022e-06, 'epoch': 0.63} + 63%|██████▎ | 4406/7045 [14:19:39<8:24:37, 11.47s/it] 63%|██████▎ | 4407/7045 [14:19:52<8:45:50, 11.96s/it] {'loss': 1.0981, 'learning_rate': 1.6241615569538677e-06, 'epoch': 0.63} + 63%|██████▎ | 4407/7045 [14:19:52<8:45:50, 11.96s/it] 63%|██████▎ | 4408/7045 [14:20:03<8:33:57, 11.69s/it] {'loss': 1.1416, 'learning_rate': 1.6230850755483068e-06, 'epoch': 0.63} + 63%|██████▎ | 4408/7045 [14:20:03<8:33:57, 11.69s/it] 63%|██████▎ | 4409/7045 [14:20:15<8:28:51, 11.58s/it] {'loss': 1.1094, 'learning_rate': 1.622008779510571e-06, 'epoch': 0.63} + 63%|██████▎ | 4409/7045 [14:20:15<8:28:51, 11.58s/it] 63%|██████▎ | 4410/7045 [14:20:27<8:33:39, 11.70s/it] {'loss': 1.125, 'learning_rate': 1.620932669068177e-06, 'epoch': 0.63} + 63%|██████▎ | 4410/7045 [14:20:27<8:33:39, 11.70s/it] 63%|██████▎ | 4411/7045 [14:20:38<8:31:01, 11.64s/it] {'loss': 1.1309, 'learning_rate': 1.6198567444485996e-06, 'epoch': 0.63} + 63%|██████▎ | 4411/7045 [14:20:38<8:31:01, 11.64s/it] 63%|██████▎ | 4412/7045 [14:20:49<8:25:28, 11.52s/it] {'loss': 1.0786, 'learning_rate': 1.618781005879273e-06, 'epoch': 0.63} + 63%|██████▎ | 4412/7045 [14:20:49<8:25:28, 11.52s/it] 63%|██████▎ | 4413/7045 [14:21:00<8:19:03, 11.38s/it] {'loss': 1.1045, 'learning_rate': 1.6177054535875943e-06, 'epoch': 0.63} + 63%|██████▎ | 4413/7045 [14:21:00<8:19:03, 11.38s/it] 63%|██████▎ | 4414/7045 [14:21:12<8:21:20, 11.43s/it] {'loss': 1.1299, 'learning_rate': 1.6166300878009212e-06, 'epoch': 0.63} + 63%|██████▎ | 4414/7045 [14:21:12<8:21:20, 11.43s/it] 63%|██████▎ | 4415/7045 [14:21:24<8:35:14, 11.75s/it] {'loss': 1.1025, 'learning_rate': 1.6155549087465708e-06, 'epoch': 0.63} + 63%|██████▎ | 4415/7045 [14:21:24<8:35:14, 11.75s/it] 63%|██████▎ | 4416/7045 [14:21:36<8:28:33, 11.61s/it] {'loss': 1.1396, 'learning_rate': 1.614479916651821e-06, 'epoch': 0.63} + 63%|██████▎ | 4416/7045 [14:21:36<8:28:33, 11.61s/it] 63%|██████▎ | 4417/7045 [14:21:47<8:31:19, 11.67s/it] {'loss': 1.0781, 'learning_rate': 1.6134051117439104e-06, 'epoch': 0.63} + 63%|██████▎ | 4417/7045 [14:21:47<8:31:19, 11.67s/it] 63%|██████▎ | 4418/7045 [14:21:58<8:21:56, 11.46s/it] {'loss': 1.1216, 'learning_rate': 1.6123304942500385e-06, 'epoch': 0.63} + 63%|██████▎ | 4418/7045 [14:21:58<8:21:56, 11.46s/it] 63%|██████▎ | 4419/7045 [14:22:10<8:21:06, 11.45s/it] {'loss': 1.1348, 'learning_rate': 1.6112560643973652e-06, 'epoch': 0.63} + 63%|██████▎ | 4419/7045 [14:22:10<8:21:06, 11.45s/it] 63%|██████▎ | 4420/7045 [14:22:21<8:18:36, 11.40s/it] {'loss': 1.1025, 'learning_rate': 1.6101818224130092e-06, 'epoch': 0.63} + 63%|██████▎ | 4420/7045 [14:22:21<8:18:36, 11.40s/it] 63%|██████▎ | 4421/7045 [14:22:33<8:20:46, 11.45s/it] {'loss': 1.0806, 'learning_rate': 1.6091077685240497e-06, 'epoch': 0.63} + 63%|██████▎ | 4421/7045 [14:22:33<8:20:46, 11.45s/it] 63%|██████▎ | 4422/7045 [14:22:44<8:22:18, 11.49s/it] {'loss': 1.1143, 'learning_rate': 1.6080339029575304e-06, 'epoch': 0.63} + 63%|██████▎ | 4422/7045 [14:22:44<8:22:18, 11.49s/it] 63%|██████▎ | 4423/7045 [14:22:56<8:30:16, 11.68s/it] {'loss': 1.0903, 'learning_rate': 1.6069602259404487e-06, 'epoch': 0.63} + 63%|██████▎ | 4423/7045 [14:22:56<8:30:16, 11.68s/it] 63%|██████▎ | 4424/7045 [14:23:10<8:49:10, 12.11s/it] {'loss': 1.1377, 'learning_rate': 1.6058867376997668e-06, 'epoch': 0.63} + 63%|██████▎ | 4424/7045 [14:23:10<8:49:10, 12.11s/it] 63%|██████▎ | 4425/7045 [14:23:21<8:36:14, 11.82s/it] {'loss': 1.0996, 'learning_rate': 1.6048134384624047e-06, 'epoch': 0.63} + 63%|██████▎ | 4425/7045 [14:23:21<8:36:14, 11.82s/it] 63%|██████▎ | 4426/7045 [14:23:32<8:34:33, 11.79s/it] {'loss': 1.1084, 'learning_rate': 1.603740328455244e-06, 'epoch': 0.63} + 63%|██████▎ | 4426/7045 [14:23:32<8:34:33, 11.79s/it] 63%|██████▎ | 4427/7045 [14:23:44<8:36:58, 11.85s/it] {'loss': 1.1143, 'learning_rate': 1.6026674079051243e-06, 'epoch': 0.63} + 63%|██████▎ | 4427/7045 [14:23:44<8:36:58, 11.85s/it] 63%|██████▎ | 4428/7045 [14:23:57<8:48:56, 12.13s/it] {'loss': 1.0767, 'learning_rate': 1.601594677038847e-06, 'epoch': 0.63} + 63%|██████▎ | 4428/7045 [14:23:57<8:48:56, 12.13s/it] 63%|██████▎ | 4429/7045 [14:24:08<8:34:30, 11.80s/it] {'loss': 1.1221, 'learning_rate': 1.6005221360831727e-06, 'epoch': 0.63} + 63%|██████▎ | 4429/7045 [14:24:08<8:34:30, 11.80s/it] 63%|██████▎ | 4430/7045 [14:24:21<8:41:33, 11.97s/it] {'loss': 1.0728, 'learning_rate': 1.5994497852648223e-06, 'epoch': 0.63} + 63%|██████▎ | 4430/7045 [14:24:21<8:41:33, 11.97s/it] 63%|██████▎ | 4431/7045 [14:24:31<8:28:08, 11.66s/it] {'loss': 1.1221, 'learning_rate': 1.5983776248104754e-06, 'epoch': 0.63} + 63%|██████▎ | 4431/7045 [14:24:32<8:28:08, 11.66s/it] 63%|██████▎ | 4432/7045 [14:24:43<8:25:51, 11.62s/it] {'loss': 1.123, 'learning_rate': 1.5973056549467718e-06, 'epoch': 0.63} + 63%|██████▎ | 4432/7045 [14:24:43<8:25:51, 11.62s/it] 63%|██████▎ | 4433/7045 [14:24:54<8:16:57, 11.42s/it] {'loss': 1.0967, 'learning_rate': 1.5962338759003122e-06, 'epoch': 0.63} + 63%|██████▎ | 4433/7045 [14:24:54<8:16:57, 11.42s/it] 63%|██████▎ | 4434/7045 [14:25:05<8:12:13, 11.31s/it] {'loss': 1.1309, 'learning_rate': 1.5951622878976555e-06, 'epoch': 0.63} + 63%|██████▎ | 4434/7045 [14:25:05<8:12:13, 11.31s/it] 63%|██████▎ | 4435/7045 [14:25:17<8:19:53, 11.49s/it] {'loss': 1.1323, 'learning_rate': 1.5940908911653211e-06, 'epoch': 0.63} + 63%|██████▎ | 4435/7045 [14:25:17<8:19:53, 11.49s/it] 63%|██████▎ | 4436/7045 [14:25:28<8:19:53, 11.50s/it] {'loss': 1.124, 'learning_rate': 1.5930196859297874e-06, 'epoch': 0.63} + 63%|██████▎ | 4436/7045 [14:25:28<8:19:53, 11.50s/it] 63%|██████▎ | 4437/7045 [14:25:39<8:13:41, 11.36s/it] {'loss': 1.1396, 'learning_rate': 1.5919486724174931e-06, 'epoch': 0.63} + 63%|██████▎ | 4437/7045 [14:25:39<8:13:41, 11.36s/it] 63%|██████▎ | 4438/7045 [14:25:51<8:11:31, 11.31s/it] {'loss': 1.1455, 'learning_rate': 1.5908778508548348e-06, 'epoch': 0.63} + 63%|██████▎ | 4438/7045 [14:25:51<8:11:31, 11.31s/it] 63%|██████▎ | 4439/7045 [14:26:02<8:09:51, 11.28s/it] {'loss': 1.0938, 'learning_rate': 1.5898072214681698e-06, 'epoch': 0.63} + 63%|██████▎ | 4439/7045 [14:26:02<8:09:51, 11.28s/it] 63%|██████▎ | 4440/7045 [14:26:13<8:11:21, 11.32s/it] {'loss': 1.0693, 'learning_rate': 1.588736784483815e-06, 'epoch': 0.63} + 63%|██████▎ | 4440/7045 [14:26:13<8:11:21, 11.32s/it] 63%|██████▎ | 4441/7045 [14:26:25<8:11:09, 11.32s/it] {'loss': 1.1201, 'learning_rate': 1.5876665401280464e-06, 'epoch': 0.63} + 63%|██████▎ | 4441/7045 [14:26:25<8:11:09, 11.32s/it] 63%|██████▎ | 4442/7045 [14:26:37<8:20:10, 11.53s/it] {'loss': 1.1338, 'learning_rate': 1.5865964886270978e-06, 'epoch': 0.63} + 63%|██████▎ | 4442/7045 [14:26:37<8:20:10, 11.53s/it] 63%|██████▎ | 4443/7045 [14:26:47<8:10:57, 11.32s/it] {'loss': 1.084, 'learning_rate': 1.5855266302071645e-06, 'epoch': 0.63} + 63%|██████▎ | 4443/7045 [14:26:47<8:10:57, 11.32s/it] 63%|██████▎ | 4444/7045 [14:26:59<8:08:05, 11.26s/it] {'loss': 1.1289, 'learning_rate': 1.5844569650943994e-06, 'epoch': 0.63} + 63%|██████▎ | 4444/7045 [14:26:59<8:08:05, 11.26s/it] 63%|██████▎ | 4445/7045 [14:27:09<8:02:53, 11.14s/it] {'loss': 1.0977, 'learning_rate': 1.5833874935149145e-06, 'epoch': 0.63} + 63%|██████▎ | 4445/7045 [14:27:09<8:02:53, 11.14s/it] 63%|██████▎ | 4446/7045 [14:27:21<8:03:40, 11.17s/it] {'loss': 1.1104, 'learning_rate': 1.5823182156947834e-06, 'epoch': 0.63} + 63%|██████▎ | 4446/7045 [14:27:21<8:03:40, 11.17s/it] 63%|██████▎ | 4447/7045 [14:27:32<8:02:59, 11.15s/it] {'loss': 1.0957, 'learning_rate': 1.5812491318600354e-06, 'epoch': 0.63} + 63%|██████▎ | 4447/7045 [14:27:32<8:02:59, 11.15s/it] 63%|██████▎ | 4448/7045 [14:27:45<8:23:23, 11.63s/it] {'loss': 1.0942, 'learning_rate': 1.5801802422366608e-06, 'epoch': 0.63} + 63%|██████▎ | 4448/7045 [14:27:45<8:23:23, 11.63s/it] 63%|██████▎ | 4449/7045 [14:27:56<8:21:18, 11.59s/it] {'loss': 1.0986, 'learning_rate': 1.5791115470506078e-06, 'epoch': 0.63} + 63%|██████▎ | 4449/7045 [14:27:56<8:21:18, 11.59s/it] 63%|██████▎ | 4450/7045 [14:28:07<8:15:25, 11.45s/it] {'loss': 1.1514, 'learning_rate': 1.5780430465277837e-06, 'epoch': 0.63} + 63%|██████▎ | 4450/7045 [14:28:07<8:15:25, 11.45s/it] 63%|██████▎ | 4451/7045 [14:28:18<8:09:12, 11.32s/it] {'loss': 1.1035, 'learning_rate': 1.5769747408940555e-06, 'epoch': 0.63} + 63%|██████▎ | 4451/7045 [14:28:18<8:09:12, 11.32s/it] 63%|██████▎ | 4452/7045 [14:28:29<8:07:30, 11.28s/it] {'loss': 1.126, 'learning_rate': 1.5759066303752489e-06, 'epoch': 0.63} + 63%|██████▎ | 4452/7045 [14:28:29<8:07:30, 11.28s/it] 63%|██████▎ | 4453/7045 [14:28:43<8:39:14, 12.02s/it] {'loss': 1.1123, 'learning_rate': 1.5748387151971464e-06, 'epoch': 0.63} + 63%|██████▎ | 4453/7045 [14:28:43<8:39:14, 12.02s/it] 63%|██████▎ | 4454/7045 [14:28:54<8:23:53, 11.67s/it] {'loss': 1.123, 'learning_rate': 1.5737709955854912e-06, 'epoch': 0.63} + 63%|██████▎ | 4454/7045 [14:28:54<8:23:53, 11.67s/it] 63%|██████▎ | 4455/7045 [14:29:05<8:13:41, 11.44s/it] {'loss': 1.0684, 'learning_rate': 1.5727034717659856e-06, 'epoch': 0.63} + 63%|██████▎ | 4455/7045 [14:29:05<8:13:41, 11.44s/it] 63%|██████▎ | 4456/7045 [14:29:18<8:35:01, 11.94s/it] {'loss': 1.064, 'learning_rate': 1.5716361439642886e-06, 'epoch': 0.63} + 63%|██████▎ | 4456/7045 [14:29:18<8:35:01, 11.94s/it] 63%|██████▎ | 4457/7045 [14:29:29<8:23:12, 11.67s/it] {'loss': 1.1348, 'learning_rate': 1.5705690124060177e-06, 'epoch': 0.63} + 63%|██████▎ | 4457/7045 [14:29:29<8:23:12, 11.67s/it] 63%|██████▎ | 4458/7045 [14:29:40<8:16:20, 11.51s/it] {'loss': 1.0957, 'learning_rate': 1.5695020773167518e-06, 'epoch': 0.63} + 63%|██████▎ | 4458/7045 [14:29:40<8:16:20, 11.51s/it] 63%|██████▎ | 4459/7045 [14:29:54<8:44:06, 12.16s/it] {'loss': 1.1021, 'learning_rate': 1.5684353389220269e-06, 'epoch': 0.63} + 63%|██████▎ | 4459/7045 [14:29:54<8:44:06, 12.16s/it] 63%|██████▎ | 4460/7045 [14:30:05<8:35:18, 11.96s/it] {'loss': 1.0952, 'learning_rate': 1.5673687974473344e-06, 'epoch': 0.63} + 63%|██████▎ | 4460/7045 [14:30:05<8:35:18, 11.96s/it] 63%|██████▎ | 4461/7045 [14:30:16<8:21:39, 11.65s/it] {'loss': 1.0894, 'learning_rate': 1.5663024531181287e-06, 'epoch': 0.63} + 63%|██████▎ | 4461/7045 [14:30:16<8:21:39, 11.65s/it] 63%|██████▎ | 4462/7045 [14:30:30<8:43:56, 12.17s/it] {'loss': 1.0361, 'learning_rate': 1.565236306159819e-06, 'epoch': 0.63} + 63%|██████▎ | 4462/7045 [14:30:30<8:43:56, 12.17s/it] 63%|██████▎ | 4463/7045 [14:30:43<8:54:12, 12.41s/it] {'loss': 1.0894, 'learning_rate': 1.564170356797776e-06, 'epoch': 0.63} + 63%|██████▎ | 4463/7045 [14:30:43<8:54:12, 12.41s/it] 63%|██████▎ | 4464/7045 [14:30:54<8:39:50, 12.08s/it] {'loss': 1.0977, 'learning_rate': 1.5631046052573246e-06, 'epoch': 0.63} + 63%|██████▎ | 4464/7045 [14:30:54<8:39:50, 12.08s/it] 63%|██████▎ | 4465/7045 [14:31:06<8:33:26, 11.94s/it] {'loss': 1.0972, 'learning_rate': 1.5620390517637513e-06, 'epoch': 0.63} + 63%|██████▎ | 4465/7045 [14:31:06<8:33:26, 11.94s/it] 63%|██████▎ | 4466/7045 [14:31:18<8:44:17, 12.20s/it] {'loss': 1.1172, 'learning_rate': 1.5609736965422996e-06, 'epoch': 0.63} + 63%|██████▎ | 4466/7045 [14:31:18<8:44:17, 12.20s/it] 63%|██████▎ | 4467/7045 [14:31:31<8:44:46, 12.21s/it] {'loss': 1.0601, 'learning_rate': 1.559908539818171e-06, 'epoch': 0.63} + 63%|██████▎ | 4467/7045 [14:31:31<8:44:46, 12.21s/it] 63%|██████▎ | 4468/7045 [14:31:42<8:31:32, 11.91s/it] {'loss': 1.1357, 'learning_rate': 1.5588435818165245e-06, 'epoch': 0.63} + 63%|██████▎ | 4468/7045 [14:31:42<8:31:32, 11.91s/it] 63%|██████▎ | 4469/7045 [14:31:55<8:43:24, 12.19s/it] {'loss': 1.0879, 'learning_rate': 1.557778822762477e-06, 'epoch': 0.63} + 63%|██████▎ | 4469/7045 [14:31:55<8:43:24, 12.19s/it] 63%|██████▎ | 4470/7045 [14:32:07<8:41:04, 12.14s/it] {'loss': 1.085, 'learning_rate': 1.5567142628811067e-06, 'epoch': 0.63} + 63%|██████▎ | 4470/7045 [14:32:07<8:41:04, 12.14s/it] 63%|██████▎ | 4471/7045 [14:32:18<8:26:43, 11.81s/it] {'loss': 1.126, 'learning_rate': 1.5556499023974447e-06, 'epoch': 0.63} + 63%|██████▎ | 4471/7045 [14:32:18<8:26:43, 11.81s/it] 63%|██████▎ | 4472/7045 [14:32:29<8:23:55, 11.75s/it] {'loss': 1.0552, 'learning_rate': 1.554585741536483e-06, 'epoch': 0.63} + 63%|██████▎ | 4472/7045 [14:32:29<8:23:55, 11.75s/it] 63%|██████▎ | 4473/7045 [14:32:40<8:13:45, 11.52s/it] {'loss': 1.0791, 'learning_rate': 1.5535217805231706e-06, 'epoch': 0.63} + 63%|██████▎ | 4473/7045 [14:32:40<8:13:45, 11.52s/it] 64%|██████▎ | 4474/7045 [14:32:51<8:09:16, 11.42s/it] {'loss': 1.0986, 'learning_rate': 1.5524580195824152e-06, 'epoch': 0.64} + 64%|██████▎ | 4474/7045 [14:32:51<8:09:16, 11.42s/it] 64%|██████▎ | 4475/7045 [14:33:02<8:03:08, 11.28s/it] {'loss': 1.0962, 'learning_rate': 1.5513944589390795e-06, 'epoch': 0.64} + 64%|██████▎ | 4475/7045 [14:33:02<8:03:08, 11.28s/it] 64%|██████▎ | 4476/7045 [14:33:14<8:02:19, 11.26s/it] {'loss': 1.1445, 'learning_rate': 1.5503310988179864e-06, 'epoch': 0.64} + 64%|██████▎ | 4476/7045 [14:33:14<8:02:19, 11.26s/it] 64%|██████▎ | 4477/7045 [14:33:25<8:03:04, 11.29s/it] {'loss': 1.1138, 'learning_rate': 1.5492679394439162e-06, 'epoch': 0.64} + 64%|██████▎ | 4477/7045 [14:33:25<8:03:04, 11.29s/it] 64%|██████▎ | 4478/7045 [14:33:36<8:02:58, 11.29s/it] {'loss': 1.0996, 'learning_rate': 1.548204981041606e-06, 'epoch': 0.64} + 64%|██████▎ | 4478/7045 [14:33:36<8:02:58, 11.29s/it] 64%|██████▎ | 4479/7045 [14:33:47<7:57:48, 11.17s/it] {'loss': 1.1289, 'learning_rate': 1.5471422238357508e-06, 'epoch': 0.64} + 64%|██████▎ | 4479/7045 [14:33:47<7:57:48, 11.17s/it] 64%|██████▎ | 4480/7045 [14:34:01<8:26:51, 11.86s/it] {'loss': 1.1079, 'learning_rate': 1.5460796680510024e-06, 'epoch': 0.64} + 64%|██████▎ | 4480/7045 [14:34:01<8:26:51, 11.86s/it] 64%|██████▎ | 4481/7045 [14:34:12<8:20:28, 11.71s/it] {'loss': 1.1377, 'learning_rate': 1.545017313911971e-06, 'epoch': 0.64} + 64%|██████▎ | 4481/7045 [14:34:12<8:20:28, 11.71s/it] 64%|██████▎ | 4482/7045 [14:34:24<8:30:05, 11.94s/it] {'loss': 1.1074, 'learning_rate': 1.543955161643223e-06, 'epoch': 0.64} + 64%|██████▎ | 4482/7045 [14:34:24<8:30:05, 11.94s/it] 64%|██████▎ | 4483/7045 [14:34:37<8:37:49, 12.13s/it] {'loss': 1.084, 'learning_rate': 1.5428932114692841e-06, 'epoch': 0.64} + 64%|██████▎ | 4483/7045 [14:34:37<8:37:49, 12.13s/it] 64%|██████▎ | 4484/7045 [14:34:48<8:26:53, 11.88s/it] {'loss': 1.1211, 'learning_rate': 1.5418314636146356e-06, 'epoch': 0.64} + 64%|██████▎ | 4484/7045 [14:34:48<8:26:53, 11.88s/it] 64%|██████▎ | 4485/7045 [14:35:00<8:24:03, 11.81s/it] {'loss': 1.1299, 'learning_rate': 1.5407699183037167e-06, 'epoch': 0.64} + 64%|██████▎ | 4485/7045 [14:35:00<8:24:03, 11.81s/it] 64%|██████▎ | 4486/7045 [14:35:12<8:20:02, 11.72s/it] {'loss': 1.0879, 'learning_rate': 1.5397085757609229e-06, 'epoch': 0.64} + 64%|██████▎ | 4486/7045 [14:35:12<8:20:02, 11.72s/it] 64%|██████▎ | 4487/7045 [14:35:24<8:31:40, 12.00s/it] {'loss': 1.1089, 'learning_rate': 1.538647436210608e-06, 'epoch': 0.64} + 64%|██████▎ | 4487/7045 [14:35:24<8:31:40, 12.00s/it] 64%|██████▎ | 4488/7045 [14:35:35<8:21:25, 11.77s/it] {'loss': 1.0645, 'learning_rate': 1.5375864998770818e-06, 'epoch': 0.64} + 64%|██████▎ | 4488/7045 [14:35:35<8:21:25, 11.77s/it] 64%|██████▎ | 4489/7045 [14:35:46<8:08:52, 11.48s/it] {'loss': 1.0962, 'learning_rate': 1.5365257669846128e-06, 'epoch': 0.64} + 64%|██████▎ | 4489/7045 [14:35:46<8:08:52, 11.48s/it] 64%|██████▎ | 4490/7045 [14:35:59<8:29:13, 11.96s/it] {'loss': 1.1045, 'learning_rate': 1.5354652377574244e-06, 'epoch': 0.64} + 64%|██████▎ | 4490/7045 [14:35:59<8:29:13, 11.96s/it] 64%|██████▎ | 4491/7045 [14:36:10<8:17:01, 11.68s/it] {'loss': 1.0967, 'learning_rate': 1.5344049124196984e-06, 'epoch': 0.64} + 64%|██████▎ | 4491/7045 [14:36:10<8:17:01, 11.68s/it] 64%|██████▍ | 4492/7045 [14:36:22<8:12:12, 11.57s/it] {'loss': 1.1465, 'learning_rate': 1.5333447911955729e-06, 'epoch': 0.64} + 64%|██████▍ | 4492/7045 [14:36:22<8:12:12, 11.57s/it] 64%|██████▍ | 4493/7045 [14:36:33<8:05:18, 11.41s/it] {'loss': 1.1553, 'learning_rate': 1.5322848743091434e-06, 'epoch': 0.64} + 64%|██████▍ | 4493/7045 [14:36:33<8:05:18, 11.41s/it] 64%|██████▍ | 4494/7045 [14:36:45<8:12:07, 11.57s/it] {'loss': 1.1021, 'learning_rate': 1.5312251619844608e-06, 'epoch': 0.64} + 64%|██████▍ | 4494/7045 [14:36:45<8:12:07, 11.57s/it] 64%|██████▍ | 4495/7045 [14:36:56<8:04:57, 11.41s/it] {'loss': 1.125, 'learning_rate': 1.5301656544455356e-06, 'epoch': 0.64} + 64%|██████▍ | 4495/7045 [14:36:56<8:04:57, 11.41s/it] 64%|██████▍ | 4496/7045 [14:37:08<8:13:22, 11.61s/it] {'loss': 1.1445, 'learning_rate': 1.5291063519163325e-06, 'epoch': 0.64} + 64%|██████▍ | 4496/7045 [14:37:08<8:13:22, 11.61s/it] 64%|██████▍ | 4497/7045 [14:37:19<8:05:55, 11.44s/it] {'loss': 1.0869, 'learning_rate': 1.5280472546207728e-06, 'epoch': 0.64} + 64%|██████▍ | 4497/7045 [14:37:19<8:05:55, 11.44s/it] 64%|██████▍ | 4498/7045 [14:37:30<8:05:09, 11.43s/it] {'loss': 1.1758, 'learning_rate': 1.5269883627827358e-06, 'epoch': 0.64} + 64%|██████▍ | 4498/7045 [14:37:30<8:05:09, 11.43s/it] 64%|██████▍ | 4499/7045 [14:37:41<7:58:35, 11.28s/it] {'loss': 1.1094, 'learning_rate': 1.5259296766260564e-06, 'epoch': 0.64} + 64%|██████▍ | 4499/7045 [14:37:41<7:58:35, 11.28s/it] 64%|██████▍ | 4500/7045 [14:37:52<7:55:29, 11.21s/it] {'loss': 1.1123, 'learning_rate': 1.5248711963745272e-06, 'epoch': 0.64} + 64%|██████▍ | 4500/7045 [14:37:52<7:55:29, 11.21s/it] 64%|██████▍ | 4501/7045 [14:38:03<7:53:23, 11.16s/it] {'loss': 1.0928, 'learning_rate': 1.5238129222518955e-06, 'epoch': 0.64} + 64%|██████▍ | 4501/7045 [14:38:03<7:53:23, 11.16s/it] 64%|██████▍ | 4502/7045 [14:38:14<7:53:07, 11.16s/it] {'loss': 1.1631, 'learning_rate': 1.5227548544818666e-06, 'epoch': 0.64} + 64%|██████▍ | 4502/7045 [14:38:14<7:53:07, 11.16s/it] 64%|██████▍ | 4503/7045 [14:38:26<7:58:57, 11.31s/it] {'loss': 1.1162, 'learning_rate': 1.5216969932881009e-06, 'epoch': 0.64} + 64%|██████▍ | 4503/7045 [14:38:26<7:58:57, 11.31s/it] 64%|██████▍ | 4504/7045 [14:38:37<8:00:44, 11.35s/it] {'loss': 1.1104, 'learning_rate': 1.5206393388942164e-06, 'epoch': 0.64} + 64%|██████▍ | 4504/7045 [14:38:37<8:00:44, 11.35s/it] 64%|██████▍ | 4505/7045 [14:38:49<8:01:02, 11.36s/it] {'loss': 1.1123, 'learning_rate': 1.5195818915237875e-06, 'epoch': 0.64} + 64%|██████▍ | 4505/7045 [14:38:49<8:01:02, 11.36s/it] 64%|██████▍ | 4506/7045 [14:39:01<8:17:11, 11.75s/it] {'loss': 1.1152, 'learning_rate': 1.5185246514003413e-06, 'epoch': 0.64} + 64%|██████▍ | 4506/7045 [14:39:01<8:17:11, 11.75s/it] 64%|██████▍ | 4507/7045 [14:39:13<8:07:59, 11.54s/it] {'loss': 1.1089, 'learning_rate': 1.5174676187473674e-06, 'epoch': 0.64} + 64%|██████▍ | 4507/7045 [14:39:13<8:07:59, 11.54s/it] 64%|██████▍ | 4508/7045 [14:39:24<8:02:00, 11.40s/it] {'loss': 1.0928, 'learning_rate': 1.5164107937883061e-06, 'epoch': 0.64} + 64%|██████▍ | 4508/7045 [14:39:24<8:02:00, 11.40s/it] 64%|██████▍ | 4509/7045 [14:39:36<8:15:32, 11.72s/it] {'loss': 1.1216, 'learning_rate': 1.515354176746556e-06, 'epoch': 0.64} + 64%|██████▍ | 4509/7045 [14:39:36<8:15:32, 11.72s/it] 64%|██████▍ | 4510/7045 [14:39:47<8:11:12, 11.63s/it] {'loss': 1.0752, 'learning_rate': 1.5142977678454718e-06, 'epoch': 0.64} + 64%|██████▍ | 4510/7045 [14:39:47<8:11:12, 11.63s/it] 64%|██████▍ | 4511/7045 [14:39:59<8:11:15, 11.63s/it] {'loss': 1.1309, 'learning_rate': 1.513241567308364e-06, 'epoch': 0.64} + 64%|██████▍ | 4511/7045 [14:39:59<8:11:15, 11.63s/it] 64%|██████▍ | 4512/7045 [14:40:11<8:17:23, 11.78s/it] {'loss': 1.0713, 'learning_rate': 1.5121855753584982e-06, 'epoch': 0.64} + 64%|██████▍ | 4512/7045 [14:40:11<8:17:23, 11.78s/it] 64%|██████▍ | 4513/7045 [14:40:22<8:08:45, 11.58s/it] {'loss': 1.1279, 'learning_rate': 1.5111297922190972e-06, 'epoch': 0.64} + 64%|██████▍ | 4513/7045 [14:40:22<8:08:45, 11.58s/it] 64%|██████▍ | 4514/7045 [14:40:34<8:05:56, 11.52s/it] {'loss': 1.1221, 'learning_rate': 1.5100742181133394e-06, 'epoch': 0.64} + 64%|██████▍ | 4514/7045 [14:40:34<8:05:56, 11.52s/it] 64%|██████▍ | 4515/7045 [14:40:45<8:02:08, 11.43s/it] {'loss': 1.0986, 'learning_rate': 1.5090188532643579e-06, 'epoch': 0.64} + 64%|██████▍ | 4515/7045 [14:40:45<8:02:08, 11.43s/it] 64%|██████▍ | 4516/7045 [14:40:57<8:05:13, 11.51s/it] {'loss': 1.124, 'learning_rate': 1.5079636978952439e-06, 'epoch': 0.64} + 64%|██████▍ | 4516/7045 [14:40:57<8:05:13, 11.51s/it] 64%|██████▍ | 4517/7045 [14:41:08<8:00:04, 11.39s/it] {'loss': 1.084, 'learning_rate': 1.506908752229041e-06, 'epoch': 0.64} + 64%|██████▍ | 4517/7045 [14:41:08<8:00:04, 11.39s/it] 64%|██████▍ | 4518/7045 [14:41:20<8:07:49, 11.58s/it] {'loss': 1.1396, 'learning_rate': 1.5058540164887506e-06, 'epoch': 0.64} + 64%|██████▍ | 4518/7045 [14:41:20<8:07:49, 11.58s/it] 64%|██████▍ | 4519/7045 [14:41:31<8:03:09, 11.48s/it] {'loss': 1.1543, 'learning_rate': 1.5047994908973304e-06, 'epoch': 0.64} + 64%|██████▍ | 4519/7045 [14:41:31<8:03:09, 11.48s/it] 64%|██████▍ | 4520/7045 [14:41:43<8:04:23, 11.51s/it] {'loss': 1.124, 'learning_rate': 1.5037451756776924e-06, 'epoch': 0.64} + 64%|██████▍ | 4520/7045 [14:41:43<8:04:23, 11.51s/it] 64%|██████▍ | 4521/7045 [14:41:54<7:57:57, 11.36s/it] {'loss': 1.1221, 'learning_rate': 1.5026910710527042e-06, 'epoch': 0.64} + 64%|██████▍ | 4521/7045 [14:41:54<7:57:57, 11.36s/it] 64%|██████▍ | 4522/7045 [14:42:05<7:56:51, 11.34s/it] {'loss': 1.1396, 'learning_rate': 1.5016371772451896e-06, 'epoch': 0.64} + 64%|██████▍ | 4522/7045 [14:42:05<7:56:51, 11.34s/it] 64%|██████▍ | 4523/7045 [14:42:16<7:56:33, 11.34s/it] {'loss': 1.1016, 'learning_rate': 1.5005834944779266e-06, 'epoch': 0.64} + 64%|██████▍ | 4523/7045 [14:42:16<7:56:33, 11.34s/it] 64%|██████▍ | 4524/7045 [14:42:28<7:59:36, 11.41s/it] {'loss': 1.1426, 'learning_rate': 1.4995300229736492e-06, 'epoch': 0.64} + 64%|██████▍ | 4524/7045 [14:42:28<7:59:36, 11.41s/it] 64%|██████▍ | 4525/7045 [14:42:39<7:52:40, 11.25s/it] {'loss': 1.0869, 'learning_rate': 1.4984767629550478e-06, 'epoch': 0.64} + 64%|██████▍ | 4525/7045 [14:42:39<7:52:40, 11.25s/it] 64%|██████▍ | 4526/7045 [14:42:50<7:57:20, 11.37s/it] {'loss': 1.1074, 'learning_rate': 1.4974237146447665e-06, 'epoch': 0.64} + 64%|██████▍ | 4526/7045 [14:42:50<7:57:20, 11.37s/it] 64%|██████▍ | 4527/7045 [14:43:01<7:53:39, 11.29s/it] {'loss': 1.1035, 'learning_rate': 1.4963708782654057e-06, 'epoch': 0.64} + 64%|██████▍ | 4527/7045 [14:43:01<7:53:39, 11.29s/it] 64%|██████▍ | 4528/7045 [14:43:13<7:50:58, 11.23s/it] {'loss': 1.127, 'learning_rate': 1.4953182540395203e-06, 'epoch': 0.64} + 64%|██████▍ | 4528/7045 [14:43:13<7:50:58, 11.23s/it] 64%|██████▍ | 4529/7045 [14:43:25<8:01:20, 11.48s/it] {'loss': 1.0908, 'learning_rate': 1.4942658421896206e-06, 'epoch': 0.64} + 64%|██████▍ | 4529/7045 [14:43:25<8:01:20, 11.48s/it] 64%|██████▍ | 4530/7045 [14:43:38<8:20:36, 11.94s/it] {'loss': 1.0498, 'learning_rate': 1.4932136429381719e-06, 'epoch': 0.64} + 64%|██████▍ | 4530/7045 [14:43:38<8:20:36, 11.94s/it] 64%|██████▍ | 4531/7045 [14:43:49<8:08:23, 11.66s/it] {'loss': 1.0962, 'learning_rate': 1.4921616565075956e-06, 'epoch': 0.64} + 64%|██████▍ | 4531/7045 [14:43:49<8:08:23, 11.66s/it] 64%|██████▍ | 4532/7045 [14:44:00<8:05:06, 11.58s/it] {'loss': 1.1074, 'learning_rate': 1.4911098831202664e-06, 'epoch': 0.64} + 64%|██████▍ | 4532/7045 [14:44:00<8:05:06, 11.58s/it] 64%|██████▍ | 4533/7045 [14:44:12<8:07:17, 11.64s/it] {'loss': 1.0864, 'learning_rate': 1.490058322998516e-06, 'epoch': 0.64} + 64%|██████▍ | 4533/7045 [14:44:12<8:07:17, 11.64s/it] 64%|██████▍ | 4534/7045 [14:44:23<7:58:03, 11.42s/it] {'loss': 1.1064, 'learning_rate': 1.4890069763646284e-06, 'epoch': 0.64} + 64%|██████▍ | 4534/7045 [14:44:23<7:58:03, 11.42s/it] 64%|██████▍ | 4535/7045 [14:44:34<7:53:28, 11.32s/it] {'loss': 1.1406, 'learning_rate': 1.4879558434408444e-06, 'epoch': 0.64} + 64%|██████▍ | 4535/7045 [14:44:34<7:53:28, 11.32s/it] 64%|██████▍ | 4536/7045 [14:44:45<7:56:18, 11.39s/it] {'loss': 1.1377, 'learning_rate': 1.4869049244493594e-06, 'epoch': 0.64} + 64%|██████▍ | 4536/7045 [14:44:45<7:56:18, 11.39s/it] 64%|██████▍ | 4537/7045 [14:44:57<7:53:17, 11.32s/it] {'loss': 1.0908, 'learning_rate': 1.4858542196123236e-06, 'epoch': 0.64} + 64%|██████▍ | 4537/7045 [14:44:57<7:53:17, 11.32s/it] 64%|██████▍ | 4538/7045 [14:45:08<7:56:37, 11.41s/it] {'loss': 1.1191, 'learning_rate': 1.4848037291518404e-06, 'epoch': 0.64} + 64%|██████▍ | 4538/7045 [14:45:08<7:56:37, 11.41s/it] 64%|██████▍ | 4539/7045 [14:45:19<7:52:39, 11.32s/it] {'loss': 1.1123, 'learning_rate': 1.4837534532899702e-06, 'epoch': 0.64} + 64%|██████▍ | 4539/7045 [14:45:19<7:52:39, 11.32s/it] 64%|██████▍ | 4540/7045 [14:45:31<8:03:28, 11.58s/it] {'loss': 1.0781, 'learning_rate': 1.4827033922487266e-06, 'epoch': 0.64} + 64%|██████▍ | 4540/7045 [14:45:31<8:03:28, 11.58s/it] 64%|██████▍ | 4541/7045 [14:45:44<8:12:13, 11.79s/it] {'loss': 1.0728, 'learning_rate': 1.481653546250078e-06, 'epoch': 0.64} + 64%|██████▍ | 4541/7045 [14:45:44<8:12:13, 11.79s/it] 64%|██████▍ | 4542/7045 [14:45:55<8:05:11, 11.63s/it] {'loss': 1.1455, 'learning_rate': 1.4806039155159485e-06, 'epoch': 0.64} + 64%|██████▍ | 4542/7045 [14:45:55<8:05:11, 11.63s/it] 64%|██████▍ | 4543/7045 [14:46:06<8:00:52, 11.53s/it] {'loss': 1.1074, 'learning_rate': 1.4795545002682133e-06, 'epoch': 0.64} + 64%|██████▍ | 4543/7045 [14:46:06<8:00:52, 11.53s/it] 64%|██████▍ | 4544/7045 [14:46:20<8:23:15, 12.07s/it] {'loss': 1.1108, 'learning_rate': 1.4785053007287076e-06, 'epoch': 0.64} + 64%|██████▍ | 4544/7045 [14:46:20<8:23:15, 12.07s/it] 65%|██████▍ | 4545/7045 [14:46:31<8:13:22, 11.84s/it] {'loss': 1.1279, 'learning_rate': 1.4774563171192153e-06, 'epoch': 0.65} + 65%|██████▍ | 4545/7045 [14:46:31<8:13:22, 11.84s/it] 65%|██████▍ | 4546/7045 [14:46:43<8:12:08, 11.82s/it] {'loss': 1.1523, 'learning_rate': 1.4764075496614784e-06, 'epoch': 0.65} + 65%|██████▍ | 4546/7045 [14:46:43<8:12:08, 11.82s/it] 65%|██████▍ | 4547/7045 [14:46:54<8:07:23, 11.71s/it] {'loss': 1.1436, 'learning_rate': 1.4753589985771922e-06, 'epoch': 0.65} + 65%|██████▍ | 4547/7045 [14:46:54<8:07:23, 11.71s/it] 65%|██████▍ | 4548/7045 [14:47:05<7:57:17, 11.47s/it] {'loss': 1.1089, 'learning_rate': 1.4743106640880056e-06, 'epoch': 0.65} + 65%|██████▍ | 4548/7045 [14:47:05<7:57:17, 11.47s/it] 65%|██████▍ | 4549/7045 [14:47:16<7:55:32, 11.43s/it] {'loss': 1.1367, 'learning_rate': 1.4732625464155218e-06, 'epoch': 0.65} + 65%|██████▍ | 4549/7045 [14:47:16<7:55:32, 11.43s/it] 65%|██████▍ | 4550/7045 [14:47:27<7:51:04, 11.33s/it] {'loss': 1.123, 'learning_rate': 1.4722146457812989e-06, 'epoch': 0.65} + 65%|██████▍ | 4550/7045 [14:47:27<7:51:04, 11.33s/it] 65%|██████▍ | 4551/7045 [14:47:40<8:10:41, 11.80s/it] {'loss': 1.104, 'learning_rate': 1.4711669624068486e-06, 'epoch': 0.65} + 65%|██████▍ | 4551/7045 [14:47:40<8:10:41, 11.80s/it] 65%|██████▍ | 4552/7045 [14:47:52<8:02:06, 11.60s/it] {'loss': 1.124, 'learning_rate': 1.4701194965136375e-06, 'epoch': 0.65} + 65%|██████▍ | 4552/7045 [14:47:52<8:02:06, 11.60s/it] 65%|██████▍ | 4553/7045 [14:48:03<8:01:25, 11.59s/it] {'loss': 1.1167, 'learning_rate': 1.4690722483230856e-06, 'epoch': 0.65} + 65%|██████▍ | 4553/7045 [14:48:03<8:01:25, 11.59s/it] 65%|██████▍ | 4554/7045 [14:48:15<8:01:20, 11.59s/it] {'loss': 1.1094, 'learning_rate': 1.4680252180565657e-06, 'epoch': 0.65} + 65%|██████▍ | 4554/7045 [14:48:15<8:01:20, 11.59s/it] 65%|██████▍ | 4555/7045 [14:48:26<8:00:16, 11.57s/it] {'loss': 1.0957, 'learning_rate': 1.4669784059354058e-06, 'epoch': 0.65} + 65%|██████▍ | 4555/7045 [14:48:26<8:00:16, 11.57s/it] 65%|██████▍ | 4556/7045 [14:48:39<8:16:41, 11.97s/it] {'loss': 1.0684, 'learning_rate': 1.4659318121808888e-06, 'epoch': 0.65} + 65%|██████▍ | 4556/7045 [14:48:39<8:16:41, 11.97s/it] 65%|██████▍ | 4557/7045 [14:48:52<8:25:16, 12.19s/it] {'loss': 1.0459, 'learning_rate': 1.4648854370142496e-06, 'epoch': 0.65} + 65%|██████▍ | 4557/7045 [14:48:52<8:25:16, 12.19s/it] 65%|██████▍ | 4558/7045 [14:49:04<8:25:39, 12.20s/it] {'loss': 1.1001, 'learning_rate': 1.4638392806566782e-06, 'epoch': 0.65} + 65%|██████▍ | 4558/7045 [14:49:04<8:25:39, 12.20s/it] 65%|██████▍ | 4559/7045 [14:49:15<8:11:23, 11.86s/it] {'loss': 1.1133, 'learning_rate': 1.4627933433293174e-06, 'epoch': 0.65} + 65%|██████▍ | 4559/7045 [14:49:15<8:11:23, 11.86s/it] 65%|██████▍ | 4560/7045 [14:49:26<8:03:52, 11.68s/it] {'loss': 1.1182, 'learning_rate': 1.4617476252532637e-06, 'epoch': 0.65} + 65%|██████▍ | 4560/7045 [14:49:26<8:03:52, 11.68s/it] 65%|██████▍ | 4561/7045 [14:49:40<8:21:55, 12.12s/it] {'loss': 1.124, 'learning_rate': 1.4607021266495675e-06, 'epoch': 0.65} + 65%|██████▍ | 4561/7045 [14:49:40<8:21:55, 12.12s/it] 65%|██████▍ | 4562/7045 [14:49:50<8:06:57, 11.77s/it] {'loss': 1.1475, 'learning_rate': 1.4596568477392334e-06, 'epoch': 0.65} + 65%|██████▍ | 4562/7045 [14:49:50<8:06:57, 11.77s/it] 65%|██████▍ | 4563/7045 [14:50:01<7:56:42, 11.52s/it] {'loss': 1.083, 'learning_rate': 1.4586117887432188e-06, 'epoch': 0.65} + 65%|██████▍ | 4563/7045 [14:50:01<7:56:42, 11.52s/it] 65%|██████▍ | 4564/7045 [14:50:15<8:25:05, 12.21s/it] {'loss': 1.1172, 'learning_rate': 1.4575669498824358e-06, 'epoch': 0.65} + 65%|██████▍ | 4564/7045 [14:50:15<8:25:05, 12.21s/it] 65%|██████▍ | 4565/7045 [14:50:28<8:31:18, 12.37s/it] {'loss': 1.1113, 'learning_rate': 1.4565223313777472e-06, 'epoch': 0.65} + 65%|██████▍ | 4565/7045 [14:50:28<8:31:18, 12.37s/it] 65%|██████▍ | 4566/7045 [14:50:40<8:24:01, 12.20s/it] {'loss': 1.1211, 'learning_rate': 1.455477933449973e-06, 'epoch': 0.65} + 65%|██████▍ | 4566/7045 [14:50:40<8:24:01, 12.20s/it] 65%|██████▍ | 4567/7045 [14:50:52<8:22:45, 12.17s/it] {'loss': 1.0796, 'learning_rate': 1.4544337563198829e-06, 'epoch': 0.65} + 65%|██████▍ | 4567/7045 [14:50:52<8:22:45, 12.17s/it] 65%|██████▍ | 4568/7045 [14:51:04<8:15:41, 12.01s/it] {'loss': 1.0547, 'learning_rate': 1.4533898002082036e-06, 'epoch': 0.65} + 65%|██████▍ | 4568/7045 [14:51:04<8:15:41, 12.01s/it] 65%|██████▍ | 4569/7045 [14:51:15<8:05:22, 11.76s/it] {'loss': 1.0566, 'learning_rate': 1.4523460653356114e-06, 'epoch': 0.65} + 65%|██████▍ | 4569/7045 [14:51:15<8:05:22, 11.76s/it] 65%|██████▍ | 4570/7045 [14:51:26<7:57:03, 11.56s/it] {'loss': 1.1191, 'learning_rate': 1.4513025519227387e-06, 'epoch': 0.65} + 65%|██████▍ | 4570/7045 [14:51:26<7:57:03, 11.56s/it] 65%|██████▍ | 4571/7045 [14:51:37<7:48:56, 11.37s/it] {'loss': 1.0928, 'learning_rate': 1.4502592601901688e-06, 'epoch': 0.65} + 65%|██████▍ | 4571/7045 [14:51:37<7:48:56, 11.37s/it] 65%|██████▍ | 4572/7045 [14:51:48<7:45:31, 11.29s/it] {'loss': 1.1035, 'learning_rate': 1.4492161903584412e-06, 'epoch': 0.65} + 65%|██████▍ | 4572/7045 [14:51:48<7:45:31, 11.29s/it] 65%|██████▍ | 4573/7045 [14:52:01<8:07:38, 11.84s/it] {'loss': 1.0674, 'learning_rate': 1.4481733426480455e-06, 'epoch': 0.65} + 65%|██████▍ | 4573/7045 [14:52:01<8:07:38, 11.84s/it] 65%|██████▍ | 4574/7045 [14:52:14<8:18:47, 12.11s/it] {'loss': 1.1309, 'learning_rate': 1.4471307172794241e-06, 'epoch': 0.65} + 65%|██████▍ | 4574/7045 [14:52:14<8:18:47, 12.11s/it] 65%|██████▍ | 4575/7045 [14:52:27<8:35:06, 12.51s/it] {'loss': 1.0703, 'learning_rate': 1.4460883144729764e-06, 'epoch': 0.65} + 65%|██████▍ | 4575/7045 [14:52:27<8:35:06, 12.51s/it] 65%|██████▍ | 4576/7045 [14:52:38<8:18:33, 12.12s/it] {'loss': 1.1045, 'learning_rate': 1.4450461344490496e-06, 'epoch': 0.65} + 65%|██████▍ | 4576/7045 [14:52:38<8:18:33, 12.12s/it] 65%|████��█▍ | 4577/7045 [14:52:50<8:08:19, 11.87s/it] {'loss': 1.125, 'learning_rate': 1.4440041774279484e-06, 'epoch': 0.65} + 65%|██████▍ | 4577/7045 [14:52:50<8:08:19, 11.87s/it] 65%|██████▍ | 4578/7045 [14:53:01<8:07:35, 11.86s/it] {'loss': 1.0962, 'learning_rate': 1.4429624436299274e-06, 'epoch': 0.65} + 65%|██████▍ | 4578/7045 [14:53:01<8:07:35, 11.86s/it] 65%|██████▍ | 4579/7045 [14:53:15<8:29:50, 12.40s/it] {'loss': 1.085, 'learning_rate': 1.4419209332751937e-06, 'epoch': 0.65} + 65%|██████▍ | 4579/7045 [14:53:15<8:29:50, 12.40s/it] 65%|██████▌ | 4580/7045 [14:53:27<8:17:49, 12.12s/it] {'loss': 1.1807, 'learning_rate': 1.4408796465839097e-06, 'epoch': 0.65} + 65%|██████▌ | 4580/7045 [14:53:27<8:17:49, 12.12s/it] 65%|██████▌ | 4581/7045 [14:53:40<8:31:46, 12.46s/it] {'loss': 1.063, 'learning_rate': 1.4398385837761901e-06, 'epoch': 0.65} + 65%|██████▌ | 4581/7045 [14:53:40<8:31:46, 12.46s/it] 65%|██████▌ | 4582/7045 [14:53:51<8:10:54, 11.96s/it] {'loss': 1.1641, 'learning_rate': 1.438797745072099e-06, 'epoch': 0.65} + 65%|██████▌ | 4582/7045 [14:53:51<8:10:54, 11.96s/it] 65%|██████▌ | 4583/7045 [14:54:02<7:58:50, 11.67s/it] {'loss': 1.1084, 'learning_rate': 1.4377571306916576e-06, 'epoch': 0.65} + 65%|██████▌ | 4583/7045 [14:54:02<7:58:50, 11.67s/it] 65%|██████▌ | 4584/7045 [14:54:14<8:07:11, 11.88s/it] {'loss': 1.0894, 'learning_rate': 1.4367167408548366e-06, 'epoch': 0.65} + 65%|██████▌ | 4584/7045 [14:54:14<8:07:11, 11.88s/it] 65%|██████▌ | 4585/7045 [14:54:25<7:56:57, 11.63s/it] {'loss': 1.0591, 'learning_rate': 1.4356765757815593e-06, 'epoch': 0.65} + 65%|██████▌ | 4585/7045 [14:54:25<7:56:57, 11.63s/it] 65%|██████▌ | 4586/7045 [14:54:36<7:48:33, 11.43s/it] {'loss': 1.0947, 'learning_rate': 1.4346366356917047e-06, 'epoch': 0.65} + 65%|██████▌ | 4586/7045 [14:54:36<7:48:33, 11.43s/it] 65%|██████▌ | 4587/7045 [14:54:48<7:49:45, 11.47s/it] {'loss': 1.0977, 'learning_rate': 1.4335969208050986e-06, 'epoch': 0.65} + 65%|██████▌ | 4587/7045 [14:54:48<7:49:45, 11.47s/it] 65%|██████▌ | 4588/7045 [14:55:00<8:03:35, 11.81s/it] {'loss': 1.1152, 'learning_rate': 1.4325574313415263e-06, 'epoch': 0.65} + 65%|██████▌ | 4588/7045 [14:55:00<8:03:35, 11.81s/it] 65%|██████▌ | 4589/7045 [14:55:11<7:54:19, 11.59s/it] {'loss': 1.1357, 'learning_rate': 1.4315181675207193e-06, 'epoch': 0.65} + 65%|██████▌ | 4589/7045 [14:55:11<7:54:19, 11.59s/it] 65%|██████▌ | 4590/7045 [14:55:23<7:53:53, 11.58s/it] {'loss': 1.0908, 'learning_rate': 1.4304791295623633e-06, 'epoch': 0.65} + 65%|██████▌ | 4590/7045 [14:55:23<7:53:53, 11.58s/it] 65%|██████▌ | 4591/7045 [14:55:35<8:04:35, 11.85s/it] {'loss': 1.0869, 'learning_rate': 1.4294403176860978e-06, 'epoch': 0.65} + 65%|██████▌ | 4591/7045 [14:55:35<8:04:35, 11.85s/it] 65%|██████▌ | 4592/7045 [14:55:46<7:54:08, 11.60s/it] {'loss': 1.1006, 'learning_rate': 1.428401732111514e-06, 'epoch': 0.65} + 65%|██████▌ | 4592/7045 [14:55:46<7:54:08, 11.60s/it] 65%|██████▌ | 4593/7045 [14:55:58<7:56:14, 11.65s/it] {'loss': 1.1035, 'learning_rate': 1.4273633730581532e-06, 'epoch': 0.65} + 65%|██████▌ | 4593/7045 [14:55:58<7:56:14, 11.65s/it] 65%|██████▌ | 4594/7045 [14:56:09<7:49:25, 11.49s/it] {'loss': 1.1504, 'learning_rate': 1.4263252407455125e-06, 'epoch': 0.65} + 65%|██████▌ | 4594/7045 [14:56:09<7:49:25, 11.49s/it] 65%|██████▌ | 4595/7045 [14:56:20<7:45:53, 11.41s/it] {'loss': 1.1172, 'learning_rate': 1.4252873353930369e-06, 'epoch': 0.65} + 65%|█���████▌ | 4595/7045 [14:56:20<7:45:53, 11.41s/it] 65%|██████▌ | 4596/7045 [14:56:32<7:45:58, 11.42s/it] {'loss': 1.1172, 'learning_rate': 1.4242496572201251e-06, 'epoch': 0.65} + 65%|██████▌ | 4596/7045 [14:56:32<7:45:58, 11.42s/it] 65%|██████▌ | 4597/7045 [14:56:43<7:46:29, 11.43s/it] {'loss': 1.1055, 'learning_rate': 1.4232122064461301e-06, 'epoch': 0.65} + 65%|██████▌ | 4597/7045 [14:56:43<7:46:29, 11.43s/it] 65%|██████▌ | 4598/7045 [14:56:55<7:54:37, 11.64s/it] {'loss': 1.1084, 'learning_rate': 1.4221749832903526e-06, 'epoch': 0.65} + 65%|██████▌ | 4598/7045 [14:56:55<7:54:37, 11.64s/it] 65%|██████▌ | 4599/7045 [14:57:07<7:58:58, 11.75s/it] {'loss': 1.1191, 'learning_rate': 1.4211379879720494e-06, 'epoch': 0.65} + 65%|██████▌ | 4599/7045 [14:57:07<7:58:58, 11.75s/it] 65%|██████▌ | 4600/7045 [14:57:18<7:48:09, 11.49s/it] {'loss': 1.085, 'learning_rate': 1.420101220710426e-06, 'epoch': 0.65} + 65%|██████▌ | 4600/7045 [14:57:18<7:48:09, 11.49s/it] 65%|██████▌ | 4601/7045 [14:57:31<8:03:11, 11.86s/it] {'loss': 1.1084, 'learning_rate': 1.41906468172464e-06, 'epoch': 0.65} + 65%|██████▌ | 4601/7045 [14:57:31<8:03:11, 11.86s/it] 65%|██████▌ | 4602/7045 [14:57:42<7:51:28, 11.58s/it] {'loss': 1.1094, 'learning_rate': 1.4180283712338039e-06, 'epoch': 0.65} + 65%|██████▌ | 4602/7045 [14:57:42<7:51:28, 11.58s/it] 65%|██████▌ | 4603/7045 [14:57:55<8:05:44, 11.93s/it] {'loss': 1.125, 'learning_rate': 1.4169922894569773e-06, 'epoch': 0.65} + 65%|██████▌ | 4603/7045 [14:57:55<8:05:44, 11.93s/it] 65%|██████▌ | 4604/7045 [14:58:06<7:53:56, 11.65s/it] {'loss': 1.0977, 'learning_rate': 1.4159564366131746e-06, 'epoch': 0.65} + 65%|██████▌ | 4604/7045 [14:58:06<7:53:56, 11.65s/it] 65%|██████▌ | 4605/7045 [14:58:17<7:46:23, 11.47s/it] {'loss': 1.1191, 'learning_rate': 1.414920812921362e-06, 'epoch': 0.65} + 65%|██████▌ | 4605/7045 [14:58:17<7:46:23, 11.47s/it] 65%|██████▌ | 4606/7045 [14:58:28<7:44:23, 11.42s/it] {'loss': 1.1089, 'learning_rate': 1.4138854186004553e-06, 'epoch': 0.65} + 65%|██████▌ | 4606/7045 [14:58:28<7:44:23, 11.42s/it] 65%|██████▌ | 4607/7045 [14:58:40<7:44:24, 11.43s/it] {'loss': 1.0806, 'learning_rate': 1.4128502538693213e-06, 'epoch': 0.65} + 65%|██████▌ | 4607/7045 [14:58:40<7:44:24, 11.43s/it] 65%|██████▌ | 4608/7045 [14:58:50<7:38:27, 11.29s/it] {'loss': 1.0889, 'learning_rate': 1.411815318946782e-06, 'epoch': 0.65} + 65%|██████▌ | 4608/7045 [14:58:51<7:38:27, 11.29s/it] 65%|██████▌ | 4609/7045 [14:59:02<7:36:34, 11.25s/it] {'loss': 1.127, 'learning_rate': 1.4107806140516066e-06, 'epoch': 0.65} + 65%|██████▌ | 4609/7045 [14:59:02<7:36:34, 11.25s/it] 65%|██████▌ | 4610/7045 [14:59:13<7:39:06, 11.31s/it] {'loss': 1.1089, 'learning_rate': 1.4097461394025198e-06, 'epoch': 0.65} + 65%|██████▌ | 4610/7045 [14:59:13<7:39:06, 11.31s/it] 65%|██████▌ | 4611/7045 [14:59:25<7:40:53, 11.36s/it] {'loss': 1.1538, 'learning_rate': 1.4087118952181934e-06, 'epoch': 0.65} + 65%|██████▌ | 4611/7045 [14:59:25<7:40:53, 11.36s/it] 65%|██████▌ | 4612/7045 [14:59:39<8:19:21, 12.31s/it] {'loss': 1.0762, 'learning_rate': 1.4076778817172523e-06, 'epoch': 0.65} + 65%|██████▌ | 4612/7045 [14:59:39<8:19:21, 12.31s/it] 65%|██████▌ | 4613/7045 [14:59:51<8:16:24, 12.25s/it] {'loss': 1.1201, 'learning_rate': 1.406644099118275e-06, 'epoch': 0.65} + 65%|██████▌ | 4613/7045 [14:59:51<8:16:24, 12.25s/it] 65%|██████▌ | 4614/7045 [15:00:02<8:00:35, 11.86s/it] {'loss': 1.123, 'learning_rate': 1.4056105476397875e-06, 'epoch': 0.65} + 65%|██████▌ | 4614/7045 [15:00:02<8:00:35, 11.86s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 12 bytes but only got 10. + warnings.warn(str(msg)) + 66%|██████▌ | 4615/7045 [15:00:13<7:49:23, 11.59s/it] {'loss': 1.1152, 'learning_rate': 1.4045772275002673e-06, 'epoch': 0.66} + 66%|██████▌ | 4615/7045 [15:00:13<7:49:23, 11.59s/it] 66%|██████▌ | 4616/7045 [15:00:24<7:45:58, 11.51s/it] {'loss': 1.1426, 'learning_rate': 1.4035441389181456e-06, 'epoch': 0.66} + 66%|██████▌ | 4616/7045 [15:00:24<7:45:58, 11.51s/it] 66%|██████▌ | 4617/7045 [15:00:36<7:41:50, 11.41s/it] {'loss': 1.1396, 'learning_rate': 1.4025112821118043e-06, 'epoch': 0.66} + 66%|██████▌ | 4617/7045 [15:00:36<7:41:50, 11.41s/it] 66%|██████▌ | 4618/7045 [15:00:47<7:39:58, 11.37s/it] {'loss': 1.1348, 'learning_rate': 1.4014786572995725e-06, 'epoch': 0.66} + 66%|██████▌ | 4618/7045 [15:00:47<7:39:58, 11.37s/it] 66%|██████▌ | 4619/7045 [15:01:00<8:03:36, 11.96s/it] {'loss': 1.0713, 'learning_rate': 1.4004462646997357e-06, 'epoch': 0.66} + 66%|██████▌ | 4619/7045 [15:01:00<8:03:36, 11.96s/it] 66%|██████▌ | 4620/7045 [15:01:11<7:54:08, 11.73s/it] {'loss': 1.0996, 'learning_rate': 1.3994141045305253e-06, 'epoch': 0.66} + 66%|██████▌ | 4620/7045 [15:01:11<7:54:08, 11.73s/it] 66%|██████▌ | 4621/7045 [15:01:23<7:49:25, 11.62s/it] {'loss': 1.124, 'learning_rate': 1.398382177010128e-06, 'epoch': 0.66} + 66%|██████▌ | 4621/7045 [15:01:23<7:49:25, 11.62s/it] 66%|██████▌ | 4622/7045 [15:01:34<7:39:38, 11.38s/it] {'loss': 1.1055, 'learning_rate': 1.3973504823566778e-06, 'epoch': 0.66} + 66%|██████▌ | 4622/7045 [15:01:34<7:39:38, 11.38s/it] 66%|██████▌ | 4623/7045 [15:01:47<8:02:46, 11.96s/it] {'loss': 1.1108, 'learning_rate': 1.39631902078826e-06, 'epoch': 0.66} + 66%|██████▌ | 4623/7045 [15:01:47<8:02:46, 11.96s/it] 66%|██████▌ | 4624/7045 [15:01:58<7:49:45, 11.64s/it] {'loss': 1.082, 'learning_rate': 1.3952877925229137e-06, 'epoch': 0.66} + 66%|██████▌ | 4624/7045 [15:01:58<7:49:45, 11.64s/it] 66%|██████▌ | 4625/7045 [15:02:10<7:53:05, 11.73s/it] {'loss': 1.0938, 'learning_rate': 1.3942567977786253e-06, 'epoch': 0.66} + 66%|██████▌ | 4625/7045 [15:02:10<7:53:05, 11.73s/it] 66%|██████▌ | 4626/7045 [15:02:21<7:49:20, 11.64s/it] {'loss': 1.083, 'learning_rate': 1.3932260367733318e-06, 'epoch': 0.66} + 66%|██████▌ | 4626/7045 [15:02:21<7:49:20, 11.64s/it] 66%|██████▌ | 4627/7045 [15:02:33<7:45:50, 11.56s/it] {'loss': 1.1182, 'learning_rate': 1.3921955097249246e-06, 'epoch': 0.66} + 66%|██████▌ | 4627/7045 [15:02:33<7:45:50, 11.56s/it] 66%|██████▌ | 4628/7045 [15:02:46<8:03:07, 11.99s/it] {'loss': 1.0894, 'learning_rate': 1.3911652168512405e-06, 'epoch': 0.66} + 66%|██████▌ | 4628/7045 [15:02:46<8:03:07, 11.99s/it] 66%|██████▌ | 4629/7045 [15:02:57<8:00:48, 11.94s/it] {'loss': 1.0679, 'learning_rate': 1.3901351583700703e-06, 'epoch': 0.66} + 66%|██████▌ | 4629/7045 [15:02:57<8:00:48, 11.94s/it] 66%|██████▌ | 4630/7045 [15:03:09<7:53:10, 11.76s/it] {'loss': 1.1304, 'learning_rate': 1.3891053344991557e-06, 'epoch': 0.66} + 66%|██████▌ | 4630/7045 [15:03:09<7:53:10, 11.76s/it] 66%|██████▌ | 4631/7045 [15:03:20<7:42:43, 11.50s/it] {'loss': 1.1172, 'learning_rate': 1.3880757454561847e-06, 'epoch': 0.66} + 66%|██████▌ | 4631/7045 [15:03:20<7:42:43, 11.50s/it] 66%|██████▌ | 4632/7045 [15:03:33<8:00:02, 11.94s/it] {'loss': 1.1094, 'learning_rate': 1.387046391458801e-06, 'epoch': 0.66} + 66%|██████▌ | 4632/7045 [15:03:33<8:00:02, 11.94s/it] 66%|██████▌ | 4633/7045 [15:03:45<8:04:11, 12.04s/it] {'loss': 1.0879, 'learning_rate': 1.3860172727245947e-06, 'epoch': 0.66} + 66%|██████▌ | 4633/7045 [15:03:45<8:04:11, 12.04s/it] 66%|██████▌ | 4634/7045 [15:03:57<7:59:52, 11.94s/it] {'loss': 1.0947, 'learning_rate': 1.3849883894711064e-06, 'epoch': 0.66} + 66%|██████▌ | 4634/7045 [15:03:57<7:59:52, 11.94s/it] 66%|██████▌ | 4635/7045 [15:04:08<7:57:40, 11.89s/it] {'loss': 1.1406, 'learning_rate': 1.3839597419158298e-06, 'epoch': 0.66} + 66%|██████▌ | 4635/7045 [15:04:08<7:57:40, 11.89s/it] 66%|██████▌ | 4636/7045 [15:04:20<7:55:41, 11.85s/it] {'loss': 1.1011, 'learning_rate': 1.3829313302762065e-06, 'epoch': 0.66} + 66%|██████▌ | 4636/7045 [15:04:20<7:55:41, 11.85s/it] 66%|██████▌ | 4637/7045 [15:04:31<7:44:41, 11.58s/it] {'loss': 1.1143, 'learning_rate': 1.3819031547696271e-06, 'epoch': 0.66} + 66%|██████▌ | 4637/7045 [15:04:31<7:44:41, 11.58s/it] 66%|██████▌ | 4638/7045 [15:04:42<7:35:29, 11.35s/it] {'loss': 1.1226, 'learning_rate': 1.380875215613436e-06, 'epoch': 0.66} + 66%|██████▌ | 4638/7045 [15:04:42<7:35:29, 11.35s/it] 66%|██████▌ | 4639/7045 [15:04:53<7:33:44, 11.32s/it] {'loss': 1.1006, 'learning_rate': 1.379847513024924e-06, 'epoch': 0.66} + 66%|██████▌ | 4639/7045 [15:04:53<7:33:44, 11.32s/it] 66%|██████▌ | 4640/7045 [15:05:04<7:33:34, 11.32s/it] {'loss': 1.1133, 'learning_rate': 1.3788200472213331e-06, 'epoch': 0.66} + 66%|██████▌ | 4640/7045 [15:05:04<7:33:34, 11.32s/it] 66%|██████▌ | 4641/7045 [15:05:15<7:28:43, 11.20s/it] {'loss': 1.1348, 'learning_rate': 1.377792818419858e-06, 'epoch': 0.66} + 66%|██████▌ | 4641/7045 [15:05:15<7:28:43, 11.20s/it] 66%|██████▌ | 4642/7045 [15:05:28<7:40:00, 11.49s/it] {'loss': 1.1001, 'learning_rate': 1.3767658268376377e-06, 'epoch': 0.66} + 66%|██████▌ | 4642/7045 [15:05:28<7:40:00, 11.49s/it] 66%|██████▌ | 4643/7045 [15:05:39<7:36:05, 11.39s/it] {'loss': 1.1064, 'learning_rate': 1.3757390726917668e-06, 'epoch': 0.66} + 66%|██████▌ | 4643/7045 [15:05:39<7:36:05, 11.39s/it] 66%|██████▌ | 4644/7045 [15:05:50<7:39:14, 11.48s/it] {'loss': 1.1172, 'learning_rate': 1.3747125561992863e-06, 'epoch': 0.66} + 66%|██████▌ | 4644/7045 [15:05:50<7:39:14, 11.48s/it] 66%|██████▌ | 4645/7045 [15:06:03<7:49:07, 11.73s/it] {'loss': 1.0977, 'learning_rate': 1.3736862775771858e-06, 'epoch': 0.66} + 66%|██████▌ | 4645/7045 [15:06:03<7:49:07, 11.73s/it] 66%|██████▌ | 4646/7045 [15:06:14<7:42:18, 11.56s/it] {'loss': 1.1133, 'learning_rate': 1.3726602370424093e-06, 'epoch': 0.66} + 66%|██████▌ | 4646/7045 [15:06:14<7:42:18, 11.56s/it] 66%|██████▌ | 4647/7045 [15:06:26<7:44:08, 11.61s/it] {'loss': 1.1006, 'learning_rate': 1.3716344348118469e-06, 'epoch': 0.66} + 66%|██████▌ | 4647/7045 [15:06:26<7:44:08, 11.61s/it] 66%|██████▌ | 4648/7045 [15:06:38<7:50:00, 11.77s/it] {'loss': 1.0718, 'learning_rate': 1.3706088711023374e-06, 'epoch': 0.66} + 66%|██████▌ | 4648/7045 [15:06:38<7:50:00, 11.77s/it] 66%|██████▌ | 4649/7045 [15:06:49<7:41:33, 11.56s/it] {'loss': 1.1465, 'learning_rate': 1.369583546130674e-06, 'epoch': 0.66} + 66%|██████▌ | 4649/7045 [15:06:49<7:41:33, 11.56s/it] 66%|██████▌ | 4650/7045 [15:07:00<7:35:33, 11.41s/it] {'loss': 1.1191, 'learning_rate': 1.3685584601135933e-06, 'epoch': 0.66} + 66%|██████▌ | 4650/7045 [15:07:00<7:35:33, 11.41s/it] 66%|██████▌ | 4651/7045 [15:07:11<7:37:47, 11.47s/it] {'loss': 1.0947, 'learning_rate': 1.3675336132677868e-06, 'epoch': 0.66} + 66%|██████▌ | 4651/7045 [15:07:11<7:37:47, 11.47s/it] 66%|██████▌ | 4652/7045 [15:07:23<7:35:38, 11.42s/it] {'loss': 1.1177, 'learning_rate': 1.3665090058098917e-06, 'epoch': 0.66} + 66%|██████▌ | 4652/7045 [15:07:23<7:35:38, 11.42s/it] 66%|██████▌ | 4653/7045 [15:07:35<7:47:54, 11.74s/it] {'loss': 1.085, 'learning_rate': 1.3654846379564965e-06, 'epoch': 0.66} + 66%|██████▌ | 4653/7045 [15:07:35<7:47:54, 11.74s/it] 66%|██████▌ | 4654/7045 [15:07:47<7:46:40, 11.71s/it] {'loss': 1.1465, 'learning_rate': 1.3644605099241394e-06, 'epoch': 0.66} + 66%|██████▌ | 4654/7045 [15:07:47<7:46:40, 11.71s/it] 66%|██████▌ | 4655/7045 [15:07:58<7:39:31, 11.54s/it] {'loss': 1.1006, 'learning_rate': 1.3634366219293066e-06, 'epoch': 0.66} + 66%|██████▌ | 4655/7045 [15:07:58<7:39:31, 11.54s/it] 66%|██████▌ | 4656/7045 [15:08:09<7:32:35, 11.37s/it] {'loss': 1.084, 'learning_rate': 1.3624129741884326e-06, 'epoch': 0.66} + 66%|██████▌ | 4656/7045 [15:08:09<7:32:35, 11.37s/it] 66%|██████▌ | 4657/7045 [15:08:20<7:32:15, 11.36s/it] {'loss': 1.1025, 'learning_rate': 1.3613895669179045e-06, 'epoch': 0.66} + 66%|██████▌ | 4657/7045 [15:08:20<7:32:15, 11.36s/it] 66%|██████▌ | 4658/7045 [15:08:34<7:56:27, 11.98s/it] {'loss': 1.0908, 'learning_rate': 1.360366400334056e-06, 'epoch': 0.66} + 66%|██████▌ | 4658/7045 [15:08:34<7:56:27, 11.98s/it] 66%|██████▌ | 4659/7045 [15:08:45<7:44:54, 11.69s/it] {'loss': 1.0947, 'learning_rate': 1.3593434746531691e-06, 'epoch': 0.66} + 66%|██████▌ | 4659/7045 [15:08:45<7:44:54, 11.69s/it] 66%|██████▌ | 4660/7045 [15:08:56<7:40:38, 11.59s/it] {'loss': 1.0781, 'learning_rate': 1.3583207900914787e-06, 'epoch': 0.66} + 66%|██████▌ | 4660/7045 [15:08:56<7:40:38, 11.59s/it] 66%|██████▌ | 4661/7045 [15:09:07<7:36:07, 11.48s/it] {'loss': 1.0786, 'learning_rate': 1.357298346865164e-06, 'epoch': 0.66} + 66%|██████▌ | 4661/7045 [15:09:07<7:36:07, 11.48s/it] 66%|██████▌ | 4662/7045 [15:09:18<7:31:43, 11.37s/it] {'loss': 1.1074, 'learning_rate': 1.3562761451903577e-06, 'epoch': 0.66} + 66%|██████▌ | 4662/7045 [15:09:18<7:31:43, 11.37s/it] 66%|██████▌ | 4663/7045 [15:09:29<7:26:00, 11.23s/it] {'loss': 1.0762, 'learning_rate': 1.3552541852831382e-06, 'epoch': 0.66} + 66%|██████▌ | 4663/7045 [15:09:29<7:26:00, 11.23s/it] 66%|██████▌ | 4664/7045 [15:09:43<7:48:09, 11.80s/it] {'loss': 1.0718, 'learning_rate': 1.3542324673595331e-06, 'epoch': 0.66} + 66%|██████▌ | 4664/7045 [15:09:43<7:48:09, 11.80s/it] 66%|██████▌ | 4665/7045 [15:09:54<7:40:37, 11.61s/it] {'loss': 1.1709, 'learning_rate': 1.3532109916355201e-06, 'epoch': 0.66} + 66%|██████▌ | 4665/7045 [15:09:54<7:40:37, 11.61s/it] 66%|██████▌ | 4666/7045 [15:10:05<7:34:58, 11.47s/it] {'loss': 1.1318, 'learning_rate': 1.3521897583270266e-06, 'epoch': 0.66} + 66%|██████▌ | 4666/7045 [15:10:05<7:34:58, 11.47s/it] 66%|██████▌ | 4667/7045 [15:10:16<7:30:28, 11.37s/it] {'loss': 1.0674, 'learning_rate': 1.3511687676499252e-06, 'epoch': 0.66} + 66%|██████▌ | 4667/7045 [15:10:16<7:30:28, 11.37s/it] 66%|██████▋ | 4668/7045 [15:10:27<7:29:51, 11.36s/it] {'loss': 1.1201, 'learning_rate': 1.3501480198200412e-06, 'epoch': 0.66} + 66%|██████▋ | 4668/7045 [15:10:27<7:29:51, 11.36s/it] 66%|██████▋ | 4669/7045 [15:10:38<7:24:58, 11.24s/it] {'loss': 1.124, 'learning_rate': 1.3491275150531463e-06, 'epoch': 0.66} + 66%|██████▋ | 4669/7045 [15:10:38<7:24:58, 11.24s/it] 66%|██████▋ | 4670/7045 [15:10:50<7:27:42, 11.31s/it] {'loss': 1.1133, 'learning_rate': 1.3481072535649598e-06, 'epoch': 0.66} + 66%|██████▋ | 4670/7045 [15:10:50<7:27:42, 11.31s/it] 66%|██████▋ | 4671/7045 [15:11:01<7:28:21, 11.33s/it] {'loss': 1.1187, 'learning_rate': 1.3470872355711528e-06, 'epoch': 0.66} + 66%|██████▋ | 4671/7045 [15:11:01<7:28:21, 11.33s/it] 66%|██████▋ | 4672/7045 [15:11:13<7:34:07, 11.48s/it] {'loss': 1.1104, 'learning_rate': 1.3460674612873417e-06, 'epoch': 0.66} + 66%|██████▋ | 4672/7045 [15:11:13<7:34:07, 11.48s/it] 66%|██████▋ | 4673/7045 [15:11:24<7:27:25, 11.32s/it] {'loss': 1.0996, 'learning_rate': 1.3450479309290948e-06, 'epoch': 0.66} + 66%|██████▋ | 4673/7045 [15:11:24<7:27:25, 11.32s/it] 66%|██████▋ | 4674/7045 [15:11:35<7:29:35, 11.38s/it] {'loss': 1.0801, 'learning_rate': 1.3440286447119256e-06, 'epoch': 0.66} + 66%|██████▋ | 4674/7045 [15:11:35<7:29:35, 11.38s/it] 66%|██████▋ | 4675/7045 [15:11:47<7:28:42, 11.36s/it] {'loss': 1.1196, 'learning_rate': 1.3430096028512963e-06, 'epoch': 0.66} + 66%|██████▋ | 4675/7045 [15:11:47<7:28:42, 11.36s/it] 66%|██████▋ | 4676/7045 [15:11:59<7:44:40, 11.77s/it] {'loss': 1.1338, 'learning_rate': 1.3419908055626194e-06, 'epoch': 0.66} + 66%|██████▋ | 4676/7045 [15:11:59<7:44:40, 11.77s/it] 66%|██████▋ | 4677/7045 [15:12:10<7:34:31, 11.52s/it] {'loss': 1.0942, 'learning_rate': 1.340972253061256e-06, 'epoch': 0.66} + 66%|██████▋ | 4677/7045 [15:12:10<7:34:31, 11.52s/it] 66%|██████▋ | 4678/7045 [15:12:23<7:46:16, 11.82s/it] {'loss': 1.0967, 'learning_rate': 1.3399539455625118e-06, 'epoch': 0.66} + 66%|██████▋ | 4678/7045 [15:12:23<7:46:16, 11.82s/it] 66%|██████▋ | 4679/7045 [15:12:34<7:38:33, 11.63s/it] {'loss': 1.0991, 'learning_rate': 1.3389358832816455e-06, 'epoch': 0.66} + 66%|██████▋ | 4679/7045 [15:12:34<7:38:33, 11.63s/it] 66%|██████▋ | 4680/7045 [15:12:45<7:27:36, 11.36s/it] {'loss': 1.0947, 'learning_rate': 1.3379180664338603e-06, 'epoch': 0.66} + 66%|██████▋ | 4680/7045 [15:12:45<7:27:36, 11.36s/it] 66%|██████▋ | 4681/7045 [15:12:56<7:28:33, 11.38s/it] {'loss': 1.123, 'learning_rate': 1.3369004952343076e-06, 'epoch': 0.66} + 66%|██████▋ | 4681/7045 [15:12:56<7:28:33, 11.38s/it] 66%|██████▋ | 4682/7045 [15:13:09<7:43:41, 11.77s/it] {'loss': 1.0591, 'learning_rate': 1.3358831698980904e-06, 'epoch': 0.66} + 66%|██████▋ | 4682/7045 [15:13:09<7:43:41, 11.77s/it] 66%|██████▋ | 4683/7045 [15:13:21<7:42:05, 11.74s/it] {'loss': 1.1279, 'learning_rate': 1.334866090640255e-06, 'epoch': 0.66} + 66%|██████▋ | 4683/7045 [15:13:21<7:42:05, 11.74s/it] 66%|██████▋ | 4684/7045 [15:13:32<7:35:55, 11.59s/it] {'loss': 1.0889, 'learning_rate': 1.3338492576758005e-06, 'epoch': 0.66} + 66%|██████▋ | 4684/7045 [15:13:32<7:35:55, 11.59s/it] 67%|██████▋ | 4685/7045 [15:13:43<7:30:00, 11.44s/it] {'loss': 1.1182, 'learning_rate': 1.33283267121967e-06, 'epoch': 0.67} + 67%|██████▋ | 4685/7045 [15:13:43<7:30:00, 11.44s/it] 67%|██████▋ | 4686/7045 [15:13:55<7:36:19, 11.61s/it] {'loss': 1.0737, 'learning_rate': 1.3318163314867555e-06, 'epoch': 0.67} + 67%|██████▋ | 4686/7045 [15:13:55<7:36:19, 11.61s/it] 67%|██████▋ | 4687/7045 [15:14:06<7:33:12, 11.53s/it] {'loss': 1.1289, 'learning_rate': 1.330800238691899e-06, 'epoch': 0.67} + 67%|██████▋ | 4687/7045 [15:14:06<7:33:12, 11.53s/it] 67%|██████▋ | 4688/7045 [15:14:17<7:29:03, 11.43s/it] {'loss': 1.1211, 'learning_rate': 1.329784393049887e-06, 'epoch': 0.67} + 67%|██████▋ | 4688/7045 [15:14:17<7:29:03, 11.43s/it] 67%|██████▋ | 4689/7045 [15:14:29<7:25:35, 11.35s/it] {'loss': 1.1338, 'learning_rate': 1.3287687947754548e-06, 'epoch': 0.67} + 67%|██████▋ | 4689/7045 [15:14:29<7:25:35, 11.35s/it] 67%|██████▋ | 4690/7045 [15:14:41<7:36:27, 11.63s/it] {'loss': 1.1099, 'learning_rate': 1.327753444083289e-06, 'epoch': 0.67} + 67%|██████▋ | 4690/7045 [15:14:41<7:36:27, 11.63s/it] 67%|██████▋ | 4691/7045 [15:14:54<7:48:21, 11.94s/it] {'loss': 1.1328, 'learning_rate': 1.326738341188019e-06, 'epoch': 0.67} + 67%|██████▋ | 4691/7045 [15:14:54<7:48:21, 11.94s/it] 67%|██████▋ | 4692/7045 [15:15:05<7:38:05, 11.68s/it] {'loss': 1.1074, 'learning_rate': 1.3257234863042228e-06, 'epoch': 0.67} + 67%|██████▋ | 4692/7045 [15:15:05<7:38:05, 11.68s/it] 67%|██████▋ | 4693/7045 [15:15:16<7:34:37, 11.60s/it] {'loss': 1.1104, 'learning_rate': 1.3247088796464292e-06, 'epoch': 0.67} + 67%|██████▋ | 4693/7045 [15:15:16<7:34:37, 11.60s/it] 67%|██████▋ | 4694/7045 [15:15:27<7:31:04, 11.51s/it] {'loss': 1.1064, 'learning_rate': 1.323694521429109e-06, 'epoch': 0.67} + 67%|██████▋ | 4694/7045 [15:15:27<7:31:04, 11.51s/it] 67%|██████▋ | 4695/7045 [15:15:38<7:23:49, 11.33s/it] {'loss': 1.1152, 'learning_rate': 1.3226804118666871e-06, 'epoch': 0.67} + 67%|██████▋ | 4695/7045 [15:15:38<7:23:49, 11.33s/it] 67%|██████▋ | 4696/7045 [15:15:50<7:24:56, 11.37s/it] {'loss': 1.1191, 'learning_rate': 1.3216665511735305e-06, 'epoch': 0.67} + 67%|██████▋ | 4696/7045 [15:15:50<7:24:56, 11.37s/it] 67%|██████▋ | 4697/7045 [15:16:01<7:27:13, 11.43s/it] {'loss': 1.1035, 'learning_rate': 1.320652939563955e-06, 'epoch': 0.67} + 67%|██████▋ | 4697/7045 [15:16:01<7:27:13, 11.43s/it] 67%|██████▋ | 4698/7045 [15:16:13<7:30:31, 11.52s/it] {'loss': 1.1465, 'learning_rate': 1.3196395772522258e-06, 'epoch': 0.67} + 67%|██████▋ | 4698/7045 [15:16:13<7:30:31, 11.52s/it] 67%|██████▋ | 4699/7045 [15:16:24<7:23:51, 11.35s/it] {'loss': 1.0977, 'learning_rate': 1.3186264644525528e-06, 'epoch': 0.67} + 67%|██████▋ | 4699/7045 [15:16:24<7:23:51, 11.35s/it] 67%|██████▋ | 4700/7045 [15:16:35<7:20:09, 11.26s/it] {'loss': 1.0811, 'learning_rate': 1.3176136013790936e-06, 'epoch': 0.67} + 67%|██████▋ | 4700/7045 [15:16:35<7:20:09, 11.26s/it] 67%|██████▋ | 4701/7045 [15:16:47<7:24:25, 11.38s/it] {'loss': 1.0947, 'learning_rate': 1.3166009882459547e-06, 'epoch': 0.67} + 67%|██████▋ | 4701/7045 [15:16:47<7:24:25, 11.38s/it] 67%|██████▋ | 4702/7045 [15:16:58<7:19:06, 11.24s/it] {'loss': 1.1387, 'learning_rate': 1.315588625267189e-06, 'epoch': 0.67} + 67%|██████▋ | 4702/7045 [15:16:58<7:19:06, 11.24s/it] 67%|██████▋ | 4703/7045 [15:17:10<7:32:02, 11.58s/it] {'loss': 1.0977, 'learning_rate': 1.3145765126567944e-06, 'epoch': 0.67} + 67%|██████▋ | 4703/7045 [15:17:10<7:32:02, 11.58s/it] 67%|██████▋ | 4704/7045 [15:17:21<7:27:55, 11.48s/it] {'loss': 1.127, 'learning_rate': 1.3135646506287203e-06, 'epoch': 0.67} + 67%|██████▋ | 4704/7045 [15:17:21<7:27:55, 11.48s/it] 67%|██████▋ | 4705/7045 [15:17:33<7:29:14, 11.52s/it] {'loss': 1.1367, 'learning_rate': 1.3125530393968577e-06, 'epoch': 0.67} + 67%|██████▋ | 4705/7045 [15:17:33<7:29:14, 11.52s/it] 67%|██████▋ | 4706/7045 [15:17:46<7:49:17, 12.04s/it] {'loss': 1.0972, 'learning_rate': 1.31154167917505e-06, 'epoch': 0.67} + 67%|██████▋ | 4706/7045 [15:17:46<7:49:17, 12.04s/it] 67%|██████▋ | 4707/7045 [15:17:57<7:40:32, 11.82s/it] {'loss': 1.1455, 'learning_rate': 1.3105305701770834e-06, 'epoch': 0.67} + 67%|██████▋ | 4707/7045 [15:17:57<7:40:32, 11.82s/it] 67%|██████▋ | 4708/7045 [15:18:09<7:36:28, 11.72s/it] {'loss': 1.1201, 'learning_rate': 1.309519712616692e-06, 'epoch': 0.67} + 67%|██████▋ | 4708/7045 [15:18:09<7:36:28, 11.72s/it] 67%|██████▋ | 4709/7045 [15:18:20<7:34:41, 11.68s/it] {'loss': 1.1279, 'learning_rate': 1.3085091067075589e-06, 'epoch': 0.67} + 67%|██████▋ | 4709/7045 [15:18:20<7:34:41, 11.68s/it] 67%|██████▋ | 4710/7045 [15:18:32<7:35:33, 11.71s/it] {'loss': 1.0801, 'learning_rate': 1.3074987526633114e-06, 'epoch': 0.67} + 67%|██████▋ | 4710/7045 [15:18:32<7:35:33, 11.71s/it] 67%|██████▋ | 4711/7045 [15:18:44<7:41:21, 11.86s/it] {'loss': 1.1748, 'learning_rate': 1.3064886506975238e-06, 'epoch': 0.67} + 67%|██████▋ | 4711/7045 [15:18:44<7:41:21, 11.86s/it] 67%|██████▋ | 4712/7045 [15:18:56<7:32:18, 11.63s/it] {'loss': 1.1279, 'learning_rate': 1.3054788010237202e-06, 'epoch': 0.67} + 67%|██████▋ | 4712/7045 [15:18:56<7:32:18, 11.63s/it] 67%|██████▋ | 4713/7045 [15:19:07<7:27:12, 11.51s/it] {'loss': 1.1055, 'learning_rate': 1.3044692038553658e-06, 'epoch': 0.67} + 67%|██████▋ | 4713/7045 [15:19:07<7:27:12, 11.51s/it] 67%|██████▋ | 4714/7045 [15:19:18<7:22:33, 11.39s/it] {'loss': 1.106, 'learning_rate': 1.3034598594058775e-06, 'epoch': 0.67} + 67%|██████▋ | 4714/7045 [15:19:18<7:22:33, 11.39s/it] 67%|██████▋ | 4715/7045 [15:19:29<7:19:18, 11.31s/it] {'loss': 1.0742, 'learning_rate': 1.3024507678886179e-06, 'epoch': 0.67} + 67%|██████▋ | 4715/7045 [15:19:29<7:19:18, 11.31s/it] 67%|██████▋ | 4716/7045 [15:19:40<7:14:37, 11.20s/it] {'loss': 1.1064, 'learning_rate': 1.3014419295168929e-06, 'epoch': 0.67} + 67%|██████▋ | 4716/7045 [15:19:40<7:14:37, 11.20s/it] 67%|██████▋ | 4717/7045 [15:19:52<7:18:47, 11.31s/it] {'loss': 1.1025, 'learning_rate': 1.3004333445039597e-06, 'epoch': 0.67} + 67%|██████▋ | 4717/7045 [15:19:52<7:18:47, 11.31s/it] 67%|██████▋ | 4718/7045 [15:20:02<7:13:38, 11.18s/it] {'loss': 1.126, 'learning_rate': 1.2994250130630173e-06, 'epoch': 0.67} + 67%|██████▋ | 4718/7045 [15:20:02<7:13:38, 11.18s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 67%|██████▋ | 4719/7045 [15:20:13<7:10:58, 11.12s/it] {'loss': 1.1455, 'learning_rate': 1.2984169354072133e-06, 'epoch': 0.67} + 67%|██████▋ | 4719/7045 [15:20:13<7:10:58, 11.12s/it] 67%|██████▋ | 4720/7045 [15:20:25<7:11:27, 11.13s/it] {'loss': 1.1016, 'learning_rate': 1.2974091117496426e-06, 'epoch': 0.67} + 67%|██████▋ | 4720/7045 [15:20:25<7:11:27, 11.13s/it] 67%|██████▋ | 4721/7045 [15:20:38<7:35:18, 11.75s/it] {'loss': 1.1294, 'learning_rate': 1.296401542303345e-06, 'epoch': 0.67} + 67%|██████▋ | 4721/7045 [15:20:38<7:35:18, 11.75s/it] 67%|██████▋ | 4722/7045 [15:20:49<7:31:05, 11.65s/it] {'loss': 1.123, 'learning_rate': 1.2953942272813058e-06, 'epoch': 0.67} + 67%|██████▋ | 4722/7045 [15:20:49<7:31:05, 11.65s/it] 67%|██████▋ | 4723/7045 [15:21:00<7:24:29, 11.49s/it] {'loss': 1.1104, 'learning_rate': 1.2943871668964597e-06, 'epoch': 0.67} + 67%|██████▋ | 4723/7045 [15:21:00<7:24:29, 11.49s/it] 67%|██████▋ | 4724/7045 [15:21:11<7:19:08, 11.35s/it] {'loss': 1.0791, 'learning_rate': 1.293380361361683e-06, 'epoch': 0.67} + 67%|██████▋ | 4724/7045 [15:21:11<7:19:08, 11.35s/it] 67%|██████▋ | 4725/7045 [15:21:23<7:28:22, 11.60s/it] {'loss': 1.1387, 'learning_rate': 1.2923738108898022e-06, 'epoch': 0.67} + 67%|██████▋ | 4725/7045 [15:21:23<7:28:22, 11.60s/it] 67%|██████▋ | 4726/7045 [15:21:37<7:45:39, 12.05s/it] {'loss': 1.1182, 'learning_rate': 1.291367515693589e-06, 'epoch': 0.67} + 67%|██████▋ | 4726/7045 [15:21:37<7:45:39, 12.05s/it] 67%|██████▋ | 4727/7045 [15:21:48<7:36:04, 11.81s/it] {'loss': 1.124, 'learning_rate': 1.2903614759857584e-06, 'epoch': 0.67} + 67%|██████▋ | 4727/7045 [15:21:48<7:36:04, 11.81s/it] 67%|██████▋ | 4728/7045 [15:21:59<7:32:13, 11.71s/it] {'loss': 1.1396, 'learning_rate': 1.2893556919789757e-06, 'epoch': 0.67} + 67%|██████▋ | 4728/7045 [15:21:59<7:32:13, 11.71s/it] 67%|██████▋ | 4729/7045 [15:22:10<7:22:04, 11.45s/it] {'loss': 1.0898, 'learning_rate': 1.2883501638858486e-06, 'epoch': 0.67} + 67%|██████▋ | 4729/7045 [15:22:10<7:22:04, 11.45s/it] 67%|██████▋ | 4730/7045 [15:22:22<7:24:53, 11.53s/it] {'loss': 1.1182, 'learning_rate': 1.287344891918931e-06, 'epoch': 0.67} + 67%|██████▋ | 4730/7045 [15:22:22<7:24:53, 11.53s/it] 67%|██████▋ | 4731/7045 [15:22:33<7:20:33, 11.42s/it] {'loss': 1.1094, 'learning_rate': 1.286339876290726e-06, 'epoch': 0.67} + 67%|██████▋ | 4731/7045 [15:22:33<7:20:33, 11.42s/it] 67%|██████▋ | 4732/7045 [15:22:44<7:19:26, 11.40s/it] {'loss': 1.1133, 'learning_rate': 1.2853351172136791e-06, 'epoch': 0.67} + 67%|██████▋ | 4732/7045 [15:22:44<7:19:26, 11.40s/it] 67%|██████▋ | 4733/7045 [15:22:56<7:17:22, 11.35s/it] {'loss': 1.0708, 'learning_rate': 1.284330614900181e-06, 'epoch': 0.67} + 67%|██████▋ | 4733/7045 [15:22:56<7:17:22, 11.35s/it] 67%|██████▋ | 4734/7045 [15:23:07<7:16:50, 11.34s/it] {'loss': 1.1152, 'learning_rate': 1.2833263695625725e-06, 'epoch': 0.67} + 67%|██████▋ | 4734/7045 [15:23:07<7:16:50, 11.34s/it] 67%|██████▋ | 4735/7045 [15:23:18<7:13:15, 11.25s/it] {'loss': 1.1123, 'learning_rate': 1.2823223814131352e-06, 'epoch': 0.67} + 67%|██████▋ | 4735/7045 [15:23:18<7:13:15, 11.25s/it] 67%|██████▋ | 4736/7045 [15:23:29<7:12:41, 11.24s/it] {'loss': 1.1318, 'learning_rate': 1.2813186506641e-06, 'epoch': 0.67} + 67%|██████▋ | 4736/7045 [15:23:29<7:12:41, 11.24s/it] 67%|██████▋ | 4737/7045 [15:23:42<7:26:10, 11.60s/it] {'loss': 1.0991, 'learning_rate': 1.2803151775276407e-06, 'epoch': 0.67} + 67%|██████▋ | 4737/7045 [15:23:42<7:26:10, 11.60s/it] 67%|██████▋ | 4738/7045 [15:23:54<7:34:41, 11.83s/it] {'loss': 1.0801, 'learning_rate': 1.2793119622158782e-06, 'epoch': 0.67} + 67%|██████▋ | 4738/7045 [15:23:54<7:34:41, 11.83s/it] 67%|██████▋ | 4739/7045 [15:24:06<7:32:22, 11.77s/it] {'loss': 1.1357, 'learning_rate': 1.2783090049408793e-06, 'epoch': 0.67} + 67%|██████▋ | 4739/7045 [15:24:06<7:32:22, 11.77s/it] 67%|██████▋ | 4740/7045 [15:24:17<7:24:05, 11.56s/it] {'loss': 1.1021, 'learning_rate': 1.2773063059146551e-06, 'epoch': 0.67} + 67%|██████▋ | 4740/7045 [15:24:17<7:24:05, 11.56s/it] 67%|██████▋ | 4741/7045 [15:24:28<7:18:40, 11.42s/it] {'loss': 1.1123, 'learning_rate': 1.2763038653491609e-06, 'epoch': 0.67} + 67%|██████▋ | 4741/7045 [15:24:28<7:18:40, 11.42s/it] 67%|██████▋ | 4742/7045 [15:24:40<7:31:02, 11.75s/it] {'loss': 1.1377, 'learning_rate': 1.2753016834563008e-06, 'epoch': 0.67} + 67%|██████▋ | 4742/7045 [15:24:40<7:31:02, 11.75s/it] 67%|██████▋ | 4743/7045 [15:24:52<7:24:34, 11.59s/it] {'loss': 1.1562, 'learning_rate': 1.2742997604479225e-06, 'epoch': 0.67} + 67%|██████▋ | 4743/7045 [15:24:52<7:24:34, 11.59s/it] 67%|██████▋ | 4744/7045 [15:25:03<7:19:32, 11.46s/it] {'loss': 1.1475, 'learning_rate': 1.2732980965358166e-06, 'epoch': 0.67} + 67%|██████▋ | 4744/7045 [15:25:03<7:19:32, 11.46s/it] 67%|██████▋ | 4745/7045 [15:25:14<7:17:35, 11.42s/it] {'loss': 1.0596, 'learning_rate': 1.2722966919317234e-06, 'epoch': 0.67} + 67%|██████▋ | 4745/7045 [15:25:14<7:17:35, 11.42s/it] 67%|██████▋ | 4746/7045 [15:25:25<7:14:59, 11.35s/it] {'loss': 1.0874, 'learning_rate': 1.2712955468473245e-06, 'epoch': 0.67} + 67%|██████▋ | 4746/7045 [15:25:25<7:14:59, 11.35s/it] 67%|██████▋ | 4747/7045 [15:25:39<7:37:24, 11.94s/it] {'loss': 1.0815, 'learning_rate': 1.2702946614942502e-06, 'epoch': 0.67} + 67%|██████▋ | 4747/7045 [15:25:39<7:37:24, 11.94s/it] 67%|██████▋ | 4748/7045 [15:25:50<7:37:34, 11.95s/it] {'loss': 1.1396, 'learning_rate': 1.2692940360840727e-06, 'epoch': 0.67} + 67%|██████▋ | 4748/7045 [15:25:50<7:37:34, 11.95s/it] 67%|██████▋ | 4749/7045 [15:26:02<7:30:05, 11.76s/it] {'loss': 1.1182, 'learning_rate': 1.26829367082831e-06, 'epoch': 0.67} + 67%|██████▋ | 4749/7045 [15:26:02<7:30:05, 11.76s/it] 67%|██████▋ | 4750/7045 [15:26:13<7:27:41, 11.70s/it] {'loss': 1.0874, 'learning_rate': 1.2672935659384263e-06, 'epoch': 0.67} + 67%|██████▋ | 4750/7045 [15:26:13<7:27:41, 11.70s/it] 67%|██████▋ | 4751/7045 [15:26:24<7:19:24, 11.49s/it] {'loss': 1.1172, 'learning_rate': 1.2662937216258312e-06, 'epoch': 0.67} + 67%|██████▋ | 4751/7045 [15:26:24<7:19:24, 11.49s/it] 67%|██████▋ | 4752/7045 [15:26:36<7:19:13, 11.49s/it] {'loss': 1.1064, 'learning_rate': 1.265294138101876e-06, 'epoch': 0.67} + 67%|██████▋ | 4752/7045 [15:26:36<7:19:13, 11.49s/it] 67%|██████▋ | 4753/7045 [15:26:49<7:39:31, 12.03s/it] {'loss': 1.062, 'learning_rate': 1.2642948155778612e-06, 'epoch': 0.67} + 67%|██████▋ | 4753/7045 [15:26:49<7:39:31, 12.03s/it] 67%|██████▋ | 4754/7045 [15:27:01<7:38:43, 12.01s/it] {'loss': 1.0864, 'learning_rate': 1.2632957542650287e-06, 'epoch': 0.67} + 67%|██████▋ | 4754/7045 [15:27:01<7:38:43, 12.01s/it] 67%|██████▋ | 4755/7045 [15:27:14<7:46:59, 12.24s/it] {'loss': 1.084, 'learning_rate': 1.2622969543745655e-06, 'epoch': 0.67} + 67%|██████▋ | 4755/7045 [15:27:14<7:46:59, 12.24s/it] 68%|██████▊ | 4756/7045 [15:27:27<7:52:52, 12.40s/it] {'loss': 1.0752, 'learning_rate': 1.2612984161176062e-06, 'epoch': 0.68} + 68%|██████▊ | 4756/7045 [15:27:27<7:52:52, 12.40s/it] 68%|██████▊ | 4757/7045 [15:27:39<7:53:41, 12.42s/it] {'loss': 1.1221, 'learning_rate': 1.260300139705226e-06, 'epoch': 0.68} + 68%|██████▊ | 4757/7045 [15:27:39<7:53:41, 12.42s/it] 68%|██████▊ | 4758/7045 [15:27:50<7:39:01, 12.04s/it] {'loss': 1.0889, 'learning_rate': 1.2593021253484494e-06, 'epoch': 0.68} + 68%|██████▊ | 4758/7045 [15:27:50<7:39:01, 12.04s/it] 68%|██████▊ | 4759/7045 [15:28:03<7:43:46, 12.17s/it] {'loss': 1.0806, 'learning_rate': 1.2583043732582417e-06, 'epoch': 0.68} + 68%|██████▊ | 4759/7045 [15:28:03<7:43:46, 12.17s/it] 68%|██████▊ | 4760/7045 [15:28:14<7:33:46, 11.92s/it] {'loss': 1.1309, 'learning_rate': 1.2573068836455127e-06, 'epoch': 0.68} + 68%|██████▊ | 4760/7045 [15:28:14<7:33:46, 11.92s/it] 68%|██████▊ | 4761/7045 [15:28:26<7:30:52, 11.84s/it] {'loss': 1.1025, 'learning_rate': 1.2563096567211209e-06, 'epoch': 0.68} + 68%|██████▊ | 4761/7045 [15:28:26<7:30:52, 11.84s/it] 68%|██████▊ | 4762/7045 [15:28:37<7:22:09, 11.62s/it] {'loss': 1.0889, 'learning_rate': 1.2553126926958631e-06, 'epoch': 0.68} + 68%|██████▊ | 4762/7045 [15:28:37<7:22:09, 11.62s/it] 68%|██████▊ | 4763/7045 [15:28:48<7:16:16, 11.47s/it] {'loss': 1.1035, 'learning_rate': 1.2543159917804865e-06, 'epoch': 0.68} + 68%|██████▊ | 4763/7045 [15:28:48<7:16:16, 11.47s/it] 68%|██████▊ | 4764/7045 [15:28:59<7:15:40, 11.46s/it] {'loss': 1.0859, 'learning_rate': 1.2533195541856797e-06, 'epoch': 0.68} + 68%|██████▊ | 4764/7045 [15:28:59<7:15:40, 11.46s/it] 68%|██████▊ | 4765/7045 [15:29:11<7:17:40, 11.52s/it] {'loss': 1.1328, 'learning_rate': 1.2523233801220758e-06, 'epoch': 0.68} + 68%|██████▊ | 4765/7045 [15:29:11<7:17:40, 11.52s/it] 68%|██████▊ | 4766/7045 [15:29:23<7:19:21, 11.57s/it] {'loss': 1.1357, 'learning_rate': 1.2513274698002509e-06, 'epoch': 0.68} + 68%|██████▊ | 4766/7045 [15:29:23<7:19:21, 11.57s/it] 68%|██████▊ | 4767/7045 [15:29:34<7:12:05, 11.38s/it] {'loss': 1.1318, 'learning_rate': 1.2503318234307288e-06, 'epoch': 0.68} + 68%|██████▊ | 4767/7045 [15:29:34<7:12:05, 11.38s/it] 68%|██████▊ | 4768/7045 [15:29:45<7:07:47, 11.27s/it] {'loss': 1.1152, 'learning_rate': 1.2493364412239741e-06, 'epoch': 0.68} + 68%|██████▊ | 4768/7045 [15:29:45<7:07:47, 11.27s/it] 68%|██████▊ | 4769/7045 [15:29:58<7:25:25, 11.74s/it] {'loss': 1.126, 'learning_rate': 1.2483413233903988e-06, 'epoch': 0.68} + 68%|██████▊ | 4769/7045 [15:29:58<7:25:25, 11.74s/it] 68%|██████▊ | 4770/7045 [15:30:11<7:40:14, 12.14s/it] {'loss': 1.0879, 'learning_rate': 1.2473464701403556e-06, 'epoch': 0.68} + 68%|██████▊ | 4770/7045 [15:30:11<7:40:14, 12.14s/it] 68%|██████▊ | 4771/7045 [15:30:22<7:32:08, 11.93s/it] {'loss': 1.126, 'learning_rate': 1.2463518816841427e-06, 'epoch': 0.68} + 68%|██████▊ | 4771/7045 [15:30:22<7:32:08, 11.93s/it] 68%|██████▊ | 4772/7045 [15:30:33<7:24:28, 11.73s/it] {'loss': 1.1338, 'learning_rate': 1.2453575582320038e-06, 'epoch': 0.68} + 68%|██████▊ | 4772/7045 [15:30:33<7:24:28, 11.73s/it] 68%|██████▊ | 4773/7045 [15:30:45<7:28:40, 11.85s/it] {'loss': 1.1318, 'learning_rate': 1.2443634999941238e-06, 'epoch': 0.68} + 68%|██████▊ | 4773/7045 [15:30:45<7:28:40, 11.85s/it] 68%|██████▊ | 4774/7045 [15:30:57<7:23:58, 11.73s/it] {'loss': 1.1299, 'learning_rate': 1.243369707180634e-06, 'epoch': 0.68} + 68%|██████▊ | 4774/7045 [15:30:57<7:23:58, 11.73s/it] 68%|██████▊ | 4775/7045 [15:31:09<7:29:56, 11.89s/it] {'loss': 1.123, 'learning_rate': 1.242376180001609e-06, 'epoch': 0.68} + 68%|██████▊ | 4775/7045 [15:31:09<7:29:56, 11.89s/it] 68%|██████▊ | 4776/7045 [15:31:22<7:34:53, 12.03s/it] {'loss': 1.125, 'learning_rate': 1.2413829186670664e-06, 'epoch': 0.68} + 68%|██████▊ | 4776/7045 [15:31:22<7:34:53, 12.03s/it] 68%|██████▊ | 4777/7045 [15:31:34<7:39:11, 12.15s/it] {'loss': 1.1123, 'learning_rate': 1.2403899233869674e-06, 'epoch': 0.68} + 68%|██████▊ | 4777/7045 [15:31:34<7:39:11, 12.15s/it] 68%|██████▊ | 4778/7045 [15:31:45<7:25:21, 11.79s/it] {'loss': 1.1143, 'learning_rate': 1.2393971943712189e-06, 'epoch': 0.68} + 68%|██████▊ | 4778/7045 [15:31:45<7:25:21, 11.79s/it] 68%|██████▊ | 4779/7045 [15:31:56<7:16:31, 11.56s/it] {'loss': 1.1455, 'learning_rate': 1.2384047318296685e-06, 'epoch': 0.68} + 68%|██████▊ | 4779/7045 [15:31:56<7:16:31, 11.56s/it] 68%|██████▊ | 4780/7045 [15:32:10<7:39:38, 12.18s/it] {'loss': 1.0879, 'learning_rate': 1.2374125359721112e-06, 'epoch': 0.68} + 68%|██████▊ | 4780/7045 [15:32:10<7:39:38, 12.18s/it] 68%|██████▊ | 4781/7045 [15:32:21<7:27:42, 11.86s/it] {'loss': 1.1309, 'learning_rate': 1.236420607008283e-06, 'epoch': 0.68} + 68%|██���███▊ | 4781/7045 [15:32:21<7:27:42, 11.86s/it] 68%|██████▊ | 4782/7045 [15:32:32<7:21:31, 11.71s/it] {'loss': 1.1572, 'learning_rate': 1.2354289451478624e-06, 'epoch': 0.68} + 68%|██████▊ | 4782/7045 [15:32:32<7:21:31, 11.71s/it] 68%|██████▊ | 4783/7045 [15:32:45<7:35:29, 12.08s/it] {'loss': 1.062, 'learning_rate': 1.2344375506004757e-06, 'epoch': 0.68} + 68%|██████▊ | 4783/7045 [15:32:45<7:35:29, 12.08s/it] 68%|██████▊ | 4784/7045 [15:32:57<7:31:38, 11.99s/it] {'loss': 1.1455, 'learning_rate': 1.2334464235756894e-06, 'epoch': 0.68} + 68%|██████▊ | 4784/7045 [15:32:57<7:31:38, 11.99s/it] 68%|██████▊ | 4785/7045 [15:33:07<7:17:20, 11.61s/it] {'loss': 1.1191, 'learning_rate': 1.2324555642830124e-06, 'epoch': 0.68} + 68%|██████▊ | 4785/7045 [15:33:07<7:17:20, 11.61s/it] 68%|██████▊ | 4786/7045 [15:33:19<7:11:46, 11.47s/it] {'loss': 1.1016, 'learning_rate': 1.2314649729319005e-06, 'epoch': 0.68} + 68%|██████▊ | 4786/7045 [15:33:19<7:11:46, 11.47s/it] 68%|██████▊ | 4787/7045 [15:33:30<7:12:18, 11.49s/it] {'loss': 1.1309, 'learning_rate': 1.230474649731752e-06, 'epoch': 0.68} + 68%|██████▊ | 4787/7045 [15:33:30<7:12:18, 11.49s/it] 68%|██████▊ | 4788/7045 [15:33:42<7:13:06, 11.51s/it] {'loss': 1.1055, 'learning_rate': 1.2294845948919052e-06, 'epoch': 0.68} + 68%|██████▊ | 4788/7045 [15:33:42<7:13:06, 11.51s/it] 68%|██████▊ | 4789/7045 [15:33:54<7:26:11, 11.87s/it] {'loss': 1.0767, 'learning_rate': 1.2284948086216466e-06, 'epoch': 0.68} + 68%|██████▊ | 4789/7045 [15:33:54<7:26:11, 11.87s/it] 68%|██████▊ | 4790/7045 [15:34:06<7:24:41, 11.83s/it] {'loss': 1.1846, 'learning_rate': 1.2275052911302018e-06, 'epoch': 0.68} + 68%|██████▊ | 4790/7045 [15:34:06<7:24:41, 11.83s/it] 68%|██████▊ | 4791/7045 [15:34:20<7:42:32, 12.31s/it] {'loss': 1.1445, 'learning_rate': 1.2265160426267427e-06, 'epoch': 0.68} + 68%|██████▊ | 4791/7045 [15:34:20<7:42:32, 12.31s/it] 68%|██████▊ | 4792/7045 [15:34:31<7:29:15, 11.96s/it] {'loss': 1.1221, 'learning_rate': 1.225527063320382e-06, 'epoch': 0.68} + 68%|██████▊ | 4792/7045 [15:34:31<7:29:15, 11.96s/it] 68%|██████▊ | 4793/7045 [15:34:43<7:28:58, 11.96s/it] {'loss': 1.0942, 'learning_rate': 1.2245383534201758e-06, 'epoch': 0.68} + 68%|██████▊ | 4793/7045 [15:34:43<7:28:58, 11.96s/it] 68%|██████▊ | 4794/7045 [15:34:54<7:18:34, 11.69s/it] {'loss': 1.1094, 'learning_rate': 1.2235499131351252e-06, 'epoch': 0.68} + 68%|██████▊ | 4794/7045 [15:34:54<7:18:34, 11.69s/it] 68%|██████▊ | 4795/7045 [15:35:05<7:11:26, 11.51s/it] {'loss': 1.0869, 'learning_rate': 1.2225617426741729e-06, 'epoch': 0.68} + 68%|██████▊ | 4795/7045 [15:35:05<7:11:26, 11.51s/it] 68%|██████▊ | 4796/7045 [15:35:16<7:06:57, 11.39s/it] {'loss': 1.1309, 'learning_rate': 1.2215738422462025e-06, 'epoch': 0.68} + 68%|██████▊ | 4796/7045 [15:35:16<7:06:57, 11.39s/it] 68%|██████▊ | 4797/7045 [15:35:28<7:17:29, 11.68s/it] {'loss': 1.1172, 'learning_rate': 1.2205862120600457e-06, 'epoch': 0.68} + 68%|██████▊ | 4797/7045 [15:35:28<7:17:29, 11.68s/it] 68%|██████▊ | 4798/7045 [15:35:39<7:11:56, 11.53s/it] {'loss': 1.1143, 'learning_rate': 1.2195988523244712e-06, 'epoch': 0.68} + 68%|██████▊ | 4798/7045 [15:35:39<7:11:56, 11.53s/it] 68%|██████▊ | 4799/7045 [15:35:50<7:03:26, 11.31s/it] {'loss': 1.0977, 'learning_rate': 1.2186117632481947e-06, 'epoch': 0.68} + 68%|██████▊ | 4799/7045 [15:35:50<7:03:26, 11.31s/it] 68%|██████▊ | 4800/7045 [15:36:03<7:18:02, 11.71s/it] {'loss': 1.0962, 'learning_rate': 1.2176249450398745e-06, 'epoch': 0.68} + 68%|██████▊ | 4800/7045 [15:36:03<7:18:02, 11.71s/it] 68%|██████▊ | 4801/7045 [15:36:14<7:16:08, 11.66s/it] {'loss': 1.0718, 'learning_rate': 1.216638397908108e-06, 'epoch': 0.68} + 68%|██████▊ | 4801/7045 [15:36:14<7:16:08, 11.66s/it] 68%|██████▊ | 4802/7045 [15:36:26<7:09:00, 11.48s/it] {'loss': 1.125, 'learning_rate': 1.2156521220614404e-06, 'epoch': 0.68} + 68%|██████▊ | 4802/7045 [15:36:26<7:09:00, 11.48s/it] 68%|██████▊ | 4803/7045 [15:36:38<7:23:58, 11.88s/it] {'loss': 1.0835, 'learning_rate': 1.2146661177083554e-06, 'epoch': 0.68} + 68%|██████▊ | 4803/7045 [15:36:38<7:23:58, 11.88s/it] 68%|██████▊ | 4804/7045 [15:36:51<7:29:25, 12.03s/it] {'loss': 1.0791, 'learning_rate': 1.2136803850572804e-06, 'epoch': 0.68} + 68%|██████▊ | 4804/7045 [15:36:51<7:29:25, 12.03s/it] 68%|██████▊ | 4805/7045 [15:37:02<7:19:35, 11.77s/it] {'loss': 1.1016, 'learning_rate': 1.2126949243165869e-06, 'epoch': 0.68} + 68%|██████▊ | 4805/7045 [15:37:02<7:19:35, 11.77s/it] 68%|██████▊ | 4806/7045 [15:37:14<7:25:59, 11.95s/it] {'loss': 1.0854, 'learning_rate': 1.2117097356945878e-06, 'epoch': 0.68} + 68%|██████▊ | 4806/7045 [15:37:14<7:25:59, 11.95s/it] 68%|██████▊ | 4807/7045 [15:37:28<7:42:38, 12.40s/it] {'loss': 1.0522, 'learning_rate': 1.2107248193995368e-06, 'epoch': 0.68} + 68%|██████▊ | 4807/7045 [15:37:28<7:42:38, 12.40s/it] 68%|██████▊ | 4808/7045 [15:37:39<7:31:24, 12.11s/it] {'loss': 1.1221, 'learning_rate': 1.2097401756396344e-06, 'epoch': 0.68} + 68%|██████▊ | 4808/7045 [15:37:39<7:31:24, 12.11s/it] 68%|██████▊ | 4809/7045 [15:37:51<7:24:29, 11.93s/it] {'loss': 1.0928, 'learning_rate': 1.208755804623018e-06, 'epoch': 0.68} + 68%|██████▊ | 4809/7045 [15:37:51<7:24:29, 11.93s/it] 68%|██████▊ | 4810/7045 [15:38:03<7:33:26, 12.17s/it] {'loss': 1.064, 'learning_rate': 1.2077717065577718e-06, 'epoch': 0.68} + 68%|██████▊ | 4810/7045 [15:38:03<7:33:26, 12.17s/it] 68%|██████▊ | 4811/7045 [15:38:16<7:43:32, 12.45s/it] {'loss': 1.083, 'learning_rate': 1.2067878816519217e-06, 'epoch': 0.68} + 68%|██████▊ | 4811/7045 [15:38:16<7:43:32, 12.45s/it] 68%|██████▊ | 4812/7045 [15:38:27<7:25:59, 11.98s/it] {'loss': 1.0669, 'learning_rate': 1.2058043301134325e-06, 'epoch': 0.68} + 68%|██████▊ | 4812/7045 [15:38:27<7:25:59, 11.98s/it] 68%|██████▊ | 4813/7045 [15:38:38<7:14:08, 11.67s/it] {'loss': 1.0801, 'learning_rate': 1.2048210521502158e-06, 'epoch': 0.68} + 68%|██████▊ | 4813/7045 [15:38:38<7:14:08, 11.67s/it] 68%|██████▊ | 4814/7045 [15:38:51<7:29:18, 12.08s/it] {'loss': 1.1104, 'learning_rate': 1.2038380479701217e-06, 'epoch': 0.68} + 68%|██████▊ | 4814/7045 [15:38:51<7:29:18, 12.08s/it] 68%|██████▊ | 4815/7045 [15:39:04<7:34:07, 12.22s/it] {'loss': 1.0981, 'learning_rate': 1.2028553177809436e-06, 'epoch': 0.68} + 68%|██████▊ | 4815/7045 [15:39:04<7:34:07, 12.22s/it] 68%|██████▊ | 4816/7045 [15:39:15<7:24:34, 11.97s/it] {'loss': 1.1289, 'learning_rate': 1.2018728617904183e-06, 'epoch': 0.68} + 68%|██████▊ | 4816/7045 [15:39:15<7:24:34, 11.97s/it] 68%|██████▊ | 4817/7045 [15:39:28<7:36:21, 12.29s/it] {'loss': 1.1133, 'learning_rate': 1.2008906802062232e-06, 'epoch': 0.68} + 68%|██████▊ | 4817/7045 [15:39:28<7:36:21, 12.29s/it] 68%|██████▊ | 4818/7045 [15:39:41<7:42:21, 12.46s/it] {'loss': 1.1201, 'learning_rate': 1.1999087732359771e-06, 'epoch': 0.68} + 68%|██████▊ | 4818/7045 [15:39:41<7:42:21, 12.46s/it] 68%|██████▊ | 4819/7045 [15:39:54<7:46:29, 12.57s/it] {'loss': 1.1484, 'learning_rate': 1.1989271410872433e-06, 'epoch': 0.68} + 68%|██████▊ | 4819/7045 [15:39:54<7:46:29, 12.57s/it] 68%|██████▊ | 4820/7045 [15:40:06<7:42:17, 12.47s/it] {'loss': 1.0967, 'learning_rate': 1.1979457839675238e-06, 'epoch': 0.68} + 68%|██████▊ | 4820/7045 [15:40:06<7:42:17, 12.47s/it] 68%|██████▊ | 4821/7045 [15:40:17<7:25:28, 12.02s/it] {'loss': 1.0913, 'learning_rate': 1.1969647020842658e-06, 'epoch': 0.68} + 68%|██████▊ | 4821/7045 [15:40:17<7:25:28, 12.02s/it] 68%|██████▊ | 4822/7045 [15:40:28<7:12:48, 11.68s/it] {'loss': 1.0723, 'learning_rate': 1.1959838956448557e-06, 'epoch': 0.68} + 68%|██████▊ | 4822/7045 [15:40:28<7:12:48, 11.68s/it] 68%|██████▊ | 4823/7045 [15:40:39<7:07:25, 11.54s/it] {'loss': 1.1064, 'learning_rate': 1.1950033648566206e-06, 'epoch': 0.68} + 68%|██████▊ | 4823/7045 [15:40:39<7:07:25, 11.54s/it] 68%|██████▊ | 4824/7045 [15:40:50<7:01:45, 11.39s/it] {'loss': 1.127, 'learning_rate': 1.1940231099268353e-06, 'epoch': 0.68} + 68%|██████▊ | 4824/7045 [15:40:50<7:01:45, 11.39s/it] 68%|██████▊ | 4825/7045 [15:41:04<7:29:38, 12.15s/it] {'loss': 1.0439, 'learning_rate': 1.1930431310627102e-06, 'epoch': 0.68} + 68%|██████▊ | 4825/7045 [15:41:04<7:29:38, 12.15s/it] 69%|██████▊ | 4826/7045 [15:41:16<7:26:01, 12.06s/it] {'loss': 1.0649, 'learning_rate': 1.192063428471399e-06, 'epoch': 0.69} + 69%|██████▊ | 4826/7045 [15:41:16<7:26:01, 12.06s/it] 69%|██████▊ | 4827/7045 [15:41:27<7:10:33, 11.65s/it] {'loss': 1.0898, 'learning_rate': 1.1910840023599984e-06, 'epoch': 0.69} + 69%|██████▊ | 4827/7045 [15:41:27<7:10:33, 11.65s/it] 69%|██████▊ | 4828/7045 [15:41:38<7:07:32, 11.57s/it] {'loss': 1.1016, 'learning_rate': 1.1901048529355452e-06, 'epoch': 0.69} + 69%|██████▊ | 4828/7045 [15:41:38<7:07:32, 11.57s/it] 69%|██████▊ | 4829/7045 [15:41:50<7:06:12, 11.54s/it] {'loss': 1.1069, 'learning_rate': 1.1891259804050177e-06, 'epoch': 0.69} + 69%|██████▊ | 4829/7045 [15:41:50<7:06:12, 11.54s/it] 69%|██████▊ | 4830/7045 [15:42:01<7:07:23, 11.58s/it] {'loss': 1.1265, 'learning_rate': 1.1881473849753373e-06, 'epoch': 0.69} + 69%|██████▊ | 4830/7045 [15:42:01<7:07:23, 11.58s/it] 69%|██████▊ | 4831/7045 [15:42:12<7:01:56, 11.43s/it] {'loss': 1.1152, 'learning_rate': 1.1871690668533642e-06, 'epoch': 0.69} + 69%|██████▊ | 4831/7045 [15:42:12<7:01:56, 11.43s/it] 69%|██████▊ | 4832/7045 [15:42:24<7:08:12, 11.61s/it] {'loss': 1.1094, 'learning_rate': 1.1861910262459034e-06, 'epoch': 0.69} + 69%|██████▊ | 4832/7045 [15:42:24<7:08:12, 11.61s/it] 69%|██████▊ | 4833/7045 [15:42:37<7:14:02, 11.77s/it] {'loss': 1.0791, 'learning_rate': 1.185213263359698e-06, 'epoch': 0.69} + 69%|██████▊ | 4833/7045 [15:42:37<7:14:02, 11.77s/it] 69%|██████▊ | 4834/7045 [15:42:49<7:25:01, 12.08s/it] {'loss': 1.1401, 'learning_rate': 1.1842357784014335e-06, 'epoch': 0.69} + 69%|██████▊ | 4834/7045 [15:42:49<7:25:01, 12.08s/it] 69%|██████▊ | 4835/7045 [15:43:01<7:16:12, 11.84s/it] {'loss': 1.0811, 'learning_rate': 1.183258571577737e-06, 'epoch': 0.69} + 69%|██████▊ | 4835/7045 [15:43:01<7:16:12, 11.84s/it] 69%|██████▊ | 4836/7045 [15:43:12<7:09:17, 11.66s/it] {'loss': 1.0649, 'learning_rate': 1.1822816430951784e-06, 'epoch': 0.69} + 69%|██████▊ | 4836/7045 [15:43:12<7:09:17, 11.66s/it] 69%|██████▊ | 4837/7045 [15:43:25<7:20:27, 11.97s/it] {'loss': 1.1089, 'learning_rate': 1.1813049931602644e-06, 'epoch': 0.69} + 69%|██████▊ | 4837/7045 [15:43:25<7:20:27, 11.97s/it] 69%|██████▊ | 4838/7045 [15:43:39<7:42:02, 12.56s/it] {'loss': 1.0933, 'learning_rate': 1.1803286219794474e-06, 'epoch': 0.69} + 69%|██████▊ | 4838/7045 [15:43:39<7:42:02, 12.56s/it] 69%|██████▊ | 4839/7045 [15:43:50<7:32:07, 12.30s/it] {'loss': 1.1152, 'learning_rate': 1.1793525297591185e-06, 'epoch': 0.69} + 69%|██████▊ | 4839/7045 [15:43:50<7:32:07, 12.30s/it] 69%|██████▊ | 4840/7045 [15:44:02<7:22:43, 12.05s/it] {'loss': 1.1533, 'learning_rate': 1.178376716705609e-06, 'epoch': 0.69} + 69%|██████▊ | 4840/7045 [15:44:02<7:22:43, 12.05s/it] 69%|██████▊ | 4841/7045 [15:44:13<7:09:49, 11.70s/it] {'loss': 1.1318, 'learning_rate': 1.1774011830251945e-06, 'epoch': 0.69} + 69%|██████▊ | 4841/7045 [15:44:13<7:09:49, 11.70s/it] 69%|██████▊ | 4842/7045 [15:44:24<7:02:19, 11.50s/it] {'loss': 1.043, 'learning_rate': 1.176425928924087e-06, 'epoch': 0.69} + 69%|██████▊ | 4842/7045 [15:44:24<7:02:19, 11.50s/it] 69%|██████▊ | 4843/7045 [15:44:35<7:01:49, 11.49s/it] {'loss': 1.1089, 'learning_rate': 1.1754509546084445e-06, 'epoch': 0.69} + 69%|██████▊ | 4843/7045 [15:44:35<7:01:49, 11.49s/it] 69%|██████▉ | 4844/7045 [15:44:48<7:15:39, 11.88s/it] {'loss': 1.1152, 'learning_rate': 1.174476260284362e-06, 'epoch': 0.69} + 69%|██████▉ | 4844/7045 [15:44:48<7:15:39, 11.88s/it] 69%|██████▉ | 4845/7045 [15:44:59<7:04:38, 11.58s/it] {'loss': 1.1118, 'learning_rate': 1.1735018461578757e-06, 'epoch': 0.69} + 69%|██████▉ | 4845/7045 [15:44:59<7:04:38, 11.58s/it] 69%|██████▉ | 4846/7045 [15:45:10<7:01:00, 11.49s/it] {'loss': 1.1582, 'learning_rate': 1.172527712434965e-06, 'epoch': 0.69} + 69%|██████▉ | 4846/7045 [15:45:10<7:01:00, 11.49s/it] 69%|██████▉ | 4847/7045 [15:45:21<6:58:25, 11.42s/it] {'loss': 1.1206, 'learning_rate': 1.171553859321547e-06, 'epoch': 0.69} + 69%|██████▉ | 4847/7045 [15:45:21<6:58:25, 11.42s/it] 69%|██████▉ | 4848/7045 [15:45:33<6:59:04, 11.44s/it] {'loss': 1.1035, 'learning_rate': 1.1705802870234815e-06, 'epoch': 0.69} + 69%|██████▉ | 4848/7045 [15:45:33<6:59:04, 11.44s/it] 69%|██████▉ | 4849/7045 [15:45:44<6:53:02, 11.29s/it] {'loss': 1.1182, 'learning_rate': 1.1696069957465694e-06, 'epoch': 0.69} + 69%|██████▉ | 4849/7045 [15:45:44<6:53:02, 11.29s/it] 69%|██████▉ | 4850/7045 [15:45:55<6:53:23, 11.30s/it] {'loss': 1.1191, 'learning_rate': 1.1686339856965506e-06, 'epoch': 0.69} + 69%|██████▉ | 4850/7045 [15:45:55<6:53:23, 11.30s/it] 69%|██████▉ | 4851/7045 [15:46:06<6:53:47, 11.32s/it] {'loss': 1.1182, 'learning_rate': 1.1676612570791046e-06, 'epoch': 0.69} + 69%|██████▉ | 4851/7045 [15:46:06<6:53:47, 11.32s/it] 69%|██████▉ | 4852/7045 [15:46:18<6:51:53, 11.27s/it] {'loss': 1.1631, 'learning_rate': 1.1666888100998555e-06, 'epoch': 0.69} + 69%|██████▉ | 4852/7045 [15:46:18<6:51:53, 11.27s/it] 69%|██████▉ | 4853/7045 [15:46:29<6:48:50, 11.19s/it] {'loss': 1.0933, 'learning_rate': 1.1657166449643628e-06, 'epoch': 0.69} + 69%|██████▉ | 4853/7045 [15:46:29<6:48:50, 11.19s/it] 69%|██████▉ | 4854/7045 [15:46:40<6:49:05, 11.20s/it] {'loss': 1.1309, 'learning_rate': 1.164744761878131e-06, 'epoch': 0.69} + 69%|██████▉ | 4854/7045 [15:46:40<6:49:05, 11.20s/it] 69%|██████▉ | 4855/7045 [15:46:53<7:12:14, 11.84s/it] {'loss': 1.0669, 'learning_rate': 1.1637731610466018e-06, 'epoch': 0.69} + 69%|██████▉ | 4855/7045 [15:46:53<7:12:14, 11.84s/it] 69%|██████▉ | 4856/7045 [15:47:05<7:10:58, 11.81s/it] {'loss': 1.1543, 'learning_rate': 1.1628018426751577e-06, 'epoch': 0.69} + 69%|██████▉ | 4856/7045 [15:47:05<7:10:58, 11.81s/it] 69%|██████▉ | 4857/7045 [15:47:17<7:17:36, 12.00s/it] {'loss': 1.0513, 'learning_rate': 1.1618308069691243e-06, 'epoch': 0.69} + 69%|██████▉ | 4857/7045 [15:47:17<7:17:36, 12.00s/it] 69%|██████▉ | 4858/7045 [15:47:28<7:07:10, 11.72s/it] {'loss': 1.0928, 'learning_rate': 1.1608600541337625e-06, 'epoch': 0.69} + 69%|██████▉ | 4858/7045 [15:47:28<7:07:10, 11.72s/it] 69%|██████▉ | 4859/7045 [15:47:40<7:02:59, 11.61s/it] {'loss': 1.0537, 'learning_rate': 1.1598895843742778e-06, 'epoch': 0.69} + 69%|██████▉ | 4859/7045 [15:47:40<7:02:59, 11.61s/it] 69%|██████▉ | 4860/7045 [15:47:51<6:59:21, 11.52s/it] {'loss': 1.1104, 'learning_rate': 1.1589193978958148e-06, 'epoch': 0.69} + 69%|██████▉ | 4860/7045 [15:47:51<6:59:21, 11.52s/it] 69%|██████▉ | 4861/7045 [15:48:02<6:54:24, 11.39s/it] {'loss': 1.0942, 'learning_rate': 1.157949494903457e-06, 'epoch': 0.69} + 69%|██████▉ | 4861/7045 [15:48:02<6:54:24, 11.39s/it] 69%|██████▉ | 4862/7045 [15:48:13<6:52:15, 11.33s/it] {'loss': 1.0825, 'learning_rate': 1.1569798756022278e-06, 'epoch': 0.69} + 69%|██████▉ | 4862/7045 [15:48:13<6:52:15, 11.33s/it] 69%|██████▉ | 4863/7045 [15:48:26<7:06:32, 11.73s/it] {'loss': 1.0645, 'learning_rate': 1.1560105401970924e-06, 'epoch': 0.69} + 69%|██████▉ | 4863/7045 [15:48:26<7:06:32, 11.73s/it] 69%|██████▉ | 4864/7045 [15:48:38<7:05:56, 11.72s/it] {'loss': 1.1582, 'learning_rate': 1.1550414888929545e-06, 'epoch': 0.69} + 69%|██████▉ | 4864/7045 [15:48:38<7:05:56, 11.72s/it] 69%|██████▉ | 4865/7045 [15:48:51<7:18:34, 12.07s/it] {'loss': 1.1167, 'learning_rate': 1.1540727218946596e-06, 'epoch': 0.69} + 69%|██████▉ | 4865/7045 [15:48:51<7:18:34, 12.07s/it] 69%|██████▉ | 4866/7045 [15:49:02<7:09:07, 11.82s/it] {'loss': 1.1221, 'learning_rate': 1.1531042394069907e-06, 'epoch': 0.69} + 69%|██████▉ | 4866/7045 [15:49:02<7:09:07, 11.82s/it] 69%|██████▉ | 4867/7045 [15:49:14<7:16:08, 12.02s/it] {'loss': 1.1309, 'learning_rate': 1.1521360416346707e-06, 'epoch': 0.69} + 69%|██████▉ | 4867/7045 [15:49:14<7:16:08, 12.02s/it] 69%|██████▉ | 4868/7045 [15:49:25<7:06:12, 11.75s/it] {'loss': 1.1436, 'learning_rate': 1.1511681287823659e-06, 'epoch': 0.69} + 69%|██████▉ | 4868/7045 [15:49:25<7:06:12, 11.75s/it] 69%|██████▉ | 4869/7045 [15:49:36<6:57:51, 11.52s/it] {'loss': 1.1211, 'learning_rate': 1.1502005010546775e-06, 'epoch': 0.69} + 69%|██████▉ | 4869/7045 [15:49:36<6:57:51, 11.52s/it] 69%|██████▉ | 4870/7045 [15:49:48<6:54:47, 11.44s/it] {'loss': 1.1143, 'learning_rate': 1.1492331586561506e-06, 'epoch': 0.69} + 69%|██████▉ | 4870/7045 [15:49:48<6:54:47, 11.44s/it] 69%|██████▉ | 4871/7045 [15:49:59<6:51:52, 11.37s/it] {'loss': 1.1426, 'learning_rate': 1.1482661017912665e-06, 'epoch': 0.69} + 69%|██████▉ | 4871/7045 [15:49:59<6:51:52, 11.37s/it] 69%|██████▉ | 4872/7045 [15:50:11<6:58:47, 11.56s/it] {'loss': 1.127, 'learning_rate': 1.1472993306644494e-06, 'epoch': 0.69} + 69%|██████▉ | 4872/7045 [15:50:11<6:58:47, 11.56s/it] 69%|██████▉ | 4873/7045 [15:50:24<7:16:48, 12.07s/it] {'loss': 1.1011, 'learning_rate': 1.14633284548006e-06, 'epoch': 0.69} + 69%|██████▉ | 4873/7045 [15:50:24<7:16:48, 12.07s/it] 69%|██████▉ | 4874/7045 [15:50:37<7:30:58, 12.46s/it] {'loss': 1.04, 'learning_rate': 1.1453666464424015e-06, 'epoch': 0.69} + 69%|██████▉ | 4874/7045 [15:50:37<7:30:58, 12.46s/it] 69%|██████▉ | 4875/7045 [15:50:49<7:16:41, 12.07s/it] {'loss': 1.1113, 'learning_rate': 1.144400733755714e-06, 'epoch': 0.69} + 69%|██████▉ | 4875/7045 [15:50:49<7:16:41, 12.07s/it] 69%|██████▉ | 4876/7045 [15:51:00<7:04:32, 11.74s/it] {'loss': 1.1357, 'learning_rate': 1.1434351076241793e-06, 'epoch': 0.69} + 69%|██████▉ | 4876/7045 [15:51:00<7:04:32, 11.74s/it] 69%|██████▉ | 4877/7045 [15:51:13<7:25:34, 12.33s/it] {'loss': 1.1016, 'learning_rate': 1.1424697682519168e-06, 'epoch': 0.69} + 69%|██████▉ | 4877/7045 [15:51:13<7:25:34, 12.33s/it] 69%|██████▉ | 4878/7045 [15:51:24<7:12:19, 11.97s/it] {'loss': 1.1025, 'learning_rate': 1.1415047158429855e-06, 'epoch': 0.69} + 69%|██████▉ | 4878/7045 [15:51:24<7:12:19, 11.97s/it] 69%|██████▉ | 4879/7045 [15:51:35<7:01:29, 11.68s/it] {'loss': 1.106, 'learning_rate': 1.140539950601386e-06, 'epoch': 0.69} + 69%|██████▉ | 4879/7045 [15:51:35<7:01:29, 11.68s/it] 69%|██████▉ | 4880/7045 [15:51:48<7:12:58, 12.00s/it] {'loss': 1.1226, 'learning_rate': 1.1395754727310543e-06, 'epoch': 0.69} + 69%|██████▉ | 4880/7045 [15:51:48<7:12:58, 12.00s/it] 69%|██████▉ | 4881/7045 [15:52:01<7:17:02, 12.12s/it] {'loss': 1.1182, 'learning_rate': 1.13861128243587e-06, 'epoch': 0.69} + 69%|██████▉ | 4881/7045 [15:52:01<7:17:02, 12.12s/it] 69%|██████▉ | 4882/7045 [15:52:16<7:49:52, 13.03s/it] {'loss': 1.0918, 'learning_rate': 1.1376473799196488e-06, 'epoch': 0.69} + 69%|██████▉ | 4882/7045 [15:52:16<7:49:52, 13.03s/it] 69%|██████▉ | 4883/7045 [15:52:27<7:26:45, 12.40s/it] {'loss': 1.085, 'learning_rate': 1.1366837653861457e-06, 'epoch': 0.69} + 69%|██████▉ | 4883/7045 [15:52:27<7:26:45, 12.40s/it] 69%|██████▉ | 4884/7045 [15:52:38<7:09:35, 11.93s/it] {'loss': 1.1035, 'learning_rate': 1.1357204390390562e-06, 'epoch': 0.69} + 69%|██████▉ | 4884/7045 [15:52:38<7:09:35, 11.93s/it] 69%|██████▉ | 4885/7045 [15:52:49<7:07:18, 11.87s/it] {'loss': 1.0952, 'learning_rate': 1.1347574010820155e-06, 'epoch': 0.69} + 69%|██████▉ | 4885/7045 [15:52:49<7:07:18, 11.87s/it] 69%|██████▉ | 4886/7045 [15:53:02<7:21:52, 12.28s/it] {'loss': 1.0664, 'learning_rate': 1.1337946517185948e-06, 'epoch': 0.69} + 69%|██████▉ | 4886/7045 [15:53:02<7:21:52, 12.28s/it] 69%|██████▉ | 4887/7045 [15:53:14<7:11:04, 11.99s/it] {'loss': 1.1211, 'learning_rate': 1.132832191152308e-06, 'epoch': 0.69} + 69%|██████▉ | 4887/7045 [15:53:14<7:11:04, 11.99s/it] 69%|██████▉ | 4888/7045 [15:53:25<7:01:26, 11.72s/it] {'loss': 1.1465, 'learning_rate': 1.131870019586605e-06, 'epoch': 0.69} + 69%|██████▉ | 4888/7045 [15:53:25<7:01:26, 11.72s/it] 69%|██████▉ | 4889/7045 [15:53:36<6:54:26, 11.53s/it] {'loss': 1.1484, 'learning_rate': 1.130908137224875e-06, 'epoch': 0.69} + 69%|██████▉ | 4889/7045 [15:53:36<6:54:26, 11.53s/it] 69%|██████▉ | 4890/7045 [15:53:47<6:47:07, 11.34s/it] {'loss': 1.1182, 'learning_rate': 1.1299465442704488e-06, 'epoch': 0.69} + 69%|██████▉ | 4890/7045 [15:53:47<6:47:07, 11.34s/it] 69%|██████▉ | 4891/7045 [15:53:58<6:44:39, 11.27s/it] {'loss': 1.0977, 'learning_rate': 1.1289852409265917e-06, 'epoch': 0.69} + 69%|██████▉ | 4891/7045 [15:53:58<6:44:39, 11.27s/it] 69%|██████▉ | 4892/7045 [15:54:09<6:44:54, 11.28s/it] {'loss': 1.1045, 'learning_rate': 1.1280242273965122e-06, 'epoch': 0.69} + 69%|██████▉ | 4892/7045 [15:54:09<6:44:54, 11.28s/it] 69%|██████▉ | 4893/7045 [15:54:21<6:45:25, 11.30s/it] {'loss': 1.1455, 'learning_rate': 1.1270635038833546e-06, 'epoch': 0.69} + 69%|██████▉ | 4893/7045 [15:54:21<6:45:25, 11.30s/it] 69%|��█████▉ | 4894/7045 [15:54:32<6:45:25, 11.31s/it] {'loss': 1.1055, 'learning_rate': 1.1261030705902016e-06, 'epoch': 0.69} + 69%|██████▉ | 4894/7045 [15:54:32<6:45:25, 11.31s/it] 69%|██████▉ | 4895/7045 [15:54:43<6:41:15, 11.20s/it] {'loss': 1.1021, 'learning_rate': 1.1251429277200773e-06, 'epoch': 0.69} + 69%|██████▉ | 4895/7045 [15:54:43<6:41:15, 11.20s/it] 69%|██████▉ | 4896/7045 [15:54:54<6:40:36, 11.19s/it] {'loss': 1.124, 'learning_rate': 1.1241830754759413e-06, 'epoch': 0.69} + 69%|██████▉ | 4896/7045 [15:54:54<6:40:36, 11.19s/it] 70%|██████▉ | 4897/7045 [15:55:05<6:40:38, 11.19s/it] {'loss': 1.124, 'learning_rate': 1.1232235140606942e-06, 'epoch': 0.7} + 70%|██████▉ | 4897/7045 [15:55:05<6:40:38, 11.19s/it] 70%|██████▉ | 4898/7045 [15:55:17<6:49:38, 11.45s/it] {'loss': 1.0908, 'learning_rate': 1.122264243677175e-06, 'epoch': 0.7} + 70%|██████▉ | 4898/7045 [15:55:17<6:49:38, 11.45s/it] 70%|██████▉ | 4899/7045 [15:55:28<6:45:43, 11.34s/it] {'loss': 1.1035, 'learning_rate': 1.1213052645281591e-06, 'epoch': 0.7} + 70%|██████▉ | 4899/7045 [15:55:28<6:45:43, 11.34s/it] 70%|██████▉ | 4900/7045 [15:55:40<6:46:16, 11.36s/it] {'loss': 1.1006, 'learning_rate': 1.1203465768163613e-06, 'epoch': 0.7} + 70%|██████▉ | 4900/7045 [15:55:40<6:46:16, 11.36s/it] 70%|██████▉ | 4901/7045 [15:55:51<6:43:29, 11.29s/it] {'loss': 1.0986, 'learning_rate': 1.1193881807444362e-06, 'epoch': 0.7} + 70%|██████▉ | 4901/7045 [15:55:51<6:43:29, 11.29s/it] 70%|██████▉ | 4902/7045 [15:56:02<6:45:34, 11.36s/it] {'loss': 1.1152, 'learning_rate': 1.1184300765149744e-06, 'epoch': 0.7} + 70%|██████▉ | 4902/7045 [15:56:02<6:45:34, 11.36s/it] 70%|██████▉ | 4903/7045 [15:56:14<6:42:14, 11.27s/it] {'loss': 1.1475, 'learning_rate': 1.1174722643305075e-06, 'epoch': 0.7} + 70%|██████▉ | 4903/7045 [15:56:14<6:42:14, 11.27s/it] 70%|██████▉ | 4904/7045 [15:56:25<6:42:50, 11.29s/it] {'loss': 1.1572, 'learning_rate': 1.1165147443935029e-06, 'epoch': 0.7} + 70%|██████▉ | 4904/7045 [15:56:25<6:42:50, 11.29s/it] 70%|██████▉ | 4905/7045 [15:56:36<6:42:46, 11.29s/it] {'loss': 1.1172, 'learning_rate': 1.1155575169063668e-06, 'epoch': 0.7} + 70%|██████▉ | 4905/7045 [15:56:36<6:42:46, 11.29s/it] 70%|██████▉ | 4906/7045 [15:56:49<7:00:04, 11.78s/it] {'loss': 1.0679, 'learning_rate': 1.1146005820714454e-06, 'epoch': 0.7} + 70%|██████▉ | 4906/7045 [15:56:49<7:00:04, 11.78s/it] 70%|██████▉ | 4907/7045 [15:57:00<6:50:12, 11.51s/it] {'loss': 1.1475, 'learning_rate': 1.1136439400910206e-06, 'epoch': 0.7} + 70%|██████▉ | 4907/7045 [15:57:00<6:50:12, 11.51s/it] 70%|██████▉ | 4908/7045 [15:57:12<6:50:31, 11.53s/it] {'loss': 1.083, 'learning_rate': 1.1126875911673124e-06, 'epoch': 0.7} + 70%|██████▉ | 4908/7045 [15:57:12<6:50:31, 11.53s/it] 70%|██████▉ | 4909/7045 [15:57:23<6:48:23, 11.47s/it] {'loss': 1.1104, 'learning_rate': 1.1117315355024826e-06, 'epoch': 0.7} + 70%|██████▉ | 4909/7045 [15:57:23<6:48:23, 11.47s/it] 70%|██████▉ | 4910/7045 [15:57:34<6:44:47, 11.38s/it] {'loss': 1.1279, 'learning_rate': 1.110775773298627e-06, 'epoch': 0.7} + 70%|██████▉ | 4910/7045 [15:57:34<6:44:47, 11.38s/it] 70%|██████▉ | 4911/7045 [15:57:45<6:38:23, 11.20s/it] {'loss': 1.064, 'learning_rate': 1.1098203047577797e-06, 'epoch': 0.7} + 70%|██████▉ | 4911/7045 [15:57:45<6:38:23, 11.20s/it] 70%|██████▉ | 4912/7045 [15:57:56<6:33:56, 11.08s/it] {'loss': 1.1025, 'learning_rate': 1.1088651300819153e-06, 'epoch': 0.7} + 70%|██████▉ | 4912/7045 [15:57:56<6:33:56, 11.08s/it] 70%|██████▉ | 4913/7045 [15:58:07<6:33:24, 11.07s/it] {'loss': 1.0771, 'learning_rate': 1.1079102494729432e-06, 'epoch': 0.7} + 70%|██████▉ | 4913/7045 [15:58:07<6:33:24, 11.07s/it] 70%|██████▉ | 4914/7045 [15:58:19<6:51:05, 11.57s/it] {'loss': 1.0337, 'learning_rate': 1.1069556631327135e-06, 'epoch': 0.7} + 70%|██████▉ | 4914/7045 [15:58:19<6:51:05, 11.57s/it] 70%|██████▉ | 4915/7045 [15:58:31<6:50:56, 11.58s/it] {'loss': 1.1201, 'learning_rate': 1.106001371263012e-06, 'epoch': 0.7} + 70%|██████▉ | 4915/7045 [15:58:31<6:50:56, 11.58s/it] 70%|██████▉ | 4916/7045 [15:58:42<6:44:29, 11.40s/it] {'loss': 1.0635, 'learning_rate': 1.1050473740655624e-06, 'epoch': 0.7} + 70%|██████▉ | 4916/7045 [15:58:42<6:44:29, 11.40s/it] 70%|██████▉ | 4917/7045 [15:58:53<6:41:08, 11.31s/it] {'loss': 1.0918, 'learning_rate': 1.1040936717420281e-06, 'epoch': 0.7} + 70%|██████▉ | 4917/7045 [15:58:53<6:41:08, 11.31s/it] 70%|██████▉ | 4918/7045 [15:59:05<6:52:47, 11.64s/it] {'loss': 1.1196, 'learning_rate': 1.1031402644940076e-06, 'epoch': 0.7} + 70%|██████▉ | 4918/7045 [15:59:05<6:52:47, 11.64s/it] 70%|██████▉ | 4919/7045 [15:59:17<6:47:59, 11.51s/it] {'loss': 1.1182, 'learning_rate': 1.1021871525230373e-06, 'epoch': 0.7} + 70%|██████▉ | 4919/7045 [15:59:17<6:47:59, 11.51s/it] 70%|██████▉ | 4920/7045 [15:59:28<6:44:46, 11.43s/it] {'loss': 1.1279, 'learning_rate': 1.1012343360305935e-06, 'epoch': 0.7} + 70%|██████▉ | 4920/7045 [15:59:28<6:44:46, 11.43s/it] 70%|██████▉ | 4921/7045 [15:59:39<6:42:10, 11.36s/it] {'loss': 1.1113, 'learning_rate': 1.100281815218089e-06, 'epoch': 0.7} + 70%|██████▉ | 4921/7045 [15:59:39<6:42:10, 11.36s/it] 70%|██████▉ | 4922/7045 [15:59:52<6:54:39, 11.72s/it] {'loss': 1.0884, 'learning_rate': 1.0993295902868723e-06, 'epoch': 0.7} + 70%|██████▉ | 4922/7045 [15:59:52<6:54:39, 11.72s/it] 70%|██████▉ | 4923/7045 [16:00:04<6:55:37, 11.75s/it] {'loss': 1.1289, 'learning_rate': 1.098377661438232e-06, 'epoch': 0.7} + 70%|██████▉ | 4923/7045 [16:00:04<6:55:37, 11.75s/it] 70%|██████▉ | 4924/7045 [16:00:16<7:04:59, 12.02s/it] {'loss': 1.0908, 'learning_rate': 1.0974260288733916e-06, 'epoch': 0.7} + 70%|██████▉ | 4924/7045 [16:00:16<7:04:59, 12.02s/it] 70%|██████▉ | 4925/7045 [16:00:27<6:56:00, 11.77s/it] {'loss': 1.1025, 'learning_rate': 1.0964746927935143e-06, 'epoch': 0.7} + 70%|██████▉ | 4925/7045 [16:00:27<6:56:00, 11.77s/it] 70%|██████▉ | 4926/7045 [16:00:39<6:50:50, 11.63s/it] {'loss': 1.1152, 'learning_rate': 1.0955236533996993e-06, 'epoch': 0.7} + 70%|██████▉ | 4926/7045 [16:00:39<6:50:50, 11.63s/it] 70%|██████▉ | 4927/7045 [16:00:49<6:41:44, 11.38s/it] {'loss': 1.0693, 'learning_rate': 1.094572910892982e-06, 'epoch': 0.7} + 70%|██████▉ | 4927/7045 [16:00:49<6:41:44, 11.38s/it] 70%|██████▉ | 4928/7045 [16:01:02<6:54:21, 11.74s/it] {'loss': 1.1187, 'learning_rate': 1.0936224654743385e-06, 'epoch': 0.7} + 70%|██████▉ | 4928/7045 [16:01:02<6:54:21, 11.74s/it] 70%|██████▉ | 4929/7045 [16:01:13<6:46:29, 11.53s/it] {'loss': 1.1523, 'learning_rate': 1.0926723173446789e-06, 'epoch': 0.7} + 70%|██████▉ | 4929/7045 [16:01:13<6:46:29, 11.53s/it] 70%|██████▉ | 4930/7045 [16:01:25<6:52:21, 11.70s/it] {'loss': 1.1001, 'learning_rate': 1.0917224667048507e-06, 'epoch': 0.7} + 70%|██████▉ | 4930/7045 [16:01:25<6:52:21, 11.70s/it] 70%|██████▉ | 4931/7045 [16:01:39<7:14:39, 12.34s/it] {'loss': 1.1562, 'learning_rate': 1.0907729137556407e-06, 'epoch': 0.7} + 70%|██████▉ | 4931/7045 [16:01:39<7:14:39, 12.34s/it] 70%|███████ | 4932/7045 [16:01:50<7:04:06, 12.04s/it] {'loss': 1.1543, 'learning_rate': 1.0898236586977703e-06, 'epoch': 0.7} + 70%|███████ | 4932/7045 [16:01:50<7:04:06, 12.04s/it] 70%|███████ | 4933/7045 [16:02:02<7:03:54, 12.04s/it] {'loss': 1.0762, 'learning_rate': 1.0888747017318997e-06, 'epoch': 0.7} + 70%|███████ | 4933/7045 [16:02:02<7:03:54, 12.04s/it] 70%|███████ | 4934/7045 [16:02:15<7:04:32, 12.07s/it] {'loss': 1.1357, 'learning_rate': 1.087926043058626e-06, 'epoch': 0.7} + 70%|███████ | 4934/7045 [16:02:15<7:04:32, 12.07s/it] 70%|███████ | 4935/7045 [16:02:25<6:51:17, 11.70s/it] {'loss': 1.1221, 'learning_rate': 1.0869776828784814e-06, 'epoch': 0.7} + 70%|███████ | 4935/7045 [16:02:25<6:51:17, 11.70s/it] 70%|███████ | 4936/7045 [16:02:38<6:57:31, 11.88s/it] {'loss': 1.0781, 'learning_rate': 1.0860296213919376e-06, 'epoch': 0.7} + 70%|███████ | 4936/7045 [16:02:38<6:57:31, 11.88s/it] 70%|███████ | 4937/7045 [16:02:51<7:13:05, 12.33s/it] {'loss': 1.0347, 'learning_rate': 1.0850818587994014e-06, 'epoch': 0.7} + 70%|███████ | 4937/7045 [16:02:51<7:13:05, 12.33s/it] 70%|███████ | 4938/7045 [16:03:02<7:03:11, 12.05s/it] {'loss': 1.1064, 'learning_rate': 1.0841343953012156e-06, 'epoch': 0.7} + 70%|███████ | 4938/7045 [16:03:02<7:03:11, 12.05s/it] 70%|███████ | 4939/7045 [16:03:13<6:49:30, 11.67s/it] {'loss': 1.0767, 'learning_rate': 1.083187231097663e-06, 'epoch': 0.7} + 70%|███████ | 4939/7045 [16:03:13<6:49:30, 11.67s/it] 70%|███████ | 4940/7045 [16:03:25<6:51:35, 11.73s/it] {'loss': 1.0625, 'learning_rate': 1.08224036638896e-06, 'epoch': 0.7} + 70%|███████ | 4940/7045 [16:03:25<6:51:35, 11.73s/it] 70%|███████ | 4941/7045 [16:03:38<7:03:24, 12.07s/it] {'loss': 1.1074, 'learning_rate': 1.0812938013752601e-06, 'epoch': 0.7} + 70%|███████ | 4941/7045 [16:03:38<7:03:24, 12.07s/it] 70%|███████ | 4942/7045 [16:03:49<6:52:17, 11.76s/it] {'loss': 1.0806, 'learning_rate': 1.0803475362566562e-06, 'epoch': 0.7} + 70%|███████ | 4942/7045 [16:03:49<6:52:17, 11.76s/it] 70%|███████ | 4943/7045 [16:04:00<6:42:49, 11.50s/it] {'loss': 1.0947, 'learning_rate': 1.079401571233174e-06, 'epoch': 0.7} + 70%|███████ | 4943/7045 [16:04:00<6:42:49, 11.50s/it] 70%|███████ | 4944/7045 [16:04:11<6:37:56, 11.36s/it] {'loss': 1.1328, 'learning_rate': 1.0784559065047781e-06, 'epoch': 0.7} + 70%|███████ | 4944/7045 [16:04:11<6:37:56, 11.36s/it] 70%|███████ | 4945/7045 [16:04:22<6:33:32, 11.24s/it] {'loss': 1.0781, 'learning_rate': 1.0775105422713705e-06, 'epoch': 0.7} + 70%|███████ | 4945/7045 [16:04:22<6:33:32, 11.24s/it] 70%|███████ | 4946/7045 [16:04:33<6:33:46, 11.26s/it] {'loss': 1.084, 'learning_rate': 1.076565478732786e-06, 'epoch': 0.7} + 70%|███████ | 4946/7045 [16:04:33<6:33:46, 11.26s/it] 70%|███████ | 4947/7045 [16:04:45<6:41:00, 11.47s/it] {'loss': 1.125, 'learning_rate': 1.0756207160888003e-06, 'epoch': 0.7} + 70%|███████ | 4947/7045 [16:04:45<6:41:00, 11.47s/it] 70%|███████ | 4948/7045 [16:04:56<6:35:02, 11.30s/it] {'loss': 1.1143, 'learning_rate': 1.0746762545391218e-06, 'epoch': 0.7} + 70%|███████ | 4948/7045 [16:04:56<6:35:02, 11.30s/it] 70%|███████ | 4949/7045 [16:05:07<6:34:22, 11.29s/it] {'loss': 1.1064, 'learning_rate': 1.0737320942833968e-06, 'epoch': 0.7} + 70%|███████ | 4949/7045 [16:05:07<6:34:22, 11.29s/it] 70%|███████ | 4950/7045 [16:05:19<6:34:46, 11.31s/it] {'loss': 1.1152, 'learning_rate': 1.0727882355212087e-06, 'epoch': 0.7} + 70%|███████ | 4950/7045 [16:05:19<6:34:46, 11.31s/it] 70%|███████ | 4951/7045 [16:05:30<6:39:08, 11.44s/it] {'loss': 1.1191, 'learning_rate': 1.0718446784520764e-06, 'epoch': 0.7} + 70%|███████ | 4951/7045 [16:05:30<6:39:08, 11.44s/it] 70%|███████ | 4952/7045 [16:05:43<6:53:22, 11.85s/it] {'loss': 1.1362, 'learning_rate': 1.070901423275453e-06, 'epoch': 0.7} + 70%|███████ | 4952/7045 [16:05:43<6:53:22, 11.85s/it] 70%|███████ | 4953/7045 [16:05:55<6:47:20, 11.68s/it] {'loss': 1.0601, 'learning_rate': 1.0699584701907326e-06, 'epoch': 0.7} + 70%|███████ | 4953/7045 [16:05:55<6:47:20, 11.68s/it] 70%|███████ | 4954/7045 [16:06:06<6:42:04, 11.54s/it] {'loss': 1.1143, 'learning_rate': 1.06901581939724e-06, 'epoch': 0.7} + 70%|███████ | 4954/7045 [16:06:06<6:42:04, 11.54s/it] 70%|███████ | 4955/7045 [16:06:19<6:59:11, 12.03s/it] {'loss': 1.1338, 'learning_rate': 1.0680734710942409e-06, 'epoch': 0.7} + 70%|███████ | 4955/7045 [16:06:19<6:59:11, 12.03s/it] 70%|███████ | 4956/7045 [16:06:30<6:51:36, 11.82s/it] {'loss': 1.1143, 'learning_rate': 1.067131425480934e-06, 'epoch': 0.7} + 70%|███████ | 4956/7045 [16:06:30<6:51:36, 11.82s/it] 70%|███████ | 4957/7045 [16:06:42<6:50:45, 11.80s/it] {'loss': 1.0918, 'learning_rate': 1.0661896827564535e-06, 'epoch': 0.7} + 70%|███████ | 4957/7045 [16:06:42<6:50:45, 11.80s/it] 70%|███████ | 4958/7045 [16:06:55<7:04:17, 12.20s/it] {'loss': 1.0693, 'learning_rate': 1.0652482431198725e-06, 'epoch': 0.7} + 70%|███████ | 4958/7045 [16:06:55<7:04:17, 12.20s/it] 70%|███████ | 4959/7045 [16:07:08<7:08:51, 12.34s/it] {'loss': 1.0762, 'learning_rate': 1.0643071067701989e-06, 'epoch': 0.7} + 70%|███████ | 4959/7045 [16:07:08<7:08:51, 12.34s/it] 70%|███████ | 4960/7045 [16:07:19<6:58:41, 12.05s/it] {'loss': 1.0688, 'learning_rate': 1.0633662739063742e-06, 'epoch': 0.7} + 70%|███████ | 4960/7045 [16:07:19<6:58:41, 12.05s/it] 70%|███████ | 4961/7045 [16:07:32<7:04:20, 12.22s/it] {'loss': 1.1035, 'learning_rate': 1.0624257447272796e-06, 'epoch': 0.7} + 70%|███████ | 4961/7045 [16:07:32<7:04:20, 12.22s/it] 70%|███████ | 4962/7045 [16:07:43<6:52:13, 11.87s/it] {'loss': 1.0996, 'learning_rate': 1.061485519431729e-06, 'epoch': 0.7} + 70%|███████ | 4962/7045 [16:07:43<6:52:13, 11.87s/it] 70%|███████ | 4963/7045 [16:07:56<7:02:09, 12.17s/it] {'loss': 1.0947, 'learning_rate': 1.0605455982184725e-06, 'epoch': 0.7} + 70%|███████ | 4963/7045 [16:07:56<7:02:09, 12.17s/it] 70%|███████ | 4964/7045 [16:08:07<6:49:48, 11.82s/it] {'loss': 1.0928, 'learning_rate': 1.0596059812861986e-06, 'epoch': 0.7} + 70%|███████ | 4964/7045 [16:08:07<6:49:48, 11.82s/it] 70%|███████ | 4965/7045 [16:08:18<6:43:48, 11.65s/it] {'loss': 1.1221, 'learning_rate': 1.0586666688335276e-06, 'epoch': 0.7} + 70%|███████ | 4965/7045 [16:08:18<6:43:48, 11.65s/it] 70%|███████ | 4966/7045 [16:08:29<6:39:11, 11.52s/it] {'loss': 1.0771, 'learning_rate': 1.0577276610590186e-06, 'epoch': 0.7} + 70%|███████ | 4966/7045 [16:08:29<6:39:11, 11.52s/it] 71%|███████ | 4967/7045 [16:08:41<6:39:17, 11.53s/it] {'loss': 1.1123, 'learning_rate': 1.0567889581611645e-06, 'epoch': 0.71} + 71%|███████ | 4967/7045 [16:08:41<6:39:17, 11.53s/it] 71%|███████ | 4968/7045 [16:08:52<6:37:17, 11.48s/it] {'loss': 1.1152, 'learning_rate': 1.0558505603383934e-06, 'epoch': 0.71} + 71%|███████ | 4968/7045 [16:08:52<6:37:17, 11.48s/it] 71%|███████ | 4969/7045 [16:09:03<6:34:48, 11.41s/it] {'loss': 1.1128, 'learning_rate': 1.0549124677890703e-06, 'epoch': 0.71} + 71%|███████ | 4969/7045 [16:09:03<6:34:48, 11.41s/it] 71%|███████ | 4970/7045 [16:09:15<6:32:36, 11.35s/it] {'loss': 1.1167, 'learning_rate': 1.0539746807114967e-06, 'epoch': 0.71} + 71%|███████ | 4970/7045 [16:09:15<6:32:36, 11.35s/it] 71%|███████ | 4971/7045 [16:09:26<6:31:55, 11.34s/it] {'loss': 1.085, 'learning_rate': 1.0530371993039053e-06, 'epoch': 0.71} + 71%|███████ | 4971/7045 [16:09:26<6:31:55, 11.34s/it] 71%|███████ | 4972/7045 [16:09:37<6:29:52, 11.28s/it] {'loss': 1.1084, 'learning_rate': 1.0521000237644697e-06, 'epoch': 0.71} + 71%|███████ | 4972/7045 [16:09:37<6:29:52, 11.28s/it] 71%|███████ | 4973/7045 [16:09:49<6:32:21, 11.36s/it] {'loss': 1.1279, 'learning_rate': 1.0511631542912942e-06, 'epoch': 0.71} + 71%|███████ | 4973/7045 [16:09:49<6:32:21, 11.36s/it] 71%|███████ | 4974/7045 [16:10:00<6:31:00, 11.33s/it] {'loss': 1.0972, 'learning_rate': 1.0502265910824196e-06, 'epoch': 0.71} + 71%|███████ | 4974/7045 [16:10:00<6:31:00, 11.33s/it] 71%|███████ | 4975/7045 [16:10:11<6:28:18, 11.26s/it] {'loss': 1.1333, 'learning_rate': 1.0492903343358244e-06, 'epoch': 0.71} + 71%|███████ | 4975/7045 [16:10:11<6:28:18, 11.26s/it] 71%|███████ | 4976/7045 [16:10:22<6:29:22, 11.29s/it] {'loss': 1.1514, 'learning_rate': 1.0483543842494187e-06, 'epoch': 0.71} + 71%|███████ | 4976/7045 [16:10:22<6:29:22, 11.29s/it] 71%|███████ | 4977/7045 [16:10:34<6:32:45, 11.40s/it] {'loss': 1.1074, 'learning_rate': 1.0474187410210516e-06, 'epoch': 0.71} + 71%|███████ | 4977/7045 [16:10:34<6:32:45, 11.40s/it] 71%|███████ | 4978/7045 [16:10:45<6:30:04, 11.32s/it] {'loss': 1.1162, 'learning_rate': 1.0464834048485036e-06, 'epoch': 0.71} + 71%|███████ | 4978/7045 [16:10:45<6:30:04, 11.32s/it] 71%|███████ | 4979/7045 [16:10:56<6:30:50, 11.35s/it] {'loss': 1.0771, 'learning_rate': 1.0455483759294918e-06, 'epoch': 0.71} + 71%|███████ | 4979/7045 [16:10:56<6:30:50, 11.35s/it] 71%|███████ | 4980/7045 [16:11:07<6:26:52, 11.24s/it] {'loss': 1.0527, 'learning_rate': 1.0446136544616703e-06, 'epoch': 0.71} + 71%|███████ | 4980/7045 [16:11:07<6:26:52, 11.24s/it] 71%|███████ | 4981/7045 [16:11:21<6:53:52, 12.03s/it] {'loss': 1.1528, 'learning_rate': 1.0436792406426241e-06, 'epoch': 0.71} + 71%|███████ | 4981/7045 [16:11:21<6:53:52, 12.03s/it] 71%|███████ | 4982/7045 [16:11:33<6:53:10, 12.02s/it] {'loss': 1.126, 'learning_rate': 1.0427451346698769e-06, 'epoch': 0.71} + 71%|███████ | 4982/7045 [16:11:33<6:53:10, 12.02s/it] 71%|███████ | 4983/7045 [16:11:45<6:52:41, 12.01s/it] {'loss': 1.0908, 'learning_rate': 1.041811336740887e-06, 'epoch': 0.71} + 71%|███████ | 4983/7045 [16:11:45<6:52:41, 12.01s/it] 71%|███████ | 4984/7045 [16:11:57<6:44:42, 11.78s/it] {'loss': 1.0977, 'learning_rate': 1.0408778470530449e-06, 'epoch': 0.71} + 71%|███████ | 4984/7045 [16:11:57<6:44:42, 11.78s/it] 71%|███████ | 4985/7045 [16:12:08<6:39:59, 11.65s/it] {'loss': 1.1309, 'learning_rate': 1.039944665803677e-06, 'epoch': 0.71} + 71%|███████ | 4985/7045 [16:12:08<6:39:59, 11.65s/it] 71%|███████ | 4986/7045 [16:12:19<6:34:19, 11.49s/it] {'loss': 1.1172, 'learning_rate': 1.0390117931900473e-06, 'epoch': 0.71} + 71%|███████ | 4986/7045 [16:12:19<6:34:19, 11.49s/it] 71%|███████ | 4987/7045 [16:12:30<6:32:33, 11.44s/it] {'loss': 1.1089, 'learning_rate': 1.03807922940935e-06, 'epoch': 0.71} + 71%|█████���█ | 4987/7045 [16:12:30<6:32:33, 11.44s/it] 71%|███████ | 4988/7045 [16:12:43<6:46:25, 11.86s/it] {'loss': 1.1147, 'learning_rate': 1.0371469746587182e-06, 'epoch': 0.71} + 71%|███████ | 4988/7045 [16:12:43<6:46:25, 11.86s/it] 71%|███████ | 4989/7045 [16:12:56<7:00:57, 12.28s/it] {'loss': 1.084, 'learning_rate': 1.036215029135217e-06, 'epoch': 0.71} + 71%|███████ | 4989/7045 [16:12:56<7:00:57, 12.28s/it] 71%|███████ | 4990/7045 [16:13:07<6:46:24, 11.87s/it] {'loss': 1.1016, 'learning_rate': 1.035283393035846e-06, 'epoch': 0.71} + 71%|███████ | 4990/7045 [16:13:07<6:46:24, 11.87s/it] 71%|███████ | 4991/7045 [16:13:21<7:03:59, 12.39s/it] {'loss': 1.0513, 'learning_rate': 1.0343520665575425e-06, 'epoch': 0.71} + 71%|███████ | 4991/7045 [16:13:21<7:03:59, 12.39s/it] 71%|███████ | 4992/7045 [16:13:32<6:49:24, 11.96s/it] {'loss': 1.1182, 'learning_rate': 1.0334210498971745e-06, 'epoch': 0.71} + 71%|███████ | 4992/7045 [16:13:32<6:49:24, 11.96s/it] 71%|███████ | 4993/7045 [16:13:44<6:49:39, 11.98s/it] {'loss': 1.1206, 'learning_rate': 1.032490343251545e-06, 'epoch': 0.71} + 71%|███████ | 4993/7045 [16:13:44<6:49:39, 11.98s/it] 71%|███████ | 4994/7045 [16:13:55<6:39:17, 11.68s/it] {'loss': 1.127, 'learning_rate': 1.0315599468173959e-06, 'epoch': 0.71} + 71%|███████ | 4994/7045 [16:13:55<6:39:17, 11.68s/it] 71%|███████ | 4995/7045 [16:14:08<6:54:52, 12.14s/it] {'loss': 1.1074, 'learning_rate': 1.030629860791399e-06, 'epoch': 0.71} + 71%|███████ | 4995/7045 [16:14:08<6:54:52, 12.14s/it] 71%|███████ | 4996/7045 [16:14:19<6:45:19, 11.87s/it] {'loss': 1.1504, 'learning_rate': 1.0297000853701604e-06, 'epoch': 0.71} + 71%|███████ | 4996/7045 [16:14:19<6:45:19, 11.87s/it] 71%|███████ | 4997/7045 [16:14:31<6:39:57, 11.72s/it] {'loss': 1.0947, 'learning_rate': 1.028770620750224e-06, 'epoch': 0.71} + 71%|███████ | 4997/7045 [16:14:31<6:39:57, 11.72s/it] 71%|███████ | 4998/7045 [16:14:42<6:36:59, 11.64s/it] {'loss': 1.1279, 'learning_rate': 1.027841467128064e-06, 'epoch': 0.71} + 71%|███████ | 4998/7045 [16:14:42<6:36:59, 11.64s/it] 71%|███████ | 4999/7045 [16:14:53<6:30:09, 11.44s/it] {'loss': 1.1318, 'learning_rate': 1.0269126247000923e-06, 'epoch': 0.71} + 71%|███████ | 4999/7045 [16:14:53<6:30:09, 11.44s/it] 71%|███████ | 5000/7045 [16:15:04<6:28:16, 11.39s/it] {'loss': 1.1309, 'learning_rate': 1.0259840936626531e-06, 'epoch': 0.71} + 71%|███████ | 5000/7045 [16:15:04<6:28:16, 11.39s/it] 71%|███████ | 5001/7045 [16:15:17<6:36:06, 11.63s/it] {'loss': 1.1006, 'learning_rate': 1.025055874212024e-06, 'epoch': 0.71} + 71%|███████ | 5001/7045 [16:15:17<6:36:06, 11.63s/it] 71%|███████ | 5002/7045 [16:15:28<6:31:24, 11.49s/it] {'loss': 1.1475, 'learning_rate': 1.0241279665444195e-06, 'epoch': 0.71} + 71%|███████ | 5002/7045 [16:15:28<6:31:24, 11.49s/it] 71%|███████ | 5003/7045 [16:15:39<6:26:09, 11.35s/it] {'loss': 1.1035, 'learning_rate': 1.023200370855986e-06, 'epoch': 0.71} + 71%|███████ | 5003/7045 [16:15:39<6:26:09, 11.35s/it] 71%|███████ | 5004/7045 [16:15:50<6:23:58, 11.29s/it] {'loss': 1.0835, 'learning_rate': 1.0222730873428036e-06, 'epoch': 0.71} + 71%|███████ | 5004/7045 [16:15:50<6:23:58, 11.29s/it] 71%|███████ | 5005/7045 [16:16:01<6:24:07, 11.30s/it] {'loss': 1.1006, 'learning_rate': 1.0213461162008883e-06, 'epoch': 0.71} + 71%|███████ | 5005/7045 [16:16:01<6:24:07, 11.30s/it] 71%|███████ | 5006/7045 [16:16:13<6:32:11, 11.54s/it] {'loss': 1.0483, 'learning_rate': 1.0204194576261896e-06, 'epoch': 0.71} + 71%|███████ | 5006/7045 [16:16:13<6:32:11, 11.54s/it] 71%|███████ | 5007/7045 [16:16:25<6:28:29, 11.44s/it] {'loss': 1.1182, 'learning_rate': 1.0194931118145895e-06, 'epoch': 0.71} + 71%|███████ | 5007/7045 [16:16:25<6:28:29, 11.44s/it] 71%|███████ | 5008/7045 [16:16:37<6:33:45, 11.60s/it] {'loss': 1.0742, 'learning_rate': 1.018567078961906e-06, 'epoch': 0.71} + 71%|███████ | 5008/7045 [16:16:37<6:33:45, 11.60s/it] 71%|███████ | 5009/7045 [16:16:47<6:26:50, 11.40s/it] {'loss': 1.0874, 'learning_rate': 1.0176413592638878e-06, 'epoch': 0.71} + 71%|███████ | 5009/7045 [16:16:47<6:26:50, 11.40s/it] 71%|███████ | 5010/7045 [16:16:58<6:21:42, 11.25s/it] {'loss': 1.1064, 'learning_rate': 1.0167159529162218e-06, 'epoch': 0.71} + 71%|███████ | 5010/7045 [16:16:58<6:21:42, 11.25s/it] 71%|███████ | 5011/7045 [16:17:10<6:22:48, 11.29s/it] {'loss': 1.1035, 'learning_rate': 1.015790860114525e-06, 'epoch': 0.71} + 71%|███████ | 5011/7045 [16:17:10<6:22:48, 11.29s/it] 71%|███████ | 5012/7045 [16:17:21<6:20:12, 11.22s/it] {'loss': 1.0874, 'learning_rate': 1.0148660810543487e-06, 'epoch': 0.71} + 71%|███████ | 5012/7045 [16:17:21<6:20:12, 11.22s/it] 71%|███████ | 5013/7045 [16:17:32<6:18:06, 11.16s/it] {'loss': 1.1201, 'learning_rate': 1.01394161593118e-06, 'epoch': 0.71} + 71%|███████ | 5013/7045 [16:17:32<6:18:06, 11.16s/it] 71%|███████ | 5014/7045 [16:17:44<6:25:58, 11.40s/it] {'loss': 1.1338, 'learning_rate': 1.0130174649404375e-06, 'epoch': 0.71} + 71%|███████ | 5014/7045 [16:17:44<6:25:58, 11.40s/it] 71%|███████ | 5015/7045 [16:17:56<6:34:59, 11.67s/it] {'loss': 1.0972, 'learning_rate': 1.0120936282774732e-06, 'epoch': 0.71} + 71%|███████ | 5015/7045 [16:17:56<6:34:59, 11.67s/it] 71%|███████ | 5016/7045 [16:18:07<6:30:56, 11.56s/it] {'loss': 1.1104, 'learning_rate': 1.0111701061375752e-06, 'epoch': 0.71} + 71%|███████ | 5016/7045 [16:18:07<6:30:56, 11.56s/it] 71%|███████ | 5017/7045 [16:18:19<6:31:34, 11.59s/it] {'loss': 1.2012, 'learning_rate': 1.010246898715962e-06, 'epoch': 0.71} + 71%|███████ | 5017/7045 [16:18:19<6:31:34, 11.59s/it] 71%|███████ | 5018/7045 [16:18:31<6:36:24, 11.73s/it] {'loss': 1.1338, 'learning_rate': 1.0093240062077872e-06, 'epoch': 0.71} + 71%|███████ | 5018/7045 [16:18:31<6:36:24, 11.73s/it] 71%|███████ | 5019/7045 [16:18:42<6:30:54, 11.58s/it] {'loss': 1.0981, 'learning_rate': 1.0084014288081396e-06, 'epoch': 0.71} + 71%|███████ | 5019/7045 [16:18:42<6:30:54, 11.58s/it] 71%|███████▏ | 5020/7045 [16:18:54<6:28:07, 11.50s/it] {'loss': 1.1025, 'learning_rate': 1.0074791667120368e-06, 'epoch': 0.71} + 71%|███████▏ | 5020/7045 [16:18:54<6:28:07, 11.50s/it] 71%|███████▏ | 5021/7045 [16:19:05<6:28:06, 11.51s/it] {'loss': 1.1318, 'learning_rate': 1.0065572201144344e-06, 'epoch': 0.71} + 71%|███████▏ | 5021/7045 [16:19:05<6:28:06, 11.51s/it] 71%|███████▏ | 5022/7045 [16:19:16<6:23:54, 11.39s/it] {'loss': 1.0962, 'learning_rate': 1.0056355892102185e-06, 'epoch': 0.71} + 71%|███████▏ | 5022/7045 [16:19:16<6:23:54, 11.39s/it] 71%|███████▏ | 5023/7045 [16:19:29<6:38:32, 11.83s/it] {'loss': 1.1216, 'learning_rate': 1.0047142741942088e-06, 'epoch': 0.71} + 71%|███████▏ | 5023/7045 [16:19:29<6:38:32, 11.83s/it] 71%|███████▏ | 5024/7045 [16:19:41<6:38:32, 11.83s/it] {'loss': 1.0889, 'learning_rate': 1.0037932752611596e-06, 'epoch': 0.71} + 71%|███████▏ | 5024/7045 [16:19:41<6:38:32, 11.83s/it] 71%|███████▏ | 5025/7045 [16:19:53<6:35:01, 11.73s/it] {'loss': 1.1382, 'learning_rate': 1.002872592605757e-06, 'epoch': 0.71} + 71%|███████▏ | 5025/7045 [16:19:53<6:35:01, 11.73s/it] 71%|███████▏ | 5026/7045 [16:20:04<6:36:17, 11.78s/it] {'loss': 1.1504, 'learning_rate': 1.0019522264226201e-06, 'epoch': 0.71} + 71%|███████▏ | 5026/7045 [16:20:04<6:36:17, 11.78s/it] 71%|███████▏ | 5027/7045 [16:20:15<6:29:14, 11.57s/it] {'loss': 1.0942, 'learning_rate': 1.0010321769063033e-06, 'epoch': 0.71} + 71%|███████▏ | 5027/7045 [16:20:15<6:29:14, 11.57s/it] 71%|███████▏ | 5028/7045 [16:20:27<6:26:53, 11.51s/it] {'loss': 1.1191, 'learning_rate': 1.0001124442512907e-06, 'epoch': 0.71} + 71%|███████▏ | 5028/7045 [16:20:27<6:26:53, 11.51s/it] 71%|███████▏ | 5029/7045 [16:20:38<6:21:24, 11.35s/it] {'loss': 1.1406, 'learning_rate': 9.99193028652003e-07, 'epoch': 0.71} + 71%|███████▏ | 5029/7045 [16:20:38<6:21:24, 11.35s/it] 71%|███████▏ | 5030/7045 [16:20:51<6:38:36, 11.87s/it] {'loss': 1.0957, 'learning_rate': 9.982739303027903e-07, 'epoch': 0.71} + 71%|███████▏ | 5030/7045 [16:20:51<6:38:36, 11.87s/it] 71%|███████▏ | 5031/7045 [16:21:02<6:33:51, 11.73s/it] {'loss': 1.1084, 'learning_rate': 9.973551493979384e-07, 'epoch': 0.71} + 71%|███████▏ | 5031/7045 [16:21:02<6:33:51, 11.73s/it] 71%|███████▏ | 5032/7045 [16:21:13<6:26:51, 11.53s/it] {'loss': 1.1011, 'learning_rate': 9.964366861316655e-07, 'epoch': 0.71} + 71%|███████▏ | 5032/7045 [16:21:13<6:26:51, 11.53s/it] 71%|███████▏ | 5033/7045 [16:21:25<6:23:41, 11.44s/it] {'loss': 1.0601, 'learning_rate': 9.95518540698122e-07, 'epoch': 0.71} + 71%|███████▏ | 5033/7045 [16:21:25<6:23:41, 11.44s/it] 71%|███████▏ | 5034/7045 [16:21:36<6:25:33, 11.50s/it] {'loss': 1.1494, 'learning_rate': 9.946007132913898e-07, 'epoch': 0.71} + 71%|███████▏ | 5034/7045 [16:21:36<6:25:33, 11.50s/it] 71%|███████▏ | 5035/7045 [16:21:48<6:30:54, 11.67s/it] {'loss': 1.0889, 'learning_rate': 9.93683204105487e-07, 'epoch': 0.71} + 71%|███████▏ | 5035/7045 [16:21:48<6:30:54, 11.67s/it] 71%|███████▏ | 5036/7045 [16:21:59<6:25:30, 11.51s/it] {'loss': 1.1289, 'learning_rate': 9.92766013334362e-07, 'epoch': 0.71} + 71%|███████▏ | 5036/7045 [16:21:59<6:25:30, 11.51s/it] 71%|███████▏ | 5037/7045 [16:22:11<6:26:48, 11.56s/it] {'loss': 1.123, 'learning_rate': 9.91849141171895e-07, 'epoch': 0.71} + 71%|███████▏ | 5037/7045 [16:22:11<6:26:48, 11.56s/it] 72%|███████▏ | 5038/7045 [16:22:22<6:24:31, 11.50s/it] {'loss': 1.0767, 'learning_rate': 9.909325878119019e-07, 'epoch': 0.72} + 72%|███████▏ | 5038/7045 [16:22:22<6:24:31, 11.50s/it] 72%|███████▏ | 5039/7045 [16:22:34<6:20:25, 11.38s/it] {'loss': 1.1045, 'learning_rate': 9.900163534481285e-07, 'epoch': 0.72} + 72%|███████▏ | 5039/7045 [16:22:34<6:20:25, 11.38s/it] 72%|███████▏ | 5040/7045 [16:22:45<6:17:29, 11.30s/it] {'loss': 1.1045, 'learning_rate': 9.891004382742554e-07, 'epoch': 0.72} + 72%|███████▏ | 5040/7045 [16:22:45<6:17:29, 11.30s/it] 72%|███████▏ | 5041/7045 [16:22:56<6:18:05, 11.32s/it] {'loss': 1.1533, 'learning_rate': 9.881848424838938e-07, 'epoch': 0.72} + 72%|███████▏ | 5041/7045 [16:22:56<6:18:05, 11.32s/it] 72%|███████▏ | 5042/7045 [16:23:08<6:19:06, 11.36s/it] {'loss': 1.0889, 'learning_rate': 9.872695662705864e-07, 'epoch': 0.72} + 72%|███████▏ | 5042/7045 [16:23:08<6:19:06, 11.36s/it] 72%|███████▏ | 5043/7045 [16:23:19<6:22:10, 11.45s/it] {'loss': 1.1084, 'learning_rate': 9.863546098278137e-07, 'epoch': 0.72} + 72%|███████▏ | 5043/7045 [16:23:19<6:22:10, 11.45s/it] 72%|███████▏ | 5044/7045 [16:23:31<6:21:01, 11.43s/it] {'loss': 1.1567, 'learning_rate': 9.85439973348983e-07, 'epoch': 0.72} + 72%|███████▏ | 5044/7045 [16:23:31<6:21:01, 11.43s/it] 72%|███████▏ | 5045/7045 [16:23:43<6:34:35, 11.84s/it] {'loss': 1.1006, 'learning_rate': 9.845256570274352e-07, 'epoch': 0.72} + 72%|███████▏ | 5045/7045 [16:23:43<6:34:35, 11.84s/it] 72%|███████▏ | 5046/7045 [16:23:56<6:44:59, 12.16s/it] {'loss': 1.0898, 'learning_rate': 9.836116610564461e-07, 'epoch': 0.72} + 72%|███████▏ | 5046/7045 [16:23:56<6:44:59, 12.16s/it] 72%|███████▏ | 5047/7045 [16:24:08<6:43:42, 12.12s/it] {'loss': 1.1123, 'learning_rate': 9.826979856292209e-07, 'epoch': 0.72} + 72%|███████▏ | 5047/7045 [16:24:08<6:43:42, 12.12s/it] 72%|███████▏ | 5048/7045 [16:24:20<6:38:46, 11.98s/it] {'loss': 1.1338, 'learning_rate': 9.817846309388973e-07, 'epoch': 0.72} + 72%|███████▏ | 5048/7045 [16:24:20<6:38:46, 11.98s/it] 72%|███████▏ | 5049/7045 [16:24:33<6:45:27, 12.19s/it] {'loss': 1.1152, 'learning_rate': 9.808715971785475e-07, 'epoch': 0.72} + 72%|███████▏ | 5049/7045 [16:24:33<6:45:27, 12.19s/it] 72%|███████▏ | 5050/7045 [16:24:44<6:32:55, 11.82s/it] {'loss': 1.1221, 'learning_rate': 9.799588845411734e-07, 'epoch': 0.72} + 72%|███████▏ | 5050/7045 [16:24:44<6:32:55, 11.82s/it] 72%|███████▏ | 5051/7045 [16:24:55<6:32:45, 11.82s/it] {'loss': 1.0884, 'learning_rate': 9.790464932197109e-07, 'epoch': 0.72} + 72%|███████▏ | 5051/7045 [16:24:55<6:32:45, 11.82s/it] 72%|███████▏ | 5052/7045 [16:25:06<6:22:40, 11.52s/it] {'loss': 1.1045, 'learning_rate': 9.781344234070265e-07, 'epoch': 0.72} + 72%|███████▏ | 5052/7045 [16:25:06<6:22:40, 11.52s/it] 72%|███████▏ | 5053/7045 [16:25:17<6:18:37, 11.40s/it] {'loss': 1.1367, 'learning_rate': 9.772226752959183e-07, 'epoch': 0.72} + 72%|███████▏ | 5053/7045 [16:25:17<6:18:37, 11.40s/it] 72%|███████▏ | 5054/7045 [16:25:29<6:16:44, 11.35s/it] {'loss': 1.1016, 'learning_rate': 9.763112490791183e-07, 'epoch': 0.72} + 72%|███████▏ | 5054/7045 [16:25:29<6:16:44, 11.35s/it] 72%|███████▏ | 5055/7045 [16:25:41<6:22:28, 11.53s/it] {'loss': 1.1035, 'learning_rate': 9.754001449492907e-07, 'epoch': 0.72} + 72%|███████▏ | 5055/7045 [16:25:41<6:22:28, 11.53s/it] 72%|███████▏ | 5056/7045 [16:25:53<6:29:09, 11.74s/it] {'loss': 1.0708, 'learning_rate': 9.74489363099028e-07, 'epoch': 0.72} + 72%|███████▏ | 5056/7045 [16:25:53<6:29:09, 11.74s/it] 72%|███████▏ | 5057/7045 [16:26:04<6:23:51, 11.59s/it] {'loss': 1.0996, 'learning_rate': 9.735789037208595e-07, 'epoch': 0.72} + 72%|███████▏ | 5057/7045 [16:26:04<6:23:51, 11.59s/it] 72%|███████▏ | 5058/7045 [16:26:16<6:30:02, 11.78s/it] {'loss': 1.0918, 'learning_rate': 9.726687670072427e-07, 'epoch': 0.72} + 72%|███████▏ | 5058/7045 [16:26:16<6:30:02, 11.78s/it] 72%|███████▏ | 5059/7045 [16:26:28<6:25:15, 11.64s/it] {'loss': 1.127, 'learning_rate': 9.717589531505673e-07, 'epoch': 0.72} + 72%|███████▏ | 5059/7045 [16:26:28<6:25:15, 11.64s/it] 72%|███████▏ | 5060/7045 [16:26:39<6:20:05, 11.49s/it] {'loss': 1.1162, 'learning_rate': 9.70849462343157e-07, 'epoch': 0.72} + 72%|███████▏ | 5060/7045 [16:26:39<6:20:05, 11.49s/it] 72%|███████▏ | 5061/7045 [16:26:50<6:15:43, 11.36s/it] {'loss': 1.0801, 'learning_rate': 9.699402947772644e-07, 'epoch': 0.72} + 72%|███████▏ | 5061/7045 [16:26:50<6:15:43, 11.36s/it] 72%|███████▏ | 5062/7045 [16:27:01<6:16:52, 11.40s/it] {'loss': 1.1318, 'learning_rate': 9.690314506450763e-07, 'epoch': 0.72} + 72%|███████▏ | 5062/7045 [16:27:01<6:16:52, 11.40s/it] 72%|███████▏ | 5063/7045 [16:27:14<6:34:35, 11.95s/it] {'loss': 1.0898, 'learning_rate': 9.681229301387095e-07, 'epoch': 0.72} + 72%|███████▏ | 5063/7045 [16:27:14<6:34:35, 11.95s/it] 72%|███████▏ | 5064/7045 [16:27:25<6:25:19, 11.67s/it] {'loss': 1.1079, 'learning_rate': 9.672147334502118e-07, 'epoch': 0.72} + 72%|███████▏ | 5064/7045 [16:27:25<6:25:19, 11.67s/it] 72%|███████▏ | 5065/7045 [16:27:42<7:15:36, 13.20s/it] {'loss': 1.0806, 'learning_rate': 9.663068607715653e-07, 'epoch': 0.72} + 72%|███████▏ | 5065/7045 [16:27:42<7:15:36, 13.20s/it] 72%|███████▏ | 5066/7045 [16:27:54<6:59:37, 12.72s/it] {'loss': 1.0859, 'learning_rate': 9.6539931229468e-07, 'epoch': 0.72} + 72%|███████▏ | 5066/7045 [16:27:54<6:59:37, 12.72s/it] 72%|███████▏ | 5067/7045 [16:28:06<6:57:08, 12.65s/it] {'loss': 1.0781, 'learning_rate': 9.644920882114003e-07, 'epoch': 0.72} + 72%|███████▏ | 5067/7045 [16:28:06<6:57:08, 12.65s/it] 72%|███████▏ | 5068/7045 [16:28:17<6:38:25, 12.09s/it] {'loss': 1.0645, 'learning_rate': 9.635851887135016e-07, 'epoch': 0.72} + 72%|███████▏ | 5068/7045 [16:28:17<6:38:25, 12.09s/it] 72%|███████▏ | 5069/7045 [16:28:29<6:32:45, 11.93s/it] {'loss': 1.0869, 'learning_rate': 9.626786139926897e-07, 'epoch': 0.72} + 72%|███████▏ | 5069/7045 [16:28:29<6:32:45, 11.93s/it] 72%|███████▏ | 5070/7045 [16:28:40<6:23:48, 11.66s/it] {'loss': 1.1416, 'learning_rate': 9.617723642406007e-07, 'epoch': 0.72} + 72%|███████▏ | 5070/7045 [16:28:40<6:23:48, 11.66s/it] 72%|███████▏ | 5071/7045 [16:28:51<6:15:16, 11.41s/it] {'loss': 1.0996, 'learning_rate': 9.608664396488051e-07, 'epoch': 0.72} + 72%|███████▏ | 5071/7045 [16:28:51<6:15:16, 11.41s/it] 72%|███████▏ | 5072/7045 [16:29:02<6:13:29, 11.36s/it] {'loss': 1.0996, 'learning_rate': 9.599608404088018e-07, 'epoch': 0.72} + 72%|███████▏ | 5072/7045 [16:29:02<6:13:29, 11.36s/it] 72%|███████▏ | 5073/7045 [16:29:13<6:11:41, 11.31s/it] {'loss': 1.0747, 'learning_rate': 9.590555667120235e-07, 'epoch': 0.72} + 72%|███████▏ | 5073/7045 [16:29:13<6:11:41, 11.31s/it] 72%|███████▏ | 5074/7045 [16:29:25<6:14:01, 11.39s/it] {'loss': 1.1396, 'learning_rate': 9.581506187498315e-07, 'epoch': 0.72} + 72%|███████▏ | 5074/7045 [16:29:25<6:14:01, 11.39s/it] 72%|███████▏ | 5075/7045 [16:29:36<6:13:57, 11.39s/it] {'loss': 1.0898, 'learning_rate': 9.572459967135193e-07, 'epoch': 0.72} + 72%|███████▏ | 5075/7045 [16:29:36<6:13:57, 11.39s/it] 72%|███████▏ | 5076/7045 [16:29:47<6:10:23, 11.29s/it] {'loss': 1.1094, 'learning_rate': 9.563417007943127e-07, 'epoch': 0.72} + 72%|███████▏ | 5076/7045 [16:29:47<6:10:23, 11.29s/it] 72%|███████▏ | 5077/7045 [16:29:58<6:08:07, 11.22s/it] {'loss': 1.0728, 'learning_rate': 9.554377311833667e-07, 'epoch': 0.72} + 72%|███████▏ | 5077/7045 [16:29:58<6:08:07, 11.22s/it] 72%|███████▏ | 5078/7045 [16:30:09<6:04:37, 11.12s/it] {'loss': 1.1221, 'learning_rate': 9.54534088071767e-07, 'epoch': 0.72} + 72%|███████▏ | 5078/7045 [16:30:09<6:04:37, 11.12s/it] 72%|███████▏ | 5079/7045 [16:30:21<6:08:59, 11.26s/it] {'loss': 1.1201, 'learning_rate': 9.536307716505341e-07, 'epoch': 0.72} + 72%|███████▏ | 5079/7045 [16:30:21<6:08:59, 11.26s/it] 72%|███████▏ | 5080/7045 [16:30:34<6:27:04, 11.82s/it] {'loss': 1.123, 'learning_rate': 9.527277821106154e-07, 'epoch': 0.72} + 72%|███████▏ | 5080/7045 [16:30:34<6:27:04, 11.82s/it] 72%|███████▏ | 5081/7045 [16:30:45<6:17:48, 11.54s/it] {'loss': 1.1299, 'learning_rate': 9.518251196428896e-07, 'epoch': 0.72} + 72%|███████▏ | 5081/7045 [16:30:45<6:17:48, 11.54s/it] 72%|███████▏ | 5082/7045 [16:30:56<6:12:46, 11.39s/it] {'loss': 1.1504, 'learning_rate': 9.509227844381691e-07, 'epoch': 0.72} + 72%|███████▏ | 5082/7045 [16:30:56<6:12:46, 11.39s/it] 72%|███████▏ | 5083/7045 [16:31:07<6:10:17, 11.32s/it] {'loss': 1.1377, 'learning_rate': 9.500207766871933e-07, 'epoch': 0.72} + 72%|███████▏ | 5083/7045 [16:31:07<6:10:17, 11.32s/it] 72%|███████▏ | 5084/7045 [16:31:18<6:09:18, 11.30s/it] {'loss': 1.1416, 'learning_rate': 9.49119096580636e-07, 'epoch': 0.72} + 72%|███████▏ | 5084/7045 [16:31:18<6:09:18, 11.30s/it] 72%|███████▏ | 5085/7045 [16:31:29<6:05:54, 11.20s/it] {'loss': 1.1152, 'learning_rate': 9.482177443090992e-07, 'epoch': 0.72} + 72%|███████▏ | 5085/7045 [16:31:29<6:05:54, 11.20s/it] 72%|███████▏ | 5086/7045 [16:31:41<6:13:44, 11.45s/it] {'loss': 1.0684, 'learning_rate': 9.473167200631156e-07, 'epoch': 0.72} + 72%|███████▏ | 5086/7045 [16:31:41<6:13:44, 11.45s/it] 72%|███████▏ | 5087/7045 [16:31:53<6:22:09, 11.71s/it] {'loss': 1.084, 'learning_rate': 9.464160240331513e-07, 'epoch': 0.72} + 72%|███████▏ | 5087/7045 [16:31:53<6:22:09, 11.71s/it] 72%|███████▏ | 5088/7045 [16:32:07<6:45:08, 12.42s/it] {'loss': 1.1045, 'learning_rate': 9.455156564096002e-07, 'epoch': 0.72} + 72%|███████▏ | 5088/7045 [16:32:07<6:45:08, 12.42s/it] 72%|███████▏ | 5089/7045 [16:32:19<6:36:06, 12.15s/it] {'loss': 1.1016, 'learning_rate': 9.446156173827867e-07, 'epoch': 0.72} + 72%|███████▏ | 5089/7045 [16:32:19<6:36:06, 12.15s/it] 72%|███████▏ | 5090/7045 [16:32:30<6:24:01, 11.79s/it] {'loss': 1.1289, 'learning_rate': 9.437159071429685e-07, 'epoch': 0.72} + 72%|███████▏ | 5090/7045 [16:32:30<6:24:01, 11.79s/it] 72%|███████▏ | 5091/7045 [16:32:41<6:16:39, 11.57s/it] {'loss': 1.0762, 'learning_rate': 9.428165258803307e-07, 'epoch': 0.72} + 72%|███████▏ | 5091/7045 [16:32:41<6:16:39, 11.57s/it] 72%|███████▏ | 5092/7045 [16:32:52<6:16:31, 11.57s/it] {'loss': 1.123, 'learning_rate': 9.419174737849904e-07, 'epoch': 0.72} + 72%|███████▏ | 5092/7045 [16:32:52<6:16:31, 11.57s/it] 72%|███████▏ | 5093/7045 [16:33:04<6:12:40, 11.46s/it] {'loss': 1.0903, 'learning_rate': 9.410187510469962e-07, 'epoch': 0.72} + 72%|███████▏ | 5093/7045 [16:33:04<6:12:40, 11.46s/it] 72%|███████▏ | 5094/7045 [16:33:15<6:08:18, 11.33s/it] {'loss': 1.1211, 'learning_rate': 9.401203578563239e-07, 'epoch': 0.72} + 72%|███████▏ | 5094/7045 [16:33:15<6:08:18, 11.33s/it] 72%|███████▏ | 5095/7045 [16:33:27<6:16:44, 11.59s/it] {'loss': 1.0967, 'learning_rate': 9.392222944028836e-07, 'epoch': 0.72} + 72%|███████▏ | 5095/7045 [16:33:27<6:16:44, 11.59s/it] 72%|███████▏ | 5096/7045 [16:33:39<6:20:47, 11.72s/it] {'loss': 1.166, 'learning_rate': 9.383245608765124e-07, 'epoch': 0.72} + 72%|███████▏ | 5096/7045 [16:33:39<6:20:47, 11.72s/it] 72%|███████▏ | 5097/7045 [16:33:52<6:33:54, 12.13s/it] {'loss': 1.0825, 'learning_rate': 9.374271574669782e-07, 'epoch': 0.72} + 72%|███████▏ | 5097/7045 [16:33:52<6:33:54, 12.13s/it] 72%|███████▏ | 5098/7045 [16:34:03<6:22:59, 11.80s/it] {'loss': 1.0815, 'learning_rate': 9.365300843639816e-07, 'epoch': 0.72} + 72%|███████▏ | 5098/7045 [16:34:03<6:22:59, 11.80s/it] 72%|███████▏ | 5099/7045 [16:34:15<6:19:43, 11.71s/it] {'loss': 1.0854, 'learning_rate': 9.356333417571506e-07, 'epoch': 0.72} + 72%|███████▏ | 5099/7045 [16:34:15<6:19:43, 11.71s/it] 72%|███████▏ | 5100/7045 [16:34:26<6:13:20, 11.52s/it] {'loss': 1.126, 'learning_rate': 9.347369298360434e-07, 'epoch': 0.72} + 72%|███████▏ | 5100/7045 [16:34:26<6:13:20, 11.52s/it] 72%|███████▏ | 5101/7045 [16:34:37<6:12:47, 11.51s/it] {'loss': 1.1064, 'learning_rate': 9.33840848790151e-07, 'epoch': 0.72} + 72%|███████▏ | 5101/7045 [16:34:37<6:12:47, 11.51s/it] 72%|███████▏ | 5102/7045 [16:34:48<6:08:50, 11.39s/it] {'loss': 1.1221, 'learning_rate': 9.329450988088912e-07, 'epoch': 0.72} + 72%|███████▏ | 5102/7045 [16:34:48<6:08:50, 11.39s/it] 72%|███████▏ | 5103/7045 [16:34:59<6:07:12, 11.35s/it] {'loss': 1.1094, 'learning_rate': 9.32049680081614e-07, 'epoch': 0.72} + 72%|███████▏ | 5103/7045 [16:34:59<6:07:12, 11.35s/it] 72%|███████▏ | 5104/7045 [16:35:10<6:00:56, 11.16s/it] {'loss': 1.1035, 'learning_rate': 9.311545927975993e-07, 'epoch': 0.72} + 72%|███████▏ | 5104/7045 [16:35:10<6:00:56, 11.16s/it] 72%|███████▏ | 5105/7045 [16:35:21<5:57:51, 11.07s/it] {'loss': 1.0615, 'learning_rate': 9.30259837146055e-07, 'epoch': 0.72} + 72%|███████▏ | 5105/7045 [16:35:21<5:57:51, 11.07s/it] 72%|███████▏ | 5106/7045 [16:35:34<6:16:34, 11.65s/it] {'loss': 1.0576, 'learning_rate': 9.293654133161214e-07, 'epoch': 0.72} + 72%|███████▏ | 5106/7045 [16:35:34<6:16:34, 11.65s/it] 72%|███████▏ | 5107/7045 [16:35:45<6:14:19, 11.59s/it] {'loss': 1.0957, 'learning_rate': 9.284713214968674e-07, 'epoch': 0.72} + 72%|███████▏ | 5107/7045 [16:35:45<6:14:19, 11.59s/it] 73%|███████▎ | 5108/7045 [16:35:57<6:16:41, 11.67s/it] {'loss': 1.1299, 'learning_rate': 9.275775618772903e-07, 'epoch': 0.73} + 73%|███████▎ | 5108/7045 [16:35:57<6:16:41, 11.67s/it] 73%|███████▎ | 5109/7045 [16:36:09<6:16:59, 11.68s/it] {'loss': 1.0977, 'learning_rate': 9.266841346463204e-07, 'epoch': 0.73} + 73%|███████▎ | 5109/7045 [16:36:09<6:16:59, 11.68s/it] 73%|███████▎ | 5110/7045 [16:36:20<6:10:30, 11.49s/it] {'loss': 1.0928, 'learning_rate': 9.257910399928155e-07, 'epoch': 0.73} + 73%|███████▎ | 5110/7045 [16:36:20<6:10:30, 11.49s/it] 73%|███████▎ | 5111/7045 [16:36:31<6:05:15, 11.33s/it] {'loss': 1.124, 'learning_rate': 9.248982781055624e-07, 'epoch': 0.73} + 73%|███████▎ | 5111/7045 [16:36:31<6:05:15, 11.33s/it] 73%|███████▎ | 5112/7045 [16:36:43<6:07:17, 11.40s/it] {'loss': 1.1006, 'learning_rate': 9.240058491732806e-07, 'epoch': 0.73} + 73%|███████▎ | 5112/7045 [16:36:43<6:07:17, 11.40s/it] 73%|███████▎ | 5113/7045 [16:36:54<6:05:06, 11.34s/it] {'loss': 1.1108, 'learning_rate': 9.231137533846158e-07, 'epoch': 0.73} + 73%|███████▎ | 5113/7045 [16:36:54<6:05:06, 11.34s/it] 73%|███████▎ | 5114/7045 [16:37:05<6:04:28, 11.33s/it] {'loss': 1.0801, 'learning_rate': 9.222219909281466e-07, 'epoch': 0.73} + 73%|███████▎ | 5114/7045 [16:37:05<6:04:28, 11.33s/it] 73%|███████▎ | 5115/7045 [16:37:16<6:03:50, 11.31s/it] {'loss': 1.1289, 'learning_rate': 9.213305619923771e-07, 'epoch': 0.73} + 73%|███████▎ | 5115/7045 [16:37:16<6:03:50, 11.31s/it] 73%|███████▎ | 5116/7045 [16:37:29<6:13:03, 11.60s/it] {'loss': 1.1084, 'learning_rate': 9.204394667657448e-07, 'epoch': 0.73} + 73%|███████▎ | 5116/7045 [16:37:29<6:13:03, 11.60s/it] 73%|███████▎ | 5117/7045 [16:37:41<6:15:54, 11.70s/it] {'loss': 1.1191, 'learning_rate': 9.195487054366153e-07, 'epoch': 0.73} + 73%|███████▎ | 5117/7045 [16:37:41<6:15:54, 11.70s/it] 73%|███████▎ | 5118/7045 [16:37:52<6:08:56, 11.49s/it] {'loss': 1.123, 'learning_rate': 9.186582781932832e-07, 'epoch': 0.73} + 73%|███████▎ | 5118/7045 [16:37:52<6:08:56, 11.49s/it] 73%|███████▎ | 5119/7045 [16:38:03<6:12:50, 11.62s/it] {'loss': 1.127, 'learning_rate': 9.177681852239711e-07, 'epoch': 0.73} + 73%|███████▎ | 5119/7045 [16:38:03<6:12:50, 11.62s/it] 73%|███████▎ | 5120/7045 [16:38:15<6:08:21, 11.48s/it] {'loss': 1.124, 'learning_rate': 9.168784267168346e-07, 'epoch': 0.73} + 73%|███████▎ | 5120/7045 [16:38:15<6:08:21, 11.48s/it] 73%|███████▎ | 5121/7045 [16:38:27<6:13:39, 11.65s/it] {'loss': 1.0771, 'learning_rate': 9.159890028599552e-07, 'epoch': 0.73} + 73%|███████▎ | 5121/7045 [16:38:27<6:13:39, 11.65s/it] 73%|███████▎ | 5122/7045 [16:38:38<6:07:59, 11.48s/it] {'loss': 1.1113, 'learning_rate': 9.150999138413446e-07, 'epoch': 0.73} + 73%|███████▎ | 5122/7045 [16:38:38<6:07:59, 11.48s/it] 73%|███████▎ | 5123/7045 [16:38:49<6:05:30, 11.41s/it] {'loss': 1.1211, 'learning_rate': 9.142111598489455e-07, 'epoch': 0.73} + 73%|███████▎ | 5123/7045 [16:38:49<6:05:30, 11.41s/it] 73%|███████▎ | 5124/7045 [16:39:01<6:09:27, 11.54s/it] {'loss': 1.0825, 'learning_rate': 9.133227410706269e-07, 'epoch': 0.73} + 73%|███████▎ | 5124/7045 [16:39:01<6:09:27, 11.54s/it] 73%|███████▎ | 5125/7045 [16:39:12<6:04:14, 11.38s/it] {'loss': 1.1201, 'learning_rate': 9.124346576941897e-07, 'epoch': 0.73} + 73%|███████▎ | 5125/7045 [16:39:12<6:04:14, 11.38s/it] 73%|███████▎ | 5126/7045 [16:39:23<6:03:28, 11.36s/it] {'loss': 1.1338, 'learning_rate': 9.11546909907362e-07, 'epoch': 0.73} + 73%|███████▎ | 5126/7045 [16:39:23<6:03:28, 11.36s/it] 73%|███████▎ | 5127/7045 [16:39:36<6:12:39, 11.66s/it] {'loss': 1.0825, 'learning_rate': 9.106594978977998e-07, 'epoch': 0.73} + 73%|███████▎ | 5127/7045 [16:39:36<6:12:39, 11.66s/it] 73%|███████▎ | 5128/7045 [16:39:47<6:07:25, 11.50s/it] {'loss': 1.1064, 'learning_rate': 9.097724218530931e-07, 'epoch': 0.73} + 73%|███████▎ | 5128/7045 [16:39:47<6:07:25, 11.50s/it] 73%|███████▎ | 5129/7045 [16:39:58<6:02:26, 11.35s/it] {'loss': 1.1162, 'learning_rate': 9.088856819607562e-07, 'epoch': 0.73} + 73%|███████▎ | 5129/7045 [16:39:58<6:02:26, 11.35s/it] 73%|███████▎ | 5130/7045 [16:40:09<6:01:48, 11.34s/it] {'loss': 1.1191, 'learning_rate': 9.079992784082328e-07, 'epoch': 0.73} + 73%|███████▎ | 5130/7045 [16:40:09<6:01:48, 11.34s/it] 73%|███████▎ | 5131/7045 [16:40:20<5:57:44, 11.21s/it] {'loss': 1.1465, 'learning_rate': 9.071132113828982e-07, 'epoch': 0.73} + 73%|███████▎ | 5131/7045 [16:40:20<5:57:44, 11.21s/it] 73%|███████▎ | 5132/7045 [16:40:32<6:06:05, 11.48s/it] {'loss': 1.1035, 'learning_rate': 9.06227481072054e-07, 'epoch': 0.73} + 73%|███████▎ | 5132/7045 [16:40:32<6:06:05, 11.48s/it] 73%|███████▎ | 5133/7045 [16:40:43<6:03:06, 11.39s/it] {'loss': 1.0864, 'learning_rate': 9.053420876629307e-07, 'epoch': 0.73} + 73%|███████▎ | 5133/7045 [16:40:43<6:03:06, 11.39s/it] 73%|███████▎ | 5134/7045 [16:40:56<6:19:55, 11.93s/it] {'loss': 1.1079, 'learning_rate': 9.044570313426898e-07, 'epoch': 0.73} + 73%|███████▎ | 5134/7045 [16:40:56<6:19:55, 11.93s/it] 73%|███████▎ | 5135/7045 [16:41:09<6:21:36, 11.99s/it] {'loss': 1.1611, 'learning_rate': 9.035723122984189e-07, 'epoch': 0.73} + 73%|███████▎ | 5135/7045 [16:41:09<6:21:36, 11.99s/it] 73%|███████▎ | 5136/7045 [16:41:21<6:21:54, 12.00s/it] {'loss': 1.1104, 'learning_rate': 9.026879307171368e-07, 'epoch': 0.73} + 73%|███████▎ | 5136/7045 [16:41:21<6:21:54, 12.00s/it] 73%|███████▎ | 5137/7045 [16:41:33<6:21:21, 11.99s/it] {'loss': 1.1396, 'learning_rate': 9.01803886785789e-07, 'epoch': 0.73} + 73%|███████▎ | 5137/7045 [16:41:33<6:21:21, 11.99s/it] 73%|███████▎ | 5138/7045 [16:41:43<6:10:25, 11.65s/it] {'loss': 1.1094, 'learning_rate': 9.009201806912494e-07, 'epoch': 0.73} + 73%|███████▎ | 5138/7045 [16:41:43<6:10:25, 11.65s/it] 73%|███████▎ | 5139/7045 [16:41:55<6:05:14, 11.50s/it] {'loss': 1.1167, 'learning_rate': 9.000368126203221e-07, 'epoch': 0.73} + 73%|███████▎ | 5139/7045 [16:41:55<6:05:14, 11.50s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2171 > 2048). Running this sequence through the model will result in indexing errors + 73%|███████▎ | 5140/7045 [16:42:07<6:12:05, 11.72s/it] {'loss': 1.0859, 'learning_rate': 8.991537827597402e-07, 'epoch': 0.73} + 73%|███████▎ | 5140/7045 [16:42:07<6:12:05, 11.72s/it] 73%|███████▎ | 5141/7045 [16:42:19<6:15:30, 11.83s/it] {'loss': 1.0942, 'learning_rate': 8.982710912961623e-07, 'epoch': 0.73} + 73%|███████▎ | 5141/7045 [16:42:19<6:15:30, 11.83s/it] 73%|███████▎ | 5142/7045 [16:42:30<6:08:55, 11.63s/it] {'loss': 1.0801, 'learning_rate': 8.973887384161789e-07, 'epoch': 0.73} + 73%|███████▎ | 5142/7045 [16:42:30<6:08:55, 11.63s/it] 73%|███████▎ | 5143/7045 [16:42:42<6:09:43, 11.66s/it] {'loss': 1.1123, 'learning_rate': 8.965067243063063e-07, 'epoch': 0.73} + 73%|███████▎ | 5143/7045 [16:42:42<6:09:43, 11.66s/it] 73%|███████▎ | 5144/7045 [16:42:53<6:04:50, 11.52s/it] {'loss': 1.0903, 'learning_rate': 8.9562504915299e-07, 'epoch': 0.73} + 73%|███████▎ | 5144/7045 [16:42:53<6:04:50, 11.52s/it] 73%|███████▎ | 5145/7045 [16:43:04<6:01:50, 11.43s/it] {'loss': 1.1465, 'learning_rate': 8.947437131426054e-07, 'epoch': 0.73} + 73%|███████▎ | 5145/7045 [16:43:04<6:01:50, 11.43s/it] 73%|███████▎ | 5146/7045 [16:43:15<6:00:02, 11.38s/it] {'loss': 1.1021, 'learning_rate': 8.938627164614531e-07, 'epoch': 0.73} + 73%|███████▎ | 5146/7045 [16:43:15<6:00:02, 11.38s/it] 73%|███████▎ | 5147/7045 [16:43:28<6:15:07, 11.86s/it] {'loss': 1.0947, 'learning_rate': 8.929820592957656e-07, 'epoch': 0.73} + 73%|███████▎ | 5147/7045 [16:43:28<6:15:07, 11.86s/it] 73%|███████▎ | 5148/7045 [16:43:40<6:08:12, 11.65s/it] {'loss': 1.0605, 'learning_rate': 8.921017418317008e-07, 'epoch': 0.73} + 73%|███████▎ | 5148/7045 [16:43:40<6:08:12, 11.65s/it] 73%|███████▎ | 5149/7045 [16:43:51<6:08:12, 11.65s/it] {'loss': 1.0947, 'learning_rate': 8.91221764255345e-07, 'epoch': 0.73} + 73%|███████▎ | 5149/7045 [16:43:51<6:08:12, 11.65s/it] 73%|███████▎ | 5150/7045 [16:44:02<6:04:16, 11.53s/it] {'loss': 1.0996, 'learning_rate': 8.903421267527152e-07, 'epoch': 0.73} + 73%|███████▎ | 5150/7045 [16:44:02<6:04:16, 11.53s/it] 73%|███████▎ | 5151/7045 [16:44:14<5:59:42, 11.40s/it] {'loss': 1.104, 'learning_rate': 8.89462829509753e-07, 'epoch': 0.73} + 73%|███████▎ | 5151/7045 [16:44:14<5:59:42, 11.40s/it] 73%|███████▎ | 5152/7045 [16:44:26<6:07:37, 11.65s/it] {'loss': 1.1079, 'learning_rate': 8.885838727123306e-07, 'epoch': 0.73} + 73%|███████▎ | 5152/7045 [16:44:26<6:07:37, 11.65s/it] 73%|███████▎ | 5153/7045 [16:44:37<6:01:54, 11.48s/it] {'loss': 1.0898, 'learning_rate': 8.877052565462483e-07, 'epoch': 0.73} + 73%|███████▎ | 5153/7045 [16:44:37<6:01:54, 11.48s/it] 73%|███████▎ | 5154/7045 [16:44:48<6:01:29, 11.47s/it] {'loss': 1.1611, 'learning_rate': 8.868269811972327e-07, 'epoch': 0.73} + 73%|███████▎ | 5154/7045 [16:44:48<6:01:29, 11.47s/it] 73%|███████▎ | 5155/7045 [16:44:59<5:55:32, 11.29s/it] {'loss': 1.1377, 'learning_rate': 8.859490468509385e-07, 'epoch': 0.73} + 73%|███████▎ | 5155/7045 [16:44:59<5:55:32, 11.29s/it] 73%|███████▎ | 5156/7045 [16:45:10<5:53:14, 11.22s/it] {'loss': 1.1641, 'learning_rate': 8.850714536929506e-07, 'epoch': 0.73} + 73%|███████▎ | 5156/7045 [16:45:10<5:53:14, 11.22s/it] 73%|███████▎ | 5157/7045 [16:45:28<6:51:15, 13.07s/it] {'loss': 1.0947, 'learning_rate': 8.841942019087784e-07, 'epoch': 0.73} + 73%|███████▎ | 5157/7045 [16:45:28<6:51:15, 13.07s/it] 73%|███████▎ | 5158/7045 [16:45:39<6:36:18, 12.60s/it] {'loss': 1.1172, 'learning_rate': 8.833172916838629e-07, 'epoch': 0.73} + 73%|███████▎ | 5158/7045 [16:45:39<6:36:18, 12.60s/it] 73%|███████▎ | 5159/7045 [16:45:51<6:27:48, 12.34s/it] {'loss': 1.0996, 'learning_rate': 8.824407232035698e-07, 'epoch': 0.73} + 73%|███████▎ | 5159/7045 [16:45:51<6:27:48, 12.34s/it] 73%|███████▎ | 5160/7045 [16:46:04<6:33:47, 12.53s/it] {'loss': 1.1045, 'learning_rate': 8.815644966531931e-07, 'epoch': 0.73} + 73%|███████▎ | 5160/7045 [16:46:04<6:33:47, 12.53s/it] 73%|███████▎ | 5161/7045 [16:46:17<6:40:32, 12.76s/it] {'loss': 1.0913, 'learning_rate': 8.806886122179567e-07, 'epoch': 0.73} + 73%|███████▎ | 5161/7045 [16:46:17<6:40:32, 12.76s/it] 73%|███████▎ | 5162/7045 [16:46:29<6:31:07, 12.46s/it] {'loss': 1.1069, 'learning_rate': 8.798130700830087e-07, 'epoch': 0.73} + 73%|███████▎ | 5162/7045 [16:46:29<6:31:07, 12.46s/it] 73%|███████▎ | 5163/7045 [16:46:40<6:19:22, 12.10s/it] {'loss': 1.0928, 'learning_rate': 8.789378704334287e-07, 'epoch': 0.73} + 73%|███████▎ | 5163/7045 [16:46:40<6:19:22, 12.10s/it] 73%|███████▎ | 5164/7045 [16:46:51<6:08:23, 11.75s/it] {'loss': 1.1016, 'learning_rate': 8.7806301345422e-07, 'epoch': 0.73} + 73%|███████▎ | 5164/7045 [16:46:51<6:08:23, 11.75s/it] 73%|███████▎ | 5165/7045 [16:47:04<6:19:56, 12.13s/it] {'loss': 1.1299, 'learning_rate': 8.771884993303176e-07, 'epoch': 0.73} + 73%|███████▎ | 5165/7045 [16:47:04<6:19:56, 12.13s/it] 73%|███████▎ | 5166/7045 [16:47:16<6:17:24, 12.05s/it] {'loss': 1.1035, 'learning_rate': 8.763143282465797e-07, 'epoch': 0.73} + 73%|███████▎ | 5166/7045 [16:47:16<6:17:24, 12.05s/it] 73%|███████▎ | 5167/7045 [16:47:28<6:13:24, 11.93s/it] {'loss': 1.1123, 'learning_rate': 8.754405003877958e-07, 'epoch': 0.73} + 73%|███████▎ | 5167/7045 [16:47:28<6:13:24, 11.93s/it] 73%|███████▎ | 5168/7045 [16:47:39<6:07:05, 11.73s/it] {'loss': 1.127, 'learning_rate': 8.745670159386796e-07, 'epoch': 0.73} + 73%|███████▎ | 5168/7045 [16:47:39<6:07:05, 11.73s/it] 73%|███████▎ | 5169/7045 [16:47:50<6:01:03, 11.55s/it] {'loss': 1.123, 'learning_rate': 8.736938750838752e-07, 'epoch': 0.73} + 73%|███████▎ | 5169/7045 [16:47:50<6:01:03, 11.55s/it] 73%|███████▎ | 5170/7045 [16:48:01<5:56:09, 11.40s/it] {'loss': 1.125, 'learning_rate': 8.728210780079524e-07, 'epoch': 0.73} + 73%|███████▎ | 5170/7045 [16:48:01<5:56:09, 11.40s/it] 73%|███████▎ | 5171/7045 [16:48:12<5:52:05, 11.27s/it] {'loss': 1.0986, 'learning_rate': 8.719486248954074e-07, 'epoch': 0.73} + 73%|███████▎ | 5171/7045 [16:48:12<5:52:05, 11.27s/it] 73%|███████▎ | 5172/7045 [16:48:25<6:07:48, 11.78s/it] {'loss': 1.0786, 'learning_rate': 8.710765159306661e-07, 'epoch': 0.73} + 73%|███████▎ | 5172/7045 [16:48:25<6:07:48, 11.78s/it] 73%|███████▎ | 5173/7045 [16:48:36<6:03:46, 11.66s/it] {'loss': 1.0854, 'learning_rate': 8.702047512980794e-07, 'epoch': 0.73} + 73%|███████▎ | 5173/7045 [16:48:36<6:03:46, 11.66s/it] 73%|███████▎ | 5174/7045 [16:48:47<5:57:29, 11.46s/it] {'loss': 1.1143, 'learning_rate': 8.693333311819279e-07, 'epoch': 0.73} + 73%|███████▎ | 5174/7045 [16:48:47<5:57:29, 11.46s/it] 73%|███████▎ | 5175/7045 [16:48:58<5:53:00, 11.33s/it] {'loss': 1.1025, 'learning_rate': 8.684622557664168e-07, 'epoch': 0.73} + 73%|███████▎ | 5175/7045 [16:48:58<5:53:00, 11.33s/it] 73%|███████▎ | 5176/7045 [16:49:10<5:52:40, 11.32s/it] {'loss': 1.1211, 'learning_rate': 8.67591525235679e-07, 'epoch': 0.73} + 73%|███████▎ | 5176/7045 [16:49:10<5:52:40, 11.32s/it] 73%|███████▎ | 5177/7045 [16:49:21<5:50:01, 11.24s/it] {'loss': 1.0903, 'learning_rate': 8.66721139773776e-07, 'epoch': 0.73} + 73%|███████▎ | 5177/7045 [16:49:21<5:50:01, 11.24s/it] 73%|███████▎ | 5178/7045 [16:49:32<5:49:05, 11.22s/it] {'loss': 1.0889, 'learning_rate': 8.658510995646957e-07, 'epoch': 0.73} + 73%|███████▎ | 5178/7045 [16:49:32<5:49:05, 11.22s/it] 74%|███████▎ | 5179/7045 [16:49:43<5:46:30, 11.14s/it] {'loss': 1.1289, 'learning_rate': 8.649814047923513e-07, 'epoch': 0.74} + 74%|███████▎ | 5179/7045 [16:49:43<5:46:30, 11.14s/it] 74%|███████▎ | 5180/7045 [16:49:54<5:49:26, 11.24s/it] {'loss': 1.0923, 'learning_rate': 8.64112055640586e-07, 'epoch': 0.74} + 74%|███████▎ | 5180/7045 [16:49:54<5:49:26, 11.24s/it] 74%|███████▎ | 5181/7045 [16:50:07<6:00:24, 11.60s/it] {'loss': 1.1113, 'learning_rate': 8.632430522931679e-07, 'epoch': 0.74} + 74%|███████▎ | 5181/7045 [16:50:07<6:00:24, 11.60s/it] 74%|███████▎ | 5182/7045 [16:50:19<6:04:38, 11.74s/it] {'loss': 1.1123, 'learning_rate': 8.623743949337909e-07, 'epoch': 0.74} + 74%|███████▎ | 5182/7045 [16:50:19<6:04:38, 11.74s/it] 74%|███████▎ | 5183/7045 [16:50:32<6:19:11, 12.22s/it] {'loss': 1.0898, 'learning_rate': 8.615060837460795e-07, 'epoch': 0.74} + 74%|███████▎ | 5183/7045 [16:50:32<6:19:11, 12.22s/it] 74%|███████▎ | 5184/7045 [16:50:44<6:19:53, 12.25s/it] {'loss': 1.1104, 'learning_rate': 8.60638118913581e-07, 'epoch': 0.74} + 74%|███████▎ | 5184/7045 [16:50:45<6:19:53, 12.25s/it] 74%|███████▎ | 5185/7045 [16:50:56<6:09:37, 11.92s/it] {'loss': 1.0908, 'learning_rate': 8.597705006197729e-07, 'epoch': 0.74} + 74%|███████▎ | 5185/7045 [16:50:56<6:09:37, 11.92s/it] 74%|███████▎ | 5186/7045 [16:51:07<6:01:24, 11.66s/it] {'loss': 1.1064, 'learning_rate': 8.589032290480571e-07, 'epoch': 0.74} + 74%|███████▎ | 5186/7045 [16:51:07<6:01:24, 11.66s/it] 74%|███████▎ | 5187/7045 [16:51:18<6:00:16, 11.63s/it] {'loss': 1.1299, 'learning_rate': 8.580363043817618e-07, 'epoch': 0.74} + 74%|███████▎ | 5187/7045 [16:51:18<6:00:16, 11.63s/it] 74%|███████▎ | 5188/7045 [16:51:31<6:08:34, 11.91s/it] {'loss': 1.0908, 'learning_rate': 8.571697268041446e-07, 'epoch': 0.74} + 74%|███████▎ | 5188/7045 [16:51:31<6:08:34, 11.91s/it] 74%|███████▎ | 5189/7045 [16:51:42<5:57:41, 11.56s/it] {'loss': 1.0918, 'learning_rate': 8.56303496498388e-07, 'epoch': 0.74} + 74%|███████▎ | 5189/7045 [16:51:42<5:57:41, 11.56s/it] 74%|███████▎ | 5190/7045 [16:51:52<5:50:35, 11.34s/it] {'loss': 1.0981, 'learning_rate': 8.554376136476003e-07, 'epoch': 0.74} + 74%|███████▎ | 5190/7045 [16:51:52<5:50:35, 11.34s/it] 74%|███████▎ | 5191/7045 [16:52:03<5:45:26, 11.18s/it] {'loss': 1.1152, 'learning_rate': 8.545720784348188e-07, 'epoch': 0.74} + 74%|███████▎ | 5191/7045 [16:52:03<5:45:26, 11.18s/it] 74%|███████▎ | 5192/7045 [16:52:15<5:49:06, 11.30s/it] {'loss': 1.1172, 'learning_rate': 8.537068910430049e-07, 'epoch': 0.74} + 74%|███████▎ | 5192/7045 [16:52:15<5:49:06, 11.30s/it] 74%|███████▎ | 5193/7045 [16:52:26<5:50:09, 11.34s/it] {'loss': 1.0986, 'learning_rate': 8.528420516550465e-07, 'epoch': 0.74} + 74%|███████▎ | 5193/7045 [16:52:26<5:50:09, 11.34s/it] 74%|███████▎ | 5194/7045 [16:52:38<5:49:48, 11.34s/it] {'loss': 1.1104, 'learning_rate': 8.519775604537606e-07, 'epoch': 0.74} + 74%|███████▎ | 5194/7045 [16:52:38<5:49:48, 11.34s/it] 74%|███████▎ | 5195/7045 [16:52:49<5:50:58, 11.38s/it] {'loss': 1.1201, 'learning_rate': 8.511134176218871e-07, 'epoch': 0.74} + 74%|███████▎ | 5195/7045 [16:52:49<5:50:58, 11.38s/it] 74%|███████▍ | 5196/7045 [16:53:00<5:46:51, 11.26s/it] {'loss': 1.1494, 'learning_rate': 8.502496233420959e-07, 'epoch': 0.74} + 74%|███████▍ | 5196/7045 [16:53:00<5:46:51, 11.26s/it] 74%|███████▍ | 5197/7045 [16:53:11<5:44:27, 11.18s/it] {'loss': 1.1113, 'learning_rate': 8.4938617779698e-07, 'epoch': 0.74} + 74%|███████▍ | 5197/7045 [16:53:11<5:44:27, 11.18s/it] 74%|███████▍ | 5198/7045 [16:53:22<5:43:39, 11.16s/it] {'loss': 1.1172, 'learning_rate': 8.485230811690595e-07, 'epoch': 0.74} + 74%|███████▍ | 5198/7045 [16:53:22<5:43:39, 11.16s/it] 74%|███████▍ | 5199/7045 [16:53:33<5:42:58, 11.15s/it] {'loss': 1.1025, 'learning_rate': 8.476603336407827e-07, 'epoch': 0.74} + 74%|███████▍ | 5199/7045 [16:53:33<5:42:58, 11.15s/it] 74%|███████▍ | 5200/7045 [16:53:44<5:43:31, 11.17s/it] {'loss': 1.1396, 'learning_rate': 8.467979353945211e-07, 'epoch': 0.74} + 74%|███████▍ | 5200/7045 [16:53:44<5:43:31, 11.17s/it] 74%|███████▍ | 5201/7045 [16:53:57<5:59:51, 11.71s/it] {'loss': 1.1025, 'learning_rate': 8.459358866125747e-07, 'epoch': 0.74} + 74%|███████▍ | 5201/7045 [16:53:57<5:59:51, 11.71s/it] 74%|███████▍ | 5202/7045 [16:54:09<6:02:02, 11.79s/it] {'loss': 1.0938, 'learning_rate': 8.450741874771695e-07, 'epoch': 0.74} + 74%|███████▍ | 5202/7045 [16:54:09<6:02:02, 11.79s/it] 74%|███████▍ | 5203/7045 [16:54:24<6:24:50, 12.54s/it] {'loss': 1.0908, 'learning_rate': 8.442128381704562e-07, 'epoch': 0.74} + 74%|███████▍ | 5203/7045 [16:54:24<6:24:50, 12.54s/it] 74%|███████▍ | 5204/7045 [16:54:35<6:16:08, 12.26s/it] {'loss': 1.0498, 'learning_rate': 8.433518388745116e-07, 'epoch': 0.74} + 74%|███████▍ | 5204/7045 [16:54:35<6:16:08, 12.26s/it] 74%|███████▍ | 5205/7045 [16:54:46<6:03:27, 11.85s/it] {'loss': 1.1387, 'learning_rate': 8.424911897713409e-07, 'epoch': 0.74} + 74%|███████▍ | 5205/7045 [16:54:46<6:03:27, 11.85s/it] 74%|███████▍ | 5206/7045 [16:54:57<5:56:02, 11.62s/it] {'loss': 1.1011, 'learning_rate': 8.416308910428714e-07, 'epoch': 0.74} + 74%|███████▍ | 5206/7045 [16:54:57<5:56:02, 11.62s/it] 74%|███████▍ | 5207/7045 [16:55:08<5:48:49, 11.39s/it] {'loss': 1.1221, 'learning_rate': 8.407709428709604e-07, 'epoch': 0.74} + 74%|███████▍ | 5207/7045 [16:55:08<5:48:49, 11.39s/it] 74%|███████▍ | 5208/7045 [16:55:20<5:55:07, 11.60s/it] {'loss': 1.1074, 'learning_rate': 8.399113454373886e-07, 'epoch': 0.74} + 74%|███████▍ | 5208/7045 [16:55:20<5:55:07, 11.60s/it] 74%|███████▍ | 5209/7045 [16:55:32<5:52:24, 11.52s/it] {'loss': 1.0728, 'learning_rate': 8.39052098923862e-07, 'epoch': 0.74} + 74%|███████▍ | 5209/7045 [16:55:32<5:52:24, 11.52s/it] 74%|███████▍ | 5210/7045 [16:55:43<5:51:45, 11.50s/it] {'loss': 1.1504, 'learning_rate': 8.381932035120155e-07, 'epoch': 0.74} + 74%|███████▍ | 5210/7045 [16:55:43<5:51:45, 11.50s/it] 74%|███████▍ | 5211/7045 [16:55:56<6:07:08, 12.01s/it] {'loss': 1.0635, 'learning_rate': 8.373346593834067e-07, 'epoch': 0.74} + 74%|███████▍ | 5211/7045 [16:55:56<6:07:08, 12.01s/it] 74%|███████▍ | 5212/7045 [16:56:09<6:11:28, 12.16s/it] {'loss': 1.1191, 'learning_rate': 8.364764667195188e-07, 'epoch': 0.74} + 74%|███████▍ | 5212/7045 [16:56:09<6:11:28, 12.16s/it] 74%|███████▍ | 5213/7045 [16:56:20<6:05:17, 11.96s/it] {'loss': 1.1064, 'learning_rate': 8.356186257017648e-07, 'epoch': 0.74} + 74%|███████▍ | 5213/7045 [16:56:20<6:05:17, 11.96s/it] 74%|███████▍ | 5214/7045 [16:56:33<6:15:24, 12.30s/it] {'loss': 1.0884, 'learning_rate': 8.347611365114793e-07, 'epoch': 0.74} + 74%|███████▍ | 5214/7045 [16:56:33<6:15:24, 12.30s/it] 74%|███████▍ | 5215/7045 [16:56:46<6:15:53, 12.32s/it] {'loss': 1.1333, 'learning_rate': 8.339039993299233e-07, 'epoch': 0.74} + 74%|███████▍ | 5215/7045 [16:56:46<6:15:53, 12.32s/it] 74%|███████▍ | 5216/7045 [16:56:57<6:06:03, 12.01s/it] {'loss': 1.123, 'learning_rate': 8.330472143382848e-07, 'epoch': 0.74} + 74%|███████▍ | 5216/7045 [16:56:57<6:06:03, 12.01s/it] 74%|███████▍ | 5217/7045 [16:57:08<6:00:26, 11.83s/it] {'loss': 1.1201, 'learning_rate': 8.32190781717675e-07, 'epoch': 0.74} + 74%|███████▍ | 5217/7045 [16:57:08<6:00:26, 11.83s/it] 74%|███████▍ | 5218/7045 [16:57:20<6:02:02, 11.89s/it] {'loss': 1.0952, 'learning_rate': 8.313347016491338e-07, 'epoch': 0.74} + 74%|███████▍ | 5218/7045 [16:57:20<6:02:02, 11.89s/it] 74%|███████▍ | 5219/7045 [16:57:32<5:55:20, 11.68s/it] {'loss': 1.0693, 'learning_rate': 8.304789743136241e-07, 'epoch': 0.74} + 74%|███████▍ | 5219/7045 [16:57:32<5:55:20, 11.68s/it] 74%|███████▍ | 5220/7045 [16:57:44<5:59:29, 11.82s/it] {'loss': 1.0811, 'learning_rate': 8.29623599892034e-07, 'epoch': 0.74} + 74%|███████▍ | 5220/7045 [16:57:44<5:59:29, 11.82s/it] 74%|███████▍ | 5221/7045 [16:57:55<5:55:04, 11.68s/it] {'loss': 1.1133, 'learning_rate': 8.287685785651797e-07, 'epoch': 0.74} + 74%|███████▍ | 5221/7045 [16:57:55<5:55:04, 11.68s/it] 74%|███████▍ | 5222/7045 [16:58:07<5:53:22, 11.63s/it] {'loss': 1.0869, 'learning_rate': 8.279139105138001e-07, 'epoch': 0.74} + 74%|███████▍ | 5222/7045 [16:58:07<5:53:22, 11.63s/it] 74%|███████▍ | 5223/7045 [16:58:19<5:57:03, 11.76s/it] {'loss': 1.0894, 'learning_rate': 8.270595959185595e-07, 'epoch': 0.74} + 74%|███████▍ | 5223/7045 [16:58:19<5:57:03, 11.76s/it] 74%|███████▍ | 5224/7045 [16:58:30<5:49:04, 11.50s/it] {'loss': 1.0889, 'learning_rate': 8.2620563496005e-07, 'epoch': 0.74} + 74%|███████▍ | 5224/7045 [16:58:30<5:49:04, 11.50s/it] 74%|███████▍ | 5225/7045 [16:58:41<5:46:34, 11.43s/it] {'loss': 1.0879, 'learning_rate': 8.253520278187857e-07, 'epoch': 0.74} + 74%|███████▍ | 5225/7045 [16:58:41<5:46:34, 11.43s/it] 74%|███████▍ | 5226/7045 [16:58:52<5:45:33, 11.40s/it] {'loss': 1.0723, 'learning_rate': 8.244987746752084e-07, 'epoch': 0.74} + 74%|███████▍ | 5226/7045 [16:58:52<5:45:33, 11.40s/it] 74%|███████▍ | 5227/7045 [16:59:04<5:45:32, 11.40s/it] {'loss': 1.1045, 'learning_rate': 8.236458757096847e-07, 'epoch': 0.74} + 74%|███████▍ | 5227/7045 [16:59:04<5:45:32, 11.40s/it] 74%|███████▍ | 5228/7045 [16:59:17<6:02:28, 11.97s/it] {'loss': 1.1201, 'learning_rate': 8.22793331102504e-07, 'epoch': 0.74} + 74%|███████▍ | 5228/7045 [16:59:17<6:02:28, 11.97s/it] 74%|███████▍ | 5229/7045 [16:59:28<5:55:30, 11.75s/it] {'loss': 1.0747, 'learning_rate': 8.219411410338849e-07, 'epoch': 0.74} + 74%|███████▍ | 5229/7045 [16:59:28<5:55:30, 11.75s/it] 74%|███████▍ | 5230/7045 [16:59:40<5:56:55, 11.80s/it] {'loss': 1.1152, 'learning_rate': 8.210893056839673e-07, 'epoch': 0.74} + 74%|███████▍ | 5230/7045 [16:59:40<5:56:55, 11.80s/it] 74%|███████▍ | 5231/7045 [16:59:51<5:50:30, 11.59s/it] {'loss': 1.1094, 'learning_rate': 8.20237825232817e-07, 'epoch': 0.74} + 74%|███████▍ | 5231/7045 [16:59:51<5:50:30, 11.59s/it] 74%|███████▍ | 5232/7045 [17:00:02<5:46:26, 11.47s/it] {'loss': 1.1631, 'learning_rate': 8.193866998604266e-07, 'epoch': 0.74} + 74%|███████▍ | 5232/7045 [17:00:02<5:46:26, 11.47s/it] 74%|███████▍ | 5233/7045 [17:00:14<5:46:01, 11.46s/it] {'loss': 1.0977, 'learning_rate': 8.185359297467121e-07, 'epoch': 0.74} + 74%|███████▍ | 5233/7045 [17:00:14<5:46:01, 11.46s/it] 74%|███████▍ | 5234/7045 [17:00:25<5:42:48, 11.36s/it] {'loss': 1.0815, 'learning_rate': 8.176855150715135e-07, 'epoch': 0.74} + 74%|███████▍ | 5234/7045 [17:00:25<5:42:48, 11.36s/it] 74%|███████▍ | 5235/7045 [17:00:36<5:39:59, 11.27s/it] {'loss': 1.1016, 'learning_rate': 8.168354560145985e-07, 'epoch': 0.74} + 74%|███████▍ | 5235/7045 [17:00:36<5:39:59, 11.27s/it] 74%|███████▍ | 5236/7045 [17:00:50<6:02:51, 12.04s/it] {'loss': 1.0757, 'learning_rate': 8.159857527556567e-07, 'epoch': 0.74} + 74%|███████▍ | 5236/7045 [17:00:50<6:02:51, 12.04s/it] 74%|███████▍ | 5237/7045 [17:01:01<5:53:50, 11.74s/it] {'loss': 1.0869, 'learning_rate': 8.151364054743038e-07, 'epoch': 0.74} + 74%|███████▍ | 5237/7045 [17:01:01<5:53:50, 11.74s/it] 74%|███████▍ | 5238/7045 [17:01:17<6:33:01, 13.05s/it] {'loss': 1.0601, 'learning_rate': 8.142874143500817e-07, 'epoch': 0.74} + 74%|███████▍ | 5238/7045 [17:01:17<6:33:01, 13.05s/it] 74%|███████▍ | 5239/7045 [17:01:28<6:17:12, 12.53s/it] {'loss': 1.0996, 'learning_rate': 8.134387795624546e-07, 'epoch': 0.74} + 74%|███████▍ | 5239/7045 [17:01:28<6:17:12, 12.53s/it] 74%|███████▍ | 5240/7045 [17:01:41<6:22:45, 12.72s/it] {'loss': 1.0654, 'learning_rate': 8.125905012908111e-07, 'epoch': 0.74} + 74%|███████▍ | 5240/7045 [17:01:41<6:22:45, 12.72s/it] 74%|███████▍ | 5241/7045 [17:01:53<6:09:35, 12.29s/it] {'loss': 1.127, 'learning_rate': 8.117425797144676e-07, 'epoch': 0.74} + 74%|███████▍ | 5241/7045 [17:01:53<6:09:35, 12.29s/it] 74%|███████▍ | 5242/7045 [17:02:04<5:57:47, 11.91s/it] {'loss': 1.1338, 'learning_rate': 8.108950150126613e-07, 'epoch': 0.74} + 74%|███████▍ | 5242/7045 [17:02:04<5:57:47, 11.91s/it] 74%|███████▍ | 5243/7045 [17:02:16<6:03:16, 12.10s/it] {'loss': 1.0947, 'learning_rate': 8.100478073645577e-07, 'epoch': 0.74} + 74%|███████▍ | 5243/7045 [17:02:16<6:03:16, 12.10s/it] 74%|███████▍ | 5244/7045 [17:02:27<5:55:24, 11.84s/it] {'loss': 1.1182, 'learning_rate': 8.09200956949244e-07, 'epoch': 0.74} + 74%|███████▍ | 5244/7045 [17:02:27<5:55:24, 11.84s/it] 74%|███████▍ | 5245/7045 [17:02:39<5:54:44, 11.82s/it] {'loss': 1.106, 'learning_rate': 8.083544639457316e-07, 'epoch': 0.74} + 74%|███████▍ | 5245/7045 [17:02:39<5:54:44, 11.82s/it] 74%|███████▍ | 5246/7045 [17:02:51<5:55:04, 11.84s/it] {'loss': 1.1289, 'learning_rate': 8.075083285329596e-07, 'epoch': 0.74} + 74%|███████▍ | 5246/7045 [17:02:51<5:55:04, 11.84s/it] 74%|███████▍ | 5247/7045 [17:03:02<5:47:27, 11.59s/it] {'loss': 1.1226, 'learning_rate': 8.066625508897879e-07, 'epoch': 0.74} + 74%|███████▍ | 5247/7045 [17:03:02<5:47:27, 11.59s/it] 74%|███████▍ | 5248/7045 [17:03:13<5:44:10, 11.49s/it] {'loss': 1.1553, 'learning_rate': 8.058171311950036e-07, 'epoch': 0.74} + 74%|███████▍ | 5248/7045 [17:03:13<5:44:10, 11.49s/it] 75%|███████▍ | 5249/7045 [17:03:25<5:40:29, 11.37s/it] {'loss': 1.1016, 'learning_rate': 8.049720696273156e-07, 'epoch': 0.75} + 75%|███████▍ | 5249/7045 [17:03:25<5:40:29, 11.37s/it] 75%|███████▍ | 5250/7045 [17:03:37<5:50:12, 11.71s/it] {'loss': 1.0674, 'learning_rate': 8.041273663653598e-07, 'epoch': 0.75} + 75%|███████▍ | 5250/7045 [17:03:37<5:50:12, 11.71s/it] 75%|███████▍ | 5251/7045 [17:03:50<5:58:51, 12.00s/it] {'loss': 1.1099, 'learning_rate': 8.03283021587693e-07, 'epoch': 0.75} + 75%|███████▍ | 5251/7045 [17:03:50<5:58:51, 12.00s/it] 75%|███████▍ | 5252/7045 [17:04:01<5:53:55, 11.84s/it] {'loss': 1.0918, 'learning_rate': 8.024390354728004e-07, 'epoch': 0.75} + 75%|███████▍ | 5252/7045 [17:04:01<5:53:55, 11.84s/it] 75%|███████▍ | 5253/7045 [17:04:14<5:59:11, 12.03s/it] {'loss': 1.1025, 'learning_rate': 8.01595408199087e-07, 'epoch': 0.75} + 75%|███████▍ | 5253/7045 [17:04:14<5:59:11, 12.03s/it] 75%|███████▍ | 5254/7045 [17:04:25<5:54:04, 11.86s/it] {'loss': 1.0996, 'learning_rate': 8.007521399448858e-07, 'epoch': 0.75} + 75%|███████▍ | 5254/7045 [17:04:25<5:54:04, 11.86s/it] 75%|███████▍ | 5255/7045 [17:04:37<5:50:56, 11.76s/it] {'loss': 1.0977, 'learning_rate': 7.999092308884518e-07, 'epoch': 0.75} + 75%|███████▍ | 5255/7045 [17:04:37<5:50:56, 11.76s/it] 75%|███████▍ | 5256/7045 [17:04:48<5:43:37, 11.52s/it] {'loss': 1.1313, 'learning_rate': 7.990666812079628e-07, 'epoch': 0.75} + 75%|███████▍ | 5256/7045 [17:04:48<5:43:37, 11.52s/it] 75%|███████▍ | 5257/7045 [17:05:00<5:54:55, 11.91s/it] {'loss': 1.1162, 'learning_rate': 7.982244910815249e-07, 'epoch': 0.75} + 75%|███████▍ | 5257/7045 [17:05:00<5:54:55, 11.91s/it] 75%|███████▍ | 5258/7045 [17:05:11<5:45:01, 11.58s/it] {'loss': 1.1011, 'learning_rate': 7.973826606871629e-07, 'epoch': 0.75} + 75%|███████▍ | 5258/7045 [17:05:11<5:45:01, 11.58s/it] 75%|███████▍ | 5259/7045 [17:05:22<5:39:09, 11.39s/it] {'loss': 1.0684, 'learning_rate': 7.965411902028309e-07, 'epoch': 0.75} + 75%|███████▍ | 5259/7045 [17:05:22<5:39:09, 11.39s/it] 75%|███████▍ | 5260/7045 [17:05:35<5:52:22, 11.84s/it] {'loss': 1.1074, 'learning_rate': 7.957000798064027e-07, 'epoch': 0.75} + 75%|███████▍ | 5260/7045 [17:05:35<5:52:22, 11.84s/it] 75%|███████▍ | 5261/7045 [17:05:47<5:49:51, 11.77s/it] {'loss': 1.1475, 'learning_rate': 7.948593296756768e-07, 'epoch': 0.75} + 75%|███████▍ | 5261/7045 [17:05:47<5:49:51, 11.77s/it] 75%|███████▍ | 5262/7045 [17:05:58<5:47:00, 11.68s/it] {'loss': 1.166, 'learning_rate': 7.940189399883775e-07, 'epoch': 0.75} + 75%|███████▍ | 5262/7045 [17:05:58<5:47:00, 11.68s/it] 75%|███████▍ | 5263/7045 [17:06:10<5:45:11, 11.62s/it] {'loss': 1.1211, 'learning_rate': 7.931789109221522e-07, 'epoch': 0.75} + 75%|███████▍ | 5263/7045 [17:06:10<5:45:11, 11.62s/it] 75%|███████▍ | 5264/7045 [17:06:21<5:43:10, 11.56s/it] {'loss': 1.1045, 'learning_rate': 7.923392426545701e-07, 'epoch': 0.75} + 75%|███████▍ | 5264/7045 [17:06:21<5:43:10, 11.56s/it] 75%|███████▍ | 5265/7045 [17:06:32<5:39:56, 11.46s/it] {'loss': 1.0996, 'learning_rate': 7.91499935363127e-07, 'epoch': 0.75} + 75%|███████▍ | 5265/7045 [17:06:32<5:39:56, 11.46s/it] 75%|███████▍ | 5266/7045 [17:06:44<5:42:29, 11.55s/it] {'loss': 1.0957, 'learning_rate': 7.906609892252404e-07, 'epoch': 0.75} + 75%|███████▍ | 5266/7045 [17:06:44<5:42:29, 11.55s/it] 75%|███████▍ | 5267/7045 [17:06:56<5:46:05, 11.68s/it] {'loss': 1.1123, 'learning_rate': 7.898224044182509e-07, 'epoch': 0.75} + 75%|███████▍ | 5267/7045 [17:06:56<5:46:05, 11.68s/it] 75%|███████▍ | 5268/7045 [17:07:08<5:51:17, 11.86s/it] {'loss': 1.0674, 'learning_rate': 7.88984181119426e-07, 'epoch': 0.75} + 75%|███████▍ | 5268/7045 [17:07:08<5:51:17, 11.86s/it] 75%|███████▍ | 5269/7045 [17:07:20<5:46:11, 11.70s/it] {'loss': 1.1406, 'learning_rate': 7.881463195059525e-07, 'epoch': 0.75} + 75%|███████▍ | 5269/7045 [17:07:20<5:46:11, 11.70s/it] 75%|███████▍ | 5270/7045 [17:07:33<5:59:03, 12.14s/it] {'loss': 1.0884, 'learning_rate': 7.87308819754945e-07, 'epoch': 0.75} + 75%|███████▍ | 5270/7045 [17:07:33<5:59:03, 12.14s/it] 75%|███████▍ | 5271/7045 [17:07:44<5:48:01, 11.77s/it] {'loss': 1.123, 'learning_rate': 7.864716820434382e-07, 'epoch': 0.75} + 75%|███████▍ | 5271/7045 [17:07:44<5:48:01, 11.77s/it] 75%|███████▍ | 5272/7045 [17:07:55<5:43:38, 11.63s/it] {'loss': 1.1289, 'learning_rate': 7.856349065483912e-07, 'epoch': 0.75} + 75%|███████▍ | 5272/7045 [17:07:55<5:43:38, 11.63s/it] 75%|███████▍ | 5273/7045 [17:08:07<5:42:43, 11.60s/it] {'loss': 1.1035, 'learning_rate': 7.847984934466873e-07, 'epoch': 0.75} + 75%|███████▍ | 5273/7045 [17:08:07<5:42:43, 11.60s/it] 75%|███████▍ | 5274/7045 [17:08:18<5:41:14, 11.56s/it] {'loss': 1.1074, 'learning_rate': 7.83962442915134e-07, 'epoch': 0.75} + 75%|███████▍ | 5274/7045 [17:08:18<5:41:14, 11.56s/it] 75%|███████▍ | 5275/7045 [17:08:29<5:37:28, 11.44s/it] {'loss': 1.1138, 'learning_rate': 7.831267551304592e-07, 'epoch': 0.75} + 75%|███████▍ | 5275/7045 [17:08:29<5:37:28, 11.44s/it] 75%|███████▍ | 5276/7045 [17:08:41<5:42:01, 11.60s/it] {'loss': 1.1094, 'learning_rate': 7.822914302693171e-07, 'epoch': 0.75} + 75%|███████▍ | 5276/7045 [17:08:41<5:42:01, 11.60s/it] 75%|███████▍ | 5277/7045 [17:08:53<5:47:34, 11.80s/it] {'loss': 1.0527, 'learning_rate': 7.814564685082837e-07, 'epoch': 0.75} + 75%|███████▍ | 5277/7045 [17:08:53<5:47:34, 11.80s/it] 75%|███████▍ | 5278/7045 [17:09:04<5:40:50, 11.57s/it] {'loss': 1.0972, 'learning_rate': 7.806218700238577e-07, 'epoch': 0.75} + 75%|███████▍ | 5278/7045 [17:09:04<5:40:50, 11.57s/it] 75%|███████▍ | 5279/7045 [17:09:17<5:45:12, 11.73s/it] {'loss': 1.1309, 'learning_rate': 7.79787634992463e-07, 'epoch': 0.75} + 75%|███████▍ | 5279/7045 [17:09:17<5:45:12, 11.73s/it] 75%|███████▍ | 5280/7045 [17:09:28<5:44:12, 11.70s/it] {'loss': 1.1104, 'learning_rate': 7.78953763590444e-07, 'epoch': 0.75} + 75%|███████▍ | 5280/7045 [17:09:28<5:44:12, 11.70s/it] 75%|███████▍ | 5281/7045 [17:09:40<5:41:34, 11.62s/it] {'loss': 1.0928, 'learning_rate': 7.781202559940715e-07, 'epoch': 0.75} + 75%|███████▍ | 5281/7045 [17:09:40<5:41:34, 11.62s/it] 75%|███████▍ | 5282/7045 [17:09:51<5:38:48, 11.53s/it] {'loss': 1.1138, 'learning_rate': 7.772871123795364e-07, 'epoch': 0.75} + 75%|███████▍ | 5282/7045 [17:09:51<5:38:48, 11.53s/it] 75%|███████▍ | 5283/7045 [17:10:04<5:51:02, 11.95s/it] {'loss': 1.0679, 'learning_rate': 7.764543329229537e-07, 'epoch': 0.75} + 75%|███████▍ | 5283/7045 [17:10:04<5:51:02, 11.95s/it] 75%|███████▌ | 5284/7045 [17:10:16<5:52:12, 12.00s/it] {'loss': 1.0938, 'learning_rate': 7.756219178003626e-07, 'epoch': 0.75} + 75%|███████▌ | 5284/7045 [17:10:16<5:52:12, 12.00s/it] 75%|███████▌ | 5285/7045 [17:10:27<5:42:15, 11.67s/it] {'loss': 1.126, 'learning_rate': 7.747898671877232e-07, 'epoch': 0.75} + 75%|███████▌ | 5285/7045 [17:10:27<5:42:15, 11.67s/it] 75%|███████▌ | 5286/7045 [17:10:38<5:33:30, 11.38s/it] {'loss': 1.0859, 'learning_rate': 7.739581812609201e-07, 'epoch': 0.75} + 75%|███████▌ | 5286/7045 [17:10:38<5:33:30, 11.38s/it] 75%|███████▌ | 5287/7045 [17:10:50<5:41:24, 11.65s/it] {'loss': 1.1328, 'learning_rate': 7.731268601957612e-07, 'epoch': 0.75} + 75%|███████▌ | 5287/7045 [17:10:50<5:41:24, 11.65s/it] 75%|███████▌ | 5288/7045 [17:11:01<5:36:07, 11.48s/it] {'loss': 1.1074, 'learning_rate': 7.722959041679756e-07, 'epoch': 0.75} + 75%|███████▌ | 5288/7045 [17:11:01<5:36:07, 11.48s/it] 75%|███████▌ | 5289/7045 [17:11:12<5:34:54, 11.44s/it] {'loss': 1.1309, 'learning_rate': 7.714653133532152e-07, 'epoch': 0.75} + 75%|███████▌ | 5289/7045 [17:11:12<5:34:54, 11.44s/it] 75%|███████▌ | 5290/7045 [17:11:23<5:31:43, 11.34s/it] {'loss': 1.0986, 'learning_rate': 7.706350879270571e-07, 'epoch': 0.75} + 75%|███████▌ | 5290/7045 [17:11:23<5:31:43, 11.34s/it] 75%|███████▌ | 5291/7045 [17:11:35<5:30:49, 11.32s/it] {'loss': 1.1113, 'learning_rate': 7.698052280649984e-07, 'epoch': 0.75} + 75%|███████▌ | 5291/7045 [17:11:35<5:30:49, 11.32s/it] 75%|███████▌ | 5292/7045 [17:11:46<5:28:20, 11.24s/it] {'loss': 1.0962, 'learning_rate': 7.689757339424611e-07, 'epoch': 0.75} + 75%|███████▌ | 5292/7045 [17:11:46<5:28:20, 11.24s/it] 75%|███████▌ | 5293/7045 [17:11:57<5:26:26, 11.18s/it] {'loss': 1.1445, 'learning_rate': 7.681466057347886e-07, 'epoch': 0.75} + 75%|███████▌ | 5293/7045 [17:11:57<5:26:26, 11.18s/it] 75%|███████▌ | 5294/7045 [17:12:08<5:25:38, 11.16s/it] {'loss': 1.1494, 'learning_rate': 7.673178436172462e-07, 'epoch': 0.75} + 75%|███████▌ | 5294/7045 [17:12:08<5:25:38, 11.16s/it] 75%|███████▌ | 5295/7045 [17:12:21<5:46:08, 11.87s/it] {'loss': 1.083, 'learning_rate': 7.664894477650247e-07, 'epoch': 0.75} + 75%|███████▌ | 5295/7045 [17:12:21<5:46:08, 11.87s/it] 75%|███████▌ | 5296/7045 [17:12:33<5:39:34, 11.65s/it] {'loss': 1.1108, 'learning_rate': 7.656614183532349e-07, 'epoch': 0.75} + 75%|███████▌ | 5296/7045 [17:12:33<5:39:34, 11.65s/it] 75%|███████▌ | 5297/7045 [17:12:44<5:36:35, 11.55s/it] {'loss': 1.1289, 'learning_rate': 7.648337555569099e-07, 'epoch': 0.75} + 75%|███████▌ | 5297/7045 [17:12:44<5:36:35, 11.55s/it] 75%|███████▌ | 5298/7045 [17:12:56<5:37:45, 11.60s/it] {'loss': 1.1572, 'learning_rate': 7.640064595510068e-07, 'epoch': 0.75} + 75%|███████▌ | 5298/7045 [17:12:56<5:37:45, 11.60s/it] 75%|███████▌ | 5299/7045 [17:13:07<5:33:37, 11.47s/it] {'loss': 1.0576, 'learning_rate': 7.631795305104062e-07, 'epoch': 0.75} + 75%|███████▌ | 5299/7045 [17:13:07<5:33:37, 11.47s/it] 75%|███████▌ | 5300/7045 [17:13:18<5:31:09, 11.39s/it] {'loss': 1.1201, 'learning_rate': 7.623529686099074e-07, 'epoch': 0.75} + 75%|███████▌ | 5300/7045 [17:13:18<5:31:09, 11.39s/it] 75%|███████▌ | 5301/7045 [17:13:29<5:27:09, 11.26s/it] {'loss': 1.0771, 'learning_rate': 7.615267740242363e-07, 'epoch': 0.75} + 75%|███████▌ | 5301/7045 [17:13:29<5:27:09, 11.26s/it] 75%|███████▌ | 5302/7045 [17:13:40<5:26:16, 11.23s/it] {'loss': 1.1299, 'learning_rate': 7.607009469280372e-07, 'epoch': 0.75} + 75%|███████▌ | 5302/7045 [17:13:40<5:26:16, 11.23s/it] 75%|███████▌ | 5303/7045 [17:13:52<5:36:46, 11.60s/it] {'loss': 1.0845, 'learning_rate': 7.598754874958802e-07, 'epoch': 0.75} + 75%|███████▌ | 5303/7045 [17:13:52<5:36:46, 11.60s/it] 75%|███████▌ | 5304/7045 [17:14:03<5:30:30, 11.39s/it] {'loss': 1.0952, 'learning_rate': 7.590503959022558e-07, 'epoch': 0.75} + 75%|███████▌ | 5304/7045 [17:14:03<5:30:30, 11.39s/it] 75%|███████▌ | 5305/7045 [17:14:17<5:47:30, 11.98s/it] {'loss': 1.0532, 'learning_rate': 7.582256723215761e-07, 'epoch': 0.75} + 75%|███████▌ | 5305/7045 [17:14:17<5:47:30, 11.98s/it] 75%|███████▌ | 5306/7045 [17:14:29<5:51:21, 12.12s/it] {'loss': 1.1055, 'learning_rate': 7.574013169281777e-07, 'epoch': 0.75} + 75%|███████▌ | 5306/7045 [17:14:29<5:51:21, 12.12s/it] 75%|███████▌ | 5307/7045 [17:14:42<5:58:02, 12.36s/it] {'loss': 1.1113, 'learning_rate': 7.565773298963175e-07, 'epoch': 0.75} + 75%|███████▌ | 5307/7045 [17:14:42<5:58:02, 12.36s/it] 75%|███████▌ | 5308/7045 [17:14:54<5:50:46, 12.12s/it] {'loss': 1.1201, 'learning_rate': 7.557537114001742e-07, 'epoch': 0.75} + 75%|███████▌ | 5308/7045 [17:14:54<5:50:46, 12.12s/it] 75%|███████▌ | 5309/7045 [17:15:05<5:41:10, 11.79s/it] {'loss': 1.0991, 'learning_rate': 7.549304616138512e-07, 'epoch': 0.75} + 75%|███████▌ | 5309/7045 [17:15:05<5:41:10, 11.79s/it] 75%|███████▌ | 5310/7045 [17:15:15<5:31:38, 11.47s/it] {'loss': 1.1055, 'learning_rate': 7.541075807113706e-07, 'epoch': 0.75} + 75%|███████▌ | 5310/7045 [17:15:15<5:31:38, 11.47s/it] 75%|███████▌ | 5311/7045 [17:15:27<5:28:33, 11.37s/it] {'loss': 1.0947, 'learning_rate': 7.53285068866679e-07, 'epoch': 0.75} + 75%|███████▌ | 5311/7045 [17:15:27<5:28:33, 11.37s/it] 75%|███████▌ | 5312/7045 [17:15:38<5:26:45, 11.31s/it] {'loss': 1.0786, 'learning_rate': 7.52462926253645e-07, 'epoch': 0.75} + 75%|███████▌ | 5312/7045 [17:15:38<5:26:45, 11.31s/it] 75%|███████▌ | 5313/7045 [17:15:49<5:27:23, 11.34s/it] {'loss': 1.0874, 'learning_rate': 7.516411530460566e-07, 'epoch': 0.75} + 75%|███████▌ | 5313/7045 [17:15:49<5:27:23, 11.34s/it] 75%|███████▌ | 5314/7045 [17:16:01<5:32:39, 11.53s/it] {'loss': 1.0967, 'learning_rate': 7.50819749417627e-07, 'epoch': 0.75} + 75%|███████▌ | 5314/7045 [17:16:01<5:32:39, 11.53s/it] 75%|███████▌ | 5315/7045 [17:16:14<5:40:10, 11.80s/it] {'loss': 1.0977, 'learning_rate': 7.499987155419891e-07, 'epoch': 0.75} + 75%|███████▌ | 5315/7045 [17:16:14<5:40:10, 11.80s/it] 75%|███████▌ | 5316/7045 [17:16:25<5:36:27, 11.68s/it] {'loss': 1.0596, 'learning_rate': 7.491780515926972e-07, 'epoch': 0.75} + 75%|███████▌ | 5316/7045 [17:16:25<5:36:27, 11.68s/it] 75%|███████▌ | 5317/7045 [17:16:36<5:33:45, 11.59s/it] {'loss': 1.0527, 'learning_rate': 7.4835775774323e-07, 'epoch': 0.75} + 75%|███████▌ | 5317/7045 [17:16:36<5:33:45, 11.59s/it] 75%|███████▌ | 5318/7045 [17:16:47<5:27:06, 11.36s/it] {'loss': 1.1201, 'learning_rate': 7.475378341669858e-07, 'epoch': 0.75} + 75%|███████▌ | 5318/7045 [17:16:47<5:27:06, 11.36s/it] 76%|███████▌ | 5319/7045 [17:16:59<5:27:10, 11.37s/it] {'loss': 1.125, 'learning_rate': 7.467182810372844e-07, 'epoch': 0.76} + 76%|███████▌ | 5319/7045 [17:16:59<5:27:10, 11.37s/it] 76%|███████▌ | 5320/7045 [17:17:11<5:32:57, 11.58s/it] {'loss': 1.0962, 'learning_rate': 7.458990985273698e-07, 'epoch': 0.76} + 76%|███████▌ | 5320/7045 [17:17:11<5:32:57, 11.58s/it] 76%|███████▌ | 5321/7045 [17:17:22<5:27:58, 11.41s/it] {'loss': 1.0889, 'learning_rate': 7.450802868104043e-07, 'epoch': 0.76} + 76%|███████▌ | 5321/7045 [17:17:22<5:27:58, 11.41s/it] 76%|███████▌ | 5322/7045 [17:17:33<5:25:59, 11.35s/it] {'loss': 1.1191, 'learning_rate': 7.442618460594742e-07, 'epoch': 0.76} + 76%|███████▌ | 5322/7045 [17:17:33<5:25:59, 11.35s/it] 76%|███████▌ | 5323/7045 [17:17:44<5:22:01, 11.22s/it] {'loss': 1.0859, 'learning_rate': 7.434437764475874e-07, 'epoch': 0.76} + 76%|███████▌ | 5323/7045 [17:17:44<5:22:01, 11.22s/it] 76%|███████▌ | 5324/7045 [17:17:56<5:33:09, 11.61s/it] {'loss': 1.0688, 'learning_rate': 7.426260781476716e-07, 'epoch': 0.76} + 76%|███████▌ | 5324/7045 [17:17:56<5:33:09, 11.61s/it] 76%|███████▌ | 5325/7045 [17:18:09<5:42:31, 11.95s/it] {'loss': 1.1328, 'learning_rate': 7.418087513325781e-07, 'epoch': 0.76} + 76%|███████▌ | 5325/7045 [17:18:09<5:42:31, 11.95s/it] 76%|███████▌ | 5326/7045 [17:18:20<5:36:41, 11.75s/it] {'loss': 1.0908, 'learning_rate': 7.409917961750779e-07, 'epoch': 0.76} + 76%|███████▌ | 5326/7045 [17:18:20<5:36:41, 11.75s/it] 76%|███████▌ | 5327/7045 [17:18:31<5:31:18, 11.57s/it] {'loss': 1.1182, 'learning_rate': 7.401752128478631e-07, 'epoch': 0.76} + 76%|███████▌ | 5327/7045 [17:18:31<5:31:18, 11.57s/it] 76%|███████▌ | 5328/7045 [17:18:43<5:31:03, 11.57s/it] {'loss': 1.1118, 'learning_rate': 7.393590015235507e-07, 'epoch': 0.76} + 76%|███████▌ | 5328/7045 [17:18:43<5:31:03, 11.57s/it] 76%|███████▌ | 5329/7045 [17:18:54<5:26:19, 11.41s/it] {'loss': 1.1201, 'learning_rate': 7.385431623746749e-07, 'epoch': 0.76} + 76%|███████▌ | 5329/7045 [17:18:54<5:26:19, 11.41s/it] 76%|███████▌ | 5330/7045 [17:19:05<5:23:31, 11.32s/it] {'loss': 1.1387, 'learning_rate': 7.377276955736928e-07, 'epoch': 0.76} + 76%|███████▌ | 5330/7045 [17:19:05<5:23:31, 11.32s/it] 76%|███████▌ | 5331/7045 [17:19:16<5:23:10, 11.31s/it] {'loss': 1.1284, 'learning_rate': 7.369126012929842e-07, 'epoch': 0.76} + 76%|███████▌ | 5331/7045 [17:19:16<5:23:10, 11.31s/it] 76%|███████▌ | 5332/7045 [17:19:27<5:20:23, 11.22s/it] {'loss': 1.1016, 'learning_rate': 7.360978797048473e-07, 'epoch': 0.76} + 76%|███████▌ | 5332/7045 [17:19:27<5:20:23, 11.22s/it] 76%|███████▌ | 5333/7045 [17:19:39<5:24:09, 11.36s/it] {'loss': 1.1392, 'learning_rate': 7.352835309815046e-07, 'epoch': 0.76} + 76%|███████▌ | 5333/7045 [17:19:39<5:24:09, 11.36s/it] 76%|███████▌ | 5334/7045 [17:19:51<5:23:50, 11.36s/it] {'loss': 1.1387, 'learning_rate': 7.34469555295097e-07, 'epoch': 0.76} + 76%|███████▌ | 5334/7045 [17:19:51<5:23:50, 11.36s/it] 76%|███████▌ | 5335/7045 [17:20:02<5:21:31, 11.28s/it] {'loss': 1.1436, 'learning_rate': 7.336559528176884e-07, 'epoch': 0.76} + 76%|███████▌ | 5335/7045 [17:20:02<5:21:31, 11.28s/it] 76%|███████▌ | 5336/7045 [17:20:15<5:41:23, 11.99s/it] {'loss': 1.0557, 'learning_rate': 7.328427237212643e-07, 'epoch': 0.76} + 76%|███████▌ | 5336/7045 [17:20:15<5:41:23, 11.99s/it] 76%|███████▌ | 5337/7045 [17:20:26<5:34:11, 11.74s/it] {'loss': 1.1328, 'learning_rate': 7.320298681777291e-07, 'epoch': 0.76} + 76%|███████▌ | 5337/7045 [17:20:26<5:34:11, 11.74s/it] 76%|███████▌ | 5338/7045 [17:20:38<5:35:15, 11.78s/it] {'loss': 1.0977, 'learning_rate': 7.312173863589087e-07, 'epoch': 0.76} + 76%|███████▌ | 5338/7045 [17:20:38<5:35:15, 11.78s/it] 76%|███████▌ | 5339/7045 [17:20:49<5:29:40, 11.59s/it] {'loss': 1.1211, 'learning_rate': 7.30405278436552e-07, 'epoch': 0.76} + 76%|███████▌ | 5339/7045 [17:20:49<5:29:40, 11.59s/it] 76%|███████▌ | 5340/7045 [17:21:01<5:26:18, 11.48s/it] {'loss': 1.1074, 'learning_rate': 7.295935445823272e-07, 'epoch': 0.76} + 76%|███████▌ | 5340/7045 [17:21:01<5:26:18, 11.48s/it] 76%|███████▌ | 5341/7045 [17:21:12<5:21:45, 11.33s/it] {'loss': 1.1279, 'learning_rate': 7.287821849678226e-07, 'epoch': 0.76} + 76%|███████▌ | 5341/7045 [17:21:12<5:21:45, 11.33s/it] 76%|███████▌ | 5342/7045 [17:21:23<5:19:45, 11.27s/it] {'loss': 1.1318, 'learning_rate': 7.279711997645502e-07, 'epoch': 0.76} + 76%|███████▌ | 5342/7045 [17:21:23<5:19:45, 11.27s/it] 76%|███████▌ | 5343/7045 [17:21:35<5:26:39, 11.52s/it] {'loss': 1.0918, 'learning_rate': 7.271605891439396e-07, 'epoch': 0.76} + 76%|███████▌ | 5343/7045 [17:21:35<5:26:39, 11.52s/it] 76%|███████▌ | 5344/7045 [17:21:47<5:33:05, 11.75s/it] {'loss': 1.0752, 'learning_rate': 7.263503532773445e-07, 'epoch': 0.76} + 76%|███████▌ | 5344/7045 [17:21:47<5:33:05, 11.75s/it] 76%|███████▌ | 5345/7045 [17:22:00<5:45:52, 12.21s/it] {'loss': 1.0552, 'learning_rate': 7.255404923360365e-07, 'epoch': 0.76} + 76%|███████▌ | 5345/7045 [17:22:00<5:45:52, 12.21s/it] 76%|███████▌ | 5346/7045 [17:22:11<5:35:49, 11.86s/it] {'loss': 1.0889, 'learning_rate': 7.24731006491208e-07, 'epoch': 0.76} + 76%|███████▌ | 5346/7045 [17:22:11<5:35:49, 11.86s/it] 76%|███████▌ | 5347/7045 [17:22:23<5:30:36, 11.68s/it] {'loss': 1.1406, 'learning_rate': 7.239218959139763e-07, 'epoch': 0.76} + 76%|███████▌ | 5347/7045 [17:22:23<5:30:36, 11.68s/it] 76%|███████▌ | 5348/7045 [17:22:35<5:36:29, 11.90s/it] {'loss': 1.1172, 'learning_rate': 7.231131607753744e-07, 'epoch': 0.76} + 76%|███████▌ | 5348/7045 [17:22:35<5:36:29, 11.90s/it] 76%|███████▌ | 5349/7045 [17:22:48<5:46:40, 12.26s/it] {'loss': 1.1055, 'learning_rate': 7.223048012463574e-07, 'epoch': 0.76} + 76%|███████▌ | 5349/7045 [17:22:48<5:46:40, 12.26s/it] 76%|███████▌ | 5350/7045 [17:23:01<5:53:03, 12.50s/it] {'loss': 1.0913, 'learning_rate': 7.214968174978032e-07, 'epoch': 0.76} + 76%|███████▌ | 5350/7045 [17:23:01<5:53:03, 12.50s/it] 76%|███████▌ | 5351/7045 [17:23:12<5:40:38, 12.07s/it] {'loss': 1.1377, 'learning_rate': 7.206892097005072e-07, 'epoch': 0.76} + 76%|███████▌ | 5351/7045 [17:23:12<5:40:38, 12.07s/it] 76%|███████▌ | 5352/7045 [17:23:23<5:32:33, 11.79s/it] {'loss': 1.0884, 'learning_rate': 7.198819780251865e-07, 'epoch': 0.76} + 76%|███████▌ | 5352/7045 [17:23:24<5:32:33, 11.79s/it] 76%|███████▌ | 5353/7045 [17:23:35<5:27:04, 11.60s/it] {'loss': 1.0537, 'learning_rate': 7.190751226424797e-07, 'epoch': 0.76} + 76%|███████▌ | 5353/7045 [17:23:35<5:27:04, 11.60s/it] 76%|███████▌ | 5354/7045 [17:23:46<5:23:31, 11.48s/it] {'loss': 1.0659, 'learning_rate': 7.182686437229438e-07, 'epoch': 0.76} + 76%|███████▌ | 5354/7045 [17:23:46<5:23:31, 11.48s/it] 76%|███████▌ | 5355/7045 [17:23:57<5:20:40, 11.38s/it] {'loss': 1.1035, 'learning_rate': 7.174625414370592e-07, 'epoch': 0.76} + 76%|███████▌ | 5355/7045 [17:23:57<5:20:40, 11.38s/it] 76%|███████▌ | 5356/7045 [17:24:09<5:22:01, 11.44s/it] {'loss': 1.1367, 'learning_rate': 7.166568159552239e-07, 'epoch': 0.76} + 76%|███████▌ | 5356/7045 [17:24:09<5:22:01, 11.44s/it] 76%|███████▌ | 5357/7045 [17:24:20<5:20:47, 11.40s/it] {'loss': 1.1074, 'learning_rate': 7.158514674477562e-07, 'epoch': 0.76} + 76%|███████▌ | 5357/7045 [17:24:20<5:20:47, 11.40s/it] 76%|███████▌ | 5358/7045 [17:24:32<5:26:45, 11.62s/it] {'loss': 1.0576, 'learning_rate': 7.150464960848979e-07, 'epoch': 0.76} + 76%|███████▌ | 5358/7045 [17:24:32<5:26:45, 11.62s/it] 76%|███████▌ | 5359/7045 [17:24:43<5:24:16, 11.54s/it] {'loss': 1.1123, 'learning_rate': 7.142419020368069e-07, 'epoch': 0.76} + 76%|███████▌ | 5359/7045 [17:24:43<5:24:16, 11.54s/it] 76%|███████▌ | 5360/7045 [17:24:54<5:19:23, 11.37s/it] {'loss': 1.0967, 'learning_rate': 7.134376854735642e-07, 'epoch': 0.76} + 76%|███████▌ | 5360/7045 [17:24:54<5:19:23, 11.37s/it] 76%|███████▌ | 5361/7045 [17:25:05<5:16:08, 11.26s/it] {'loss': 1.1514, 'learning_rate': 7.126338465651713e-07, 'epoch': 0.76} + 76%|███████▌ | 5361/7045 [17:25:05<5:16:08, 11.26s/it] 76%|███████▌ | 5362/7045 [17:25:17<5:16:41, 11.29s/it] {'loss': 1.085, 'learning_rate': 7.118303854815478e-07, 'epoch': 0.76} + 76%|███████▌ | 5362/7045 [17:25:17<5:16:41, 11.29s/it] 76%|███████▌ | 5363/7045 [17:25:28<5:14:36, 11.22s/it] {'loss': 1.1465, 'learning_rate': 7.110273023925335e-07, 'epoch': 0.76} + 76%|███████▌ | 5363/7045 [17:25:28<5:14:36, 11.22s/it] 76%|███████▌ | 5364/7045 [17:25:39<5:14:04, 11.21s/it] {'loss': 1.0967, 'learning_rate': 7.102245974678909e-07, 'epoch': 0.76} + 76%|███████▌ | 5364/7045 [17:25:39<5:14:04, 11.21s/it] 76%|███████▌ | 5365/7045 [17:25:50<5:12:44, 11.17s/it] {'loss': 1.1094, 'learning_rate': 7.094222708772991e-07, 'epoch': 0.76} + 76%|███████▌ | 5365/7045 [17:25:50<5:12:44, 11.17s/it] 76%|███████▌ | 5366/7045 [17:26:01<5:12:52, 11.18s/it] {'loss': 1.0977, 'learning_rate': 7.086203227903607e-07, 'epoch': 0.76} + 76%|███████▌ | 5366/7045 [17:26:01<5:12:52, 11.18s/it] 76%|███████▌ | 5367/7045 [17:26:14<5:29:29, 11.78s/it] {'loss': 1.0488, 'learning_rate': 7.078187533765957e-07, 'epoch': 0.76} + 76%|███████▌ | 5367/7045 [17:26:14<5:29:29, 11.78s/it] 76%|███████▌ | 5368/7045 [17:26:25<5:21:47, 11.51s/it] {'loss': 1.1055, 'learning_rate': 7.070175628054443e-07, 'epoch': 0.76} + 76%|███████▌ | 5368/7045 [17:26:25<5:21:47, 11.51s/it] 76%|███████▌ | 5369/7045 [17:26:37<5:19:54, 11.45s/it] {'loss': 1.0781, 'learning_rate': 7.062167512462684e-07, 'epoch': 0.76} + 76%|███████▌ | 5369/7045 [17:26:37<5:19:54, 11.45s/it] 76%|███████▌ | 5370/7045 [17:26:48<5:16:14, 11.33s/it] {'loss': 1.1406, 'learning_rate': 7.054163188683483e-07, 'epoch': 0.76} + 76%|███████▌ | 5370/7045 [17:26:48<5:16:14, 11.33s/it] 76%|███████▌ | 5371/7045 [17:26:59<5:12:15, 11.19s/it] {'loss': 1.061, 'learning_rate': 7.046162658408826e-07, 'epoch': 0.76} + 76%|███████▌ | 5371/7045 [17:26:59<5:12:15, 11.19s/it] 76%|███████▋ | 5372/7045 [17:27:10<5:11:14, 11.16s/it] {'loss': 1.0464, 'learning_rate': 7.038165923329948e-07, 'epoch': 0.76} + 76%|███████▋ | 5372/7045 [17:27:10<5:11:14, 11.16s/it] 76%|███████▋ | 5373/7045 [17:27:22<5:20:06, 11.49s/it] {'loss': 1.0869, 'learning_rate': 7.030172985137234e-07, 'epoch': 0.76} + 76%|███████▋ | 5373/7045 [17:27:22<5:20:06, 11.49s/it] 76%|███████▋ | 5374/7045 [17:27:36<5:37:59, 12.14s/it] {'loss': 1.0781, 'learning_rate': 7.022183845520275e-07, 'epoch': 0.76} + 76%|███████▋ | 5374/7045 [17:27:36<5:37:59, 12.14s/it] 76%|███████▋ | 5375/7045 [17:27:47<5:30:46, 11.88s/it] {'loss': 1.1196, 'learning_rate': 7.01419850616788e-07, 'epoch': 0.76} + 76%|███████▋ | 5375/7045 [17:27:47<5:30:46, 11.88s/it] 76%|███████▋ | 5376/7045 [17:28:00<5:37:40, 12.14s/it] {'loss': 1.0957, 'learning_rate': 7.006216968768023e-07, 'epoch': 0.76} + 76%|███████▋ | 5376/7045 [17:28:00<5:37:40, 12.14s/it] 76%|███████▋ | 5377/7045 [17:28:12<5:39:33, 12.21s/it] {'loss': 1.1187, 'learning_rate': 6.998239235007911e-07, 'epoch': 0.76} + 76%|███████▋ | 5377/7045 [17:28:12<5:39:33, 12.21s/it] 76%|███████▋ | 5378/7045 [17:28:23<5:29:42, 11.87s/it] {'loss': 1.0986, 'learning_rate': 6.990265306573921e-07, 'epoch': 0.76} + 76%|███████▋ | 5378/7045 [17:28:23<5:29:42, 11.87s/it] 76%|███████▋ | 5379/7045 [17:28:34<5:23:22, 11.65s/it] {'loss': 1.1436, 'learning_rate': 6.982295185151619e-07, 'epoch': 0.76} + 76%|███████▋ | 5379/7045 [17:28:34<5:23:22, 11.65s/it] 76%|███████▋ | 5380/7045 [17:28:45<5:19:05, 11.50s/it] {'loss': 1.0679, 'learning_rate': 6.9743288724258e-07, 'epoch': 0.76} + 76%|███████▋ | 5380/7045 [17:28:45<5:19:05, 11.50s/it] 76%|███████▋ | 5381/7045 [17:28:56<5:15:26, 11.37s/it] {'loss': 1.0835, 'learning_rate': 6.966366370080426e-07, 'epoch': 0.76} + 76%|███████▋ | 5381/7045 [17:28:56<5:15:26, 11.37s/it] 76%|███████▋ | 5382/7045 [17:29:08<5:14:42, 11.35s/it] {'loss': 1.124, 'learning_rate': 6.958407679798651e-07, 'epoch': 0.76} + 76%|███████▋ | 5382/7045 [17:29:08<5:14:42, 11.35s/it] 76%|███████▋ | 5383/7045 [17:29:20<5:24:49, 11.73s/it] {'loss': 1.0645, 'learning_rate': 6.950452803262841e-07, 'epoch': 0.76} + 76%|███████▋ | 5383/7045 [17:29:20<5:24:49, 11.73s/it] 76%|███████▋ | 5384/7045 [17:29:31<5:20:17, 11.57s/it] {'loss': 1.1289, 'learning_rate': 6.942501742154559e-07, 'epoch': 0.76} + 76%|███████▋ | 5384/7045 [17:29:32<5:20:17, 11.57s/it] 76%|███████▋ | 5385/7045 [17:29:43<5:15:36, 11.41s/it] {'loss': 1.1377, 'learning_rate': 6.934554498154533e-07, 'epoch': 0.76} + 76%|███████▋ | 5385/7045 [17:29:43<5:15:36, 11.41s/it] 76%|███████▋ | 5386/7045 [17:29:54<5:16:08, 11.43s/it] {'loss': 1.1279, 'learning_rate': 6.926611072942715e-07, 'epoch': 0.76} + 76%|███████▋ | 5386/7045 [17:29:54<5:16:08, 11.43s/it] 76%|███████▋ | 5387/7045 [17:30:05<5:15:20, 11.41s/it] {'loss': 1.1182, 'learning_rate': 6.918671468198227e-07, 'epoch': 0.76} + 76%|███████▋ | 5387/7045 [17:30:05<5:15:20, 11.41s/it] 76%|███████▋ | 5388/7045 [17:30:16<5:11:06, 11.27s/it] {'loss': 1.1289, 'learning_rate': 6.910735685599401e-07, 'epoch': 0.76} + 76%|███████▋ | 5388/7045 [17:30:16<5:11:06, 11.27s/it] 76%|███████▋ | 5389/7045 [17:30:28<5:10:59, 11.27s/it] {'loss': 1.165, 'learning_rate': 6.902803726823751e-07, 'epoch': 0.76} + 76%|███████▋ | 5389/7045 [17:30:28<5:10:59, 11.27s/it] 77%|███████▋ | 5390/7045 [17:30:39<5:08:02, 11.17s/it] {'loss': 1.1494, 'learning_rate': 6.894875593547975e-07, 'epoch': 0.77} + 77%|███████▋ | 5390/7045 [17:30:39<5:08:02, 11.17s/it] 77%|███████▋ | 5391/7045 [17:30:50<5:06:43, 11.13s/it] {'loss': 1.1475, 'learning_rate': 6.886951287447985e-07, 'epoch': 0.77} + 77%|███████▋ | 5391/7045 [17:30:50<5:06:43, 11.13s/it] 77%|███████▋ | 5392/7045 [17:31:01<5:05:56, 11.10s/it] {'loss': 1.1406, 'learning_rate': 6.879030810198867e-07, 'epoch': 0.77} + 77%|███████▋ | 5392/7045 [17:31:01<5:05:56, 11.10s/it] 77%|███████▋ | 5393/7045 [17:31:14<5:23:12, 11.74s/it] {'loss': 1.1084, 'learning_rate': 6.871114163474893e-07, 'epoch': 0.77} + 77%|███████▋ | 5393/7045 [17:31:14<5:23:12, 11.74s/it] 77%|███████▋ | 5394/7045 [17:31:26<5:30:21, 12.01s/it] {'loss': 1.084, 'learning_rate': 6.863201348949547e-07, 'epoch': 0.77} + 77%|███████▋ | 5394/7045 [17:31:26<5:30:21, 12.01s/it] 77%|███████▋ | 5395/7045 [17:31:38<5:28:01, 11.93s/it] {'loss': 1.1167, 'learning_rate': 6.855292368295477e-07, 'epoch': 0.77} + 77%|███████▋ | 5395/7045 [17:31:38<5:28:01, 11.93s/it] 77%|███████▋ | 5396/7045 [17:31:50<5:28:40, 11.96s/it] {'loss': 1.1416, 'learning_rate': 6.847387223184543e-07, 'epoch': 0.77} + 77%|███████▋ | 5396/7045 [17:31:50<5:28:40, 11.96s/it] 77%|███████▋ | 5397/7045 [17:32:01<5:21:13, 11.70s/it] {'loss': 1.1396, 'learning_rate': 6.839485915287785e-07, 'epoch': 0.77} + 77%|███████▋ | 5397/7045 [17:32:01<5:21:13, 11.70s/it] 77%|███████▋ | 5398/7045 [17:32:13<5:22:42, 11.76s/it] {'loss': 1.1504, 'learning_rate': 6.831588446275422e-07, 'epoch': 0.77} + 77%|███████▋ | 5398/7045 [17:32:13<5:22:42, 11.76s/it] 77%|███████▋ | 5399/7045 [17:32:24<5:16:29, 11.54s/it] {'loss': 1.0996, 'learning_rate': 6.823694817816887e-07, 'epoch': 0.77} + 77%|███████▋ | 5399/7045 [17:32:24<5:16:29, 11.54s/it] 77%|███████▋ | 5400/7045 [17:32:35<5:14:02, 11.45s/it] {'loss': 1.1191, 'learning_rate': 6.815805031580772e-07, 'epoch': 0.77} + 77%|███████▋ | 5400/7045 [17:32:35<5:14:02, 11.45s/it] 77%|███████▋ | 5401/7045 [17:32:46<5:09:41, 11.30s/it] {'loss': 1.0771, 'learning_rate': 6.807919089234869e-07, 'epoch': 0.77} + 77%|███████▋ | 5401/7045 [17:32:46<5:09:41, 11.30s/it] 77%|███████▋ | 5402/7045 [17:33:00<5:24:23, 11.85s/it] {'loss': 1.0444, 'learning_rate': 6.800036992446169e-07, 'epoch': 0.77} + 77%|███████▋ | 5402/7045 [17:33:00<5:24:23, 11.85s/it] 77%|███████▋ | 5403/7045 [17:33:11<5:17:53, 11.62s/it] {'loss': 1.125, 'learning_rate': 6.792158742880833e-07, 'epoch': 0.77} + 77%|███████▋ | 5403/7045 [17:33:11<5:17:53, 11.62s/it] 77%|███████▋ | 5404/7045 [17:33:22<5:14:25, 11.50s/it] {'loss': 1.0981, 'learning_rate': 6.784284342204206e-07, 'epoch': 0.77} + 77%|███████▋ | 5404/7045 [17:33:22<5:14:25, 11.50s/it] 77%|███████▋ | 5405/7045 [17:33:36<5:34:08, 12.22s/it] {'loss': 1.1265, 'learning_rate': 6.776413792080849e-07, 'epoch': 0.77} + 77%|███████▋ | 5405/7045 [17:33:36<5:34:08, 12.22s/it] 77%|███████▋ | 5406/7045 [17:33:48<5:34:59, 12.26s/it] {'loss': 1.0894, 'learning_rate': 6.768547094174468e-07, 'epoch': 0.77} + 77%|███████▋ | 5406/7045 [17:33:48<5:34:59, 12.26s/it] 77%|███████▋ | 5407/7045 [17:34:00<5:29:40, 12.08s/it] {'loss': 1.0859, 'learning_rate': 6.760684250147986e-07, 'epoch': 0.77} + 77%|███████▋ | 5407/7045 [17:34:00<5:29:40, 12.08s/it] 77%|███████▋ | 5408/7045 [17:34:11<5:25:20, 11.92s/it] {'loss': 1.1426, 'learning_rate': 6.75282526166351e-07, 'epoch': 0.77} + 77%|███████▋ | 5408/7045 [17:34:11<5:25:20, 11.92s/it] 77%|███████▋ | 5409/7045 [17:34:23<5:21:21, 11.79s/it] {'loss': 1.0967, 'learning_rate': 6.7449701303823e-07, 'epoch': 0.77} + 77%|███████▋ | 5409/7045 [17:34:23<5:21:21, 11.79s/it] 77%|███████▋ | 5410/7045 [17:34:35<5:24:26, 11.91s/it] {'loss': 1.1152, 'learning_rate': 6.737118857964847e-07, 'epoch': 0.77} + 77%|███████▋ | 5410/7045 [17:34:35<5:24:26, 11.91s/it] 77%|███████▋ | 5411/7045 [17:34:48<5:32:04, 12.19s/it] {'loss': 1.0981, 'learning_rate': 6.729271446070793e-07, 'epoch': 0.77} + 77%|███████▋ | 5411/7045 [17:34:48<5:32:04, 12.19s/it] 77%|███████▋ | 5412/7045 [17:35:01<5:36:06, 12.35s/it] {'loss': 1.041, 'learning_rate': 6.721427896358962e-07, 'epoch': 0.77} + 77%|███████▋ | 5412/7045 [17:35:01<5:36:06, 12.35s/it] 77%|███████▋ | 5413/7045 [17:35:12<5:25:00, 11.95s/it] {'loss': 1.1201, 'learning_rate': 6.713588210487393e-07, 'epoch': 0.77} + 77%|███████▋ | 5413/7045 [17:35:12<5:25:00, 11.95s/it] 77%|███████▋ | 5414/7045 [17:35:24<5:25:51, 11.99s/it] {'loss': 1.0962, 'learning_rate': 6.705752390113276e-07, 'epoch': 0.77} + 77%|███████▋ | 5414/7045 [17:35:24<5:25:51, 11.99s/it] 77%|███████▋ | 5415/7045 [17:35:37<5:32:52, 12.25s/it] {'loss': 1.0898, 'learning_rate': 6.697920436892994e-07, 'epoch': 0.77} + 77%|███████▋ | 5415/7045 [17:35:37<5:32:52, 12.25s/it] 77%|███████▋ | 5416/7045 [17:35:48<5:23:22, 11.91s/it] {'loss': 1.1309, 'learning_rate': 6.690092352482127e-07, 'epoch': 0.77} + 77%|███████▋ | 5416/7045 [17:35:48<5:23:22, 11.91s/it] 77%|███████▋ | 5417/7045 [17:36:01<5:33:16, 12.28s/it] {'loss': 1.1094, 'learning_rate': 6.682268138535411e-07, 'epoch': 0.77} + 77%|███████▋ | 5417/7045 [17:36:01<5:33:16, 12.28s/it] 77%|███████▋ | 5418/7045 [17:36:12<5:22:36, 11.90s/it] {'loss': 1.0732, 'learning_rate': 6.674447796706793e-07, 'epoch': 0.77} + 77%|███████▋ | 5418/7045 [17:36:12<5:22:36, 11.90s/it] 77%|███████▋ | 5419/7045 [17:36:24<5:25:36, 12.01s/it] {'loss': 1.105, 'learning_rate': 6.666631328649367e-07, 'epoch': 0.77} + 77%|███████▋ | 5419/7045 [17:36:24<5:25:36, 12.01s/it] 77%|███████▋ | 5420/7045 [17:36:36<5:24:35, 11.98s/it] {'loss': 1.0898, 'learning_rate': 6.658818736015443e-07, 'epoch': 0.77} + 77%|███████▋ | 5420/7045 [17:36:36<5:24:35, 11.98s/it] 77%|███████▋ | 5421/7045 [17:36:47<5:16:40, 11.70s/it] {'loss': 1.1162, 'learning_rate': 6.651010020456497e-07, 'epoch': 0.77} + 77%|███████▋ | 5421/7045 [17:36:47<5:16:40, 11.70s/it] 77%|███████▋ | 5422/7045 [17:37:00<5:25:26, 12.03s/it] {'loss': 1.0781, 'learning_rate': 6.643205183623183e-07, 'epoch': 0.77} + 77%|███████▋ | 5422/7045 [17:37:00<5:25:26, 12.03s/it] 77%|███████▋ | 5423/7045 [17:37:11<5:17:41, 11.75s/it] {'loss': 1.1323, 'learning_rate': 6.635404227165323e-07, 'epoch': 0.77} + 77%|███████▋ | 5423/7045 [17:37:11<5:17:41, 11.75s/it] 77%|███████▋ | 5424/7045 [17:37:22<5:13:12, 11.59s/it] {'loss': 1.1118, 'learning_rate': 6.627607152731952e-07, 'epoch': 0.77} + 77%|███████▋ | 5424/7045 [17:37:22<5:13:12, 11.59s/it] 77%|███████▋ | 5425/7045 [17:37:33<5:10:40, 11.51s/it] {'loss': 1.1396, 'learning_rate': 6.619813961971256e-07, 'epoch': 0.77} + 77%|███████▋ | 5425/7045 [17:37:33<5:10:40, 11.51s/it] 77%|███████▋ | 5426/7045 [17:37:45<5:08:37, 11.44s/it] {'loss': 1.0879, 'learning_rate': 6.612024656530599e-07, 'epoch': 0.77} + 77%|███████▋ | 5426/7045 [17:37:45<5:08:37, 11.44s/it] 77%|███████▋ | 5427/7045 [17:37:58<5:21:05, 11.91s/it] {'loss': 1.0952, 'learning_rate': 6.604239238056553e-07, 'epoch': 0.77} + 77%|███████▋ | 5427/7045 [17:37:58<5:21:05, 11.91s/it] 77%|███████▋ | 5428/7045 [17:38:09<5:18:32, 11.82s/it] {'loss': 1.1143, 'learning_rate': 6.596457708194834e-07, 'epoch': 0.77} + 77%|███████▋ | 5428/7045 [17:38:09<5:18:32, 11.82s/it] 77%|███████▋ | 5429/7045 [17:38:20<5:11:57, 11.58s/it] {'loss': 1.1514, 'learning_rate': 6.588680068590361e-07, 'epoch': 0.77} + 77%|███████▋ | 5429/7045 [17:38:20<5:11:57, 11.58s/it] 77%|███████▋ | 5430/7045 [17:38:33<5:18:54, 11.85s/it] {'loss': 1.0884, 'learning_rate': 6.580906320887217e-07, 'epoch': 0.77} + 77%|███████▋ | 5430/7045 [17:38:33<5:18:54, 11.85s/it] 77%|███████▋ | 5431/7045 [17:38:44<5:16:17, 11.76s/it] {'loss': 1.1045, 'learning_rate': 6.57313646672866e-07, 'epoch': 0.77} + 77%|███████▋ | 5431/7045 [17:38:44<5:16:17, 11.76s/it] 77%|███████▋ | 5432/7045 [17:38:56<5:18:09, 11.84s/it] {'loss': 1.0986, 'learning_rate': 6.565370507757137e-07, 'epoch': 0.77} + 77%|███████▋ | 5432/7045 [17:38:56<5:18:09, 11.84s/it] 77%|███████▋ | 5433/7045 [17:39:07<5:08:58, 11.50s/it] {'loss': 1.0698, 'learning_rate': 6.557608445614269e-07, 'epoch': 0.77} + 77%|███████▋ | 5433/7045 [17:39:07<5:08:58, 11.50s/it] 77%|███████▋ | 5434/7045 [17:39:18<5:06:20, 11.41s/it] {'loss': 1.0835, 'learning_rate': 6.549850281940842e-07, 'epoch': 0.77} + 77%|███████▋ | 5434/7045 [17:39:18<5:06:20, 11.41s/it] 77%|███████▋ | 5435/7045 [17:39:29<5:03:27, 11.31s/it] {'loss': 1.1387, 'learning_rate': 6.542096018376834e-07, 'epoch': 0.77} + 77%|███████▋ | 5435/7045 [17:39:29<5:03:27, 11.31s/it] 77%|███████▋ | 5436/7045 [17:39:43<5:19:02, 11.90s/it] {'loss': 1.1016, 'learning_rate': 6.534345656561389e-07, 'epoch': 0.77} + 77%|███████▋ | 5436/7045 [17:39:43<5:19:02, 11.90s/it] 77%|███████▋ | 5437/7045 [17:39:54<5:13:46, 11.71s/it] {'loss': 1.1064, 'learning_rate': 6.526599198132813e-07, 'epoch': 0.77} + 77%|███████▋ | 5437/7045 [17:39:54<5:13:46, 11.71s/it] 77%|███████▋ | 5438/7045 [17:40:05<5:09:18, 11.55s/it] {'loss': 1.0962, 'learning_rate': 6.518856644728621e-07, 'epoch': 0.77} + 77%|███████▋ | 5438/7045 [17:40:05<5:09:18, 11.55s/it] 77%|███████▋ | 5439/7045 [17:40:18<5:18:12, 11.89s/it] {'loss': 1.1055, 'learning_rate': 6.511117997985469e-07, 'epoch': 0.77} + 77%|███████▋ | 5439/7045 [17:40:18<5:18:12, 11.89s/it] 77%|███████▋ | 5440/7045 [17:40:31<5:24:55, 12.15s/it] {'loss': 1.082, 'learning_rate': 6.503383259539215e-07, 'epoch': 0.77} + 77%|███████▋ | 5440/7045 [17:40:31<5:24:55, 12.15s/it] 77%|███████▋ | 5441/7045 [17:40:42<5:17:51, 11.89s/it] {'loss': 1.167, 'learning_rate': 6.495652431024866e-07, 'epoch': 0.77} + 77%|███████▋ | 5441/7045 [17:40:42<5:17:51, 11.89s/it] 77%|███████▋ | 5442/7045 [17:40:53<5:12:29, 11.70s/it] {'loss': 1.0747, 'learning_rate': 6.48792551407661e-07, 'epoch': 0.77} + 77%|███████▋ | 5442/7045 [17:40:53<5:12:29, 11.70s/it] 77%|███████▋ | 5443/7045 [17:41:04<5:07:40, 11.52s/it] {'loss': 1.1055, 'learning_rate': 6.480202510327827e-07, 'epoch': 0.77} + 77%|███████▋ | 5443/7045 [17:41:04<5:07:40, 11.52s/it] 77%|███████▋ | 5444/7045 [17:41:15<5:04:49, 11.42s/it] {'loss': 1.124, 'learning_rate': 6.472483421411036e-07, 'epoch': 0.77} + 77%|███████▋ | 5444/7045 [17:41:15<5:04:49, 11.42s/it] 77%|███████▋ | 5445/7045 [17:41:27<5:02:19, 11.34s/it] {'loss': 1.1045, 'learning_rate': 6.464768248957953e-07, 'epoch': 0.77} + 77%|███████▋ | 5445/7045 [17:41:27<5:02:19, 11.34s/it] 77%|███████▋ | 5446/7045 [17:41:38<5:00:56, 11.29s/it] {'loss': 1.0996, 'learning_rate': 6.457056994599475e-07, 'epoch': 0.77} + 77%|███████▋ | 5446/7045 [17:41:38<5:00:56, 11.29s/it] 77%|███████▋ | 5447/7045 [17:41:49<4:59:58, 11.26s/it] {'loss': 1.1211, 'learning_rate': 6.449349659965643e-07, 'epoch': 0.77} + 77%|███████▋ | 5447/7045 [17:41:49<4:59:58, 11.26s/it] 77%|███████▋ | 5448/7045 [17:42:00<4:58:51, 11.23s/it] {'loss': 1.1069, 'learning_rate': 6.441646246685676e-07, 'epoch': 0.77} + 77%|███████▋ | 5448/7045 [17:42:00<4:58:51, 11.23s/it] 77%|███████▋ | 5449/7045 [17:42:11<4:55:19, 11.10s/it] {'loss': 1.0986, 'learning_rate': 6.433946756387982e-07, 'epoch': 0.77} + 77%|███████▋ | 5449/7045 [17:42:11<4:55:19, 11.10s/it] 77%|███████▋ | 5450/7045 [17:42:23<5:01:24, 11.34s/it] {'loss': 1.0991, 'learning_rate': 6.426251190700117e-07, 'epoch': 0.77} + 77%|███████▋ | 5450/7045 [17:42:23<5:01:24, 11.34s/it] 77%|███████▋ | 5451/7045 [17:42:35<5:10:25, 11.68s/it] {'loss': 1.0859, 'learning_rate': 6.418559551248835e-07, 'epoch': 0.77} + 77%|███████▋ | 5451/7045 [17:42:35<5:10:25, 11.68s/it] 77%|███████▋ | 5452/7045 [17:42:47<5:12:06, 11.76s/it] {'loss': 1.1035, 'learning_rate': 6.410871839660029e-07, 'epoch': 0.77} + 77%|███████▋ | 5452/7045 [17:42:47<5:12:06, 11.76s/it] 77%|███████▋ | 5453/7045 [17:42:59<5:08:55, 11.64s/it] {'loss': 1.0562, 'learning_rate': 6.403188057558777e-07, 'epoch': 0.77} + 77%|███████▋ | 5453/7045 [17:42:59<5:08:55, 11.64s/it] 77%|███████▋ | 5454/7045 [17:43:11<5:12:29, 11.78s/it] {'loss': 1.1333, 'learning_rate': 6.395508206569331e-07, 'epoch': 0.77} + 77%|███████▋ | 5454/7045 [17:43:11<5:12:29, 11.78s/it] 77%|███████▋ | 5455/7045 [17:43:22<5:09:32, 11.68s/it] {'loss': 1.0938, 'learning_rate': 6.387832288315099e-07, 'epoch': 0.77} + 77%|███████▋ | 5455/7045 [17:43:22<5:09:32, 11.68s/it] 77%|███████▋ | 5456/7045 [17:43:35<5:15:33, 11.92s/it] {'loss': 1.1055, 'learning_rate': 6.380160304418672e-07, 'epoch': 0.77} + 77%|███████▋ | 5456/7045 [17:43:35<5:15:33, 11.92s/it] 77%|███████▋ | 5457/7045 [17:43:46<5:11:55, 11.79s/it] {'loss': 1.0757, 'learning_rate': 6.372492256501805e-07, 'epoch': 0.77} + 77%|███████▋ | 5457/7045 [17:43:46<5:11:55, 11.79s/it] 77%|███████▋ | 5458/7045 [17:43:58<5:12:10, 11.80s/it] {'loss': 1.0967, 'learning_rate': 6.364828146185418e-07, 'epoch': 0.77} + 77%|███████▋ | 5458/7045 [17:43:58<5:12:10, 11.80s/it] 77%|███████▋ | 5459/7045 [17:44:09<5:07:44, 11.64s/it] {'loss': 1.1152, 'learning_rate': 6.357167975089588e-07, 'epoch': 0.77} + 77%|███████▋ | 5459/7045 [17:44:09<5:07:44, 11.64s/it] 78%|███████▊ | 5460/7045 [17:44:22<5:16:38, 11.99s/it] {'loss': 1.1172, 'learning_rate': 6.349511744833584e-07, 'epoch': 0.78} + 78%|███████▊ | 5460/7045 [17:44:22<5:16:38, 11.99s/it] 78%|███████▊ | 5461/7045 [17:44:33<5:12:14, 11.83s/it] {'loss': 1.1367, 'learning_rate': 6.341859457035812e-07, 'epoch': 0.78} + 78%|███████▊ | 5461/7045 [17:44:33<5:12:14, 11.83s/it] 78%|███████▊ | 5462/7045 [17:44:45<5:07:53, 11.67s/it] {'loss': 1.0918, 'learning_rate': 6.334211113313882e-07, 'epoch': 0.78} + 78%|███████▊ | 5462/7045 [17:44:45<5:07:53, 11.67s/it] 78%|███████▊ | 5463/7045 [17:44:56<5:03:51, 11.52s/it] {'loss': 1.125, 'learning_rate': 6.326566715284537e-07, 'epoch': 0.78} + 78%|███████▊ | 5463/7045 [17:44:56<5:03:51, 11.52s/it] 78%|███████▊ | 5464/7045 [17:45:09<5:13:20, 11.89s/it] {'loss': 1.0986, 'learning_rate': 6.318926264563696e-07, 'epoch': 0.78} + 78%|███████▊ | 5464/7045 [17:45:09<5:13:20, 11.89s/it] 78%|███████▊ | 5465/7045 [17:45:20<5:12:15, 11.86s/it] {'loss': 1.0405, 'learning_rate': 6.311289762766454e-07, 'epoch': 0.78} + 78%|███████▊ | 5465/7045 [17:45:20<5:12:15, 11.86s/it] 78%|███████▊ | 5466/7045 [17:45:31<5:05:14, 11.60s/it] {'loss': 1.1113, 'learning_rate': 6.303657211507053e-07, 'epoch': 0.78} + 78%|███████▊ | 5466/7045 [17:45:31<5:05:14, 11.60s/it] 78%|███████▊ | 5467/7045 [17:45:44<5:13:32, 11.92s/it] {'loss': 1.0698, 'learning_rate': 6.296028612398922e-07, 'epoch': 0.78} + 78%|███████▊ | 5467/7045 [17:45:44<5:13:32, 11.92s/it] 78%|███████▊ | 5468/7045 [17:45:55<5:05:12, 11.61s/it] {'loss': 1.0986, 'learning_rate': 6.28840396705463e-07, 'epoch': 0.78} + 78%|███████▊ | 5468/7045 [17:45:55<5:05:12, 11.61s/it] 78%|███████▊ | 5469/7045 [17:46:06<5:03:05, 11.54s/it] {'loss': 1.0869, 'learning_rate': 6.280783277085936e-07, 'epoch': 0.78} + 78%|███████▊ | 5469/7045 [17:46:06<5:03:05, 11.54s/it] 78%|███████▊ | 5470/7045 [17:46:19<5:13:33, 11.94s/it] {'loss': 1.0996, 'learning_rate': 6.273166544103737e-07, 'epoch': 0.78} + 78%|███████▊ | 5470/7045 [17:46:19<5:13:33, 11.94s/it] 78%|███████▊ | 5471/7045 [17:46:31<5:08:21, 11.75s/it] {'loss': 1.1069, 'learning_rate': 6.265553769718117e-07, 'epoch': 0.78} + 78%|███████▊ | 5471/7045 [17:46:31<5:08:21, 11.75s/it] 78%|███████▊ | 5472/7045 [17:46:41<5:01:46, 11.51s/it] {'loss': 1.0781, 'learning_rate': 6.2579449555383e-07, 'epoch': 0.78} + 78%|███████▊ | 5472/7045 [17:46:41<5:01:46, 11.51s/it] 78%|███████▊ | 5473/7045 [17:46:54<5:09:47, 11.82s/it] {'loss': 1.0811, 'learning_rate': 6.250340103172702e-07, 'epoch': 0.78} + 78%|███████▊ | 5473/7045 [17:46:54<5:09:47, 11.82s/it] 78%|███████▊ | 5474/7045 [17:47:06<5:09:48, 11.83s/it] {'loss': 1.105, 'learning_rate': 6.242739214228874e-07, 'epoch': 0.78} + 78%|███████▊ | 5474/7045 [17:47:06<5:09:48, 11.83s/it] 78%|███████▊ | 5475/7045 [17:47:18<5:12:34, 11.95s/it] {'loss': 1.1152, 'learning_rate': 6.235142290313537e-07, 'epoch': 0.78} + 78%|███████▊ | 5475/7045 [17:47:18<5:12:34, 11.95s/it] 78%|███████▊ | 5476/7045 [17:47:29<5:06:02, 11.70s/it] {'loss': 1.1021, 'learning_rate': 6.227549333032587e-07, 'epoch': 0.78} + 78%|███████▊ | 5476/7045 [17:47:29<5:06:02, 11.70s/it] 78%|███████▊ | 5477/7045 [17:47:40<4:59:39, 11.47s/it] {'loss': 1.1416, 'learning_rate': 6.219960343991058e-07, 'epoch': 0.78} + 78%|███████▊ | 5477/7045 [17:47:40<4:59:39, 11.47s/it] 78%|███████▊ | 5478/7045 [17:47:52<5:02:51, 11.60s/it] {'loss': 1.1001, 'learning_rate': 6.212375324793174e-07, 'epoch': 0.78} + 78%|███████▊ | 5478/7045 [17:47:52<5:02:51, 11.60s/it] 78%|███████▊ | 5479/7045 [17:48:03<4:57:59, 11.42s/it] {'loss': 1.1113, 'learning_rate': 6.204794277042298e-07, 'epoch': 0.78} + 78%|███████▊ | 5479/7045 [17:48:03<4:57:59, 11.42s/it] 78%|███████▊ | 5480/7045 [17:48:14<4:55:23, 11.32s/it] {'loss': 1.124, 'learning_rate': 6.197217202340949e-07, 'epoch': 0.78} + 78%|███████▊ | 5480/7045 [17:48:14<4:55:23, 11.32s/it] 78%|███████▊ | 5481/7045 [17:48:27<5:06:58, 11.78s/it] {'loss': 1.0786, 'learning_rate': 6.189644102290829e-07, 'epoch': 0.78} + 78%|███████▊ | 5481/7045 [17:48:27<5:06:58, 11.78s/it] 78%|███████▊ | 5482/7045 [17:48:38<5:03:21, 11.65s/it] {'loss': 1.127, 'learning_rate': 6.182074978492791e-07, 'epoch': 0.78} + 78%|███████▊ | 5482/7045 [17:48:38<5:03:21, 11.65s/it] 78%|███████▊ | 5483/7045 [17:48:49<4:58:56, 11.48s/it] {'loss': 1.1064, 'learning_rate': 6.174509832546832e-07, 'epoch': 0.78} + 78%|███████▊ | 5483/7045 [17:48:49<4:58:56, 11.48s/it] 78%|███████▊ | 5484/7045 [17:49:02<5:06:45, 11.79s/it] {'loss': 1.1113, 'learning_rate': 6.166948666052134e-07, 'epoch': 0.78} + 78%|███████▊ | 5484/7045 [17:49:02<5:06:45, 11.79s/it] 78%|███████▊ | 5485/7045 [17:49:13<5:03:42, 11.68s/it] {'loss': 1.0996, 'learning_rate': 6.159391480607019e-07, 'epoch': 0.78} + 78%|███████▊ | 5485/7045 [17:49:13<5:03:42, 11.68s/it] 78%|███████▊ | 5486/7045 [17:49:25<4:59:35, 11.53s/it] {'loss': 1.1494, 'learning_rate': 6.151838277808961e-07, 'epoch': 0.78} + 78%|███████▊ | 5486/7045 [17:49:25<4:59:35, 11.53s/it] 78%|███████▊ | 5487/7045 [17:49:36<4:56:21, 11.41s/it] {'loss': 1.126, 'learning_rate': 6.144289059254624e-07, 'epoch': 0.78} + 78%|███████▊ | 5487/7045 [17:49:36<4:56:21, 11.41s/it] 78%|███████▊ | 5488/7045 [17:49:47<4:54:01, 11.33s/it] {'loss': 1.1348, 'learning_rate': 6.13674382653979e-07, 'epoch': 0.78} + 78%|███████▊ | 5488/7045 [17:49:47<4:54:01, 11.33s/it] 78%|███████▊ | 5489/7045 [17:49:58<4:51:31, 11.24s/it] {'loss': 1.1553, 'learning_rate': 6.129202581259436e-07, 'epoch': 0.78} + 78%|███████▊ | 5489/7045 [17:49:58<4:51:31, 11.24s/it] 78%|███████▊ | 5490/7045 [17:50:09<4:50:36, 11.21s/it] {'loss': 1.1074, 'learning_rate': 6.121665325007669e-07, 'epoch': 0.78} + 78%|███████▊ | 5490/7045 [17:50:09<4:50:36, 11.21s/it] 78%|███████▊ | 5491/7045 [17:50:20<4:49:42, 11.19s/it] {'loss': 1.106, 'learning_rate': 6.114132059377756e-07, 'epoch': 0.78} + 78%|███████▊ | 5491/7045 [17:50:20<4:49:42, 11.19s/it] 78%|███████▊ | 5492/7045 [17:50:31<4:50:53, 11.24s/it] {'loss': 1.1162, 'learning_rate': 6.10660278596214e-07, 'epoch': 0.78} + 78%|███████▊ | 5492/7045 [17:50:31<4:50:53, 11.24s/it] 78%|███████▊ | 5493/7045 [17:50:43<4:51:19, 11.26s/it] {'loss': 1.1328, 'learning_rate': 6.099077506352391e-07, 'epoch': 0.78} + 78%|███████▊ | 5493/7045 [17:50:43<4:51:19, 11.26s/it] 78%|███████▊ | 5494/7045 [17:50:55<4:58:42, 11.56s/it] {'loss': 1.0854, 'learning_rate': 6.091556222139261e-07, 'epoch': 0.78} + 78%|███████▊ | 5494/7045 [17:50:55<4:58:42, 11.56s/it] 78%|███████▊ | 5495/7045 [17:51:06<4:56:10, 11.46s/it] {'loss': 1.1084, 'learning_rate': 6.08403893491265e-07, 'epoch': 0.78} + 78%|███████▊ | 5495/7045 [17:51:06<4:56:10, 11.46s/it] 78%|███████▊ | 5496/7045 [17:51:18<4:55:12, 11.43s/it] {'loss': 1.0859, 'learning_rate': 6.076525646261602e-07, 'epoch': 0.78} + 78%|███████▊ | 5496/7045 [17:51:18<4:55:12, 11.43s/it] 78%|███████▊ | 5497/7045 [17:51:29<4:54:57, 11.43s/it] {'loss': 1.1152, 'learning_rate': 6.069016357774321e-07, 'epoch': 0.78} + 78%|███████▊ | 5497/7045 [17:51:29<4:54:57, 11.43s/it] 78%|███████▊ | 5498/7045 [17:51:43<5:15:44, 12.25s/it] {'loss': 1.1045, 'learning_rate': 6.061511071038182e-07, 'epoch': 0.78} + 78%|███████▊ | 5498/7045 [17:51:43<5:15:44, 12.25s/it] 78%|███████▊ | 5499/7045 [17:51:57<5:26:18, 12.66s/it] {'loss': 1.0591, 'learning_rate': 6.054009787639678e-07, 'epoch': 0.78} + 78%|███████▊ | 5499/7045 [17:51:57<5:26:18, 12.66s/it] 78%|███████▊ | 5500/7045 [17:52:09<5:23:46, 12.57s/it] {'loss': 1.0186, 'learning_rate': 6.046512509164501e-07, 'epoch': 0.78} + 78%|███████▊ | 5500/7045 [17:52:09<5:23:46, 12.57s/it] 78%|███████▊ | 5501/7045 [17:52:20<5:10:04, 12.05s/it] {'loss': 1.0664, 'learning_rate': 6.039019237197461e-07, 'epoch': 0.78} + 78%|███████▊ | 5501/7045 [17:52:20<5:10:04, 12.05s/it] 78%|███████▊ | 5502/7045 [17:52:32<5:06:20, 11.91s/it] {'loss': 1.1133, 'learning_rate': 6.031529973322528e-07, 'epoch': 0.78} + 78%|███████▊ | 5502/7045 [17:52:32<5:06:20, 11.91s/it] 78%|███████▊ | 5503/7045 [17:52:44<5:09:39, 12.05s/it] {'loss': 1.0913, 'learning_rate': 6.024044719122843e-07, 'epoch': 0.78} + 78%|███████▊ | 5503/7045 [17:52:44<5:09:39, 12.05s/it] 78%|███████▊ | 5504/7045 [17:52:58<5:21:09, 12.50s/it] {'loss': 1.1064, 'learning_rate': 6.016563476180681e-07, 'epoch': 0.78} + 78%|███████▊ | 5504/7045 [17:52:58<5:21:09, 12.50s/it] 78%|███████▊ | 5505/7045 [17:53:09<5:11:05, 12.12s/it] {'loss': 1.1162, 'learning_rate': 6.009086246077458e-07, 'epoch': 0.78} + 78%|███████▊ | 5505/7045 [17:53:09<5:11:05, 12.12s/it] 78%|███████▊ | 5506/7045 [17:53:20<5:01:05, 11.74s/it] {'loss': 1.0537, 'learning_rate': 6.001613030393791e-07, 'epoch': 0.78} + 78%|███████▊ | 5506/7045 [17:53:20<5:01:05, 11.74s/it] 78%|███████▊ | 5507/7045 [17:53:31<4:59:12, 11.67s/it] {'loss': 1.0854, 'learning_rate': 5.994143830709395e-07, 'epoch': 0.78} + 78%|███████▊ | 5507/7045 [17:53:31<4:59:12, 11.67s/it] 78%|███████▊ | 5508/7045 [17:53:42<4:54:19, 11.49s/it] {'loss': 1.0542, 'learning_rate': 5.986678648603156e-07, 'epoch': 0.78} + 78%|███████▊ | 5508/7045 [17:53:42<4:54:19, 11.49s/it] 78%|███████▊ | 5509/7045 [17:53:55<5:05:35, 11.94s/it] {'loss': 1.0747, 'learning_rate': 5.979217485653118e-07, 'epoch': 0.78} + 78%|███████▊ | 5509/7045 [17:53:55<5:05:35, 11.94s/it] 78%|███████▊ | 5510/7045 [17:54:06<4:58:47, 11.68s/it] {'loss': 1.1064, 'learning_rate': 5.971760343436464e-07, 'epoch': 0.78} + 78%|███████▊ | 5510/7045 [17:54:06<4:58:47, 11.68s/it] 78%|███████▊ | 5511/7045 [17:54:17<4:54:24, 11.52s/it] {'loss': 1.0801, 'learning_rate': 5.964307223529539e-07, 'epoch': 0.78} + 78%|███████▊ | 5511/7045 [17:54:17<4:54:24, 11.52s/it] 78%|███████▊ | 5512/7045 [17:54:29<4:53:34, 11.49s/it] {'loss': 1.0991, 'learning_rate': 5.956858127507828e-07, 'epoch': 0.78} + 78%|███████▊ | 5512/7045 [17:54:29<4:53:34, 11.49s/it] 78%|███████▊ | 5513/7045 [17:54:40<4:50:32, 11.38s/it] {'loss': 1.1006, 'learning_rate': 5.949413056945957e-07, 'epoch': 0.78} + 78%|███████▊ | 5513/7045 [17:54:40<4:50:32, 11.38s/it] 78%|███████▊ | 5514/7045 [17:54:51<4:47:37, 11.27s/it] {'loss': 1.1113, 'learning_rate': 5.941972013417729e-07, 'epoch': 0.78} + 78%|███████▊ | 5514/7045 [17:54:51<4:47:37, 11.27s/it] 78%|███████▊ | 5515/7045 [17:55:05<5:05:32, 11.98s/it] {'loss': 1.0723, 'learning_rate': 5.934534998496072e-07, 'epoch': 0.78} + 78%|███████▊ | 5515/7045 [17:55:05<5:05:32, 11.98s/it] 78%|███████▊ | 5516/7045 [17:55:17<5:09:22, 12.14s/it] {'loss': 1.1338, 'learning_rate': 5.927102013753058e-07, 'epoch': 0.78} + 78%|███████▊ | 5516/7045 [17:55:17<5:09:22, 12.14s/it] 78%|███████▊ | 5517/7045 [17:55:29<5:03:09, 11.90s/it] {'loss': 1.1182, 'learning_rate': 5.919673060759931e-07, 'epoch': 0.78} + 78%|███████▊ | 5517/7045 [17:55:29<5:03:09, 11.90s/it] 78%|███████▊ | 5518/7045 [17:55:41<5:07:01, 12.06s/it] {'loss': 1.0713, 'learning_rate': 5.912248141087076e-07, 'epoch': 0.78} + 78%|███████▊ | 5518/7045 [17:55:41<5:07:01, 12.06s/it] 78%|███████▊ | 5519/7045 [17:55:54<5:11:50, 12.26s/it] {'loss': 1.0898, 'learning_rate': 5.904827256304003e-07, 'epoch': 0.78} + 78%|███████▊ | 5519/7045 [17:55:54<5:11:50, 12.26s/it] 78%|███████▊ | 5520/7045 [17:56:05<5:02:15, 11.89s/it] {'loss': 1.1099, 'learning_rate': 5.897410407979401e-07, 'epoch': 0.78} + 78%|███████▊ | 5520/7045 [17:56:05<5:02:15, 11.89s/it] 78%|███████▊ | 5521/7045 [17:56:18<5:09:17, 12.18s/it] {'loss': 1.061, 'learning_rate': 5.889997597681074e-07, 'epoch': 0.78} + 78%|███████▊ | 5521/7045 [17:56:18<5:09:17, 12.18s/it] 78%|███████▊ | 5522/7045 [17:56:29<5:01:41, 11.89s/it] {'loss': 1.1006, 'learning_rate': 5.882588826976007e-07, 'epoch': 0.78} + 78%|███████▊ | 5522/7045 [17:56:29<5:01:41, 11.89s/it] 78%|███████▊ | 5523/7045 [17:56:40<4:56:27, 11.69s/it] {'loss': 1.1699, 'learning_rate': 5.875184097430303e-07, 'epoch': 0.78} + 78%|███████▊ | 5523/7045 [17:56:40<4:56:27, 11.69s/it] 78%|███████▊ | 5524/7045 [17:56:51<4:54:24, 11.61s/it] {'loss': 1.0986, 'learning_rate': 5.867783410609215e-07, 'epoch': 0.78} + 78%|███████▊ | 5524/7045 [17:56:51<4:54:24, 11.61s/it] 78%|███████▊ | 5525/7045 [17:57:03<4:51:35, 11.51s/it] {'loss': 1.1436, 'learning_rate': 5.860386768077161e-07, 'epoch': 0.78} + 78%|███████▊ | 5525/7045 [17:57:03<4:51:35, 11.51s/it] 78%|███████▊ | 5526/7045 [17:57:14<4:51:01, 11.50s/it] {'loss': 1.1138, 'learning_rate': 5.852994171397683e-07, 'epoch': 0.78} + 78%|███████▊ | 5526/7045 [17:57:14<4:51:01, 11.50s/it] 78%|███████▊ | 5527/7045 [17:57:25<4:45:49, 11.30s/it] {'loss': 1.0503, 'learning_rate': 5.845605622133466e-07, 'epoch': 0.78} + 78%|███████▊ | 5527/7045 [17:57:25<4:45:49, 11.30s/it] 78%|███████▊ | 5528/7045 [17:57:36<4:44:22, 11.25s/it] {'loss': 1.1201, 'learning_rate': 5.838221121846363e-07, 'epoch': 0.78} + 78%|███████▊ | 5528/7045 [17:57:36<4:44:22, 11.25s/it] 78%|███████▊ | 5529/7045 [17:57:48<4:46:49, 11.35s/it] {'loss': 1.1201, 'learning_rate': 5.830840672097346e-07, 'epoch': 0.78} + 78%|███████▊ | 5529/7045 [17:57:48<4:46:49, 11.35s/it] 78%|███████▊ | 5530/7045 [17:57:59<4:45:47, 11.32s/it] {'loss': 1.0977, 'learning_rate': 5.823464274446542e-07, 'epoch': 0.78} + 78%|███████▊ | 5530/7045 [17:57:59<4:45:47, 11.32s/it] 79%|███████▊ | 5531/7045 [17:58:10<4:45:09, 11.30s/it] {'loss': 1.1328, 'learning_rate': 5.816091930453232e-07, 'epoch': 0.79} + 79%|███████▊ | 5531/7045 [17:58:10<4:45:09, 11.30s/it] 79%|███████▊ | 5532/7045 [17:58:22<4:45:40, 11.33s/it] {'loss': 1.123, 'learning_rate': 5.808723641675823e-07, 'epoch': 0.79} + 79%|███████▊ | 5532/7045 [17:58:22<4:45:40, 11.33s/it] 79%|███████▊ | 5533/7045 [17:58:33<4:44:08, 11.28s/it] {'loss': 1.1084, 'learning_rate': 5.801359409671858e-07, 'epoch': 0.79} + 79%|███████▊ | 5533/7045 [17:58:33<4:44:08, 11.28s/it] 79%|███████▊ | 5534/7045 [17:58:44<4:41:58, 11.20s/it] {'loss': 1.1357, 'learning_rate': 5.793999235998055e-07, 'epoch': 0.79} + 79%|███████▊ | 5534/7045 [17:58:44<4:41:58, 11.20s/it] 79%|███████▊ | 5535/7045 [17:58:56<4:52:08, 11.61s/it] {'loss': 1.1191, 'learning_rate': 5.786643122210234e-07, 'epoch': 0.79} + 79%|███████▊ | 5535/7045 [17:58:56<4:52:08, 11.61s/it] 79%|███████▊ | 5536/7045 [17:59:10<5:10:16, 12.34s/it] {'loss': 1.0879, 'learning_rate': 5.779291069863396e-07, 'epoch': 0.79} + 79%|███████▊ | 5536/7045 [17:59:10<5:10:16, 12.34s/it] 79%|███████▊ | 5537/7045 [17:59:21<4:59:59, 11.94s/it] {'loss': 1.1455, 'learning_rate': 5.771943080511655e-07, 'epoch': 0.79} + 79%|███████▊ | 5537/7045 [17:59:21<4:59:59, 11.94s/it] 79%|███████▊ | 5538/7045 [17:59:34<5:01:30, 12.00s/it] {'loss': 1.0293, 'learning_rate': 5.764599155708267e-07, 'epoch': 0.79} + 79%|███████▊ | 5538/7045 [17:59:34<5:01:30, 12.00s/it] 79%|███████▊ | 5539/7045 [17:59:45<4:59:14, 11.92s/it] {'loss': 1.0996, 'learning_rate': 5.757259297005657e-07, 'epoch': 0.79} + 79%|███████▊ | 5539/7045 [17:59:45<4:59:14, 11.92s/it] 79%|███████▊ | 5540/7045 [17:59:57<4:53:49, 11.71s/it] {'loss': 1.0913, 'learning_rate': 5.749923505955352e-07, 'epoch': 0.79} + 79%|███████▊ | 5540/7045 [17:59:57<4:53:49, 11.71s/it] 79%|███████▊ | 5541/7045 [18:00:09<4:59:43, 11.96s/it] {'loss': 1.0654, 'learning_rate': 5.742591784108043e-07, 'epoch': 0.79} + 79%|███████▊ | 5541/7045 [18:00:09<4:59:43, 11.96s/it] 79%|███████▊ | 5542/7045 [18:00:22<5:07:42, 12.28s/it] {'loss': 1.127, 'learning_rate': 5.735264133013568e-07, 'epoch': 0.79} + 79%|███████▊ | 5542/7045 [18:00:22<5:07:42, 12.28s/it] 79%|███████▊ | 5543/7045 [18:00:35<5:15:02, 12.59s/it] {'loss': 1.0542, 'learning_rate': 5.727940554220885e-07, 'epoch': 0.79} + 79%|███████▊ | 5543/7045 [18:00:35<5:15:02, 12.59s/it] 79%|███████▊ | 5544/7045 [18:00:47<5:04:30, 12.17s/it] {'loss': 1.1436, 'learning_rate': 5.720621049278085e-07, 'epoch': 0.79} + 79%|███████▊ | 5544/7045 [18:00:47<5:04:30, 12.17s/it] 79%|███████▊ | 5545/7045 [18:00:57<4:54:40, 11.79s/it] {'loss': 1.1201, 'learning_rate': 5.713305619732431e-07, 'epoch': 0.79} + 79%|███████▊ | 5545/7045 [18:00:57<4:54:40, 11.79s/it] 79%|███████▊ | 5546/7045 [18:01:10<4:58:11, 11.94s/it] {'loss': 1.1313, 'learning_rate': 5.705994267130291e-07, 'epoch': 0.79} + 79%|███████▊ | 5546/7045 [18:01:10<4:58:11, 11.94s/it] 79%|███████▊ | 5547/7045 [18:01:21<4:51:39, 11.68s/it] {'loss': 1.1035, 'learning_rate': 5.698686993017197e-07, 'epoch': 0.79} + 79%|███████▊ | 5547/7045 [18:01:21<4:51:39, 11.68s/it] 79%|███████▉ | 5548/7045 [18:01:34<5:04:15, 12.20s/it] {'loss': 1.1055, 'learning_rate': 5.691383798937802e-07, 'epoch': 0.79} + 79%|███████▉ | 5548/7045 [18:01:34<5:04:15, 12.20s/it] 79%|███████▉ | 5549/7045 [18:01:47<5:11:07, 12.48s/it] {'loss': 1.0879, 'learning_rate': 5.684084686435889e-07, 'epoch': 0.79} + 79%|███████▉ | 5549/7045 [18:01:47<5:11:07, 12.48s/it] 79%|███████▉ | 5550/7045 [18:01:58<4:59:22, 12.02s/it] {'loss': 1.0825, 'learning_rate': 5.676789657054412e-07, 'epoch': 0.79} + 79%|███████▉ | 5550/7045 [18:01:58<4:59:22, 12.02s/it] 79%|███████▉ | 5551/7045 [18:02:10<4:54:18, 11.82s/it] {'loss': 1.1113, 'learning_rate': 5.669498712335419e-07, 'epoch': 0.79} + 79%|███████▉ | 5551/7045 [18:02:10<4:54:18, 11.82s/it] 79%|███████▉ | 5552/7045 [18:02:22<4:57:11, 11.94s/it] {'loss': 1.0801, 'learning_rate': 5.662211853820137e-07, 'epoch': 0.79} + 79%|███████▉ | 5552/7045 [18:02:22<4:57:11, 11.94s/it] 79%|███████▉ | 5553/7045 [18:02:33<4:50:39, 11.69s/it] {'loss': 1.1279, 'learning_rate': 5.654929083048891e-07, 'epoch': 0.79} + 79%|███████▉ | 5553/7045 [18:02:33<4:50:39, 11.69s/it] 79%|███████▉ | 5554/7045 [18:02:44<4:47:06, 11.55s/it] {'loss': 1.1299, 'learning_rate': 5.647650401561172e-07, 'epoch': 0.79} + 79%|███████▉ | 5554/7045 [18:02:44<4:47:06, 11.55s/it] 79%|███████▉ | 5555/7045 [18:02:56<4:47:43, 11.59s/it] {'loss': 1.1152, 'learning_rate': 5.640375810895581e-07, 'epoch': 0.79} + 79%|███████▉ | 5555/7045 [18:02:56<4:47:43, 11.59s/it] 79%|███████▉ | 5556/7045 [18:03:07<4:43:32, 11.43s/it] {'loss': 1.1211, 'learning_rate': 5.633105312589881e-07, 'epoch': 0.79} + 79%|███████▉ | 5556/7045 [18:03:07<4:43:32, 11.43s/it] 79%|███████▉ | 5557/7045 [18:03:18<4:39:54, 11.29s/it] {'loss': 1.1094, 'learning_rate': 5.625838908180944e-07, 'epoch': 0.79} + 79%|███████▉ | 5557/7045 [18:03:18<4:39:54, 11.29s/it] 79%|███████▉ | 5558/7045 [18:03:29<4:40:33, 11.32s/it] {'loss': 1.1011, 'learning_rate': 5.618576599204797e-07, 'epoch': 0.79} + 79%|███████▉ | 5558/7045 [18:03:29<4:40:33, 11.32s/it] 79%|███████▉ | 5559/7045 [18:03:40<4:38:29, 11.24s/it] {'loss': 1.1338, 'learning_rate': 5.611318387196593e-07, 'epoch': 0.79} + 79%|███████▉ | 5559/7045 [18:03:40<4:38:29, 11.24s/it] 79%|███████▉ | 5560/7045 [18:03:51<4:35:28, 11.13s/it] {'loss': 1.1133, 'learning_rate': 5.604064273690607e-07, 'epoch': 0.79} + 79%|███████▉ | 5560/7045 [18:03:51<4:35:28, 11.13s/it] 79%|███████▉ | 5561/7045 [18:04:03<4:38:15, 11.25s/it] {'loss': 1.1074, 'learning_rate': 5.596814260220274e-07, 'epoch': 0.79} + 79%|███████▉ | 5561/7045 [18:04:03<4:38:15, 11.25s/it] 79%|███████▉ | 5562/7045 [18:04:14<4:35:34, 11.15s/it] {'loss': 1.0796, 'learning_rate': 5.589568348318133e-07, 'epoch': 0.79} + 79%|███████▉ | 5562/7045 [18:04:14<4:35:34, 11.15s/it] 79%|███████▉ | 5563/7045 [18:04:25<4:38:41, 11.28s/it] {'loss': 1.1357, 'learning_rate': 5.58232653951589e-07, 'epoch': 0.79} + 79%|███████▉ | 5563/7045 [18:04:25<4:38:41, 11.28s/it] 79%|███████▉ | 5564/7045 [18:04:37<4:41:00, 11.38s/it] {'loss': 1.1201, 'learning_rate': 5.57508883534435e-07, 'epoch': 0.79} + 79%|███████▉ | 5564/7045 [18:04:37<4:41:00, 11.38s/it] 79%|███████▉ | 5565/7045 [18:04:48<4:37:36, 11.25s/it] {'loss': 1.0889, 'learning_rate': 5.567855237333461e-07, 'epoch': 0.79} + 79%|███████▉ | 5565/7045 [18:04:48<4:37:36, 11.25s/it] 79%|███████▉ | 5566/7045 [18:04:59<4:36:46, 11.23s/it] {'loss': 1.126, 'learning_rate': 5.560625747012316e-07, 'epoch': 0.79} + 79%|███████▉ | 5566/7045 [18:04:59<4:36:46, 11.23s/it] 79%|███████▉ | 5567/7045 [18:05:10<4:35:20, 11.18s/it] {'loss': 1.0986, 'learning_rate': 5.553400365909134e-07, 'epoch': 0.79} + 79%|███████▉ | 5567/7045 [18:05:10<4:35:20, 11.18s/it] 79%|███████▉ | 5568/7045 [18:05:24<4:53:59, 11.94s/it] {'loss': 1.0723, 'learning_rate': 5.546179095551249e-07, 'epoch': 0.79} + 79%|███████▉ | 5568/7045 [18:05:24<4:53:59, 11.94s/it] 79%|███████▉ | 5569/7045 [18:05:35<4:51:28, 11.85s/it] {'loss': 1.105, 'learning_rate': 5.538961937465154e-07, 'epoch': 0.79} + 79%|███████▉ | 5569/7045 [18:05:35<4:51:28, 11.85s/it] 79%|███████▉ | 5570/7045 [18:05:47<4:50:17, 11.81s/it] {'loss': 1.125, 'learning_rate': 5.531748893176447e-07, 'epoch': 0.79} + 79%|███████▉ | 5570/7045 [18:05:47<4:50:17, 11.81s/it] 79%|███████▉ | 5571/7045 [18:05:59<4:51:10, 11.85s/it] {'loss': 1.1162, 'learning_rate': 5.524539964209863e-07, 'epoch': 0.79} + 79%|███████▉ | 5571/7045 [18:05:59<4:51:10, 11.85s/it] 79%|███████▉ | 5572/7045 [18:06:11<4:48:00, 11.73s/it] {'loss': 1.1328, 'learning_rate': 5.517335152089287e-07, 'epoch': 0.79} + 79%|███████▉ | 5572/7045 [18:06:11<4:48:00, 11.73s/it] 79%|███████▉ | 5573/7045 [18:06:24<5:02:53, 12.35s/it] {'loss': 1.0747, 'learning_rate': 5.510134458337699e-07, 'epoch': 0.79} + 79%|███████▉ | 5573/7045 [18:06:24<5:02:53, 12.35s/it] 79%|███████▉ | 5574/7045 [18:06:37<5:06:17, 12.49s/it] {'loss': 1.1074, 'learning_rate': 5.502937884477244e-07, 'epoch': 0.79} + 79%|███████▉ | 5574/7045 [18:06:37<5:06:17, 12.49s/it] 79%|███████▉ | 5575/7045 [18:06:48<4:56:45, 12.11s/it] {'loss': 1.0854, 'learning_rate': 5.495745432029173e-07, 'epoch': 0.79} + 79%|███████▉ | 5575/7045 [18:06:48<4:56:45, 12.11s/it] 79%|███████▉ | 5576/7045 [18:07:02<5:04:23, 12.43s/it] {'loss': 1.0752, 'learning_rate': 5.488557102513862e-07, 'epoch': 0.79} + 79%|███████▉ | 5576/7045 [18:07:02<5:04:23, 12.43s/it] 79%|███████▉ | 5577/7045 [18:07:13<4:54:43, 12.05s/it] {'loss': 1.0908, 'learning_rate': 5.481372897450843e-07, 'epoch': 0.79} + 79%|███████▉ | 5577/7045 [18:07:13<4:54:43, 12.05s/it] 79%|███████▉ | 5578/7045 [18:07:24<4:45:29, 11.68s/it] {'loss': 1.0977, 'learning_rate': 5.474192818358742e-07, 'epoch': 0.79} + 79%|███████▉ | 5578/7045 [18:07:24<4:45:29, 11.68s/it] 79%|███████▉ | 5579/7045 [18:07:35<4:44:13, 11.63s/it] {'loss': 1.1113, 'learning_rate': 5.467016866755337e-07, 'epoch': 0.79} + 79%|███████▉ | 5579/7045 [18:07:35<4:44:13, 11.63s/it] 79%|███████▉ | 5580/7045 [18:07:47<4:45:30, 11.69s/it] {'loss': 1.0742, 'learning_rate': 5.459845044157536e-07, 'epoch': 0.79} + 79%|███████▉ | 5580/7045 [18:07:47<4:45:30, 11.69s/it] 79%|███████▉ | 5581/7045 [18:07:58<4:42:21, 11.57s/it] {'loss': 1.0952, 'learning_rate': 5.452677352081353e-07, 'epoch': 0.79} + 79%|███████▉ | 5581/7045 [18:07:58<4:42:21, 11.57s/it] 79%|███████▉ | 5582/7045 [18:08:11<4:48:46, 11.84s/it] {'loss': 1.0986, 'learning_rate': 5.445513792041937e-07, 'epoch': 0.79} + 79%|███████▉ | 5582/7045 [18:08:11<4:48:46, 11.84s/it] 79%|███████▉ | 5583/7045 [18:08:23<4:49:46, 11.89s/it] {'loss': 1.0591, 'learning_rate': 5.438354365553578e-07, 'epoch': 0.79} + 79%|███████▉ | 5583/7045 [18:08:23<4:49:46, 11.89s/it] 79%|███████▉ | 5584/7045 [18:08:34<4:42:48, 11.61s/it] {'loss': 1.1201, 'learning_rate': 5.431199074129667e-07, 'epoch': 0.79} + 79%|███████▉ | 5584/7045 [18:08:34<4:42:48, 11.61s/it] 79%|███████▉ | 5585/7045 [18:08:46<4:49:03, 11.88s/it] {'loss': 1.1045, 'learning_rate': 5.424047919282748e-07, 'epoch': 0.79} + 79%|███████▉ | 5585/7045 [18:08:46<4:49:03, 11.88s/it] 79%|███████▉ | 5586/7045 [18:08:57<4:43:00, 11.64s/it] {'loss': 1.0918, 'learning_rate': 5.416900902524475e-07, 'epoch': 0.79} + 79%|███████▉ | 5586/7045 [18:08:57<4:43:00, 11.64s/it] 79%|███████▉ | 5587/7045 [18:09:10<4:50:22, 11.95s/it] {'loss': 1.1123, 'learning_rate': 5.409758025365619e-07, 'epoch': 0.79} + 79%|███████▉ | 5587/7045 [18:09:10<4:50:22, 11.95s/it] 79%|███████▉ | 5588/7045 [18:09:22<4:50:35, 11.97s/it] {'loss': 1.105, 'learning_rate': 5.402619289316102e-07, 'epoch': 0.79} + 79%|███████▉ | 5588/7045 [18:09:22<4:50:35, 11.97s/it] 79%|███████▉ | 5589/7045 [18:09:34<4:48:33, 11.89s/it] {'loss': 1.1016, 'learning_rate': 5.395484695884945e-07, 'epoch': 0.79} + 79%|███████▉ | 5589/7045 [18:09:34<4:48:33, 11.89s/it] 79%|███████▉ | 5590/7045 [18:09:45<4:44:10, 11.72s/it] {'loss': 1.1245, 'learning_rate': 5.388354246580294e-07, 'epoch': 0.79} + 79%|███████▉ | 5590/7045 [18:09:45<4:44:10, 11.72s/it] 79%|███████▉ | 5591/7045 [18:09:56<4:39:10, 11.52s/it] {'loss': 1.0986, 'learning_rate': 5.381227942909451e-07, 'epoch': 0.79} + 79%|███████▉ | 5591/7045 [18:09:56<4:39:10, 11.52s/it] 79%|███████▉ | 5592/7045 [18:10:09<4:53:32, 12.12s/it] {'loss': 1.125, 'learning_rate': 5.374105786378811e-07, 'epoch': 0.79} + 79%|███████▉ | 5592/7045 [18:10:09<4:53:32, 12.12s/it] 79%|███████▉ | 5593/7045 [18:10:21<4:50:07, 11.99s/it] {'loss': 1.1348, 'learning_rate': 5.36698777849389e-07, 'epoch': 0.79} + 79%|███████▉ | 5593/7045 [18:10:21<4:50:07, 11.99s/it] 79%|███████▉ | 5594/7045 [18:10:32<4:42:22, 11.68s/it] {'loss': 1.1377, 'learning_rate': 5.359873920759354e-07, 'epoch': 0.79} + 79%|███████▉ | 5594/7045 [18:10:32<4:42:22, 11.68s/it] 79%|███████▉ | 5595/7045 [18:10:44<4:42:15, 11.68s/it] {'loss': 1.1064, 'learning_rate': 5.35276421467896e-07, 'epoch': 0.79} + 79%|███████▉ | 5595/7045 [18:10:44<4:42:15, 11.68s/it] 79%|███████▉ | 5596/7045 [18:10:55<4:38:16, 11.52s/it] {'loss': 1.0767, 'learning_rate': 5.345658661755615e-07, 'epoch': 0.79} + 79%|███████▉ | 5596/7045 [18:10:55<4:38:16, 11.52s/it] 79%|███████▉ | 5597/7045 [18:11:06<4:34:27, 11.37s/it] {'loss': 1.1357, 'learning_rate': 5.338557263491329e-07, 'epoch': 0.79} + 79%|███████▉ | 5597/7045 [18:11:06<4:34:27, 11.37s/it] 79%|███████▉ | 5598/7045 [18:11:18<4:35:41, 11.43s/it] {'loss': 1.1104, 'learning_rate': 5.331460021387236e-07, 'epoch': 0.79} + 79%|███████▉ | 5598/7045 [18:11:18<4:35:41, 11.43s/it] 79%|███████▉ | 5599/7045 [18:11:29<4:32:56, 11.33s/it] {'loss': 1.0967, 'learning_rate': 5.324366936943609e-07, 'epoch': 0.79} + 79%|███████▉ | 5599/7045 [18:11:29<4:32:56, 11.33s/it] 79%|███████▉ | 5600/7045 [18:11:40<4:35:57, 11.46s/it] {'loss': 1.0796, 'learning_rate': 5.317278011659824e-07, 'epoch': 0.79} + 79%|███████▉ | 5600/7045 [18:11:40<4:35:57, 11.46s/it] 80%|███████▉ | 5601/7045 [18:11:52<4:36:35, 11.49s/it] {'loss': 1.1201, 'learning_rate': 5.31019324703437e-07, 'epoch': 0.8} + 80%|███████▉ | 5601/7045 [18:11:52<4:36:35, 11.49s/it] 80%|███████▉ | 5602/7045 [18:12:03<4:32:25, 11.33s/it] {'loss': 1.1084, 'learning_rate': 5.303112644564881e-07, 'epoch': 0.8} + 80%|███████▉ | 5602/7045 [18:12:03<4:32:25, 11.33s/it] 80%|███████▉ | 5603/7045 [18:12:16<4:41:37, 11.72s/it] {'loss': 1.0615, 'learning_rate': 5.296036205748106e-07, 'epoch': 0.8} + 80%|███████▉ | 5603/7045 [18:12:16<4:41:37, 11.72s/it] 80%|███████▉ | 5604/7045 [18:12:28<4:47:10, 11.96s/it] {'loss': 1.0435, 'learning_rate': 5.288963932079893e-07, 'epoch': 0.8} + 80%|███████▉ | 5604/7045 [18:12:28<4:47:10, 11.96s/it] 80%|███████▉ | 5605/7045 [18:12:39<4:42:07, 11.76s/it] {'loss': 1.1104, 'learning_rate': 5.281895825055239e-07, 'epoch': 0.8} + 80%|███████▉ | 5605/7045 [18:12:39<4:42:07, 11.76s/it] 80%|███████▉ | 5606/7045 [18:12:50<4:37:05, 11.55s/it] {'loss': 1.1309, 'learning_rate': 5.27483188616823e-07, 'epoch': 0.8} + 80%|███████▉ | 5606/7045 [18:12:50<4:37:05, 11.55s/it] 80%|███████▉ | 5607/7045 [18:13:02<4:35:44, 11.51s/it] {'loss': 1.0908, 'learning_rate': 5.267772116912101e-07, 'epoch': 0.8} + 80%|███████▉ | 5607/7045 [18:13:02<4:35:44, 11.51s/it] 80%|███████▉ | 5608/7045 [18:13:13<4:30:17, 11.29s/it] {'loss': 1.0811, 'learning_rate': 5.26071651877918e-07, 'epoch': 0.8} + 80%|███████▉ | 5608/7045 [18:13:13<4:30:17, 11.29s/it] 80%|███████▉ | 5609/7045 [18:13:23<4:27:02, 11.16s/it] {'loss': 1.1055, 'learning_rate': 5.253665093260924e-07, 'epoch': 0.8} + 80%|███████▉ | 5609/7045 [18:13:23<4:27:02, 11.16s/it] 80%|███████▉ | 5610/7045 [18:13:35<4:28:09, 11.21s/it] {'loss': 1.1045, 'learning_rate': 5.246617841847915e-07, 'epoch': 0.8} + 80%|███████▉ | 5610/7045 [18:13:35<4:28:09, 11.21s/it] 80%|███████▉ | 5611/7045 [18:13:46<4:28:32, 11.24s/it] {'loss': 1.1084, 'learning_rate': 5.239574766029842e-07, 'epoch': 0.8} + 80%|███████▉ | 5611/7045 [18:13:46<4:28:32, 11.24s/it] 80%|███████▉ | 5612/7045 [18:13:57<4:28:05, 11.23s/it] {'loss': 1.0781, 'learning_rate': 5.232535867295508e-07, 'epoch': 0.8} + 80%|███████▉ | 5612/7045 [18:13:57<4:28:05, 11.23s/it] 80%|███████▉ | 5613/7045 [18:14:09<4:30:04, 11.32s/it] {'loss': 1.1211, 'learning_rate': 5.225501147132852e-07, 'epoch': 0.8} + 80%|███████▉ | 5613/7045 [18:14:09<4:30:04, 11.32s/it] 80%|███████▉ | 5614/7045 [18:14:20<4:31:19, 11.38s/it] {'loss': 1.1387, 'learning_rate': 5.218470607028906e-07, 'epoch': 0.8} + 80%|███████▉ | 5614/7045 [18:14:20<4:31:19, 11.38s/it] 80%|███████▉ | 5615/7045 [18:14:33<4:38:14, 11.67s/it] {'loss': 1.085, 'learning_rate': 5.211444248469838e-07, 'epoch': 0.8} + 80%|███████▉ | 5615/7045 [18:14:33<4:38:14, 11.67s/it] 80%|███████▉ | 5616/7045 [18:14:44<4:33:29, 11.48s/it] {'loss': 1.1279, 'learning_rate': 5.204422072940926e-07, 'epoch': 0.8} + 80%|███████▉ | 5616/7045 [18:14:44<4:33:29, 11.48s/it] 80%|███████▉ | 5617/7045 [18:14:55<4:29:16, 11.31s/it] {'loss': 1.126, 'learning_rate': 5.197404081926554e-07, 'epoch': 0.8} + 80%|███████▉ | 5617/7045 [18:14:55<4:29:16, 11.31s/it] 80%|███████▉ | 5618/7045 [18:15:08<4:41:45, 11.85s/it] {'loss': 1.0825, 'learning_rate': 5.19039027691024e-07, 'epoch': 0.8} + 80%|███████▉ | 5618/7045 [18:15:08<4:41:45, 11.85s/it] 80%|███████▉ | 5619/7045 [18:15:19<4:36:03, 11.62s/it] {'loss': 1.105, 'learning_rate': 5.183380659374602e-07, 'epoch': 0.8} + 80%|███████▉ | 5619/7045 [18:15:19<4:36:03, 11.62s/it] 80%|███████▉ | 5620/7045 [18:15:31<4:42:21, 11.89s/it] {'loss': 1.04, 'learning_rate': 5.176375230801364e-07, 'epoch': 0.8} + 80%|███████▉ | 5620/7045 [18:15:31<4:42:21, 11.89s/it] 80%|███████▉ | 5621/7045 [18:15:44<4:44:48, 12.00s/it] {'loss': 1.0825, 'learning_rate': 5.169373992671398e-07, 'epoch': 0.8} + 80%|███████▉ | 5621/7045 [18:15:44<4:44:48, 12.00s/it] 80%|███████▉ | 5622/7045 [18:15:56<4:46:21, 12.07s/it] {'loss': 1.0903, 'learning_rate': 5.162376946464662e-07, 'epoch': 0.8} + 80%|███████▉ | 5622/7045 [18:15:56<4:46:21, 12.07s/it] 80%|███████▉ | 5623/7045 [18:16:07<4:37:21, 11.70s/it] {'loss': 1.1143, 'learning_rate': 5.155384093660229e-07, 'epoch': 0.8} + 80%|███████▉ | 5623/7045 [18:16:07<4:37:21, 11.70s/it] 80%|███████▉ | 5624/7045 [18:16:18<4:31:24, 11.46s/it] {'loss': 1.0928, 'learning_rate': 5.148395435736303e-07, 'epoch': 0.8} + 80%|███████▉ | 5624/7045 [18:16:18<4:31:24, 11.46s/it] 80%|███████▉ | 5625/7045 [18:16:29<4:29:41, 11.40s/it] {'loss': 1.127, 'learning_rate': 5.141410974170183e-07, 'epoch': 0.8} + 80%|███████▉ | 5625/7045 [18:16:29<4:29:41, 11.40s/it] 80%|███████▉ | 5626/7045 [18:16:40<4:27:08, 11.30s/it] {'loss': 1.123, 'learning_rate': 5.134430710438296e-07, 'epoch': 0.8} + 80%|███████▉ | 5626/7045 [18:16:40<4:27:08, 11.30s/it] 80%|███████▉ | 5627/7045 [18:16:52<4:30:29, 11.45s/it] {'loss': 1.1162, 'learning_rate': 5.127454646016164e-07, 'epoch': 0.8} + 80%|███████▉ | 5627/7045 [18:16:52<4:30:29, 11.45s/it] 80%|███████▉ | 5628/7045 [18:17:03<4:28:25, 11.37s/it] {'loss': 1.0791, 'learning_rate': 5.120482782378438e-07, 'epoch': 0.8} + 80%|███████▉ | 5628/7045 [18:17:03<4:28:25, 11.37s/it] 80%|███████▉ | 5629/7045 [18:17:14<4:27:28, 11.33s/it] {'loss': 1.0977, 'learning_rate': 5.113515120998882e-07, 'epoch': 0.8} + 80%|███████▉ | 5629/7045 [18:17:14<4:27:28, 11.33s/it] 80%|███████▉ | 5630/7045 [18:17:25<4:25:11, 11.24s/it] {'loss': 1.123, 'learning_rate': 5.106551663350357e-07, 'epoch': 0.8} + 80%|███████▉ | 5630/7045 [18:17:25<4:25:11, 11.24s/it] 80%|███████▉ | 5631/7045 [18:17:37<4:25:59, 11.29s/it] {'loss': 1.0684, 'learning_rate': 5.099592410904836e-07, 'epoch': 0.8} + 80%|███████▉ | 5631/7045 [18:17:37<4:25:59, 11.29s/it] 80%|███████▉ | 5632/7045 [18:17:48<4:24:11, 11.22s/it] {'loss': 1.1406, 'learning_rate': 5.092637365133424e-07, 'epoch': 0.8} + 80%|███████▉ | 5632/7045 [18:17:48<4:24:11, 11.22s/it] 80%|███████▉ | 5633/7045 [18:17:59<4:24:10, 11.23s/it] {'loss': 1.1045, 'learning_rate': 5.085686527506315e-07, 'epoch': 0.8} + 80%|███████▉ | 5633/7045 [18:17:59<4:24:10, 11.23s/it] 80%|███████▉ | 5634/7045 [18:18:10<4:21:58, 11.14s/it] {'loss': 1.0723, 'learning_rate': 5.078739899492816e-07, 'epoch': 0.8} + 80%|███████▉ | 5634/7045 [18:18:10<4:21:58, 11.14s/it] 80%|███��███▉ | 5635/7045 [18:18:22<4:28:28, 11.42s/it] {'loss': 1.1387, 'learning_rate': 5.071797482561358e-07, 'epoch': 0.8} + 80%|███████▉ | 5635/7045 [18:18:22<4:28:28, 11.42s/it] 80%|████████ | 5636/7045 [18:18:33<4:26:20, 11.34s/it] {'loss': 1.1455, 'learning_rate': 5.064859278179465e-07, 'epoch': 0.8} + 80%|████████ | 5636/7045 [18:18:33<4:26:20, 11.34s/it] 80%|████████ | 5637/7045 [18:18:44<4:24:27, 11.27s/it] {'loss': 1.1289, 'learning_rate': 5.057925287813789e-07, 'epoch': 0.8} + 80%|████████ | 5637/7045 [18:18:44<4:24:27, 11.27s/it] 80%|████████ | 5638/7045 [18:18:56<4:25:41, 11.33s/it] {'loss': 1.1045, 'learning_rate': 5.050995512930073e-07, 'epoch': 0.8} + 80%|████████ | 5638/7045 [18:18:56<4:25:41, 11.33s/it] 80%|████████ | 5639/7045 [18:19:08<4:30:49, 11.56s/it] {'loss': 1.085, 'learning_rate': 5.04406995499317e-07, 'epoch': 0.8} + 80%|████████ | 5639/7045 [18:19:08<4:30:49, 11.56s/it] 80%|████████ | 5640/7045 [18:19:20<4:36:56, 11.83s/it] {'loss': 1.0928, 'learning_rate': 5.037148615467067e-07, 'epoch': 0.8} + 80%|████████ | 5640/7045 [18:19:20<4:36:56, 11.83s/it] 80%|████████ | 5641/7045 [18:19:31<4:30:08, 11.54s/it] {'loss': 1.0967, 'learning_rate': 5.03023149581483e-07, 'epoch': 0.8} + 80%|████████ | 5641/7045 [18:19:31<4:30:08, 11.54s/it] 80%|████████ | 5642/7045 [18:19:43<4:33:34, 11.70s/it] {'loss': 1.1338, 'learning_rate': 5.023318597498635e-07, 'epoch': 0.8} + 80%|████████ | 5642/7045 [18:19:43<4:33:34, 11.70s/it] 80%|████████ | 5643/7045 [18:19:55<4:31:28, 11.62s/it] {'loss': 1.1172, 'learning_rate': 5.016409921979795e-07, 'epoch': 0.8} + 80%|████████ | 5643/7045 [18:19:55<4:31:28, 11.62s/it] 80%|████████ | 5644/7045 [18:20:05<4:26:41, 11.42s/it] {'loss': 1.1133, 'learning_rate': 5.009505470718695e-07, 'epoch': 0.8} + 80%|████████ | 5644/7045 [18:20:05<4:26:41, 11.42s/it] 80%|████████ | 5645/7045 [18:20:19<4:41:36, 12.07s/it] {'loss': 1.1069, 'learning_rate': 5.002605245174838e-07, 'epoch': 0.8} + 80%|████████ | 5645/7045 [18:20:19<4:41:36, 12.07s/it] 80%|████████ | 5646/7045 [18:20:30<4:35:42, 11.82s/it] {'loss': 1.1523, 'learning_rate': 4.995709246806848e-07, 'epoch': 0.8} + 80%|████████ | 5646/7045 [18:20:30<4:35:42, 11.82s/it] 80%|████████ | 5647/7045 [18:20:42<4:33:37, 11.74s/it] {'loss': 1.1543, 'learning_rate': 4.988817477072436e-07, 'epoch': 0.8} + 80%|████████ | 5647/7045 [18:20:42<4:33:37, 11.74s/it] 80%|████████ | 5648/7045 [18:20:53<4:26:47, 11.46s/it] {'loss': 1.0898, 'learning_rate': 4.981929937428439e-07, 'epoch': 0.8} + 80%|████████ | 5648/7045 [18:20:53<4:26:47, 11.46s/it] 80%|████████ | 5649/7045 [18:21:06<4:37:56, 11.95s/it] {'loss': 1.0791, 'learning_rate': 4.975046629330782e-07, 'epoch': 0.8} + 80%|████████ | 5649/7045 [18:21:06<4:37:56, 11.95s/it] 80%|████████ | 5650/7045 [18:21:17<4:30:32, 11.64s/it] {'loss': 1.1182, 'learning_rate': 4.968167554234493e-07, 'epoch': 0.8} + 80%|████████ | 5650/7045 [18:21:17<4:30:32, 11.64s/it] 80%|████████ | 5651/7045 [18:21:28<4:26:53, 11.49s/it] {'loss': 1.1143, 'learning_rate': 4.961292713593727e-07, 'epoch': 0.8} + 80%|████████ | 5651/7045 [18:21:28<4:26:53, 11.49s/it] 80%|████████ | 5652/7045 [18:21:40<4:32:58, 11.76s/it] {'loss': 1.1187, 'learning_rate': 4.954422108861731e-07, 'epoch': 0.8} + 80%|████████ | 5652/7045 [18:21:40<4:32:58, 11.76s/it] 80%|████████ | 5653/7045 [18:21:51<4:27:34, 11.53s/it] {'loss': 1.0947, 'learning_rate': 4.94755574149085e-07, 'epoch': 0.8} + 80%|████████ | 5653/7045 [18:21:51<4:27:34, 11.53s/it] 80%|████████ | 5654/7045 [18:22:03<4:26:06, 11.48s/it] {'loss': 1.1104, 'learning_rate': 4.940693612932551e-07, 'epoch': 0.8} + 80%|████████ | 5654/7045 [18:22:03<4:26:06, 11.48s/it] 80%|████████ | 5655/7045 [18:22:14<4:25:51, 11.48s/it] {'loss': 1.125, 'learning_rate': 4.933835724637384e-07, 'epoch': 0.8} + 80%|████████ | 5655/7045 [18:22:14<4:25:51, 11.48s/it] 80%|████████ | 5656/7045 [18:22:26<4:32:24, 11.77s/it] {'loss': 1.1084, 'learning_rate': 4.926982078055012e-07, 'epoch': 0.8} + 80%|████████ | 5656/7045 [18:22:26<4:32:24, 11.77s/it] 80%|████████ | 5657/7045 [18:22:37<4:26:22, 11.51s/it] {'loss': 1.0957, 'learning_rate': 4.920132674634212e-07, 'epoch': 0.8} + 80%|████████ | 5657/7045 [18:22:37<4:26:22, 11.51s/it] 80%|████████ | 5658/7045 [18:22:49<4:25:36, 11.49s/it] {'loss': 1.1201, 'learning_rate': 4.913287515822843e-07, 'epoch': 0.8} + 80%|████████ | 5658/7045 [18:22:49<4:25:36, 11.49s/it] 80%|████████ | 5659/7045 [18:23:00<4:22:08, 11.35s/it] {'loss': 1.1025, 'learning_rate': 4.906446603067888e-07, 'epoch': 0.8} + 80%|████████ | 5659/7045 [18:23:00<4:22:08, 11.35s/it] 80%|████████ | 5660/7045 [18:23:11<4:21:46, 11.34s/it] {'loss': 1.1465, 'learning_rate': 4.89960993781542e-07, 'epoch': 0.8} + 80%|████████ | 5660/7045 [18:23:11<4:21:46, 11.34s/it] 80%|████████ | 5661/7045 [18:23:22<4:19:42, 11.26s/it] {'loss': 1.1367, 'learning_rate': 4.892777521510608e-07, 'epoch': 0.8} + 80%|████████ | 5661/7045 [18:23:22<4:19:42, 11.26s/it] 80%|████████ | 5662/7045 [18:23:33<4:18:34, 11.22s/it] {'loss': 1.0703, 'learning_rate': 4.885949355597743e-07, 'epoch': 0.8} + 80%|████████ | 5662/7045 [18:23:33<4:18:34, 11.22s/it] 80%|████████ | 5663/7045 [18:23:45<4:19:34, 11.27s/it] {'loss': 1.0967, 'learning_rate': 4.879125441520194e-07, 'epoch': 0.8} + 80%|████████ | 5663/7045 [18:23:45<4:19:34, 11.27s/it] 80%|████████ | 5664/7045 [18:23:56<4:20:58, 11.34s/it] {'loss': 1.1152, 'learning_rate': 4.872305780720452e-07, 'epoch': 0.8} + 80%|████████ | 5664/7045 [18:23:56<4:20:58, 11.34s/it] 80%|████████ | 5665/7045 [18:24:07<4:18:12, 11.23s/it] {'loss': 1.1143, 'learning_rate': 4.865490374640103e-07, 'epoch': 0.8} + 80%|████████ | 5665/7045 [18:24:07<4:18:12, 11.23s/it] 80%|████████ | 5666/7045 [18:24:19<4:22:56, 11.44s/it] {'loss': 1.1582, 'learning_rate': 4.858679224719825e-07, 'epoch': 0.8} + 80%|████████ | 5666/7045 [18:24:19<4:22:56, 11.44s/it] 80%|████████ | 5667/7045 [18:24:31<4:27:17, 11.64s/it] {'loss': 1.0864, 'learning_rate': 4.851872332399396e-07, 'epoch': 0.8} + 80%|████████ | 5667/7045 [18:24:31<4:27:17, 11.64s/it] 80%|████████ | 5668/7045 [18:24:43<4:25:18, 11.56s/it] {'loss': 1.0928, 'learning_rate': 4.845069699117713e-07, 'epoch': 0.8} + 80%|████████ | 5668/7045 [18:24:43<4:25:18, 11.56s/it] 80%|████████ | 5669/7045 [18:24:53<4:19:56, 11.33s/it] {'loss': 1.1113, 'learning_rate': 4.838271326312749e-07, 'epoch': 0.8} + 80%|████████ | 5669/7045 [18:24:53<4:19:56, 11.33s/it] 80%|████████ | 5670/7045 [18:25:05<4:18:22, 11.27s/it] {'loss': 1.124, 'learning_rate': 4.831477215421595e-07, 'epoch': 0.8} + 80%|████████ | 5670/7045 [18:25:05<4:18:22, 11.27s/it] 80%|████████ | 5671/7045 [18:25:16<4:17:45, 11.26s/it] {'loss': 1.1543, 'learning_rate': 4.824687367880432e-07, 'epoch': 0.8} + 80%|████████ | 5671/7045 [18:25:16<4:17:45, 11.26s/it] 81%|████████ | 5672/7045 [18:25:27<4:16:42, 11.22s/it] {'loss': 1.0835, 'learning_rate': 4.817901785124532e-07, 'epoch': 0.81} + 81%|████████ | 5672/7045 [18:25:27<4:16:42, 11.22s/it] 81%|████████ | 5673/7045 [18:25:39<4:21:21, 11.43s/it] {'loss': 1.1201, 'learning_rate': 4.811120468588284e-07, 'epoch': 0.81} + 81%|████████ | 5673/7045 [18:25:39<4:21:21, 11.43s/it] 81%|████████ | 5674/7045 [18:25:50<4:19:17, 11.35s/it] {'loss': 1.0957, 'learning_rate': 4.804343419705165e-07, 'epoch': 0.81} + 81%|████████ | 5674/7045 [18:25:50<4:19:17, 11.35s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 81%|████████ | 5675/7045 [18:26:03<4:31:33, 11.89s/it] {'loss': 1.063, 'learning_rate': 4.797570639907736e-07, 'epoch': 0.81} + 81%|████████ | 5675/7045 [18:26:03<4:31:33, 11.89s/it] 81%|████████ | 5676/7045 [18:26:15<4:28:31, 11.77s/it] {'loss': 1.1255, 'learning_rate': 4.790802130627695e-07, 'epoch': 0.81} + 81%|████████ | 5676/7045 [18:26:15<4:28:31, 11.77s/it] 81%|████████ | 5677/7045 [18:26:26<4:28:26, 11.77s/it] {'loss': 1.1309, 'learning_rate': 4.784037893295798e-07, 'epoch': 0.81} + 81%|████████ | 5677/7045 [18:26:26<4:28:26, 11.77s/it] 81%|████████ | 5678/7045 [18:26:38<4:24:09, 11.59s/it] {'loss': 1.1318, 'learning_rate': 4.777277929341905e-07, 'epoch': 0.81} + 81%|████████ | 5678/7045 [18:26:38<4:24:09, 11.59s/it] 81%|████████ | 5679/7045 [18:26:50<4:29:23, 11.83s/it] {'loss': 1.1377, 'learning_rate': 4.770522240194999e-07, 'epoch': 0.81} + 81%|████████ | 5679/7045 [18:26:50<4:29:23, 11.83s/it] 81%|████████ | 5680/7045 [18:27:01<4:25:09, 11.66s/it] {'loss': 1.1514, 'learning_rate': 4.7637708272831237e-07, 'epoch': 0.81} + 81%|████████ | 5680/7045 [18:27:01<4:25:09, 11.66s/it] 81%|████████ | 5681/7045 [18:27:13<4:25:00, 11.66s/it] {'loss': 1.0938, 'learning_rate': 4.757023692033444e-07, 'epoch': 0.81} + 81%|████████ | 5681/7045 [18:27:13<4:25:00, 11.66s/it] 81%|████████ | 5682/7045 [18:27:24<4:19:59, 11.44s/it] {'loss': 1.1133, 'learning_rate': 4.750280835872212e-07, 'epoch': 0.81} + 81%|████████ | 5682/7045 [18:27:24<4:19:59, 11.44s/it] 81%|████████ | 5683/7045 [18:27:37<4:32:03, 11.99s/it] {'loss': 1.0996, 'learning_rate': 4.7435422602247675e-07, 'epoch': 0.81} + 81%|████████ | 5683/7045 [18:27:37<4:32:03, 11.99s/it] 81%|████████ | 5684/7045 [18:27:48<4:24:20, 11.65s/it] {'loss': 1.1182, 'learning_rate': 4.7368079665155666e-07, 'epoch': 0.81} + 81%|████████ | 5684/7045 [18:27:48<4:24:20, 11.65s/it] 81%|████████ | 5685/7045 [18:28:00<4:24:29, 11.67s/it] {'loss': 1.0693, 'learning_rate': 4.730077956168136e-07, 'epoch': 0.81} + 81%|████████ | 5685/7045 [18:28:00<4:24:29, 11.67s/it] 81%|████████ | 5686/7045 [18:28:11<4:21:18, 11.54s/it] {'loss': 1.0762, 'learning_rate': 4.723352230605105e-07, 'epoch': 0.81} + 81%|████████ | 5686/7045 [18:28:11<4:21:18, 11.54s/it] 81%|████████ | 5687/7045 [18:28:24<4:29:23, 11.90s/it] {'loss': 1.1006, 'learning_rate': 4.7166307912482087e-07, 'epoch': 0.81} + 81%|████████ | 5687/7045 [18:28:24<4:29:23, 11.90s/it] 81%|████████ | 5688/7045 [18:28:35<4:24:22, 11.69s/it] {'loss': 1.0923, 'learning_rate': 4.7099136395182684e-07, 'epoch': 0.81} + 81%|████████ | 5688/7045 [18:28:35<4:24:22, 11.69s/it] 81%|████████ | 5689/7045 [18:28:47<4:29:13, 11.91s/it] {'loss': 1.0488, 'learning_rate': 4.70320077683519e-07, 'epoch': 0.81} + 81%|████████ | 5689/7045 [18:28:47<4:29:13, 11.91s/it] 81%|████████ | 5690/7045 [18:29:01<4:37:57, 12.31s/it] {'loss': 1.0581, 'learning_rate': 4.696492204617992e-07, 'epoch': 0.81} + 81%|████████ | 5690/7045 [18:29:01<4:37:57, 12.31s/it] 81%|████████ | 5691/7045 [18:29:12<4:31:23, 12.03s/it] {'loss': 1.0898, 'learning_rate': 4.689787924284761e-07, 'epoch': 0.81} + 81%|████████ | 5691/7045 [18:29:12<4:31:23, 12.03s/it] 81%|████████ | 5692/7045 [18:29:23<4:24:05, 11.71s/it] {'loss': 1.127, 'learning_rate': 4.683087937252703e-07, 'epoch': 0.81} + 81%|████████ | 5692/7045 [18:29:23<4:24:05, 11.71s/it] 81%|████████ | 5693/7045 [18:29:35<4:27:01, 11.85s/it] {'loss': 1.1279, 'learning_rate': 4.6763922449380995e-07, 'epoch': 0.81} + 81%|████████ | 5693/7045 [18:29:35<4:27:01, 11.85s/it] 81%|████████ | 5694/7045 [18:29:47<4:30:46, 12.03s/it] {'loss': 1.0898, 'learning_rate': 4.6697008487563227e-07, 'epoch': 0.81} + 81%|████████ | 5694/7045 [18:29:47<4:30:46, 12.03s/it] 81%|████████ | 5695/7045 [18:29:59<4:26:39, 11.85s/it] {'loss': 1.1084, 'learning_rate': 4.6630137501218504e-07, 'epoch': 0.81} + 81%|████████ | 5695/7045 [18:29:59<4:26:39, 11.85s/it] 81%|████████ | 5696/7045 [18:30:10<4:21:51, 11.65s/it] {'loss': 1.0928, 'learning_rate': 4.65633095044824e-07, 'epoch': 0.81} + 81%|████████ | 5696/7045 [18:30:10<4:21:51, 11.65s/it] 81%|████████ | 5697/7045 [18:30:22<4:21:01, 11.62s/it] {'loss': 1.0967, 'learning_rate': 4.64965245114814e-07, 'epoch': 0.81} + 81%|████████ | 5697/7045 [18:30:22<4:21:01, 11.62s/it] 81%|████████ | 5698/7045 [18:30:33<4:16:52, 11.44s/it] {'loss': 1.083, 'learning_rate': 4.642978253633301e-07, 'epoch': 0.81} + 81%|████████ | 5698/7045 [18:30:33<4:16:52, 11.44s/it] 81%|████████ | 5699/7045 [18:30:44<4:16:03, 11.41s/it] {'loss': 1.1094, 'learning_rate': 4.6363083593145507e-07, 'epoch': 0.81} + 81%|████████ | 5699/7045 [18:30:44<4:16:03, 11.41s/it] 81%|████████ | 5700/7045 [18:30:55<4:13:12, 11.30s/it] {'loss': 1.1416, 'learning_rate': 4.6296427696018137e-07, 'epoch': 0.81} + 81%|████████ | 5700/7045 [18:30:55<4:13:12, 11.30s/it] 81%|████████ | 5701/7045 [18:31:06<4:12:34, 11.28s/it] {'loss': 1.0996, 'learning_rate': 4.6229814859041183e-07, 'epoch': 0.81} + 81%|████████ | 5701/7045 [18:31:06<4:12:34, 11.28s/it] 81%|████████ | 5702/7045 [18:31:19<4:23:56, 11.79s/it] {'loss': 1.0693, 'learning_rate': 4.6163245096295494e-07, 'epoch': 0.81} + 81%|████████ | 5702/7045 [18:31:19<4:23:56, 11.79s/it] 81%|████████ | 5703/7045 [18:31:31<4:21:38, 11.70s/it] {'loss': 1.0938, 'learning_rate': 4.6096718421853146e-07, 'epoch': 0.81} + 81%|████████ | 5703/7045 [18:31:31<4:21:38, 11.70s/it] 81%|████████ | 5704/7045 [18:31:43<4:23:47, 11.80s/it] {'loss': 1.0947, 'learning_rate': 4.603023484977695e-07, 'epoch': 0.81} + 81%|████████ | 5704/7045 [18:31:43<4:23:47, 11.80s/it] 81%|████████ | 5705/7045 [18:31:55<4:23:29, 11.80s/it] {'loss': 1.1436, 'learning_rate': 4.596379439412049e-07, 'epoch': 0.81} + 81%|████████ | 5705/7045 [18:31:55<4:23:29, 11.80s/it] 81%|████████ | 5706/7045 [18:32:06<4:20:15, 11.66s/it] {'loss': 1.1265, 'learning_rate': 4.5897397068928565e-07, 'epoch': 0.81} + 81%|████████ | 5706/7045 [18:32:06<4:20:15, 11.66s/it] 81%|████████ | 5707/7045 [18:32:17<4:17:46, 11.56s/it] {'loss': 1.1113, 'learning_rate': 4.5831042888236586e-07, 'epoch': 0.81} + 81%|████████ | 5707/7045 [18:32:17<4:17:46, 11.56s/it] 81%|████████ | 5708/7045 [18:32:28<4:15:23, 11.46s/it] {'loss': 1.0864, 'learning_rate': 4.576473186607081e-07, 'epoch': 0.81} + 81%|████████ | 5708/7045 [18:32:28<4:15:23, 11.46s/it] 81%|████████ | 5709/7045 [18:32:39<4:10:52, 11.27s/it] {'loss': 1.0732, 'learning_rate': 4.5698464016448644e-07, 'epoch': 0.81} + 81%|████████ | 5709/7045 [18:32:39<4:10:52, 11.27s/it] 81%|████████ | 5710/7045 [18:32:51<4:15:21, 11.48s/it] {'loss': 1.0483, 'learning_rate': 4.5632239353378073e-07, 'epoch': 0.81} + 81%|████████ | 5710/7045 [18:32:51<4:15:21, 11.48s/it] 81%|████████ | 5711/7045 [18:33:02<4:13:23, 11.40s/it] {'loss': 1.1318, 'learning_rate': 4.556605789085822e-07, 'epoch': 0.81} + 81%|████████ | 5711/7045 [18:33:02<4:13:23, 11.40s/it] 81%|████████ | 5712/7045 [18:33:14<4:16:59, 11.57s/it] {'loss': 1.0757, 'learning_rate': 4.549991964287878e-07, 'epoch': 0.81} + 81%|████████ | 5712/7045 [18:33:14<4:16:59, 11.57s/it] 81%|████████ | 5713/7045 [18:33:27<4:23:21, 11.86s/it] {'loss': 1.0791, 'learning_rate': 4.543382462342058e-07, 'epoch': 0.81} + 81%|████████ | 5713/7045 [18:33:27<4:23:21, 11.86s/it] 81%|████████ | 5714/7045 [18:33:39<4:23:52, 11.89s/it] {'loss': 1.1172, 'learning_rate': 4.536777284645527e-07, 'epoch': 0.81} + 81%|████████ | 5714/7045 [18:33:39<4:23:52, 11.89s/it] 81%|████████ | 5715/7045 [18:33:51<4:24:12, 11.92s/it] {'loss': 1.0947, 'learning_rate': 4.5301764325945185e-07, 'epoch': 0.81} + 81%|████████ | 5715/7045 [18:33:51<4:24:12, 11.92s/it] 81%|████████ | 5716/7045 [18:34:02<4:21:24, 11.80s/it] {'loss': 1.1211, 'learning_rate': 4.5235799075843576e-07, 'epoch': 0.81} + 81%|████████ | 5716/7045 [18:34:02<4:21:24, 11.80s/it] 81%|████████ | 5717/7045 [18:34:13<4:15:47, 11.56s/it] {'loss': 1.1143, 'learning_rate': 4.516987711009477e-07, 'epoch': 0.81} + 81%|████████ | 5717/7045 [18:34:13<4:15:47, 11.56s/it] 81%|████████ | 5718/7045 [18:34:25<4:18:45, 11.70s/it] {'loss': 1.0537, 'learning_rate': 4.5103998442633634e-07, 'epoch': 0.81} + 81%|████████ | 5718/7045 [18:34:25<4:18:45, 11.70s/it] 81%|████████ | 5719/7045 [18:34:37<4:15:21, 11.55s/it] {'loss': 1.0957, 'learning_rate': 4.5038163087385983e-07, 'epoch': 0.81} + 81%|████████ | 5719/7045 [18:34:37<4:15:21, 11.55s/it] 81%|████████ | 5720/7045 [18:34:49<4:17:00, 11.64s/it] {'loss': 1.1094, 'learning_rate': 4.497237105826868e-07, 'epoch': 0.81} + 81%|████████ | 5720/7045 [18:34:49<4:17:00, 11.64s/it] 81%|████████ | 5721/7045 [18:34:59<4:10:34, 11.36s/it] {'loss': 1.1309, 'learning_rate': 4.490662236918905e-07, 'epoch': 0.81} + 81%|████████ | 5721/7045 [18:34:59<4:10:34, 11.36s/it] 81%|████████ | 5722/7045 [18:35:11<4:11:25, 11.40s/it] {'loss': 1.1021, 'learning_rate': 4.4840917034045667e-07, 'epoch': 0.81} + 81%|████████ | 5722/7045 [18:35:11<4:11:25, 11.40s/it] 81%|████████ | 5723/7045 [18:35:23<4:19:29, 11.78s/it] {'loss': 1.1172, 'learning_rate': 4.477525506672764e-07, 'epoch': 0.81} + 81%|████████ | 5723/7045 [18:35:23<4:19:29, 11.78s/it] 81%|████████ | 5724/7045 [18:35:35<4:18:48, 11.76s/it] {'loss': 1.0928, 'learning_rate': 4.47096364811149e-07, 'epoch': 0.81} + 81%|████████ | 5724/7045 [18:35:35<4:18:48, 11.76s/it] 81%|████████▏ | 5725/7045 [18:35:46<4:14:13, 11.56s/it] {'loss': 1.0957, 'learning_rate': 4.4644061291078564e-07, 'epoch': 0.81} + 81%|████████▏ | 5725/7045 [18:35:46<4:14:13, 11.56s/it] 81%|████████▏ | 5726/7045 [18:35:58<4:13:29, 11.53s/it] {'loss': 1.0811, 'learning_rate': 4.457852951048022e-07, 'epoch': 0.81} + 81%|████████▏ | 5726/7045 [18:35:58<4:13:29, 11.53s/it] 81%|██████���█▏ | 5727/7045 [18:36:09<4:11:23, 11.44s/it] {'loss': 1.1191, 'learning_rate': 4.451304115317229e-07, 'epoch': 0.81} + 81%|████████▏ | 5727/7045 [18:36:09<4:11:23, 11.44s/it] 81%|████████▏ | 5728/7045 [18:36:22<4:19:36, 11.83s/it] {'loss': 1.1157, 'learning_rate': 4.444759623299827e-07, 'epoch': 0.81} + 81%|████████▏ | 5728/7045 [18:36:22<4:19:36, 11.83s/it] 81%|████████▏ | 5729/7045 [18:36:33<4:17:15, 11.73s/it] {'loss': 1.1436, 'learning_rate': 4.438219476379227e-07, 'epoch': 0.81} + 81%|████████▏ | 5729/7045 [18:36:33<4:17:15, 11.73s/it] 81%|████████▏ | 5730/7045 [18:36:44<4:11:46, 11.49s/it] {'loss': 1.1289, 'learning_rate': 4.43168367593792e-07, 'epoch': 0.81} + 81%|████████▏ | 5730/7045 [18:36:44<4:11:46, 11.49s/it] 81%|████████▏ | 5731/7045 [18:36:55<4:08:56, 11.37s/it] {'loss': 1.1211, 'learning_rate': 4.425152223357493e-07, 'epoch': 0.81} + 81%|████████▏ | 5731/7045 [18:36:55<4:08:56, 11.37s/it] 81%|████████▏ | 5732/7045 [18:37:08<4:19:20, 11.85s/it] {'loss': 1.1079, 'learning_rate': 4.4186251200185995e-07, 'epoch': 0.81} + 81%|████████▏ | 5732/7045 [18:37:08<4:19:20, 11.85s/it] 81%|████████▏ | 5733/7045 [18:37:19<4:12:13, 11.54s/it] {'loss': 1.1279, 'learning_rate': 4.412102367300988e-07, 'epoch': 0.81} + 81%|████████▏ | 5733/7045 [18:37:19<4:12:13, 11.54s/it] 81%|████████▏ | 5734/7045 [18:37:30<4:07:55, 11.35s/it] {'loss': 1.084, 'learning_rate': 4.405583966583474e-07, 'epoch': 0.81} + 81%|████████▏ | 5734/7045 [18:37:30<4:07:55, 11.35s/it] 81%|████████▏ | 5735/7045 [18:37:42<4:16:10, 11.73s/it] {'loss': 1.1294, 'learning_rate': 4.3990699192439534e-07, 'epoch': 0.81} + 81%|████████▏ | 5735/7045 [18:37:42<4:16:10, 11.73s/it] 81%|████████▏ | 5736/7045 [18:37:55<4:18:42, 11.86s/it] {'loss': 1.1289, 'learning_rate': 4.392560226659412e-07, 'epoch': 0.81} + 81%|████████▏ | 5736/7045 [18:37:55<4:18:42, 11.86s/it] 81%|████████▏ | 5737/7045 [18:38:05<4:11:37, 11.54s/it] {'loss': 1.1006, 'learning_rate': 4.386054890205918e-07, 'epoch': 0.81} + 81%|████████▏ | 5737/7045 [18:38:05<4:11:37, 11.54s/it] 81%|████████▏ | 5738/7045 [18:38:18<4:16:02, 11.75s/it] {'loss': 1.063, 'learning_rate': 4.379553911258594e-07, 'epoch': 0.81} + 81%|████████▏ | 5738/7045 [18:38:18<4:16:02, 11.75s/it] 81%|████████▏ | 5739/7045 [18:38:30<4:20:00, 11.95s/it] {'loss': 1.0825, 'learning_rate': 4.373057291191674e-07, 'epoch': 0.81} + 81%|████████▏ | 5739/7045 [18:38:30<4:20:00, 11.95s/it] 81%|████████▏ | 5740/7045 [18:38:42<4:22:09, 12.05s/it] {'loss': 1.0947, 'learning_rate': 4.3665650313784473e-07, 'epoch': 0.81} + 81%|████████▏ | 5740/7045 [18:38:42<4:22:09, 12.05s/it] 81%|████████▏ | 5741/7045 [18:38:55<4:24:53, 12.19s/it] {'loss': 1.1133, 'learning_rate': 4.3600771331912815e-07, 'epoch': 0.81} + 81%|████████▏ | 5741/7045 [18:38:55<4:24:53, 12.19s/it] 82%|████████▏ | 5742/7045 [18:39:06<4:19:22, 11.94s/it] {'loss': 1.0571, 'learning_rate': 4.353593598001646e-07, 'epoch': 0.82} + 82%|████████▏ | 5742/7045 [18:39:06<4:19:22, 11.94s/it] 82%|████████▏ | 5743/7045 [18:39:18<4:20:11, 11.99s/it] {'loss': 1.1367, 'learning_rate': 4.3471144271800547e-07, 'epoch': 0.82} + 82%|████████▏ | 5743/7045 [18:39:18<4:20:11, 11.99s/it] 82%|████████▏ | 5744/7045 [18:39:30<4:20:54, 12.03s/it] {'loss': 1.0469, 'learning_rate': 4.3406396220961295e-07, 'epoch': 0.82} + 82%|████████▏ | 5744/7045 [18:39:30<4:20:54, 12.03s/it] 82%|████████▏ | 5745/7045 [18:39:42<4:15:35, 11.80s/it] {'loss': 1.1201, 'learning_rate': 4.3341691841185475e-07, 'epoch': 0.82} + 82%|████████▏ | 5745/7045 [18:39:42<4:15:35, 11.80s/it] 82%|████████▏ | 5746/7045 [18:39:53<4:11:45, 11.63s/it] {'loss': 1.0801, 'learning_rate': 4.32770311461507e-07, 'epoch': 0.82} + 82%|████████▏ | 5746/7045 [18:39:53<4:11:45, 11.63s/it] 82%|████████▏ | 5747/7045 [18:40:04<4:09:32, 11.53s/it] {'loss': 1.1475, 'learning_rate': 4.3212414149525426e-07, 'epoch': 0.82} + 82%|████████▏ | 5747/7045 [18:40:04<4:09:32, 11.53s/it] 82%|████████▏ | 5748/7045 [18:40:16<4:07:54, 11.47s/it] {'loss': 1.103, 'learning_rate': 4.3147840864968734e-07, 'epoch': 0.82} + 82%|████████▏ | 5748/7045 [18:40:16<4:07:54, 11.47s/it] 82%|████████▏ | 5749/7045 [18:40:27<4:08:41, 11.51s/it] {'loss': 1.0859, 'learning_rate': 4.3083311306130524e-07, 'epoch': 0.82} + 82%|████████▏ | 5749/7045 [18:40:27<4:08:41, 11.51s/it] 82%|████████▏ | 5750/7045 [18:40:39<4:13:11, 11.73s/it] {'loss': 1.1221, 'learning_rate': 4.3018825486651596e-07, 'epoch': 0.82} + 82%|████████▏ | 5750/7045 [18:40:39<4:13:11, 11.73s/it] 82%|████████▏ | 5751/7045 [18:40:50<4:07:30, 11.48s/it] {'loss': 1.0981, 'learning_rate': 4.295438342016328e-07, 'epoch': 0.82} + 82%|████████▏ | 5751/7045 [18:40:50<4:07:30, 11.48s/it] 82%|████████▏ | 5752/7045 [18:41:02<4:07:40, 11.49s/it] {'loss': 1.1104, 'learning_rate': 4.2889985120287664e-07, 'epoch': 0.82} + 82%|████████▏ | 5752/7045 [18:41:02<4:07:40, 11.49s/it] 82%|████████▏ | 5753/7045 [18:41:13<4:06:19, 11.44s/it] {'loss': 1.1143, 'learning_rate': 4.2825630600637794e-07, 'epoch': 0.82} + 82%|████████▏ | 5753/7045 [18:41:13<4:06:19, 11.44s/it] 82%|████████▏ | 5754/7045 [18:41:24<4:04:53, 11.38s/it] {'loss': 1.1479, 'learning_rate': 4.276131987481727e-07, 'epoch': 0.82} + 82%|████████▏ | 5754/7045 [18:41:24<4:04:53, 11.38s/it] 82%|████████▏ | 5755/7045 [18:41:36<4:04:02, 11.35s/it] {'loss': 1.0747, 'learning_rate': 4.2697052956420545e-07, 'epoch': 0.82} + 82%|████████▏ | 5755/7045 [18:41:36<4:04:02, 11.35s/it] 82%|████████▏ | 5756/7045 [18:41:49<4:15:08, 11.88s/it] {'loss': 1.1045, 'learning_rate': 4.2632829859032775e-07, 'epoch': 0.82} + 82%|████████▏ | 5756/7045 [18:41:49<4:15:08, 11.88s/it] 82%|████████▏ | 5757/7045 [18:42:03<4:30:34, 12.60s/it] {'loss': 1.0918, 'learning_rate': 4.256865059622972e-07, 'epoch': 0.82} + 82%|████████▏ | 5757/7045 [18:42:03<4:30:34, 12.60s/it] 82%|████████▏ | 5758/7045 [18:42:15<4:27:04, 12.45s/it] {'loss': 1.0718, 'learning_rate': 4.2504515181578166e-07, 'epoch': 0.82} + 82%|████████▏ | 5758/7045 [18:42:15<4:27:04, 12.45s/it] 82%|████████▏ | 5759/7045 [18:42:26<4:16:47, 11.98s/it] {'loss': 1.1348, 'learning_rate': 4.244042362863532e-07, 'epoch': 0.82} + 82%|████████▏ | 5759/7045 [18:42:26<4:16:47, 11.98s/it] 82%|████████▏ | 5760/7045 [18:42:38<4:14:27, 11.88s/it] {'loss': 1.1338, 'learning_rate': 4.23763759509494e-07, 'epoch': 0.82} + 82%|████████▏ | 5760/7045 [18:42:38<4:14:27, 11.88s/it] 82%|████████▏ | 5761/7045 [18:42:50<4:13:52, 11.86s/it] {'loss': 1.1055, 'learning_rate': 4.231237216205908e-07, 'epoch': 0.82} + 82%|████████▏ | 5761/7045 [18:42:50<4:13:52, 11.86s/it] 82%|████████▏ | 5762/7045 [18:43:01<4:08:49, 11.64s/it] {'loss': 1.1289, 'learning_rate': 4.2248412275494006e-07, 'epoch': 0.82} + 82%|████████▏ | 5762/7045 [18:43:01<4:08:49, 11.64s/it] 82%|████████▏ | 5763/7045 [18:43:14<4:20:57, 12.21s/it] {'loss': 1.0859, 'learning_rate': 4.2184496304774283e-07, 'epoch': 0.82} + 82%|████████▏ | 5763/7045 [18:43:14<4:20:57, 12.21s/it] 82%|████████▏ | 5764/7045 [18:43:26<4:17:14, 12.05s/it] {'loss': 1.1191, 'learning_rate': 4.212062426341101e-07, 'epoch': 0.82} + 82%|████████▏ | 5764/7045 [18:43:26<4:17:14, 12.05s/it] 82%|████████▏ | 5765/7045 [18:43:40<4:30:23, 12.67s/it] {'loss': 1.0688, 'learning_rate': 4.2056796164905766e-07, 'epoch': 0.82} + 82%|████████▏ | 5765/7045 [18:43:40<4:30:23, 12.67s/it] 82%|████████▏ | 5766/7045 [18:43:51<4:19:39, 12.18s/it] {'loss': 1.084, 'learning_rate': 4.1993012022751023e-07, 'epoch': 0.82} + 82%|████████▏ | 5766/7045 [18:43:51<4:19:39, 12.18s/it] 82%|████████▏ | 5767/7045 [18:44:02<4:11:19, 11.80s/it] {'loss': 1.0801, 'learning_rate': 4.1929271850429833e-07, 'epoch': 0.82} + 82%|████████▏ | 5767/7045 [18:44:02<4:11:19, 11.80s/it] 82%|████████▏ | 5768/7045 [18:44:15<4:19:32, 12.19s/it] {'loss': 1.1343, 'learning_rate': 4.186557566141594e-07, 'epoch': 0.82} + 82%|████████▏ | 5768/7045 [18:44:15<4:19:32, 12.19s/it] 82%|████████▏ | 5769/7045 [18:44:27<4:15:30, 12.01s/it] {'loss': 1.1221, 'learning_rate': 4.180192346917394e-07, 'epoch': 0.82} + 82%|████████▏ | 5769/7045 [18:44:27<4:15:30, 12.01s/it] 82%|████████▏ | 5770/7045 [18:44:38<4:09:41, 11.75s/it] {'loss': 1.0771, 'learning_rate': 4.1738315287158964e-07, 'epoch': 0.82} + 82%|████████▏ | 5770/7045 [18:44:38<4:09:41, 11.75s/it] 82%|████████▏ | 5771/7045 [18:44:49<4:08:30, 11.70s/it] {'loss': 1.1279, 'learning_rate': 4.1674751128816997e-07, 'epoch': 0.82} + 82%|████████▏ | 5771/7045 [18:44:49<4:08:30, 11.70s/it] 82%|████████▏ | 5772/7045 [18:45:01<4:05:31, 11.57s/it] {'loss': 1.0825, 'learning_rate': 4.1611231007584603e-07, 'epoch': 0.82} + 82%|████████▏ | 5772/7045 [18:45:01<4:05:31, 11.57s/it] 82%|████████▏ | 5773/7045 [18:45:13<4:13:26, 11.95s/it] {'loss': 1.0889, 'learning_rate': 4.1547754936888953e-07, 'epoch': 0.82} + 82%|████████▏ | 5773/7045 [18:45:13<4:13:26, 11.95s/it] 82%|████████▏ | 5774/7045 [18:45:27<4:24:43, 12.50s/it] {'loss': 1.1138, 'learning_rate': 4.1484322930148146e-07, 'epoch': 0.82} + 82%|████████▏ | 5774/7045 [18:45:27<4:24:43, 12.50s/it] 82%|████████▏ | 5775/7045 [18:45:38<4:16:11, 12.10s/it] {'loss': 1.0894, 'learning_rate': 4.142093500077085e-07, 'epoch': 0.82} + 82%|████████▏ | 5775/7045 [18:45:38<4:16:11, 12.10s/it] 82%|████████▏ | 5776/7045 [18:45:50<4:13:40, 11.99s/it] {'loss': 1.1553, 'learning_rate': 4.135759116215629e-07, 'epoch': 0.82} + 82%|████████▏ | 5776/7045 [18:45:50<4:13:40, 11.99s/it] 82%|████████▏ | 5777/7045 [18:46:03<4:20:56, 12.35s/it] {'loss': 1.0703, 'learning_rate': 4.129429142769467e-07, 'epoch': 0.82} + 82%|████████▏ | 5777/7045 [18:46:03<4:20:56, 12.35s/it] 82%|████████▏ | 5778/7045 [18:46:14<4:12:56, 11.98s/it] {'loss': 1.063, 'learning_rate': 4.123103581076654e-07, 'epoch': 0.82} + 82%|████████▏ | 5778/7045 [18:46:14<4:12:56, 11.98s/it] 82%|████████▏ | 5779/7045 [18:46:26<4:08:41, 11.79s/it] {'loss': 1.1221, 'learning_rate': 4.1167824324743285e-07, 'epoch': 0.82} + 82%|████████▏ | 5779/7045 [18:46:26<4:08:41, 11.79s/it] 82%|████████▏ | 5780/7045 [18:46:37<4:04:50, 11.61s/it] {'loss': 1.1504, 'learning_rate': 4.1104656982987017e-07, 'epoch': 0.82} + 82%|████████▏ | 5780/7045 [18:46:37<4:04:50, 11.61s/it] 82%|████████▏ | 5781/7045 [18:46:48<4:01:28, 11.46s/it] {'loss': 1.126, 'learning_rate': 4.104153379885034e-07, 'epoch': 0.82} + 82%|████████▏ | 5781/7045 [18:46:48<4:01:28, 11.46s/it] 82%|████████▏ | 5782/7045 [18:46:59<3:57:10, 11.27s/it] {'loss': 1.0327, 'learning_rate': 4.0978454785676804e-07, 'epoch': 0.82} + 82%|████████▏ | 5782/7045 [18:46:59<3:57:10, 11.27s/it] 82%|████████▏ | 5783/7045 [18:47:10<3:54:27, 11.15s/it] {'loss': 1.0962, 'learning_rate': 4.0915419956800316e-07, 'epoch': 0.82} + 82%|████████▏ | 5783/7045 [18:47:10<3:54:27, 11.15s/it] 82%|████████▏ | 5784/7045 [18:47:21<3:55:51, 11.22s/it] {'loss': 1.0757, 'learning_rate': 4.085242932554556e-07, 'epoch': 0.82} + 82%|████████▏ | 5784/7045 [18:47:21<3:55:51, 11.22s/it] 82%|████████▏ | 5785/7045 [18:47:32<3:55:04, 11.19s/it] {'loss': 1.1226, 'learning_rate': 4.078948290522794e-07, 'epoch': 0.82} + 82%|████████▏ | 5785/7045 [18:47:32<3:55:04, 11.19s/it] 82%|████████▏ | 5786/7045 [18:47:46<4:10:00, 11.91s/it] {'loss': 1.0854, 'learning_rate': 4.072658070915353e-07, 'epoch': 0.82} + 82%|████████▏ | 5786/7045 [18:47:46<4:10:00, 11.91s/it] 82%|████████▏ | 5787/7045 [18:47:59<4:17:05, 12.26s/it] {'loss': 1.1167, 'learning_rate': 4.066372275061889e-07, 'epoch': 0.82} + 82%|████████▏ | 5787/7045 [18:47:59<4:17:05, 12.26s/it] 82%|████████▏ | 5788/7045 [18:48:11<4:15:03, 12.17s/it] {'loss': 1.1284, 'learning_rate': 4.060090904291142e-07, 'epoch': 0.82} + 82%|████████▏ | 5788/7045 [18:48:11<4:15:03, 12.17s/it] 82%|████████▏ | 5789/7045 [18:48:22<4:08:41, 11.88s/it] {'loss': 1.124, 'learning_rate': 4.053813959930905e-07, 'epoch': 0.82} + 82%|████████▏ | 5789/7045 [18:48:22<4:08:41, 11.88s/it] 82%|████████▏ | 5790/7045 [18:48:34<4:05:25, 11.73s/it] {'loss': 1.1348, 'learning_rate': 4.047541443308031e-07, 'epoch': 0.82} + 82%|████████▏ | 5790/7045 [18:48:34<4:05:25, 11.73s/it] 82%|████████▏ | 5791/7045 [18:48:47<4:15:25, 12.22s/it] {'loss': 1.1265, 'learning_rate': 4.0412733557484566e-07, 'epoch': 0.82} + 82%|████████▏ | 5791/7045 [18:48:47<4:15:25, 12.22s/it] 82%|████████▏ | 5792/7045 [18:49:00<4:20:14, 12.46s/it] {'loss': 1.083, 'learning_rate': 4.0350096985771583e-07, 'epoch': 0.82} + 82%|████████▏ | 5792/7045 [18:49:00<4:20:14, 12.46s/it] 82%|████████▏ | 5793/7045 [18:49:11<4:12:25, 12.10s/it] {'loss': 1.1851, 'learning_rate': 4.028750473118198e-07, 'epoch': 0.82} + 82%|████████▏ | 5793/7045 [18:49:11<4:12:25, 12.10s/it] 82%|████████▏ | 5794/7045 [18:49:22<4:06:59, 11.85s/it] {'loss': 1.0918, 'learning_rate': 4.022495680694685e-07, 'epoch': 0.82} + 82%|████████▏ | 5794/7045 [18:49:22<4:06:59, 11.85s/it] 82%|████████▏ | 5795/7045 [18:49:34<4:04:42, 11.75s/it] {'loss': 1.1191, 'learning_rate': 4.016245322628792e-07, 'epoch': 0.82} + 82%|████████▏ | 5795/7045 [18:49:34<4:04:42, 11.75s/it] 82%|████████▏ | 5796/7045 [18:49:45<4:01:56, 11.62s/it] {'loss': 1.1309, 'learning_rate': 4.0099994002417707e-07, 'epoch': 0.82} + 82%|████████▏ | 5796/7045 [18:49:45<4:01:56, 11.62s/it] 82%|████████▏ | 5797/7045 [18:49:57<4:00:16, 11.55s/it] {'loss': 1.0947, 'learning_rate': 4.0037579148539093e-07, 'epoch': 0.82} + 82%|████████▏ | 5797/7045 [18:49:57<4:00:16, 11.55s/it] 82%|████████▏ | 5798/7045 [18:50:08<4:00:31, 11.57s/it] {'loss': 1.0962, 'learning_rate': 3.9975208677845823e-07, 'epoch': 0.82} + 82%|████████▏ | 5798/7045 [18:50:08<4:00:31, 11.57s/it] 82%|████████▏ | 5799/7045 [18:50:19<3:57:26, 11.43s/it] {'loss': 1.0713, 'learning_rate': 3.991288260352222e-07, 'epoch': 0.82} + 82%|████████▏ | 5799/7045 [18:50:19<3:57:26, 11.43s/it] 82%|████████▏ | 5800/7045 [18:50:31<3:56:01, 11.37s/it] {'loss': 1.1035, 'learning_rate': 3.985060093874307e-07, 'epoch': 0.82} + 82%|████████▏ | 5800/7045 [18:50:31<3:56:01, 11.37s/it] 82%|████████▏ | 5801/7045 [18:50:42<3:57:39, 11.46s/it] {'loss': 1.1064, 'learning_rate': 3.9788363696673826e-07, 'epoch': 0.82} + 82%|████████▏ | 5801/7045 [18:50:42<3:57:39, 11.46s/it] 82%|████████▏ | 5802/7045 [18:50:53<3:54:56, 11.34s/it] {'loss': 1.0781, 'learning_rate': 3.9726170890470705e-07, 'epoch': 0.82} + 82%|████████▏ | 5802/7045 [18:50:53<3:54:56, 11.34s/it] 82%|████████▏ | 5803/7045 [18:51:05<3:57:04, 11.45s/it] {'loss': 1.0693, 'learning_rate': 3.9664022533280306e-07, 'epoch': 0.82} + 82%|████████▏ | 5803/7045 [18:51:05<3:57:04, 11.45s/it] 82%|████████▏ | 5804/7045 [18:51:17<3:57:32, 11.48s/it] {'loss': 1.1196, 'learning_rate': 3.9601918638240063e-07, 'epoch': 0.82} + 82%|████████▏ | 5804/7045 [18:51:17<3:57:32, 11.48s/it] 82%|████████▏ | 5805/7045 [18:51:28<3:55:19, 11.39s/it] {'loss': 1.1104, 'learning_rate': 3.9539859218477805e-07, 'epoch': 0.82} + 82%|████████▏ | 5805/7045 [18:51:28<3:55:19, 11.39s/it] 82%|████████▏ | 5806/7045 [18:51:39<3:52:54, 11.28s/it] {'loss': 1.1035, 'learning_rate': 3.9477844287112013e-07, 'epoch': 0.82} + 82%|████████▏ | 5806/7045 [18:51:39<3:52:54, 11.28s/it] 82%|████████▏ | 5807/7045 [18:51:52<4:02:53, 11.77s/it] {'loss': 1.1201, 'learning_rate': 3.94158738572519e-07, 'epoch': 0.82} + 82%|████████▏ | 5807/7045 [18:51:52<4:02:53, 11.77s/it] 82%|████████▏ | 5808/7045 [18:52:03<4:01:25, 11.71s/it] {'loss': 1.1318, 'learning_rate': 3.9353947941997125e-07, 'epoch': 0.82} + 82%|████████▏ | 5808/7045 [18:52:03<4:01:25, 11.71s/it] 82%|████████▏ | 5809/7045 [18:52:14<3:56:52, 11.50s/it] {'loss': 1.1323, 'learning_rate': 3.9292066554437847e-07, 'epoch': 0.82} + 82%|████████▏ | 5809/7045 [18:52:14<3:56:52, 11.50s/it] 82%|████████▏ | 5810/7045 [18:52:26<3:55:01, 11.42s/it] {'loss': 1.0645, 'learning_rate': 3.9230229707655167e-07, 'epoch': 0.82} + 82%|████████▏ | 5810/7045 [18:52:26<3:55:01, 11.42s/it] 82%|████████▏ | 5811/7045 [18:52:40<4:12:48, 12.29s/it] {'loss': 1.0864, 'learning_rate': 3.9168437414720454e-07, 'epoch': 0.82} + 82%|████████▏ | 5811/7045 [18:52:40<4:12:48, 12.29s/it] 82%|████████▏ | 5812/7045 [18:52:51<4:07:26, 12.04s/it] {'loss': 1.0986, 'learning_rate': 3.9106689688695714e-07, 'epoch': 0.82} + 82%|████████▏ | 5812/7045 [18:52:51<4:07:26, 12.04s/it] 83%|████████▎ | 5813/7045 [18:53:03<4:02:55, 11.83s/it] {'loss': 1.1719, 'learning_rate': 3.904498654263364e-07, 'epoch': 0.83} + 83%|████████▎ | 5813/7045 [18:53:03<4:02:55, 11.83s/it] 83%|████████▎ | 5814/7045 [18:53:14<3:59:09, 11.66s/it] {'loss': 1.0718, 'learning_rate': 3.8983327989577406e-07, 'epoch': 0.83} + 83%|████████▎ | 5814/7045 [18:53:14<3:59:09, 11.66s/it] 83%|████████▎ | 5815/7045 [18:53:25<3:55:20, 11.48s/it] {'loss': 1.1016, 'learning_rate': 3.892171404256073e-07, 'epoch': 0.83} + 83%|████████▎ | 5815/7045 [18:53:25<3:55:20, 11.48s/it] 83%|████████▎ | 5816/7045 [18:53:36<3:52:52, 11.37s/it] {'loss': 1.1191, 'learning_rate': 3.8860144714608055e-07, 'epoch': 0.83} + 83%|████████▎ | 5816/7045 [18:53:36<3:52:52, 11.37s/it] 83%|████████▎ | 5817/7045 [18:53:47<3:51:31, 11.31s/it] {'loss': 1.0879, 'learning_rate': 3.879862001873422e-07, 'epoch': 0.83} + 83%|████████▎ | 5817/7045 [18:53:47<3:51:31, 11.31s/it] 83%|████████▎ | 5818/7045 [18:53:58<3:50:15, 11.26s/it] {'loss': 1.0986, 'learning_rate': 3.873713996794476e-07, 'epoch': 0.83} + 83%|████████▎ | 5818/7045 [18:53:58<3:50:15, 11.26s/it] 83%|████████▎ | 5819/7045 [18:54:10<3:49:45, 11.24s/it] {'loss': 1.124, 'learning_rate': 3.86757045752357e-07, 'epoch': 0.83} + 83%|████████▎ | 5819/7045 [18:54:10<3:49:45, 11.24s/it] 83%|████████▎ | 5820/7045 [18:54:23<4:01:25, 11.82s/it] {'loss': 1.0801, 'learning_rate': 3.86143138535936e-07, 'epoch': 0.83} + 83%|████████▎ | 5820/7045 [18:54:23<4:01:25, 11.82s/it] 83%|████████▎ | 5821/7045 [18:54:34<3:59:16, 11.73s/it] {'loss': 1.1357, 'learning_rate': 3.855296781599563e-07, 'epoch': 0.83} + 83%|████████▎ | 5821/7045 [18:54:34<3:59:16, 11.73s/it] 83%|████████▎ | 5822/7045 [18:54:47<4:08:03, 12.17s/it] {'loss': 1.0522, 'learning_rate': 3.8491666475409586e-07, 'epoch': 0.83} + 83%|████████▎ | 5822/7045 [18:54:47<4:08:03, 12.17s/it] 83%|████████▎ | 5823/7045 [18:55:00<4:07:34, 12.16s/it] {'loss': 1.1445, 'learning_rate': 3.8430409844793597e-07, 'epoch': 0.83} + 83%|████████▎ | 5823/7045 [18:55:00<4:07:34, 12.16s/it] 83%|████████▎ | 5824/7045 [18:55:11<4:01:30, 11.87s/it] {'loss': 1.1055, 'learning_rate': 3.836919793709665e-07, 'epoch': 0.83} + 83%|████████▎ | 5824/7045 [18:55:11<4:01:30, 11.87s/it] 83%|████████▎ | 5825/7045 [18:55:24<4:08:35, 12.23s/it] {'loss': 1.1172, 'learning_rate': 3.830803076525799e-07, 'epoch': 0.83} + 83%|████████▎ | 5825/7045 [18:55:24<4:08:35, 12.23s/it] 83%|████████▎ | 5826/7045 [18:55:35<4:02:31, 11.94s/it] {'loss': 1.1221, 'learning_rate': 3.8246908342207484e-07, 'epoch': 0.83} + 83%|████████▎ | 5826/7045 [18:55:35<4:02:31, 11.94s/it] 83%|████████▎ | 5827/7045 [18:55:46<3:57:55, 11.72s/it] {'loss': 1.1328, 'learning_rate': 3.8185830680865675e-07, 'epoch': 0.83} + 83%|████████▎ | 5827/7045 [18:55:46<3:57:55, 11.72s/it] 83%|████████▎ | 5828/7045 [18:55:58<3:56:04, 11.64s/it] {'loss': 1.1338, 'learning_rate': 3.8124797794143463e-07, 'epoch': 0.83} + 83%|████████▎ | 5828/7045 [18:55:58<3:56:04, 11.64s/it] 83%|████████▎ | 5829/7045 [18:56:09<3:55:22, 11.61s/it] {'loss': 1.0903, 'learning_rate': 3.806380969494247e-07, 'epoch': 0.83} + 83%|████████▎ | 5829/7045 [18:56:09<3:55:22, 11.61s/it] 83%|████████▎ | 5830/7045 [18:56:23<4:04:31, 12.08s/it] {'loss': 1.0967, 'learning_rate': 3.8002866396154663e-07, 'epoch': 0.83} + 83%|████████▎ | 5830/7045 [18:56:23<4:04:31, 12.08s/it] 83%|████████▎ | 5831/7045 [18:56:33<3:56:59, 11.71s/it] {'loss': 1.0898, 'learning_rate': 3.79419679106626e-07, 'epoch': 0.83} + 83%|████████▎ | 5831/7045 [18:56:33<3:56:59, 11.71s/it] 83%|████████▎ | 5832/7045 [18:56:45<3:56:00, 11.67s/it] {'loss': 1.1416, 'learning_rate': 3.7881114251339504e-07, 'epoch': 0.83} + 83%|████████▎ | 5832/7045 [18:56:45<3:56:00, 11.67s/it] 83%|████████▎ | 5833/7045 [18:56:56<3:53:12, 11.54s/it] {'loss': 1.1646, 'learning_rate': 3.782030543104884e-07, 'epoch': 0.83} + 83%|████████▎ | 5833/7045 [18:56:56<3:53:12, 11.54s/it] 83%|████████▎ | 5834/7045 [18:57:08<3:54:04, 11.60s/it] {'loss': 1.1445, 'learning_rate': 3.7759541462644877e-07, 'epoch': 0.83} + 83%|████████▎ | 5834/7045 [18:57:08<3:54:04, 11.60s/it] 83%|████████▎ | 5835/7045 [18:57:21<4:02:13, 12.01s/it] {'loss': 1.1108, 'learning_rate': 3.769882235897232e-07, 'epoch': 0.83} + 83%|████████▎ | 5835/7045 [18:57:21<4:02:13, 12.01s/it] 83%|████████▎ | 5836/7045 [18:57:32<3:59:27, 11.88s/it] {'loss': 1.064, 'learning_rate': 3.763814813286629e-07, 'epoch': 0.83} + 83%|████████▎ | 5836/7045 [18:57:32<3:59:27, 11.88s/it] 83%|████████▎ | 5837/7045 [18:57:46<4:10:08, 12.42s/it] {'loss': 1.1143, 'learning_rate': 3.757751879715246e-07, 'epoch': 0.83} + 83%|████████▎ | 5837/7045 [18:57:46<4:10:08, 12.42s/it] 83%|████████▎ | 5838/7045 [18:57:58<4:08:01, 12.33s/it] {'loss': 1.0698, 'learning_rate': 3.751693436464712e-07, 'epoch': 0.83} + 83%|████████▎ | 5838/7045 [18:57:58<4:08:01, 12.33s/it] 83%|████████▎ | 5839/7045 [18:58:10<4:06:40, 12.27s/it] {'loss': 1.1011, 'learning_rate': 3.745639484815691e-07, 'epoch': 0.83} + 83%|████████▎ | 5839/7045 [18:58:10<4:06:40, 12.27s/it] 83%|████████▎ | 5840/7045 [18:58:22<3:59:44, 11.94s/it] {'loss': 1.1279, 'learning_rate': 3.7395900260479177e-07, 'epoch': 0.83} + 83%|████████▎ | 5840/7045 [18:58:22<3:59:44, 11.94s/it] 83%|████████▎ | 5841/7045 [18:58:34<3:59:39, 11.94s/it] {'loss': 1.1104, 'learning_rate': 3.7335450614401554e-07, 'epoch': 0.83} + 83%|████████▎ | 5841/7045 [18:58:34<3:59:39, 11.94s/it] 83%|████████▎ | 5842/7045 [18:58:45<3:59:35, 11.95s/it] {'loss': 1.0981, 'learning_rate': 3.727504592270223e-07, 'epoch': 0.83} + 83%|████████▎ | 5842/7045 [18:58:45<3:59:35, 11.95s/it] 83%|████████▎ | 5843/7045 [18:58:57<3:54:46, 11.72s/it] {'loss': 1.0859, 'learning_rate': 3.721468619815002e-07, 'epoch': 0.83} + 83%|████████▎ | 5843/7045 [18:58:57<3:54:46, 11.72s/it] 83%|████████▎ | 5844/7045 [18:59:08<3:53:33, 11.67s/it] {'loss': 1.1201, 'learning_rate': 3.715437145350409e-07, 'epoch': 0.83} + 83%|████████▎ | 5844/7045 [18:59:08<3:53:33, 11.67s/it] 83%|████████▎ | 5845/7045 [18:59:20<3:54:02, 11.70s/it] {'loss': 1.103, 'learning_rate': 3.709410170151423e-07, 'epoch': 0.83} + 83%|████████▎ | 5845/7045 [18:59:20<3:54:02, 11.70s/it] 83%|████████▎ | 5846/7045 [18:59:33<4:04:12, 12.22s/it] {'loss': 1.0879, 'learning_rate': 3.703387695492056e-07, 'epoch': 0.83} + 83%|████████▎ | 5846/7045 [18:59:33<4:04:12, 12.22s/it] 83%|████████▎ | 5847/7045 [18:59:44<3:56:56, 11.87s/it] {'loss': 1.1084, 'learning_rate': 3.69736972264538e-07, 'epoch': 0.83} + 83%|████████▎ | 5847/7045 [18:59:44<3:56:56, 11.87s/it] 83%|████████▎ | 5848/7045 [18:59:56<3:52:21, 11.65s/it] {'loss': 1.0947, 'learning_rate': 3.69135625288351e-07, 'epoch': 0.83} + 83%|████████▎ | 5848/7045 [18:59:56<3:52:21, 11.65s/it] 83%|████████▎ | 5849/7045 [19:00:07<3:49:56, 11.54s/it] {'loss': 1.0903, 'learning_rate': 3.6853472874776167e-07, 'epoch': 0.83} + 83%|████████▎ | 5849/7045 [19:00:07<3:49:56, 11.54s/it] 83%|████████▎ | 5850/7045 [19:00:18<3:48:19, 11.46s/it] {'loss': 1.0928, 'learning_rate': 3.6793428276979043e-07, 'epoch': 0.83} + 83%|████████▎ | 5850/7045 [19:00:18<3:48:19, 11.46s/it] 83%|████████▎ | 5851/7045 [19:00:29<3:45:37, 11.34s/it] {'loss': 1.1045, 'learning_rate': 3.6733428748136457e-07, 'epoch': 0.83} + 83%|████████▎ | 5851/7045 [19:00:29<3:45:37, 11.34s/it] 83%|████████▎ | 5852/7045 [19:00:41<3:50:01, 11.57s/it] {'loss': 1.0796, 'learning_rate': 3.6673474300931403e-07, 'epoch': 0.83} + 83%|████████▎ | 5852/7045 [19:00:41<3:50:01, 11.57s/it] 83%|████████▎ | 5853/7045 [19:00:54<3:54:48, 11.82s/it] {'loss': 1.0586, 'learning_rate': 3.6613564948037416e-07, 'epoch': 0.83} + 83%|████████▎ | 5853/7045 [19:00:54<3:54:48, 11.82s/it] 83%|████████▎ | 5854/7045 [19:01:06<3:55:08, 11.85s/it] {'loss': 1.1084, 'learning_rate': 3.6553700702118585e-07, 'epoch': 0.83} + 83%|████████▎ | 5854/7045 [19:01:06<3:55:08, 11.85s/it] 83%|████████▎ | 5855/7045 [19:01:18<3:55:50, 11.89s/it] {'loss': 1.0977, 'learning_rate': 3.649388157582931e-07, 'epoch': 0.83} + 83%|████████▎ | 5855/7045 [19:01:18<3:55:50, 11.89s/it] 83%|████████▎ | 5856/7045 [19:01:30<4:00:03, 12.11s/it] {'loss': 1.1016, 'learning_rate': 3.643410758181465e-07, 'epoch': 0.83} + 83%|████████▎ | 5856/7045 [19:01:30<4:00:03, 12.11s/it] 83%|████████▎ | 5857/7045 [19:01:43<4:03:35, 12.30s/it] {'loss': 1.1211, 'learning_rate': 3.637437873270994e-07, 'epoch': 0.83} + 83%|████████▎ | 5857/7045 [19:01:43<4:03:35, 12.30s/it] 83%|████████▎ | 5858/7045 [19:01:54<3:55:15, 11.89s/it] {'loss': 1.0908, 'learning_rate': 3.6314695041140964e-07, 'epoch': 0.83} + 83%|████████▎ | 5858/7045 [19:01:54<3:55:15, 11.89s/it] 83%|████████▎ | 5859/7045 [19:02:05<3:51:02, 11.69s/it] {'loss': 1.1055, 'learning_rate': 3.625505651972414e-07, 'epoch': 0.83} + 83%|████████▎ | 5859/7045 [19:02:05<3:51:02, 11.69s/it] 83%|████████▎ | 5860/7045 [19:02:16<3:47:56, 11.54s/it] {'loss': 1.0898, 'learning_rate': 3.6195463181066257e-07, 'epoch': 0.83} + 83%|████████▎ | 5860/7045 [19:02:16<3:47:56, 11.54s/it] 83%|████████▎ | 5861/7045 [19:02:28<3:48:15, 11.57s/it] {'loss': 1.1016, 'learning_rate': 3.613591503776445e-07, 'epoch': 0.83} + 83%|████████▎ | 5861/7045 [19:02:28<3:48:15, 11.57s/it] 83%|████████▎ | 5862/7045 [19:02:39<3:46:43, 11.50s/it] {'loss': 1.1104, 'learning_rate': 3.607641210240645e-07, 'epoch': 0.83} + 83%|████████▎ | 5862/7045 [19:02:39<3:46:43, 11.50s/it] 83%|████████▎ | 5863/7045 [19:02:51<3:44:54, 11.42s/it] {'loss': 1.1133, 'learning_rate': 3.601695438757036e-07, 'epoch': 0.83} + 83%|████████▎ | 5863/7045 [19:02:51<3:44:54, 11.42s/it] 83%|████████▎ | 5864/7045 [19:03:03<3:50:17, 11.70s/it] {'loss': 1.1455, 'learning_rate': 3.595754190582465e-07, 'epoch': 0.83} + 83%|████████▎ | 5864/7045 [19:03:03<3:50:17, 11.70s/it] 83%|████████▎ | 5865/7045 [19:03:15<3:50:13, 11.71s/it] {'loss': 1.0664, 'learning_rate': 3.589817466972842e-07, 'epoch': 0.83} + 83%|████████▎ | 5865/7045 [19:03:15<3:50:13, 11.71s/it] 83%|████████▎ | 5866/7045 [19:03:28<3:58:06, 12.12s/it] {'loss': 1.0635, 'learning_rate': 3.5838852691830963e-07, 'epoch': 0.83} + 83%|████████▎ | 5866/7045 [19:03:28<3:58:06, 12.12s/it] 83%|████████▎ | 5867/7045 [19:03:40<4:00:25, 12.25s/it] {'loss': 1.1035, 'learning_rate': 3.5779575984672273e-07, 'epoch': 0.83} + 83%|████████▎ | 5867/7045 [19:03:40<4:00:25, 12.25s/it] 83%|████████▎ | 5868/7045 [19:03:52<3:58:22, 12.15s/it] {'loss': 1.0977, 'learning_rate': 3.5720344560782576e-07, 'epoch': 0.83} + 83%|████████▎ | 5868/7045 [19:03:52<3:58:22, 12.15s/it] 83%|████████▎ | 5869/7045 [19:04:05<4:04:35, 12.48s/it] {'loss': 1.0977, 'learning_rate': 3.56611584326825e-07, 'epoch': 0.83} + 83%|████████▎ | 5869/7045 [19:04:05<4:04:35, 12.48s/it] 83%|████████▎ | 5870/7045 [19:04:16<3:54:56, 12.00s/it] {'loss': 1.1104, 'learning_rate': 3.560201761288326e-07, 'epoch': 0.83} + 83%|████████▎ | 5870/7045 [19:04:16<3:54:56, 12.00s/it] 83%|████████▎ | 5871/7045 [19:04:29<4:00:54, 12.31s/it] {'loss': 1.0664, 'learning_rate': 3.554292211388649e-07, 'epoch': 0.83} + 83%|████████▎ | 5871/7045 [19:04:29<4:00:54, 12.31s/it] 83%|████████▎ | 5872/7045 [19:04:42<4:00:00, 12.28s/it] {'loss': 1.082, 'learning_rate': 3.5483871948184034e-07, 'epoch': 0.83} + 83%|████████▎ | 5872/7045 [19:04:42<4:00:00, 12.28s/it] 83%|████████▎ | 5873/7045 [19:04:52<3:51:33, 11.85s/it] {'loss': 1.104, 'learning_rate': 3.542486712825838e-07, 'epoch': 0.83} + 83%|████████▎ | 5873/7045 [19:04:52<3:51:33, 11.85s/it] 83%|████████▎ | 5874/7045 [19:05:05<3:55:30, 12.07s/it] {'loss': 1.1494, 'learning_rate': 3.5365907666582347e-07, 'epoch': 0.83} + 83%|████████▎ | 5874/7045 [19:05:05<3:55:30, 12.07s/it] 83%|████████▎ | 5875/7045 [19:05:17<3:55:03, 12.05s/it] {'loss': 1.0674, 'learning_rate': 3.530699357561906e-07, 'epoch': 0.83} + 83%|████████▎ | 5875/7045 [19:05:17<3:55:03, 12.05s/it] 83%|████████▎ | 5876/7045 [19:05:30<4:01:50, 12.41s/it] {'loss': 1.1006, 'learning_rate': 3.524812486782228e-07, 'epoch': 0.83} + 83%|████████▎ | 5876/7045 [19:05:30<4:01:50, 12.41s/it] 83%|████████▎ | 5877/7045 [19:05:43<4:04:37, 12.57s/it] {'loss': 1.0811, 'learning_rate': 3.518930155563591e-07, 'epoch': 0.83} + 83%|████████▎ | 5877/7045 [19:05:43<4:04:37, 12.57s/it] 83%|████████▎ | 5878/7045 [19:05:56<4:04:08, 12.55s/it] {'loss': 1.0645, 'learning_rate': 3.5130523651494554e-07, 'epoch': 0.83} + 83%|████████▎ | 5878/7045 [19:05:56<4:04:08, 12.55s/it] 83%|████████▎ | 5879/7045 [19:06:07<3:54:34, 12.07s/it] {'loss': 1.1406, 'learning_rate': 3.5071791167822944e-07, 'epoch': 0.83} + 83%|████████▎ | 5879/7045 [19:06:07<3:54:34, 12.07s/it] 83%|████████▎ | 5880/7045 [19:06:18<3:51:20, 11.91s/it] {'loss': 1.1206, 'learning_rate': 3.501310411703632e-07, 'epoch': 0.83} + 83%|████████▎ | 5880/7045 [19:06:18<3:51:20, 11.91s/it] 83%|████████▎ | 5881/7045 [19:06:29<3:46:50, 11.69s/it] {'loss': 1.1191, 'learning_rate': 3.495446251154039e-07, 'epoch': 0.83} + 83%|████████▎ | 5881/7045 [19:06:29<3:46:50, 11.69s/it] 83%|████████▎ | 5882/7045 [19:06:43<3:57:17, 12.24s/it] {'loss': 1.1279, 'learning_rate': 3.48958663637311e-07, 'epoch': 0.83} + 83%|████████▎ | 5882/7045 [19:06:43<3:57:17, 12.24s/it] 84%|████████▎ | 5883/7045 [19:06:54<3:51:51, 11.97s/it] {'loss': 1.0957, 'learning_rate': 3.483731568599491e-07, 'epoch': 0.84} + 84%|████████▎ | 5883/7045 [19:06:54<3:51:51, 11.97s/it] 84%|████████▎ | 5884/7045 [19:07:05<3:47:10, 11.74s/it] {'loss': 1.1074, 'learning_rate': 3.477881049070875e-07, 'epoch': 0.84} + 84%|████████▎ | 5884/7045 [19:07:05<3:47:10, 11.74s/it] 84%|████████▎ | 5885/7045 [19:07:17<3:44:04, 11.59s/it] {'loss': 1.1416, 'learning_rate': 3.4720350790239666e-07, 'epoch': 0.84} + 84%|████████▎ | 5885/7045 [19:07:17<3:44:04, 11.59s/it] 84%|████████▎ | 5886/7045 [19:07:28<3:41:24, 11.46s/it] {'loss': 1.1016, 'learning_rate': 3.4661936596945276e-07, 'epoch': 0.84} + 84%|████████▎ | 5886/7045 [19:07:28<3:41:24, 11.46s/it] 84%|████████▎ | 5887/7045 [19:07:41<3:51:03, 11.97s/it] {'loss': 1.0352, 'learning_rate': 3.46035679231736e-07, 'epoch': 0.84} + 84%|████████▎ | 5887/7045 [19:07:41<3:51:03, 11.97s/it] 84%|████████▎ | 5888/7045 [19:07:52<3:46:38, 11.75s/it] {'loss': 1.0859, 'learning_rate': 3.4545244781262863e-07, 'epoch': 0.84} + 84%|████████▎ | 5888/7045 [19:07:52<3:46:38, 11.75s/it] 84%|████████▎ | 5889/7045 [19:08:04<3:43:49, 11.62s/it] {'loss': 1.1006, 'learning_rate': 3.448696718354194e-07, 'epoch': 0.84} + 84%|████████▎ | 5889/7045 [19:08:04<3:43:49, 11.62s/it] 84%|████████▎ | 5890/7045 [19:08:14<3:39:12, 11.39s/it] {'loss': 1.1152, 'learning_rate': 3.4428735142329804e-07, 'epoch': 0.84} + 84%|████████▎ | 5890/7045 [19:08:14<3:39:12, 11.39s/it] 84%|████████▎ | 5891/7045 [19:08:27<3:45:20, 11.72s/it] {'loss': 1.1064, 'learning_rate': 3.437054866993589e-07, 'epoch': 0.84} + 84%|████████▎ | 5891/7045 [19:08:27<3:45:20, 11.72s/it] 84%|████████▎ | 5892/7045 [19:08:39<3:48:38, 11.90s/it] {'loss': 1.1016, 'learning_rate': 3.4312407778660126e-07, 'epoch': 0.84} + 84%|████████▎ | 5892/7045 [19:08:39<3:48:38, 11.90s/it] 84%|████████▎ | 5893/7045 [19:08:52<3:54:23, 12.21s/it] {'loss': 1.1377, 'learning_rate': 3.425431248079264e-07, 'epoch': 0.84} + 84%|████████▎ | 5893/7045 [19:08:52<3:54:23, 12.21s/it] 84%|████████▎ | 5894/7045 [19:09:03<3:48:12, 11.90s/it] {'loss': 1.1094, 'learning_rate': 3.4196262788613906e-07, 'epoch': 0.84} + 84%|████████▎ | 5894/7045 [19:09:03<3:48:12, 11.90s/it] 84%|████████▎ | 5895/7045 [19:09:15<3:48:38, 11.93s/it] {'loss': 1.1289, 'learning_rate': 3.4138258714394993e-07, 'epoch': 0.84} + 84%|████████▎ | 5895/7045 [19:09:15<3:48:38, 11.93s/it] 84%|████████▎ | 5896/7045 [19:09:27<3:47:14, 11.87s/it] {'loss': 1.0806, 'learning_rate': 3.408030027039713e-07, 'epoch': 0.84} + 84%|████████▎ | 5896/7045 [19:09:27<3:47:14, 11.87s/it] 84%|████████▎ | 5897/7045 [19:09:39<3:46:28, 11.84s/it] {'loss': 1.0776, 'learning_rate': 3.402238746887182e-07, 'epoch': 0.84} + 84%|████████▎ | 5897/7045 [19:09:39<3:46:28, 11.84s/it] 84%|████████▎ | 5898/7045 [19:09:50<3:42:17, 11.63s/it] {'loss': 1.1123, 'learning_rate': 3.3964520322061213e-07, 'epoch': 0.84} + 84%|████████▎ | 5898/7045 [19:09:50<3:42:17, 11.63s/it] 84%|████████▎ | 5899/7045 [19:10:01<3:37:37, 11.39s/it] {'loss': 1.1035, 'learning_rate': 3.3906698842197467e-07, 'epoch': 0.84} + 84%|████████▎ | 5899/7045 [19:10:01<3:37:37, 11.39s/it] 84%|████████▎ | 5900/7045 [19:10:13<3:43:13, 11.70s/it] {'loss': 1.1123, 'learning_rate': 3.384892304150339e-07, 'epoch': 0.84} + 84%|████████▎ | 5900/7045 [19:10:13<3:43:13, 11.70s/it] 84%|████████▍ | 5901/7045 [19:10:24<3:39:30, 11.51s/it] {'loss': 1.1045, 'learning_rate': 3.3791192932191905e-07, 'epoch': 0.84} + 84%|████████▍ | 5901/7045 [19:10:24<3:39:30, 11.51s/it] 84%|████████▍ | 5902/7045 [19:10:36<3:41:58, 11.65s/it] {'loss': 1.1387, 'learning_rate': 3.3733508526466395e-07, 'epoch': 0.84} + 84%|████████▍ | 5902/7045 [19:10:36<3:41:58, 11.65s/it] 84%|████████▍ | 5903/7045 [19:10:49<3:45:17, 11.84s/it] {'loss': 1.0928, 'learning_rate': 3.3675869836520577e-07, 'epoch': 0.84} + 84%|████████▍ | 5903/7045 [19:10:49<3:45:17, 11.84s/it] 84%|████████▍ | 5904/7045 [19:11:00<3:41:02, 11.62s/it] {'loss': 1.1162, 'learning_rate': 3.361827687453847e-07, 'epoch': 0.84} + 84%|████████▍ | 5904/7045 [19:11:00<3:41:02, 11.62s/it] 84%|████████▍ | 5905/7045 [19:11:11<3:40:04, 11.58s/it] {'loss': 1.123, 'learning_rate': 3.3560729652694387e-07, 'epoch': 0.84} + 84%|████████▍ | 5905/7045 [19:11:11<3:40:04, 11.58s/it] 84%|████████▍ | 5906/7045 [19:11:23<3:41:03, 11.64s/it] {'loss': 1.0996, 'learning_rate': 3.3503228183153105e-07, 'epoch': 0.84} + 84%|████████▍ | 5906/7045 [19:11:23<3:41:03, 11.64s/it] 84%|████████▍ | 5907/7045 [19:11:34<3:39:06, 11.55s/it] {'loss': 1.1001, 'learning_rate': 3.344577247806957e-07, 'epoch': 0.84} + 84%|████████▍ | 5907/7045 [19:11:34<3:39:06, 11.55s/it] 84%|████████▍ | 5908/7045 [19:11:46<3:37:47, 11.49s/it] {'loss': 1.1104, 'learning_rate': 3.338836254958916e-07, 'epoch': 0.84} + 84%|████████▍ | 5908/7045 [19:11:46<3:37:47, 11.49s/it] 84%|█████���██▍ | 5909/7045 [19:11:56<3:34:02, 11.30s/it] {'loss': 1.1084, 'learning_rate': 3.333099840984763e-07, 'epoch': 0.84} + 84%|████████▍ | 5909/7045 [19:11:56<3:34:02, 11.30s/it] 84%|████████▍ | 5910/7045 [19:12:08<3:36:43, 11.46s/it] {'loss': 1.085, 'learning_rate': 3.327368007097084e-07, 'epoch': 0.84} + 84%|████████▍ | 5910/7045 [19:12:08<3:36:43, 11.46s/it] 84%|████████▍ | 5911/7045 [19:12:21<3:42:04, 11.75s/it] {'loss': 1.0908, 'learning_rate': 3.321640754507524e-07, 'epoch': 0.84} + 84%|████████▍ | 5911/7045 [19:12:21<3:42:04, 11.75s/it] 84%|████████▍ | 5912/7045 [19:12:32<3:38:57, 11.60s/it] {'loss': 1.0967, 'learning_rate': 3.3159180844267394e-07, 'epoch': 0.84} + 84%|████████▍ | 5912/7045 [19:12:32<3:38:57, 11.60s/it] 84%|████████▍ | 5913/7045 [19:12:45<3:44:15, 11.89s/it] {'loss': 1.1162, 'learning_rate': 3.31019999806442e-07, 'epoch': 0.84} + 84%|████████▍ | 5913/7045 [19:12:45<3:44:15, 11.89s/it] 84%|████████▍ | 5914/7045 [19:12:56<3:40:35, 11.70s/it] {'loss': 1.1221, 'learning_rate': 3.304486496629297e-07, 'epoch': 0.84} + 84%|████████▍ | 5914/7045 [19:12:56<3:40:35, 11.70s/it] 84%|████████▍ | 5915/7045 [19:13:09<3:51:13, 12.28s/it] {'loss': 1.0957, 'learning_rate': 3.2987775813291266e-07, 'epoch': 0.84} + 84%|████████▍ | 5915/7045 [19:13:09<3:51:13, 12.28s/it] 84%|████████▍ | 5916/7045 [19:13:21<3:44:36, 11.94s/it] {'loss': 1.1143, 'learning_rate': 3.293073253370688e-07, 'epoch': 0.84} + 84%|████████▍ | 5916/7045 [19:13:21<3:44:36, 11.94s/it] 84%|████████▍ | 5917/7045 [19:13:32<3:40:58, 11.75s/it] {'loss': 1.083, 'learning_rate': 3.28737351395981e-07, 'epoch': 0.84} + 84%|████████▍ | 5917/7045 [19:13:32<3:40:58, 11.75s/it] 84%|████████▍ | 5918/7045 [19:13:43<3:36:44, 11.54s/it] {'loss': 1.0762, 'learning_rate': 3.2816783643013243e-07, 'epoch': 0.84} + 84%|████████▍ | 5918/7045 [19:13:43<3:36:44, 11.54s/it] 84%|████████▍ | 5919/7045 [19:13:54<3:34:39, 11.44s/it] {'loss': 1.1309, 'learning_rate': 3.275987805599121e-07, 'epoch': 0.84} + 84%|████████▍ | 5919/7045 [19:13:54<3:34:39, 11.44s/it] 84%|████████▍ | 5920/7045 [19:14:05<3:33:09, 11.37s/it] {'loss': 1.1191, 'learning_rate': 3.270301839056106e-07, 'epoch': 0.84} + 84%|████████▍ | 5920/7045 [19:14:05<3:33:09, 11.37s/it] 84%|████████▍ | 5921/7045 [19:14:18<3:42:38, 11.88s/it] {'loss': 1.1064, 'learning_rate': 3.2646204658742046e-07, 'epoch': 0.84} + 84%|████████▍ | 5921/7045 [19:14:18<3:42:38, 11.88s/it] 84%|████████▍ | 5922/7045 [19:14:29<3:36:40, 11.58s/it] {'loss': 1.1123, 'learning_rate': 3.258943687254393e-07, 'epoch': 0.84} + 84%|████████▍ | 5922/7045 [19:14:29<3:36:40, 11.58s/it] 84%|████████▍ | 5923/7045 [19:14:40<3:34:30, 11.47s/it] {'loss': 1.0742, 'learning_rate': 3.25327150439666e-07, 'epoch': 0.84} + 84%|████████▍ | 5923/7045 [19:14:40<3:34:30, 11.47s/it] 84%|████████▍ | 5924/7045 [19:14:52<3:33:23, 11.42s/it] {'loss': 1.1533, 'learning_rate': 3.2476039185000195e-07, 'epoch': 0.84} + 84%|████████▍ | 5924/7045 [19:14:52<3:33:23, 11.42s/it] 84%|████████▍ | 5925/7045 [19:15:03<3:32:05, 11.36s/it] {'loss': 1.1055, 'learning_rate': 3.2419409307625384e-07, 'epoch': 0.84} + 84%|████████▍ | 5925/7045 [19:15:03<3:32:05, 11.36s/it] 84%|████████▍ | 5926/7045 [19:15:14<3:30:53, 11.31s/it] {'loss': 1.1377, 'learning_rate': 3.2362825423812834e-07, 'epoch': 0.84} + 84%|████████▍ | 5926/7045 [19:15:14<3:30:53, 11.31s/it] 84%|████████▍ | 5927/7045 [19:15:26<3:32:46, 11.42s/it] {'loss': 1.0884, 'learning_rate': 3.230628754552356e-07, 'epoch': 0.84} + 84%|████████▍ | 5927/7045 [19:15:26<3:32:46, 11.42s/it] 84%|████████▍ | 5928/7045 [19:15:37<3:31:08, 11.34s/it] {'loss': 1.1006, 'learning_rate': 3.224979568470901e-07, 'epoch': 0.84} + 84%|████████▍ | 5928/7045 [19:15:37<3:31:08, 11.34s/it] 84%|████████▍ | 5929/7045 [19:15:48<3:29:36, 11.27s/it] {'loss': 1.1338, 'learning_rate': 3.2193349853310714e-07, 'epoch': 0.84} + 84%|████████▍ | 5929/7045 [19:15:48<3:29:36, 11.27s/it] 84%|████████▍ | 5930/7045 [19:16:00<3:35:19, 11.59s/it] {'loss': 1.0869, 'learning_rate': 3.213695006326062e-07, 'epoch': 0.84} + 84%|████████▍ | 5930/7045 [19:16:00<3:35:19, 11.59s/it] 84%|████████▍ | 5931/7045 [19:16:13<3:40:26, 11.87s/it] {'loss': 1.1284, 'learning_rate': 3.208059632648078e-07, 'epoch': 0.84} + 84%|████████▍ | 5931/7045 [19:16:13<3:40:26, 11.87s/it] 84%|████████▍ | 5932/7045 [19:16:26<3:43:43, 12.06s/it] {'loss': 1.0864, 'learning_rate': 3.2024288654883664e-07, 'epoch': 0.84} + 84%|████████▍ | 5932/7045 [19:16:26<3:43:43, 12.06s/it] 84%|████████▍ | 5933/7045 [19:16:37<3:39:21, 11.84s/it] {'loss': 1.103, 'learning_rate': 3.196802706037197e-07, 'epoch': 0.84} + 84%|████████▍ | 5933/7045 [19:16:37<3:39:21, 11.84s/it] 84%|████████▍ | 5934/7045 [19:16:48<3:35:49, 11.66s/it] {'loss': 1.1055, 'learning_rate': 3.191181155483861e-07, 'epoch': 0.84} + 84%|████████▍ | 5934/7045 [19:16:48<3:35:49, 11.66s/it] 84%|████████▍ | 5935/7045 [19:17:00<3:34:41, 11.60s/it] {'loss': 1.1523, 'learning_rate': 3.1855642150166707e-07, 'epoch': 0.84} + 84%|████████▍ | 5935/7045 [19:17:00<3:34:41, 11.60s/it] 84%|████████▍ | 5936/7045 [19:17:11<3:36:01, 11.69s/it] {'loss': 1.1338, 'learning_rate': 3.1799518858229796e-07, 'epoch': 0.84} + 84%|████████▍ | 5936/7045 [19:17:11<3:36:01, 11.69s/it] 84%|████████▍ | 5937/7045 [19:17:23<3:36:30, 11.72s/it] {'loss': 1.1602, 'learning_rate': 3.1743441690891525e-07, 'epoch': 0.84} + 84%|████████▍ | 5937/7045 [19:17:23<3:36:30, 11.72s/it] 84%|████████▍ | 5938/7045 [19:17:35<3:33:47, 11.59s/it] {'loss': 1.1523, 'learning_rate': 3.16874106600058e-07, 'epoch': 0.84} + 84%|████████▍ | 5938/7045 [19:17:35<3:33:47, 11.59s/it] 84%|████████▍ | 5939/7045 [19:17:48<3:42:33, 12.07s/it] {'loss': 1.0986, 'learning_rate': 3.163142577741693e-07, 'epoch': 0.84} + 84%|████████▍ | 5939/7045 [19:17:48<3:42:33, 12.07s/it] 84%|████████▍ | 5940/7045 [19:18:00<3:45:44, 12.26s/it] {'loss': 1.0957, 'learning_rate': 3.157548705495922e-07, 'epoch': 0.84} + 84%|████████▍ | 5940/7045 [19:18:00<3:45:44, 12.26s/it] 84%|████████▍ | 5941/7045 [19:18:12<3:40:35, 11.99s/it] {'loss': 1.0669, 'learning_rate': 3.151959450445749e-07, 'epoch': 0.84} + 84%|████████▍ | 5941/7045 [19:18:12<3:40:35, 11.99s/it] 84%|████████▍ | 5942/7045 [19:18:23<3:35:28, 11.72s/it] {'loss': 1.0894, 'learning_rate': 3.146374813772657e-07, 'epoch': 0.84} + 84%|████████▍ | 5942/7045 [19:18:23<3:35:28, 11.72s/it] 84%|████████▍ | 5943/7045 [19:18:36<3:44:30, 12.22s/it] {'loss': 1.0601, 'learning_rate': 3.140794796657154e-07, 'epoch': 0.84} + 84%|████████▍ | 5943/7045 [19:18:36<3:44:30, 12.22s/it] 84%|████████▍ | 5944/7045 [19:18:47<3:38:34, 11.91s/it] {'loss': 1.0947, 'learning_rate': 3.135219400278802e-07, 'epoch': 0.84} + 84%|████████▍ | 5944/7045 [19:18:47<3:38:34, 11.91s/it] 84%|████████▍ | 5945/7045 [19:18:59<3:37:38, 11.87s/it] {'loss': 1.0703, 'learning_rate': 3.129648625816148e-07, 'epoch': 0.84} + 84%|████████▍ | 5945/7045 [19:18:59<3:37:38, 11.87s/it] 84%|████████▍ | 5946/7045 [19:19:10<3:33:25, 11.65s/it] {'loss': 1.127, 'learning_rate': 3.124082474446774e-07, 'epoch': 0.84} + 84%|████████▍ | 5946/7045 [19:19:10<3:33:25, 11.65s/it] 84%|████████▍ | 5947/7045 [19:19:22<3:32:18, 11.60s/it] {'loss': 1.0913, 'learning_rate': 3.118520947347303e-07, 'epoch': 0.84} + 84%|████████▍ | 5947/7045 [19:19:22<3:32:18, 11.60s/it] 84%|████████▍ | 5948/7045 [19:19:33<3:29:23, 11.45s/it] {'loss': 1.0889, 'learning_rate': 3.1129640456933565e-07, 'epoch': 0.84} + 84%|████████▍ | 5948/7045 [19:19:33<3:29:23, 11.45s/it] 84%|████████▍ | 5949/7045 [19:19:44<3:26:41, 11.32s/it] {'loss': 1.1055, 'learning_rate': 3.1074117706595827e-07, 'epoch': 0.84} + 84%|████████▍ | 5949/7045 [19:19:44<3:26:41, 11.32s/it] 84%|████████▍ | 5950/7045 [19:19:55<3:26:35, 11.32s/it] {'loss': 1.0811, 'learning_rate': 3.101864123419668e-07, 'epoch': 0.84} + 84%|████████▍ | 5950/7045 [19:19:55<3:26:35, 11.32s/it] 84%|████████▍ | 5951/7045 [19:20:07<3:26:16, 11.31s/it] {'loss': 1.0693, 'learning_rate': 3.0963211051462967e-07, 'epoch': 0.84} + 84%|████████▍ | 5951/7045 [19:20:07<3:26:16, 11.31s/it] 84%|████████▍ | 5952/7045 [19:20:18<3:26:07, 11.31s/it] {'loss': 1.1006, 'learning_rate': 3.090782717011201e-07, 'epoch': 0.84} + 84%|████████▍ | 5952/7045 [19:20:18<3:26:07, 11.31s/it] 84%|████████▍ | 5953/7045 [19:20:29<3:25:46, 11.31s/it] {'loss': 1.1001, 'learning_rate': 3.085248960185111e-07, 'epoch': 0.84} + 84%|████████▍ | 5953/7045 [19:20:29<3:25:46, 11.31s/it] 85%|████████▍ | 5954/7045 [19:20:41<3:26:32, 11.36s/it] {'loss': 1.1045, 'learning_rate': 3.0797198358377857e-07, 'epoch': 0.85} + 85%|████████▍ | 5954/7045 [19:20:41<3:26:32, 11.36s/it] 85%|████████▍ | 5955/7045 [19:20:52<3:24:06, 11.24s/it] {'loss': 1.1211, 'learning_rate': 3.0741953451380117e-07, 'epoch': 0.85} + 85%|████████▍ | 5955/7045 [19:20:52<3:24:06, 11.24s/it] 85%|████████▍ | 5956/7045 [19:21:03<3:22:59, 11.18s/it] {'loss': 1.1357, 'learning_rate': 3.0686754892535945e-07, 'epoch': 0.85} + 85%|████████▍ | 5956/7045 [19:21:03<3:22:59, 11.18s/it] 85%|████████▍ | 5957/7045 [19:21:14<3:23:41, 11.23s/it] {'loss': 1.1348, 'learning_rate': 3.0631602693513453e-07, 'epoch': 0.85} + 85%|████████▍ | 5957/7045 [19:21:14<3:23:41, 11.23s/it] 85%|████████▍ | 5958/7045 [19:21:25<3:23:49, 11.25s/it] {'loss': 1.0703, 'learning_rate': 3.057649686597117e-07, 'epoch': 0.85} + 85%|████████▍ | 5958/7045 [19:21:25<3:23:49, 11.25s/it] 85%|████████▍ | 5959/7045 [19:21:38<3:33:16, 11.78s/it] {'loss': 1.0596, 'learning_rate': 3.0521437421557635e-07, 'epoch': 0.85} + 85%|████████▍ | 5959/7045 [19:21:38<3:33:16, 11.78s/it] 85%|████████▍ | 5960/7045 [19:21:51<3:38:24, 12.08s/it] {'loss': 1.1299, 'learning_rate': 3.046642437191169e-07, 'epoch': 0.85} + 85%|████████▍ | 5960/7045 [19:21:51<3:38:24, 12.08s/it] 85%|████████▍ | 5961/7045 [19:22:02<3:32:58, 11.79s/it] {'loss': 1.085, 'learning_rate': 3.041145772866236e-07, 'epoch': 0.85} + 85%|████████▍ | 5961/7045 [19:22:02<3:32:58, 11.79s/it] 85%|████████▍ | 5962/7045 [19:22:15<3:38:56, 12.13s/it] {'loss': 1.1079, 'learning_rate': 3.035653750342879e-07, 'epoch': 0.85} + 85%|████████▍ | 5962/7045 [19:22:15<3:38:56, 12.13s/it] 85%|████████▍ | 5963/7045 [19:22:27<3:34:46, 11.91s/it] {'loss': 1.0933, 'learning_rate': 3.030166370782045e-07, 'epoch': 0.85} + 85%|████████▍ | 5963/7045 [19:22:27<3:34:46, 11.91s/it] 85%|████████▍ | 5964/7045 [19:22:38<3:30:55, 11.71s/it] {'loss': 1.1357, 'learning_rate': 3.024683635343684e-07, 'epoch': 0.85} + 85%|████████▍ | 5964/7045 [19:22:38<3:30:55, 11.71s/it] 85%|████████▍ | 5965/7045 [19:22:49<3:27:36, 11.53s/it] {'loss': 1.0635, 'learning_rate': 3.0192055451867707e-07, 'epoch': 0.85} + 85%|████████▍ | 5965/7045 [19:22:49<3:27:36, 11.53s/it] 85%|████████▍ | 5966/7045 [19:23:01<3:32:31, 11.82s/it] {'loss': 1.1094, 'learning_rate': 3.013732101469305e-07, 'epoch': 0.85} + 85%|████████▍ | 5966/7045 [19:23:01<3:32:31, 11.82s/it] 85%|████████▍ | 5967/7045 [19:23:12<3:26:53, 11.52s/it] {'loss': 1.0938, 'learning_rate': 3.008263305348294e-07, 'epoch': 0.85} + 85%|████████▍ | 5967/7045 [19:23:12<3:26:53, 11.52s/it] 85%|████████▍ | 5968/7045 [19:23:23<3:24:08, 11.37s/it] {'loss': 1.1211, 'learning_rate': 3.0027991579797576e-07, 'epoch': 0.85} + 85%|████████▍ | 5968/7045 [19:23:23<3:24:08, 11.37s/it] 85%|████████▍ | 5969/7045 [19:23:34<3:22:02, 11.27s/it] {'loss': 1.1055, 'learning_rate': 2.9973396605187583e-07, 'epoch': 0.85} + 85%|████████▍ | 5969/7045 [19:23:34<3:22:02, 11.27s/it] 85%|████████▍ | 5970/7045 [19:23:47<3:27:48, 11.60s/it] {'loss': 1.0625, 'learning_rate': 2.9918848141193527e-07, 'epoch': 0.85} + 85%|████████▍ | 5970/7045 [19:23:47<3:27:48, 11.60s/it] 85%|████████▍ | 5971/7045 [19:23:57<3:23:23, 11.36s/it] {'loss': 1.125, 'learning_rate': 2.986434619934617e-07, 'epoch': 0.85} + 85%|████████▍ | 5971/7045 [19:23:57<3:23:23, 11.36s/it] 85%|████████▍ | 5972/7045 [19:24:09<3:21:39, 11.28s/it] {'loss': 1.0771, 'learning_rate': 2.980989079116653e-07, 'epoch': 0.85} + 85%|████████▍ | 5972/7045 [19:24:09<3:21:39, 11.28s/it] 85%|████████▍ | 5973/7045 [19:24:20<3:20:35, 11.23s/it] {'loss': 1.0947, 'learning_rate': 2.9755481928165685e-07, 'epoch': 0.85} + 85%|████████▍ | 5973/7045 [19:24:20<3:20:35, 11.23s/it] 85%|████████▍ | 5974/7045 [19:24:32<3:26:39, 11.58s/it] {'loss': 1.0845, 'learning_rate': 2.9701119621844994e-07, 'epoch': 0.85} + 85%|████████▍ | 5974/7045 [19:24:32<3:26:39, 11.58s/it] 85%|████████▍ | 5975/7045 [19:24:43<3:23:18, 11.40s/it] {'loss': 1.0977, 'learning_rate': 2.9646803883695843e-07, 'epoch': 0.85} + 85%|████████▍ | 5975/7045 [19:24:43<3:23:18, 11.40s/it] 85%|████████▍ | 5976/7045 [19:24:56<3:29:52, 11.78s/it] {'loss': 1.0767, 'learning_rate': 2.959253472519982e-07, 'epoch': 0.85} + 85%|████████▍ | 5976/7045 [19:24:56<3:29:52, 11.78s/it] 85%|████████▍ | 5977/7045 [19:25:07<3:25:51, 11.56s/it] {'loss': 1.0845, 'learning_rate': 2.9538312157828764e-07, 'epoch': 0.85} + 85%|████████▍ | 5977/7045 [19:25:07<3:25:51, 11.56s/it] 85%|████████▍ | 5978/7045 [19:25:18<3:25:02, 11.53s/it] {'loss': 1.082, 'learning_rate': 2.948413619304455e-07, 'epoch': 0.85} + 85%|████████▍ | 5978/7045 [19:25:18<3:25:02, 11.53s/it] 85%|████████▍ | 5979/7045 [19:25:30<3:28:58, 11.76s/it] {'loss': 1.1299, 'learning_rate': 2.943000684229916e-07, 'epoch': 0.85} + 85%|████████▍ | 5979/7045 [19:25:30<3:28:58, 11.76s/it] 85%|████████▍ | 5980/7045 [19:25:43<3:30:27, 11.86s/it] {'loss': 1.0581, 'learning_rate': 2.937592411703488e-07, 'epoch': 0.85} + 85%|████████▍ | 5980/7045 [19:25:43<3:30:27, 11.86s/it] 85%|████████▍ | 5981/7045 [19:25:54<3:26:49, 11.66s/it] {'loss': 1.0737, 'learning_rate': 2.9321888028684076e-07, 'epoch': 0.85} + 85%|████████▍ | 5981/7045 [19:25:54<3:26:49, 11.66s/it] 85%|████████▍ | 5982/7045 [19:26:07<3:33:10, 12.03s/it] {'loss': 1.1187, 'learning_rate': 2.926789858866916e-07, 'epoch': 0.85} + 85%|████████▍ | 5982/7045 [19:26:07<3:33:10, 12.03s/it] 85%|████████▍ | 5983/7045 [19:26:18<3:27:48, 11.74s/it] {'loss': 1.082, 'learning_rate': 2.921395580840286e-07, 'epoch': 0.85} + 85%|████████▍ | 5983/7045 [19:26:18<3:27:48, 11.74s/it] 85%|████████▍ | 5984/7045 [19:26:29<3:26:02, 11.65s/it] {'loss': 1.0884, 'learning_rate': 2.916005969928784e-07, 'epoch': 0.85} + 85%|████████▍ | 5984/7045 [19:26:29<3:26:02, 11.65s/it] 85%|████████▍ | 5985/7045 [19:26:40<3:24:09, 11.56s/it] {'loss': 1.0864, 'learning_rate': 2.9106210272717124e-07, 'epoch': 0.85} + 85%|████████▍ | 5985/7045 [19:26:41<3:24:09, 11.56s/it] 85%|████████▍ | 5986/7045 [19:26:52<3:25:15, 11.63s/it] {'loss': 1.1367, 'learning_rate': 2.905240754007363e-07, 'epoch': 0.85} + 85%|████████▍ | 5986/7045 [19:26:52<3:25:15, 11.63s/it] 85%|████████▍ | 5987/7045 [19:27:04<3:26:54, 11.73s/it] {'loss': 1.1074, 'learning_rate': 2.8998651512730543e-07, 'epoch': 0.85} + 85%|████████▍ | 5987/7045 [19:27:04<3:26:54, 11.73s/it] 85%|████████▍ | 5988/7045 [19:27:15<3:23:44, 11.57s/it] {'loss': 1.0947, 'learning_rate': 2.8944942202051186e-07, 'epoch': 0.85} + 85%|████████▍ | 5988/7045 [19:27:15<3:23:44, 11.57s/it] 85%|████████▌ | 5989/7045 [19:27:27<3:21:59, 11.48s/it] {'loss': 1.0869, 'learning_rate': 2.889127961938898e-07, 'epoch': 0.85} + 85%|████████▌ | 5989/7045 [19:27:27<3:21:59, 11.48s/it] 85%|████████▌ | 5990/7045 [19:27:38<3:22:06, 11.49s/it] {'loss': 1.1387, 'learning_rate': 2.8837663776087405e-07, 'epoch': 0.85} + 85%|████████▌ | 5990/7045 [19:27:38<3:22:06, 11.49s/it] 85%|████████▌ | 5991/7045 [19:27:50<3:21:33, 11.47s/it] {'loss': 1.1436, 'learning_rate': 2.8784094683480174e-07, 'epoch': 0.85} + 85%|████████▌ | 5991/7045 [19:27:50<3:21:33, 11.47s/it] 85%|████████▌ | 5992/7045 [19:28:02<3:24:20, 11.64s/it] {'loss': 1.0718, 'learning_rate': 2.873057235289098e-07, 'epoch': 0.85} + 85%|████████▌ | 5992/7045 [19:28:02<3:24:20, 11.64s/it] 85%|████████▌ | 5993/7045 [19:28:13<3:24:12, 11.65s/it] {'loss': 1.0898, 'learning_rate': 2.86770967956338e-07, 'epoch': 0.85} + 85%|████████▌ | 5993/7045 [19:28:13<3:24:12, 11.65s/it] 85%|████████▌ | 5994/7045 [19:28:25<3:21:55, 11.53s/it] {'loss': 1.145, 'learning_rate': 2.862366802301261e-07, 'epoch': 0.85} + 85%|████████▌ | 5994/7045 [19:28:25<3:21:55, 11.53s/it] 85%|████████▌ | 5995/7045 [19:28:36<3:21:03, 11.49s/it] {'loss': 1.1455, 'learning_rate': 2.85702860463215e-07, 'epoch': 0.85} + 85%|████████▌ | 5995/7045 [19:28:36<3:21:03, 11.49s/it] 85%|████████▌ | 5996/7045 [19:28:49<3:29:15, 11.97s/it] {'loss': 1.0601, 'learning_rate': 2.8516950876844753e-07, 'epoch': 0.85} + 85%|████████▌ | 5996/7045 [19:28:49<3:29:15, 11.97s/it] 85%|████████▌ | 5997/7045 [19:29:00<3:24:42, 11.72s/it] {'loss': 1.1055, 'learning_rate': 2.846366252585667e-07, 'epoch': 0.85} + 85%|████████▌ | 5997/7045 [19:29:00<3:24:42, 11.72s/it] 85%|████████▌ | 5998/7045 [19:29:12<3:24:53, 11.74s/it] {'loss': 1.1084, 'learning_rate': 2.8410421004621574e-07, 'epoch': 0.85} + 85%|████████▌ | 5998/7045 [19:29:12<3:24:53, 11.74s/it] 85%|████████▌ | 5999/7045 [19:29:23<3:20:35, 11.51s/it] {'loss': 1.1455, 'learning_rate': 2.835722632439414e-07, 'epoch': 0.85} + 85%|████████▌ | 5999/7045 [19:29:23<3:20:35, 11.51s/it] 85%|████████▌ | 6000/7045 [19:29:40<3:46:41, 13.02s/it] {'loss': 1.0576, 'learning_rate': 2.8304078496418947e-07, 'epoch': 0.85} + 85%|████████▌ | 6000/7045 [19:29:40<3:46:41, 13.02s/it]/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants. + warnings.warn( +/usr/local/lib/python3.9/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None + warnings.warn( + 85%|████████▌ | 6001/7045 [19:30:19<6:03:46, 20.91s/it] {'loss': 1.1206, 'learning_rate': 2.825097753193065e-07, 'epoch': 0.85} + 85%|████████▌ | 6001/7045 [19:30:19<6:03:46, 20.91s/it] 85%|████████▌ | 6002/7045 [19:30:31<5:16:01, 18.18s/it] {'loss': 1.1021, 'learning_rate': 2.8197923442154163e-07, 'epoch': 0.85} + 85%|████████▌ | 6002/7045 [19:30:31<5:16:01, 18.18s/it] 85%|████████▌ | 6003/7045 [19:30:44<4:52:27, 16.84s/it] {'loss': 1.0264, 'learning_rate': 2.814491623830432e-07, 'epoch': 0.85} + 85%|████████▌ | 6003/7045 [19:30:44<4:52:27, 16.84s/it] 85%|████████▌ | 6004/7045 [19:30:55<4:21:47, 15.09s/it] {'loss': 1.1191, 'learning_rate': 2.8091955931586144e-07, 'epoch': 0.85} + 85%|████████▌ | 6004/7045 [19:30:55<4:21:47, 15.09s/it] 85%|████████▌ | 6005/7045 [19:31:07<4:01:28, 13.93s/it] {'loss': 1.124, 'learning_rate': 2.8039042533194813e-07, 'epoch': 0.85} + 85%|████████▌ | 6005/7045 [19:31:07<4:01:28, 13.93s/it] 85%|████████▌ | 6006/7045 [19:31:18<3:48:48, 13.21s/it] {'loss': 1.1011, 'learning_rate': 2.7986176054315336e-07, 'epoch': 0.85} + 85%|████████▌ | 6006/7045 [19:31:18<3:48:48, 13.21s/it] 85%|████████▌ | 6007/7045 [19:31:30<3:43:28, 12.92s/it] {'loss': 1.1074, 'learning_rate': 2.7933356506123113e-07, 'epoch': 0.85} + 85%|████████▌ | 6007/7045 [19:31:30<3:43:28, 12.92s/it] 85%|████████▌ | 6008/7045 [19:31:42<3:37:29, 12.58s/it] {'loss': 1.124, 'learning_rate': 2.7880583899783375e-07, 'epoch': 0.85} + 85%|████████▌ | 6008/7045 [19:31:42<3:37:29, 12.58s/it] 85%|████████▌ | 6009/7045 [19:31:54<3:33:08, 12.34s/it] {'loss': 1.0557, 'learning_rate': 2.782785824645154e-07, 'epoch': 0.85} + 85%|████████▌ | 6009/7045 [19:31:54<3:33:08, 12.34s/it] 85%|████████▌ | 6010/7045 [19:32:05<3:26:44, 11.98s/it] {'loss': 1.1035, 'learning_rate': 2.7775179557273155e-07, 'epoch': 0.85} + 85%|████████▌ | 6010/7045 [19:32:05<3:26:44, 11.98s/it] 85%|████████▌ | 6011/7045 [19:32:17<3:24:58, 11.89s/it] {'loss': 1.1035, 'learning_rate': 2.772254784338371e-07, 'epoch': 0.85} + 85%|████████▌ | 6011/7045 [19:32:17<3:24:58, 11.89s/it] 85%|████████▌ | 6012/7045 [19:32:28<3:21:56, 11.73s/it] {'loss': 1.0889, 'learning_rate': 2.7669963115908805e-07, 'epoch': 0.85} + 85%|████████▌ | 6012/7045 [19:32:28<3:21:56, 11.73s/it] 85%|████████▌ | 6013/7045 [19:32:40<3:20:12, 11.64s/it] {'loss': 1.0977, 'learning_rate': 2.7617425385964234e-07, 'epoch': 0.85} + 85%|████████▌ | 6013/7045 [19:32:40<3:20:12, 11.64s/it] 85%|████████▌ | 6014/7045 [19:32:51<3:21:02, 11.70s/it] {'loss': 1.0757, 'learning_rate': 2.756493466465565e-07, 'epoch': 0.85} + 85%|████████▌ | 6014/7045 [19:32:51<3:21:02, 11.70s/it] 85%|████████▌ | 6015/7045 [19:33:04<3:26:15, 12.02s/it] {'loss': 1.0791, 'learning_rate': 2.751249096307895e-07, 'epoch': 0.85} + 85%|████████▌ | 6015/7045 [19:33:04<3:26:15, 12.02s/it] 85%|█████���██▌ | 6016/7045 [19:33:17<3:28:33, 12.16s/it] {'loss': 1.1377, 'learning_rate': 2.7460094292319965e-07, 'epoch': 0.85} + 85%|████████▌ | 6016/7045 [19:33:17<3:28:33, 12.16s/it] 85%|████████▌ | 6017/7045 [19:33:28<3:24:07, 11.91s/it] {'loss': 1.0898, 'learning_rate': 2.740774466345464e-07, 'epoch': 0.85} + 85%|████████▌ | 6017/7045 [19:33:28<3:24:07, 11.91s/it] 85%|████████▌ | 6018/7045 [19:33:39<3:20:04, 11.69s/it] {'loss': 1.126, 'learning_rate': 2.7355442087549035e-07, 'epoch': 0.85} + 85%|████████▌ | 6018/7045 [19:33:39<3:20:04, 11.69s/it] 85%|████████▌ | 6019/7045 [19:33:50<3:16:11, 11.47s/it] {'loss': 1.0967, 'learning_rate': 2.730318657565917e-07, 'epoch': 0.85} + 85%|████████▌ | 6019/7045 [19:33:50<3:16:11, 11.47s/it] 85%|████████▌ | 6020/7045 [19:34:01<3:14:27, 11.38s/it] {'loss': 1.1133, 'learning_rate': 2.725097813883104e-07, 'epoch': 0.85} + 85%|████████▌ | 6020/7045 [19:34:01<3:14:27, 11.38s/it] 85%|████████▌ | 6021/7045 [19:34:13<3:15:13, 11.44s/it] {'loss': 1.0874, 'learning_rate': 2.719881678810096e-07, 'epoch': 0.85} + 85%|████████▌ | 6021/7045 [19:34:13<3:15:13, 11.44s/it] 85%|████████▌ | 6022/7045 [19:34:24<3:15:31, 11.47s/it] {'loss': 1.0576, 'learning_rate': 2.7146702534495056e-07, 'epoch': 0.85} + 85%|████████▌ | 6022/7045 [19:34:24<3:15:31, 11.47s/it] 85%|████████▌ | 6023/7045 [19:34:35<3:12:57, 11.33s/it] {'loss': 1.1377, 'learning_rate': 2.7094635389029513e-07, 'epoch': 0.85} + 85%|████████▌ | 6023/7045 [19:34:35<3:12:57, 11.33s/it] 86%|████████▌ | 6024/7045 [19:34:47<3:13:09, 11.35s/it] {'loss': 1.1426, 'learning_rate': 2.7042615362710687e-07, 'epoch': 0.86} + 86%|████████▌ | 6024/7045 [19:34:47<3:13:09, 11.35s/it] 86%|████████▌ | 6025/7045 [19:34:59<3:17:55, 11.64s/it] {'loss': 1.0723, 'learning_rate': 2.699064246653485e-07, 'epoch': 0.86} + 86%|████████▌ | 6025/7045 [19:34:59<3:17:55, 11.64s/it] 86%|████████▌ | 6026/7045 [19:35:10<3:13:58, 11.42s/it] {'loss': 1.0732, 'learning_rate': 2.693871671148843e-07, 'epoch': 0.86} + 86%|████████▌ | 6026/7045 [19:35:10<3:13:58, 11.42s/it] 86%|████████▌ | 6027/7045 [19:35:21<3:12:30, 11.35s/it] {'loss': 1.1152, 'learning_rate': 2.6886838108547754e-07, 'epoch': 0.86} + 86%|████████▌ | 6027/7045 [19:35:21<3:12:30, 11.35s/it] 86%|████████▌ | 6028/7045 [19:35:32<3:10:17, 11.23s/it] {'loss': 1.0796, 'learning_rate': 2.683500666867919e-07, 'epoch': 0.86} + 86%|████████▌ | 6028/7045 [19:35:32<3:10:17, 11.23s/it] 86%|████████▌ | 6029/7045 [19:35:43<3:09:04, 11.17s/it] {'loss': 1.127, 'learning_rate': 2.67832224028394e-07, 'epoch': 0.86} + 86%|████████▌ | 6029/7045 [19:35:43<3:09:04, 11.17s/it] 86%|████████▌ | 6030/7045 [19:35:55<3:10:03, 11.24s/it] {'loss': 1.0815, 'learning_rate': 2.6731485321974724e-07, 'epoch': 0.86} + 86%|████████▌ | 6030/7045 [19:35:55<3:10:03, 11.24s/it] 86%|████████▌ | 6031/7045 [19:36:06<3:08:37, 11.16s/it] {'loss': 1.0801, 'learning_rate': 2.667979543702168e-07, 'epoch': 0.86} + 86%|████████▌ | 6031/7045 [19:36:06<3:08:37, 11.16s/it] 86%|████████▌ | 6032/7045 [19:36:18<3:12:24, 11.40s/it] {'loss': 1.1221, 'learning_rate': 2.6628152758906843e-07, 'epoch': 0.86} + 86%|████████▌ | 6032/7045 [19:36:18<3:12:24, 11.40s/it] 86%|████████▌ | 6033/7045 [19:36:29<3:11:54, 11.38s/it] {'loss': 1.0708, 'learning_rate': 2.6576557298546773e-07, 'epoch': 0.86} + 86%|████████▌ | 6033/7045 [19:36:29<3:11:54, 11.38s/it] 86%|████████▌ | 6034/7045 [19:36:40<3:10:50, 11.33s/it] {'loss': 1.0957, 'learning_rate': 2.652500906684796e-07, 'epoch': 0.86} + 86%|████████▌ | 6034/7045 [19:36:40<3:10:50, 11.33s/it] 86%|████████▌ | 6035/7045 [19:36:53<3:18:34, 11.80s/it] {'loss': 1.0557, 'learning_rate': 2.647350807470711e-07, 'epoch': 0.86} + 86%|████████▌ | 6035/7045 [19:36:53<3:18:34, 11.80s/it] 86%|████████▌ | 6036/7045 [19:37:04<3:14:29, 11.57s/it] {'loss': 1.084, 'learning_rate': 2.6422054333010703e-07, 'epoch': 0.86} + 86%|████████▌ | 6036/7045 [19:37:04<3:14:29, 11.57s/it] 86%|████████▌ | 6037/7045 [19:37:16<3:18:20, 11.81s/it] {'loss': 1.0996, 'learning_rate': 2.637064785263549e-07, 'epoch': 0.86} + 86%|████████▌ | 6037/7045 [19:37:16<3:18:20, 11.81s/it] 86%|████████▌ | 6038/7045 [19:37:27<3:13:57, 11.56s/it] {'loss': 1.1172, 'learning_rate': 2.631928864444805e-07, 'epoch': 0.86} + 86%|████████▌ | 6038/7045 [19:37:27<3:13:57, 11.56s/it] 86%|████████▌ | 6039/7045 [19:37:39<3:12:07, 11.46s/it] {'loss': 1.166, 'learning_rate': 2.6267976719304947e-07, 'epoch': 0.86} + 86%|████████▌ | 6039/7045 [19:37:39<3:12:07, 11.46s/it] 86%|████████▌ | 6040/7045 [19:37:51<3:15:59, 11.70s/it] {'loss': 1.1084, 'learning_rate': 2.621671208805293e-07, 'epoch': 0.86} + 86%|████████▌ | 6040/7045 [19:37:51<3:15:59, 11.70s/it] 86%|████████▌ | 6041/7045 [19:38:03<3:17:50, 11.82s/it] {'loss': 1.1191, 'learning_rate': 2.616549476152852e-07, 'epoch': 0.86} + 86%|████████▌ | 6041/7045 [19:38:03<3:17:50, 11.82s/it] 86%|████████▌ | 6042/7045 [19:38:14<3:13:44, 11.59s/it] {'loss': 1.126, 'learning_rate': 2.6114324750558443e-07, 'epoch': 0.86} + 86%|████████▌ | 6042/7045 [19:38:14<3:13:44, 11.59s/it] 86%|████████▌ | 6043/7045 [19:38:27<3:19:10, 11.93s/it] {'loss': 1.0688, 'learning_rate': 2.6063202065959387e-07, 'epoch': 0.86} + 86%|████████▌ | 6043/7045 [19:38:27<3:19:10, 11.93s/it] 86%|████████▌ | 6044/7045 [19:38:38<3:14:41, 11.67s/it] {'loss': 1.126, 'learning_rate': 2.601212671853792e-07, 'epoch': 0.86} + 86%|████████▌ | 6044/7045 [19:38:38<3:14:41, 11.67s/it] 86%|████████▌ | 6045/7045 [19:38:50<3:18:19, 11.90s/it] {'loss': 1.1187, 'learning_rate': 2.596109871909061e-07, 'epoch': 0.86} + 86%|████████▌ | 6045/7045 [19:38:50<3:18:19, 11.90s/it] 86%|████████▌ | 6046/7045 [19:39:03<3:24:05, 12.26s/it] {'loss': 1.0317, 'learning_rate': 2.591011807840424e-07, 'epoch': 0.86} + 86%|████████▌ | 6046/7045 [19:39:03<3:24:05, 12.26s/it] 86%|████████▌ | 6047/7045 [19:39:14<3:17:24, 11.87s/it] {'loss': 1.1562, 'learning_rate': 2.585918480725524e-07, 'epoch': 0.86} + 86%|████████▌ | 6047/7045 [19:39:14<3:17:24, 11.87s/it] 86%|████████▌ | 6048/7045 [19:39:25<3:13:52, 11.67s/it] {'loss': 1.1318, 'learning_rate': 2.5808298916410346e-07, 'epoch': 0.86} + 86%|████████▌ | 6048/7045 [19:39:25<3:13:52, 11.67s/it] 86%|████████▌ | 6049/7045 [19:39:37<3:12:33, 11.60s/it] {'loss': 1.0898, 'learning_rate': 2.5757460416626105e-07, 'epoch': 0.86} + 86%|████████▌ | 6049/7045 [19:39:37<3:12:33, 11.60s/it]Error with [Errno 2] No such file or directory: '/mnt/bn/liangkeg/data/allava-4v/ALLaVA/allava_laion/images/99998.jpeg', [{'id': 'allava_laion_inst_99998', 'image': '/mnt/bn/liangkeg/data/allava-4v/ALLaVA/allava_laion/images/99998.jpeg', 'conversations': [{'from': 'human', 'value': "\nConsidering the ring's unique design and the way the gemstones are mounted, what could be the specific method used in setting these stones, and how might it contribute to the ring's aesthetic appeal?"}, {'from': 'gpt', 'value': "The setting used for the gemstones appears to be a tension setting, a technique where the gemstones are held in place by the pressure of the metal band itself, creating the illusion that the stones are floating. This method not only enhances the ring's modern and sleek appearance but also allows more light to pass through the gemstones, increasing their brilliance and sparkle. The absence of prongs or bezels contributes to a minimalist and contemporary aesthetic, emphasizing the stones' clarity and the smooth curvature of the metal."}], 'original_caption': 'Love Comet Rose Gold Ring', 'url': 'https://ak1.ostkcdn.com/images/products/is/images/direct/2071d97c795c1623c75134e3ff0018595b49764d/Love-Comet-Rose-Gold-Ring.jpg'}] + 86%|████████▌ | 6050/7045 [19:39:49<3:15:52, 11.81s/it] {'loss': 1.1001, 'learning_rate': 2.570666931864901e-07, 'epoch': 0.86} + 86%|████████▌ | 6050/7045 [19:39:49<3:15:52, 11.81s/it] 86%|████████▌ | 6051/7045 [19:40:01<3:14:04, 11.72s/it] {'loss': 1.125, 'learning_rate': 2.56559256332157e-07, 'epoch': 0.86} + 86%|████████▌ | 6051/7045 [19:40:01<3:14:04, 11.72s/it] 86%|████████▌ | 6052/7045 [19:40:12<3:11:53, 11.60s/it] {'loss': 1.1338, 'learning_rate': 2.5605229371052615e-07, 'epoch': 0.86} + 86%|████████▌ | 6052/7045 [19:40:12<3:11:53, 11.60s/it] 86%|████████▌ | 6053/7045 [19:40:24<3:11:58, 11.61s/it] {'loss': 1.1074, 'learning_rate': 2.5554580542876295e-07, 'epoch': 0.86} + 86%|████████▌ | 6053/7045 [19:40:24<3:11:58, 11.61s/it] 86%|████████▌ | 6054/7045 [19:40:35<3:11:12, 11.58s/it] {'loss': 1.1074, 'learning_rate': 2.550397915939326e-07, 'epoch': 0.86} + 86%|████████▌ | 6054/7045 [19:40:35<3:11:12, 11.58s/it] 86%|████████▌ | 6055/7045 [19:40:46<3:08:44, 11.44s/it] {'loss': 1.1191, 'learning_rate': 2.545342523129987e-07, 'epoch': 0.86} + 86%|████████▌ | 6055/7045 [19:40:46<3:08:44, 11.44s/it] 86%|████████▌ | 6056/7045 [19:40:59<3:12:49, 11.70s/it] {'loss': 1.0894, 'learning_rate': 2.5402918769282554e-07, 'epoch': 0.86} + 86%|████████▌ | 6056/7045 [19:40:59<3:12:49, 11.70s/it] 86%|████████▌ | 6057/7045 [19:41:10<3:09:35, 11.51s/it] {'loss': 1.0986, 'learning_rate': 2.5352459784017707e-07, 'epoch': 0.86} + 86%|████████▌ | 6057/7045 [19:41:10<3:09:35, 11.51s/it] 86%|████████▌ | 6058/7045 [19:41:23<3:17:56, 12.03s/it] {'loss': 1.085, 'learning_rate': 2.530204828617164e-07, 'epoch': 0.86} + 86%|████████▌ | 6058/7045 [19:41:23<3:17:56, 12.03s/it] 86%|████████▌ | 6059/7045 [19:41:36<3:21:11, 12.24s/it] {'loss': 1.0933, 'learning_rate': 2.525168428640071e-07, 'epoch': 0.86} + 86%|████████▌ | 6059/7045 [19:41:36<3:21:11, 12.24s/it] 86%|████████▌ | 6060/7045 [19:41:47<3:16:11, 11.95s/it] {'loss': 1.0806, 'learning_rate': 2.520136779535112e-07, 'epoch': 0.86} + 86%|████████▌ | 6060/7045 [19:41:47<3:16:11, 11.95s/it] 86%|████████▌ | 6061/7045 [19:41:58<3:11:06, 11.65s/it] {'loss': 1.0845, 'learning_rate': 2.5151098823659096e-07, 'epoch': 0.86} + 86%|████████▌ | 6061/7045 [19:41:58<3:11:06, 11.65s/it] 86%|████████▌ | 6062/7045 [19:42:09<3:07:40, 11.46s/it] {'loss': 1.1338, 'learning_rate': 2.510087738195086e-07, 'epoch': 0.86} + 86%|████████▌ | 6062/7045 [19:42:09<3:07:40, 11.46s/it] 86%|████████▌ | 6063/7045 [19:42:20<3:04:47, 11.29s/it] {'loss': 1.0918, 'learning_rate': 2.505070348084243e-07, 'epoch': 0.86} + 86%|████████▌ | 6063/7045 [19:42:20<3:04:47, 11.29s/it] 86%|████████▌ | 6064/7045 [19:42:31<3:04:02, 11.26s/it] {'loss': 1.083, 'learning_rate': 2.500057713094001e-07, 'epoch': 0.86} + 86%|████████▌ | 6064/7045 [19:42:31<3:04:02, 11.26s/it] 86%|████████▌ | 6065/7045 [19:42:42<3:03:50, 11.26s/it] {'loss': 1.0957, 'learning_rate': 2.495049834283955e-07, 'epoch': 0.86} + 86%|████████▌ | 6065/7045 [19:42:42<3:03:50, 11.26s/it] 86%|████████▌ | 6066/7045 [19:42:53<3:01:08, 11.10s/it] {'loss': 1.1045, 'learning_rate': 2.490046712712707e-07, 'epoch': 0.86} + 86%|████████▌ | 6066/7045 [19:42:53<3:01:08, 11.10s/it] 86%|████████▌ | 6067/7045 [19:43:05<3:07:20, 11.49s/it] {'loss': 1.1191, 'learning_rate': 2.4850483494378406e-07, 'epoch': 0.86} + 86%|████████▌ | 6067/7045 [19:43:05<3:07:20, 11.49s/it] 86%|████████▌ | 6068/7045 [19:43:17<3:06:26, 11.45s/it] {'loss': 1.1133, 'learning_rate': 2.48005474551595e-07, 'epoch': 0.86} + 86%|████████▌ | 6068/7045 [19:43:17<3:06:26, 11.45s/it] 86%|████████▌ | 6069/7045 [19:43:28<3:07:53, 11.55s/it] {'loss': 1.082, 'learning_rate': 2.4750659020026076e-07, 'epoch': 0.86} + 86%|████████▌ | 6069/7045 [19:43:28<3:07:53, 11.55s/it] 86%|████████▌ | 6070/7045 [19:43:40<3:07:02, 11.51s/it] {'loss': 1.1108, 'learning_rate': 2.470081819952397e-07, 'epoch': 0.86} + 86%|████████▌ | 6070/7045 [19:43:40<3:07:02, 11.51s/it] 86%|████████▌ | 6071/7045 [19:43:53<3:15:23, 12.04s/it] {'loss': 1.1465, 'learning_rate': 2.4651025004188753e-07, 'epoch': 0.86} + 86%|████████▌ | 6071/7045 [19:43:53<3:15:23, 12.04s/it] 86%|████████▌ | 6072/7045 [19:44:05<3:12:48, 11.89s/it] {'loss': 1.083, 'learning_rate': 2.4601279444546044e-07, 'epoch': 0.86} + 86%|████████▌ | 6072/7045 [19:44:05<3:12:48, 11.89s/it] 86%|████████▌ | 6073/7045 [19:44:16<3:10:11, 11.74s/it] {'loss': 1.1084, 'learning_rate': 2.4551581531111393e-07, 'epoch': 0.86} + 86%|████████▌ | 6073/7045 [19:44:16<3:10:11, 11.74s/it] 86%|████████▌ | 6074/7045 [19:44:27<3:07:58, 11.61s/it] {'loss': 1.0869, 'learning_rate': 2.450193127439024e-07, 'epoch': 0.86} + 86%|████████▌ | 6074/7045 [19:44:27<3:07:58, 11.61s/it] 86%|████████▌ | 6075/7045 [19:44:38<3:04:19, 11.40s/it] {'loss': 1.0913, 'learning_rate': 2.4452328684878014e-07, 'epoch': 0.86} + 86%|████████▌ | 6075/7045 [19:44:38<3:04:19, 11.40s/it] 86%|████████▌ | 6076/7045 [19:44:50<3:05:30, 11.49s/it] {'loss': 1.1328, 'learning_rate': 2.4402773773059995e-07, 'epoch': 0.86} + 86%|████████▌ | 6076/7045 [19:44:50<3:05:30, 11.49s/it] 86%|████████▋ | 6077/7045 [19:45:01<3:04:09, 11.41s/it] {'loss': 1.1123, 'learning_rate': 2.4353266549411364e-07, 'epoch': 0.86} + 86%|████████▋ | 6077/7045 [19:45:01<3:04:09, 11.41s/it] 86%|████████▋ | 6078/7045 [19:45:12<3:02:00, 11.29s/it] {'loss': 1.1787, 'learning_rate': 2.430380702439733e-07, 'epoch': 0.86} + 86%|████████▋ | 6078/7045 [19:45:12<3:02:00, 11.29s/it] 86%|████████▋ | 6079/7045 [19:45:24<3:01:44, 11.29s/it] {'loss': 1.1377, 'learning_rate': 2.4254395208473e-07, 'epoch': 0.86} + 86%|████████▋ | 6079/7045 [19:45:24<3:01:44, 11.29s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 2 bytes but only got 0. + warnings.warn(str(msg)) + 86%|████████▋ | 6080/7045 [19:45:36<3:09:20, 11.77s/it] {'loss': 1.0996, 'learning_rate': 2.4205031112083247e-07, 'epoch': 0.86} + 86%|████████▋ | 6080/7045 [19:45:36<3:09:20, 11.77s/it] 86%|████████▋ | 6081/7045 [19:45:48<3:07:21, 11.66s/it] {'loss': 1.1514, 'learning_rate': 2.4155714745663083e-07, 'epoch': 0.86} + 86%|████████▋ | 6081/7045 [19:45:48<3:07:21, 11.66s/it] 86%|████████▋ | 6082/7045 [19:46:00<3:11:41, 11.94s/it] {'loss': 1.0591, 'learning_rate': 2.410644611963725e-07, 'epoch': 0.86} + 86%|████████▋ | 6082/7045 [19:46:00<3:11:41, 11.94s/it] 86%|████████▋ | 6083/7045 [19:46:12<3:10:00, 11.85s/it] {'loss': 1.1289, 'learning_rate': 2.405722524442042e-07, 'epoch': 0.86} + 86%|████████▋ | 6083/7045 [19:46:12<3:10:00, 11.85s/it] 86%|████████▋ | 6084/7045 [19:46:24<3:11:11, 11.94s/it] {'loss': 1.083, 'learning_rate': 2.400805213041732e-07, 'epoch': 0.86} + 86%|████████▋ | 6084/7045 [19:46:24<3:11:11, 11.94s/it] 86%|████████▋ | 6085/7045 [19:46:35<3:04:51, 11.55s/it] {'loss': 1.084, 'learning_rate': 2.3958926788022365e-07, 'epoch': 0.86} + 86%|████████▋ | 6085/7045 [19:46:35<3:04:51, 11.55s/it] 86%|████████▋ | 6086/7045 [19:46:46<3:01:07, 11.33s/it] {'loss': 1.1113, 'learning_rate': 2.3909849227620077e-07, 'epoch': 0.86} + 86%|████████▋ | 6086/7045 [19:46:46<3:01:07, 11.33s/it] 86%|████████▋ | 6087/7045 [19:46:58<3:05:07, 11.59s/it] {'loss': 1.0991, 'learning_rate': 2.386081945958474e-07, 'epoch': 0.86} + 86%|████████▋ | 6087/7045 [19:46:58<3:05:07, 11.59s/it] 86%|████████▋ | 6088/7045 [19:47:10<3:05:08, 11.61s/it] {'loss': 1.1523, 'learning_rate': 2.381183749428051e-07, 'epoch': 0.86} + 86%|████████▋ | 6088/7045 [19:47:10<3:05:08, 11.61s/it] 86%|████████▋ | 6089/7045 [19:47:21<3:02:40, 11.47s/it] {'loss': 1.083, 'learning_rate': 2.3762903342061579e-07, 'epoch': 0.86} + 86%|████████▋ | 6089/7045 [19:47:21<3:02:40, 11.47s/it] 86%|████████▋ | 6090/7045 [19:47:33<3:07:30, 11.78s/it] {'loss': 1.1445, 'learning_rate': 2.3714017013271974e-07, 'epoch': 0.86} + 86%|████████▋ | 6090/7045 [19:47:33<3:07:30, 11.78s/it] 86%|████████▋ | 6091/7045 [19:47:44<3:04:12, 11.59s/it] {'loss': 1.1328, 'learning_rate': 2.366517851824554e-07, 'epoch': 0.86} + 86%|████████▋ | 6091/7045 [19:47:44<3:04:12, 11.59s/it] 86%|████████▋ | 6092/7045 [19:47:55<3:01:57, 11.46s/it] {'loss': 1.0771, 'learning_rate': 2.3616387867306162e-07, 'epoch': 0.86} + 86%|████████▋ | 6092/7045 [19:47:55<3:01:57, 11.46s/it] 86%|████████▋ | 6093/7045 [19:48:08<3:06:13, 11.74s/it] {'loss': 1.0601, 'learning_rate': 2.356764507076742e-07, 'epoch': 0.86} + 86%|████████▋ | 6093/7045 [19:48:08<3:06:13, 11.74s/it] 87%|████████▋ | 6094/7045 [19:48:19<3:02:23, 11.51s/it] {'loss': 1.1201, 'learning_rate': 2.351895013893288e-07, 'epoch': 0.87} + 87%|████████▋ | 6094/7045 [19:48:19<3:02:23, 11.51s/it] 87%|████████▋ | 6095/7045 [19:48:30<3:01:29, 11.46s/it] {'loss': 1.0928, 'learning_rate': 2.3470303082096025e-07, 'epoch': 0.87} + 87%|████████▋ | 6095/7045 [19:48:30<3:01:29, 11.46s/it] 87%|████████▋ | 6096/7045 [19:48:42<3:02:42, 11.55s/it] {'loss': 1.0781, 'learning_rate': 2.3421703910540133e-07, 'epoch': 0.87} + 87%|████████▋ | 6096/7045 [19:48:42<3:02:42, 11.55s/it] 87%|████████▋ | 6097/7045 [19:48:53<3:01:02, 11.46s/it] {'loss': 1.0791, 'learning_rate': 2.3373152634538488e-07, 'epoch': 0.87} + 87%|████████▋ | 6097/7045 [19:48:53<3:01:02, 11.46s/it] 87%|████████▋ | 6098/7045 [19:49:05<3:00:34, 11.44s/it] {'loss': 1.082, 'learning_rate': 2.3324649264354126e-07, 'epoch': 0.87} + 87%|████████▋ | 6098/7045 [19:49:05<3:00:34, 11.44s/it] 87%|████████▋ | 6099/7045 [19:49:16<3:02:19, 11.56s/it] {'loss': 1.0684, 'learning_rate': 2.3276193810239905e-07, 'epoch': 0.87} + 87%|████████▋ | 6099/7045 [19:49:16<3:02:19, 11.56s/it] 87%|████████▋ | 6100/7045 [19:49:28<3:00:27, 11.46s/it] {'loss': 1.1299, 'learning_rate': 2.3227786282438796e-07, 'epoch': 0.87} + 87%|████████▋ | 6100/7045 [19:49:28<3:00:27, 11.46s/it] 87%|████████▋ | 6101/7045 [19:49:40<3:03:09, 11.64s/it] {'loss': 1.1484, 'learning_rate': 2.3179426691183394e-07, 'epoch': 0.87} + 87%|████████▋ | 6101/7045 [19:49:40<3:03:09, 11.64s/it] 87%|████████▋ | 6102/7045 [19:49:52<3:06:20, 11.86s/it] {'loss': 1.1172, 'learning_rate': 2.3131115046696213e-07, 'epoch': 0.87} + 87%|████████▋ | 6102/7045 [19:49:52<3:06:20, 11.86s/it] 87%|████████▋ | 6103/7045 [19:50:04<3:06:34, 11.88s/it] {'loss': 1.0659, 'learning_rate': 2.3082851359189806e-07, 'epoch': 0.87} + 87%|████████▋ | 6103/7045 [19:50:04<3:06:34, 11.88s/it] 87%|████████▋ | 6104/7045 [19:50:15<3:03:50, 11.72s/it] {'loss': 1.0815, 'learning_rate': 2.3034635638866404e-07, 'epoch': 0.87} + 87%|████████▋ | 6104/7045 [19:50:15<3:03:50, 11.72s/it] 87%|████████▋ | 6105/7045 [19:50:26<2:59:10, 11.44s/it] {'loss': 1.0669, 'learning_rate': 2.2986467895918074e-07, 'epoch': 0.87} + 87%|████████▋ | 6105/7045 [19:50:26<2:59:10, 11.44s/it] 87%|████████▋ | 6106/7045 [19:50:38<3:02:34, 11.67s/it] {'loss': 1.1201, 'learning_rate': 2.2938348140526922e-07, 'epoch': 0.87} + 87%|████████▋ | 6106/7045 [19:50:38<3:02:34, 11.67s/it] 87%|████████▋ | 6107/7045 [19:50:51<3:08:44, 12.07s/it] {'loss': 1.1045, 'learning_rate': 2.2890276382864784e-07, 'epoch': 0.87} + 87%|████████▋ | 6107/7045 [19:50:51<3:08:44, 12.07s/it] 87%|████████▋ | 6108/7045 [19:51:03<3:08:15, 12.06s/it] {'loss': 1.0981, 'learning_rate': 2.2842252633093308e-07, 'epoch': 0.87} + 87%|████████▋ | 6108/7045 [19:51:03<3:08:15, 12.06s/it] 87%|████████▋ | 6109/7045 [19:51:15<3:06:04, 11.93s/it] {'loss': 1.1406, 'learning_rate': 2.2794276901364125e-07, 'epoch': 0.87} + 87%|████████▋ | 6109/7045 [19:51:15<3:06:04, 11.93s/it] 87%|████████▋ | 6110/7045 [19:51:26<3:02:23, 11.70s/it] {'loss': 1.126, 'learning_rate': 2.2746349197818624e-07, 'epoch': 0.87} + 87%|████████▋ | 6110/7045 [19:51:26<3:02:23, 11.70s/it] 87%|████████▋ | 6111/7045 [19:51:37<2:59:07, 11.51s/it] {'loss': 1.0547, 'learning_rate': 2.2698469532588086e-07, 'epoch': 0.87} + 87%|████████▋ | 6111/7045 [19:51:37<2:59:07, 11.51s/it] 87%|████████▋ | 6112/7045 [19:51:50<3:02:49, 11.76s/it] {'loss': 1.1074, 'learning_rate': 2.265063791579361e-07, 'epoch': 0.87} + 87%|████████▋ | 6112/7045 [19:51:50<3:02:49, 11.76s/it] 87%|████████▋ | 6113/7045 [19:52:01<3:01:39, 11.70s/it] {'loss': 1.1172, 'learning_rate': 2.2602854357546085e-07, 'epoch': 0.87} + 87%|████████▋ | 6113/7045 [19:52:01<3:01:39, 11.70s/it] 87%|████████▋ | 6114/7045 [19:52:12<2:59:55, 11.60s/it] {'loss': 1.0742, 'learning_rate': 2.255511886794634e-07, 'epoch': 0.87} + 87%|████████▋ | 6114/7045 [19:52:12<2:59:55, 11.60s/it] 87%|████████▋ | 6115/7045 [19:52:25<3:06:01, 12.00s/it] {'loss': 1.0439, 'learning_rate': 2.250743145708509e-07, 'epoch': 0.87} + 87%|████████▋ | 6115/7045 [19:52:25<3:06:01, 12.00s/it] 87%|████████▋ | 6116/7045 [19:52:37<3:02:33, 11.79s/it] {'loss': 1.0723, 'learning_rate': 2.2459792135042678e-07, 'epoch': 0.87} + 87%|████████▋ | 6116/7045 [19:52:37<3:02:33, 11.79s/it] 87%|████████▋ | 6117/7045 [19:52:49<3:04:27, 11.93s/it] {'loss': 1.0801, 'learning_rate': 2.2412200911889503e-07, 'epoch': 0.87} + 87%|████████▋ | 6117/7045 [19:52:49<3:04:27, 11.93s/it] 87%|████████▋ | 6118/7045 [19:53:00<2:59:45, 11.64s/it] {'loss': 1.0776, 'learning_rate': 2.2364657797685651e-07, 'epoch': 0.87} + 87%|████████▋ | 6118/7045 [19:53:00<2:59:45, 11.64s/it] 87%|████████▋ | 6119/7045 [19:53:11<2:57:06, 11.48s/it] {'loss': 1.0864, 'learning_rate': 2.2317162802481058e-07, 'epoch': 0.87} + 87%|████████▋ | 6119/7045 [19:53:11<2:57:06, 11.48s/it] 87%|████████▋ | 6120/7045 [19:53:22<2:55:15, 11.37s/it] {'loss': 1.0986, 'learning_rate': 2.226971593631558e-07, 'epoch': 0.87} + 87%|████████▋ | 6120/7045 [19:53:22<2:55:15, 11.37s/it] 87%|████████▋ | 6121/7045 [19:53:33<2:53:27, 11.26s/it] {'loss': 1.1035, 'learning_rate': 2.2222317209218774e-07, 'epoch': 0.87} + 87%|████████▋ | 6121/7045 [19:53:33<2:53:27, 11.26s/it] 87%|████████▋ | 6122/7045 [19:53:45<2:56:17, 11.46s/it] {'loss': 1.0791, 'learning_rate': 2.2174966631210132e-07, 'epoch': 0.87} + 87%|████████▋ | 6122/7045 [19:53:45<2:56:17, 11.46s/it] 87%|████████▋ | 6123/7045 [19:53:56<2:55:29, 11.42s/it] {'loss': 1.1211, 'learning_rate': 2.212766421229892e-07, 'epoch': 0.87} + 87%|████████▋ | 6123/7045 [19:53:56<2:55:29, 11.42s/it] 87%|████████▋ | 6124/7045 [19:54:09<2:59:26, 11.69s/it] {'loss': 1.1265, 'learning_rate': 2.2080409962484145e-07, 'epoch': 0.87} + 87%|████████▋ | 6124/7045 [19:54:09<2:59:26, 11.69s/it] 87%|████████▋ | 6125/7045 [19:54:21<3:01:20, 11.83s/it] {'loss': 1.0908, 'learning_rate': 2.2033203891754812e-07, 'epoch': 0.87} + 87%|████████▋ | 6125/7045 [19:54:21<3:01:20, 11.83s/it] 87%|████████▋ | 6126/7045 [19:54:33<3:02:36, 11.92s/it] {'loss': 1.1309, 'learning_rate': 2.198604601008958e-07, 'epoch': 0.87} + 87%|████████▋ | 6126/7045 [19:54:33<3:02:36, 11.92s/it] 87%|████████▋ | 6127/7045 [19:54:45<3:04:32, 12.06s/it] {'loss': 1.1406, 'learning_rate': 2.1938936327456973e-07, 'epoch': 0.87} + 87%|████████▋ | 6127/7045 [19:54:45<3:04:32, 12.06s/it] 87%|████████▋ | 6128/7045 [19:54:56<2:59:41, 11.76s/it] {'loss': 1.0986, 'learning_rate': 2.1891874853815388e-07, 'epoch': 0.87} + 87%|████████▋ | 6128/7045 [19:54:56<2:59:41, 11.76s/it] 87%|████████▋ | 6129/7045 [19:55:09<3:05:10, 12.13s/it] {'loss': 1.0605, 'learning_rate': 2.1844861599112948e-07, 'epoch': 0.87} + 87%|████████▋ | 6129/7045 [19:55:09<3:05:10, 12.13s/it] 87%|████████▋ | 6130/7045 [19:55:20<2:59:48, 11.79s/it] {'loss': 1.1113, 'learning_rate': 2.179789657328757e-07, 'epoch': 0.87} + 87%|████████▋ | 6130/7045 [19:55:20<2:59:48, 11.79s/it] 87%|████████▋ | 6131/7045 [19:55:32<2:58:57, 11.75s/it] {'loss': 1.0679, 'learning_rate': 2.1750979786267113e-07, 'epoch': 0.87} + 87%|████████▋ | 6131/7045 [19:55:32<2:58:57, 11.75s/it] 87%|████████▋ | 6132/7045 [19:55:43<2:56:34, 11.60s/it] {'loss': 1.1172, 'learning_rate': 2.1704111247969006e-07, 'epoch': 0.87} + 87%|████████▋ | 6132/7045 [19:55:43<2:56:34, 11.60s/it] 87%|████████▋ | 6133/7045 [19:55:55<2:57:11, 11.66s/it] {'loss': 1.1426, 'learning_rate': 2.165729096830077e-07, 'epoch': 0.87} + 87%|████████▋ | 6133/7045 [19:55:55<2:57:11, 11.66s/it] 87%|████████▋ | 6134/7045 [19:56:07<2:59:24, 11.82s/it] {'loss': 1.0869, 'learning_rate': 2.1610518957159516e-07, 'epoch': 0.87} + 87%|████████▋ | 6134/7045 [19:56:07<2:59:24, 11.82s/it] 87%|████████▋ | 6135/7045 [19:56:18<2:55:58, 11.60s/it] {'loss': 1.1035, 'learning_rate': 2.1563795224432139e-07, 'epoch': 0.87} + 87%|████████▋ | 6135/7045 [19:56:18<2:55:58, 11.60s/it] 87%|████████▋ | 6136/7045 [19:56:30<2:53:26, 11.45s/it] {'loss': 1.0718, 'learning_rate': 2.1517119779995487e-07, 'epoch': 0.87} + 87%|████████▋ | 6136/7045 [19:56:30<2:53:26, 11.45s/it] 87%|████████▋ | 6137/7045 [19:56:41<2:52:26, 11.39s/it] {'loss': 1.127, 'learning_rate': 2.1470492633716034e-07, 'epoch': 0.87} + 87%|████████▋ | 6137/7045 [19:56:41<2:52:26, 11.39s/it] 87%|████████▋ | 6138/7045 [19:56:54<2:59:58, 11.91s/it] {'loss': 1.0786, 'learning_rate': 2.142391379545017e-07, 'epoch': 0.87} + 87%|████████▋ | 6138/7045 [19:56:54<2:59:58, 11.91s/it] 87%|████████▋ | 6139/7045 [19:57:07<3:05:50, 12.31s/it] {'loss': 1.0454, 'learning_rate': 2.1377383275044045e-07, 'epoch': 0.87} + 87%|████████▋ | 6139/7045 [19:57:07<3:05:50, 12.31s/it] 87%|████████▋ | 6140/7045 [19:57:18<3:00:14, 11.95s/it] {'loss': 1.061, 'learning_rate': 2.1330901082333572e-07, 'epoch': 0.87} + 87%|████████▋ | 6140/7045 [19:57:18<3:00:14, 11.95s/it] 87%|████████▋ | 6141/7045 [19:57:30<2:59:19, 11.90s/it] {'loss': 1.1055, 'learning_rate': 2.1284467227144362e-07, 'epoch': 0.87} + 87%|████████▋ | 6141/7045 [19:57:30<2:59:19, 11.90s/it] 87%|████████▋ | 6142/7045 [19:57:41<2:55:33, 11.67s/it] {'loss': 1.1172, 'learning_rate': 2.123808171929201e-07, 'epoch': 0.87} + 87%|████████▋ | 6142/7045 [19:57:41<2:55:33, 11.67s/it] 87%|████████▋ | 6143/7045 [19:57:53<2:54:55, 11.64s/it] {'loss': 1.1289, 'learning_rate': 2.1191744568581696e-07, 'epoch': 0.87} + 87%|████████▋ | 6143/7045 [19:57:53<2:54:55, 11.64s/it] 87%|████████▋ | 6144/7045 [19:58:04<2:52:34, 11.49s/it] {'loss': 1.1201, 'learning_rate': 2.1145455784808532e-07, 'epoch': 0.87} + 87%|████████▋ | 6144/7045 [19:58:04<2:52:34, 11.49s/it] 87%|████████▋ | 6145/7045 [19:58:16<2:54:25, 11.63s/it] {'loss': 1.1025, 'learning_rate': 2.1099215377757303e-07, 'epoch': 0.87} + 87%|████████▋ | 6145/7045 [19:58:16<2:54:25, 11.63s/it] 87%|████████▋ | 6146/7045 [19:58:27<2:52:09, 11.49s/it] {'loss': 1.1758, 'learning_rate': 2.1053023357202524e-07, 'epoch': 0.87} + 87%|████████▋ | 6146/7045 [19:58:27<2:52:09, 11.49s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 87%|████████▋ | 6147/7045 [19:58:38<2:49:26, 11.32s/it] {'loss': 1.1362, 'learning_rate': 2.1006879732908686e-07, 'epoch': 0.87} + 87%|████████▋ | 6147/7045 [19:58:38<2:49:26, 11.32s/it] 87%|████████▋ | 6148/7045 [19:58:51<2:56:52, 11.83s/it] {'loss': 1.1465, 'learning_rate': 2.0960784514629823e-07, 'epoch': 0.87} + 87%|████████▋ | 6148/7045 [19:58:51<2:56:52, 11.83s/it] 87%|████████▋ | 6149/7045 [19:59:03<2:59:04, 11.99s/it] {'loss': 1.0708, 'learning_rate': 2.0914737712109917e-07, 'epoch': 0.87} + 87%|████████▋ | 6149/7045 [19:59:03<2:59:04, 11.99s/it] 87%|████████▋ | 6150/7045 [19:59:14<2:55:04, 11.74s/it] {'loss': 1.1357, 'learning_rate': 2.0868739335082516e-07, 'epoch': 0.87} + 87%|████████▋ | 6150/7045 [19:59:14<2:55:04, 11.74s/it] 87%|████████▋ | 6151/7045 [19:59:27<2:59:13, 12.03s/it] {'loss': 1.1211, 'learning_rate': 2.0822789393271175e-07, 'epoch': 0.87} + 87%|████████▋ | 6151/7045 [19:59:27<2:59:13, 12.03s/it] 87%|████████▋ | 6152/7045 [19:59:40<3:03:12, 12.31s/it] {'loss': 1.0908, 'learning_rate': 2.0776887896388987e-07, 'epoch': 0.87} + 87%|████████▋ | 6152/7045 [19:59:40<3:03:12, 12.31s/it] 87%|████████▋ | 6153/7045 [19:59:51<2:58:05, 11.98s/it] {'loss': 1.1309, 'learning_rate': 2.0731034854138964e-07, 'epoch': 0.87} + 87%|████████▋ | 6153/7045 [19:59:51<2:58:05, 11.98s/it] 87%|████████▋ | 6154/7045 [20:00:03<2:54:21, 11.74s/it] {'loss': 1.1104, 'learning_rate': 2.0685230276213774e-07, 'epoch': 0.87} + 87%|████████▋ | 6154/7045 [20:00:03<2:54:21, 11.74s/it] 87%|████████▋ | 6155/7045 [20:00:14<2:52:05, 11.60s/it] {'loss': 1.1172, 'learning_rate': 2.063947417229592e-07, 'epoch': 0.87} + 87%|████████▋ | 6155/7045 [20:00:14<2:52:05, 11.60s/it] 87%|████████▋ | 6156/7045 [20:00:25<2:48:10, 11.35s/it] {'loss': 1.1201, 'learning_rate': 2.059376655205758e-07, 'epoch': 0.87} + 87%|████████▋ | 6156/7045 [20:00:25<2:48:10, 11.35s/it] 87%|████████▋ | 6157/7045 [20:00:36<2:46:50, 11.27s/it] {'loss': 1.0742, 'learning_rate': 2.0548107425160697e-07, 'epoch': 0.87} + 87%|████████▋ | 6157/7045 [20:00:36<2:46:50, 11.27s/it] 87%|████████▋ | 6158/7045 [20:00:49<2:54:35, 11.81s/it] {'loss': 1.1353, 'learning_rate': 2.0502496801257072e-07, 'epoch': 0.87} + 87%|████████▋ | 6158/7045 [20:00:49<2:54:35, 11.81s/it] 87%|████████▋ | 6159/7045 [20:01:02<2:59:09, 12.13s/it] {'loss': 1.1025, 'learning_rate': 2.0456934689988078e-07, 'epoch': 0.87} + 87%|████████▋ | 6159/7045 [20:01:02<2:59:09, 12.13s/it] 87%|████████▋ | 6160/7045 [20:01:14<2:58:56, 12.13s/it] {'loss': 1.1309, 'learning_rate': 2.0411421100984985e-07, 'epoch': 0.87} + 87%|████████▋ | 6160/7045 [20:01:14<2:58:56, 12.13s/it] 87%|████████▋ | 6161/7045 [20:01:25<2:54:29, 11.84s/it] {'loss': 1.1348, 'learning_rate': 2.0365956043868708e-07, 'epoch': 0.87} + 87%|████████▋ | 6161/7045 [20:01:25<2:54:29, 11.84s/it] 87%|████████▋ | 6162/7045 [20:01:36<2:49:56, 11.55s/it] {'loss': 1.0532, 'learning_rate': 2.0320539528249918e-07, 'epoch': 0.87} + 87%|████████▋ | 6162/7045 [20:01:36<2:49:56, 11.55s/it] 87%|████████▋ | 6163/7045 [20:01:47<2:49:06, 11.50s/it] {'loss': 1.1211, 'learning_rate': 2.0275171563729074e-07, 'epoch': 0.87} + 87%|████████▋ | 6163/7045 [20:01:47<2:49:06, 11.50s/it] 87%|████████▋ | 6164/7045 [20:01:59<2:52:25, 11.74s/it] {'loss': 1.0415, 'learning_rate': 2.022985215989637e-07, 'epoch': 0.87} + 87%|████████▋ | 6164/7045 [20:01:59<2:52:25, 11.74s/it] 88%|████████▊ | 6165/7045 [20:02:11<2:49:30, 11.56s/it] {'loss': 1.126, 'learning_rate': 2.0184581326331666e-07, 'epoch': 0.88} + 88%|████████▊ | 6165/7045 [20:02:11<2:49:30, 11.56s/it] 88%|████████▊ | 6166/7045 [20:02:22<2:47:56, 11.46s/it] {'loss': 1.0996, 'learning_rate': 2.0139359072604642e-07, 'epoch': 0.88} + 88%|████████▊ | 6166/7045 [20:02:22<2:47:56, 11.46s/it] 88%|████████▊ | 6167/7045 [20:02:34<2:50:46, 11.67s/it] {'loss': 1.083, 'learning_rate': 2.0094185408274652e-07, 'epoch': 0.88} + 88%|████████▊ | 6167/7045 [20:02:34<2:50:46, 11.67s/it] 88%|████████▊ | 6168/7045 [20:02:47<2:55:08, 11.98s/it] {'loss': 1.1064, 'learning_rate': 2.0049060342890697e-07, 'epoch': 0.88} + 88%|████████▊ | 6168/7045 [20:02:48<2:55:08, 11.98s/it] 88%|████████▊ | 6169/7045 [20:02:59<2:57:03, 12.13s/it] {'loss': 1.1265, 'learning_rate': 2.0003983885991757e-07, 'epoch': 0.88} + 88%|████████▊ | 6169/7045 [20:02:59<2:57:03, 12.13s/it] 88%|████████▊ | 6170/7045 [20:03:10<2:51:52, 11.79s/it] {'loss': 1.0908, 'learning_rate': 1.9958956047106236e-07, 'epoch': 0.88} + 88%|████████▊ | 6170/7045 [20:03:10<2:51:52, 11.79s/it] 88%|████████▊ | 6171/7045 [20:03:22<2:50:02, 11.67s/it] {'loss': 1.1025, 'learning_rate': 1.9913976835752525e-07, 'epoch': 0.88} + 88%|████████▊ | 6171/7045 [20:03:22<2:50:02, 11.67s/it] 88%|████████▊ | 6172/7045 [20:03:33<2:47:20, 11.50s/it] {'loss': 1.1133, 'learning_rate': 1.986904626143854e-07, 'epoch': 0.88} + 88%|████████▊ | 6172/7045 [20:03:33<2:47:20, 11.50s/it] 88%|████████▊ | 6173/7045 [20:03:44<2:46:44, 11.47s/it] {'loss': 1.0742, 'learning_rate': 1.9824164333661993e-07, 'epoch': 0.88} + 88%|████████▊ | 6173/7045 [20:03:44<2:46:44, 11.47s/it] 88%|████████▊ | 6174/7045 [20:03:55<2:45:07, 11.38s/it] {'loss': 1.1123, 'learning_rate': 1.9779331061910323e-07, 'epoch': 0.88} + 88%|████████▊ | 6174/7045 [20:03:55<2:45:07, 11.38s/it] 88%|████████▊ | 6175/7045 [20:04:07<2:45:05, 11.39s/it] {'loss': 1.1104, 'learning_rate': 1.973454645566067e-07, 'epoch': 0.88} + 88%|████████▊ | 6175/7045 [20:04:07<2:45:05, 11.39s/it] 88%|████████▊ | 6176/7045 [20:04:19<2:47:19, 11.55s/it] {'loss': 1.0791, 'learning_rate': 1.9689810524379848e-07, 'epoch': 0.88} + 88%|████████▊ | 6176/7045 [20:04:19<2:47:19, 11.55s/it] 88%|████████▊ | 6177/7045 [20:04:30<2:45:17, 11.43s/it] {'loss': 1.0757, 'learning_rate': 1.9645123277524546e-07, 'epoch': 0.88} + 88%|████████▊ | 6177/7045 [20:04:30<2:45:17, 11.43s/it] 88%|████████▊ | 6178/7045 [20:04:41<2:44:24, 11.38s/it] {'loss': 1.1211, 'learning_rate': 1.960048472454093e-07, 'epoch': 0.88} + 88%|████████▊ | 6178/7045 [20:04:41<2:44:24, 11.38s/it] 88%|████████▊ | 6179/7045 [20:04:52<2:42:50, 11.28s/it] {'loss': 1.0996, 'learning_rate': 1.955589487486495e-07, 'epoch': 0.88} + 88%|████████▊ | 6179/7045 [20:04:52<2:42:50, 11.28s/it] 88%|████████▊ | 6180/7045 [20:05:05<2:49:55, 11.79s/it] {'loss': 1.0566, 'learning_rate': 1.95113537379224e-07, 'epoch': 0.88} + 88%|████████▊ | 6180/7045 [20:05:05<2:49:55, 11.79s/it] 88%|████████▊ | 6181/7045 [20:05:16<2:45:59, 11.53s/it] {'loss': 1.1143, 'learning_rate': 1.9466861323128582e-07, 'epoch': 0.88} + 88%|████████▊ | 6181/7045 [20:05:16<2:45:59, 11.53s/it] 88%|████████▊ | 6182/7045 [20:05:29<2:53:54, 12.09s/it] {'loss': 1.1108, 'learning_rate': 1.942241763988864e-07, 'epoch': 0.88} + 88%|████████▊ | 6182/7045 [20:05:29<2:53:54, 12.09s/it] 88%|████████▊ | 6183/7045 [20:05:41<2:49:54, 11.83s/it] {'loss': 1.1113, 'learning_rate': 1.9378022697597336e-07, 'epoch': 0.88} + 88%|████████▊ | 6183/7045 [20:05:42<2:49:54, 11.83s/it] 88%|████████▊ | 6184/7045 [20:05:54<2:56:56, 12.33s/it] {'loss': 1.0957, 'learning_rate': 1.9333676505639109e-07, 'epoch': 0.88} + 88%|████████▊ | 6184/7045 [20:05:54<2:56:56, 12.33s/it] 88%|████████▊ | 6185/7045 [20:06:05<2:52:48, 12.06s/it] {'loss': 1.1162, 'learning_rate': 1.928937907338821e-07, 'epoch': 0.88} + 88%|████████▊ | 6185/7045 [20:06:05<2:52:48, 12.06s/it] 88%|████████▊ | 6186/7045 [20:06:18<2:53:57, 12.15s/it] {'loss': 1.0718, 'learning_rate': 1.9245130410208478e-07, 'epoch': 0.88} + 88%|████████▊ | 6186/7045 [20:06:18<2:53:57, 12.15s/it] 88%|████████▊ | 6187/7045 [20:06:29<2:48:40, 11.80s/it] {'loss': 1.0674, 'learning_rate': 1.920093052545341e-07, 'epoch': 0.88} + 88%|████████▊ | 6187/7045 [20:06:29<2:48:40, 11.80s/it] 88%|████████▊ | 6188/7045 [20:06:40<2:46:54, 11.69s/it] {'loss': 1.1279, 'learning_rate': 1.9156779428466414e-07, 'epoch': 0.88} + 88%|████████▊ | 6188/7045 [20:06:40<2:46:54, 11.69s/it] 88%|████████▊ | 6189/7045 [20:06:53<2:52:46, 12.11s/it] {'loss': 1.0752, 'learning_rate': 1.9112677128580337e-07, 'epoch': 0.88} + 88%|████████▊ | 6189/7045 [20:06:53<2:52:46, 12.11s/it] 88%|████████▊ | 6190/7045 [20:07:05<2:52:31, 12.11s/it] {'loss': 1.0918, 'learning_rate': 1.9068623635117772e-07, 'epoch': 0.88} + 88%|████████▊ | 6190/7045 [20:07:05<2:52:31, 12.11s/it] 88%|████████▊ | 6191/7045 [20:07:17<2:48:09, 11.81s/it] {'loss': 1.1377, 'learning_rate': 1.9024618957391077e-07, 'epoch': 0.88} + 88%|████████▊ | 6191/7045 [20:07:17<2:48:09, 11.81s/it] 88%|████████▊ | 6192/7045 [20:07:28<2:45:30, 11.64s/it] {'loss': 1.1738, 'learning_rate': 1.8980663104702201e-07, 'epoch': 0.88} + 88%|████████▊ | 6192/7045 [20:07:28<2:45:30, 11.64s/it] 88%|████████▊ | 6193/7045 [20:07:39<2:43:30, 11.52s/it] {'loss': 1.0938, 'learning_rate': 1.893675608634285e-07, 'epoch': 0.88} + 88%|████████▊ | 6193/7045 [20:07:39<2:43:30, 11.52s/it] 88%|████████▊ | 6194/7045 [20:07:52<2:48:27, 11.88s/it] {'loss': 1.0513, 'learning_rate': 1.8892897911594376e-07, 'epoch': 0.88} + 88%|████████▊ | 6194/7045 [20:07:52<2:48:27, 11.88s/it] 88%|████████▊ | 6195/7045 [20:08:04<2:48:19, 11.88s/it] {'loss': 1.1216, 'learning_rate': 1.8849088589727698e-07, 'epoch': 0.88} + 88%|████████▊ | 6195/7045 [20:08:04<2:48:19, 11.88s/it] 88%|████████▊ | 6196/7045 [20:08:15<2:45:10, 11.67s/it] {'loss': 1.0908, 'learning_rate': 1.880532813000363e-07, 'epoch': 0.88} + 88%|████████▊ | 6196/7045 [20:08:15<2:45:10, 11.67s/it] 88%|████████▊ | 6197/7045 [20:08:26<2:42:20, 11.49s/it] {'loss': 1.1318, 'learning_rate': 1.8761616541672496e-07, 'epoch': 0.88} + 88%|████████▊ | 6197/7045 [20:08:26<2:42:20, 11.49s/it] 88%|████████▊ | 6198/7045 [20:08:38<2:43:20, 11.57s/it] {'loss': 1.1006, 'learning_rate': 1.8717953833974262e-07, 'epoch': 0.88} + 88%|████████▊ | 6198/7045 [20:08:38<2:43:20, 11.57s/it] 88%|████████▊ | 6199/7045 [20:08:50<2:45:44, 11.75s/it] {'loss': 1.0557, 'learning_rate': 1.8674340016138687e-07, 'epoch': 0.88} + 88%|████████▊ | 6199/7045 [20:08:50<2:45:44, 11.75s/it] 88%|████████▊ | 6200/7045 [20:09:01<2:43:05, 11.58s/it] {'loss': 1.1396, 'learning_rate': 1.8630775097385172e-07, 'epoch': 0.88} + 88%|████████▊ | 6200/7045 [20:09:01<2:43:05, 11.58s/it] 88%|████████▊ | 6201/7045 [20:09:12<2:39:13, 11.32s/it] {'loss': 1.0928, 'learning_rate': 1.858725908692266e-07, 'epoch': 0.88} + 88%|████████▊ | 6201/7045 [20:09:12<2:39:13, 11.32s/it] 88%|████████▊ | 6202/7045 [20:09:23<2:37:41, 11.22s/it] {'loss': 1.1338, 'learning_rate': 1.8543791993949927e-07, 'epoch': 0.88} + 88%|████████▊ | 6202/7045 [20:09:23<2:37:41, 11.22s/it] 88%|████████▊ | 6203/7045 [20:09:35<2:40:17, 11.42s/it] {'loss': 1.0811, 'learning_rate': 1.8500373827655267e-07, 'epoch': 0.88} + 88%|████████▊ | 6203/7045 [20:09:35<2:40:17, 11.42s/it] 88%|████████▊ | 6204/7045 [20:09:47<2:43:39, 11.68s/it] {'loss': 1.1089, 'learning_rate': 1.8457004597216695e-07, 'epoch': 0.88} + 88%|████████▊ | 6204/7045 [20:09:47<2:43:39, 11.68s/it] 88%|████████▊ | 6205/7045 [20:09:58<2:41:31, 11.54s/it] {'loss': 1.0947, 'learning_rate': 1.8413684311801905e-07, 'epoch': 0.88} + 88%|████████▊ | 6205/7045 [20:09:58<2:41:31, 11.54s/it] 88%|████████▊ | 6206/7045 [20:10:10<2:41:10, 11.53s/it] {'loss': 1.1172, 'learning_rate': 1.837041298056813e-07, 'epoch': 0.88} + 88%|████████▊ | 6206/7045 [20:10:10<2:41:10, 11.53s/it] 88%|████████▊ | 6207/7045 [20:10:21<2:42:08, 11.61s/it] {'loss': 1.0996, 'learning_rate': 1.8327190612662444e-07, 'epoch': 0.88} + 88%|████████▊ | 6207/7045 [20:10:21<2:42:08, 11.61s/it] 88%|████████▊ | 6208/7045 [20:10:33<2:40:58, 11.54s/it] {'loss': 1.1123, 'learning_rate': 1.8284017217221395e-07, 'epoch': 0.88} + 88%|████████▊ | 6208/7045 [20:10:33<2:40:58, 11.54s/it] 88%|████████▊ | 6209/7045 [20:10:44<2:38:33, 11.38s/it] {'loss': 1.0825, 'learning_rate': 1.8240892803371268e-07, 'epoch': 0.88} + 88%|████████▊ | 6209/7045 [20:10:44<2:38:33, 11.38s/it] 88%|████████▊ | 6210/7045 [20:10:57<2:46:50, 11.99s/it] {'loss': 1.1196, 'learning_rate': 1.8197817380227968e-07, 'epoch': 0.88} + 88%|████████▊ | 6210/7045 [20:10:57<2:46:50, 11.99s/it] 88%|████████▊ | 6211/7045 [20:11:08<2:43:35, 11.77s/it] {'loss': 1.1396, 'learning_rate': 1.815479095689704e-07, 'epoch': 0.88} + 88%|████████▊ | 6211/7045 [20:11:08<2:43:35, 11.77s/it] 88%|████████▊ | 6212/7045 [20:11:21<2:45:48, 11.94s/it] {'loss': 1.0923, 'learning_rate': 1.811181354247371e-07, 'epoch': 0.88} + 88%|████████▊ | 6212/7045 [20:11:21<2:45:48, 11.94s/it] 88%|████████▊ | 6213/7045 [20:11:32<2:42:18, 11.71s/it] {'loss': 1.124, 'learning_rate': 1.8068885146042824e-07, 'epoch': 0.88} + 88%|████████▊ | 6213/7045 [20:11:32<2:42:18, 11.71s/it] 88%|████████▊ | 6214/7045 [20:11:45<2:47:27, 12.09s/it] {'loss': 1.0757, 'learning_rate': 1.8026005776678817e-07, 'epoch': 0.88} + 88%|████████▊ | 6214/7045 [20:11:45<2:47:27, 12.09s/it] 88%|████████▊ | 6215/7045 [20:11:56<2:42:03, 11.71s/it] {'loss': 1.063, 'learning_rate': 1.7983175443445855e-07, 'epoch': 0.88} + 88%|████████▊ | 6215/7045 [20:11:56<2:42:03, 11.71s/it] 88%|████████▊ | 6216/7045 [20:12:07<2:41:27, 11.69s/it] {'loss': 1.1025, 'learning_rate': 1.794039415539764e-07, 'epoch': 0.88} + 88%|████████▊ | 6216/7045 [20:12:07<2:41:27, 11.69s/it] 88%|████████▊ | 6217/7045 [20:12:19<2:42:49, 11.80s/it] {'loss': 1.1602, 'learning_rate': 1.7897661921577547e-07, 'epoch': 0.88} + 88%|████████▊ | 6217/7045 [20:12:19<2:42:49, 11.80s/it] 88%|████████▊ | 6218/7045 [20:12:30<2:38:59, 11.53s/it] {'loss': 1.0791, 'learning_rate': 1.785497875101863e-07, 'epoch': 0.88} + 88%|████████▊ | 6218/7045 [20:12:30<2:38:59, 11.53s/it] 88%|████████▊ | 6219/7045 [20:12:42<2:37:48, 11.46s/it] {'loss': 1.1182, 'learning_rate': 1.7812344652743507e-07, 'epoch': 0.88} + 88%|████████▊ | 6219/7045 [20:12:42<2:37:48, 11.46s/it] 88%|████████▊ | 6220/7045 [20:12:53<2:36:23, 11.37s/it] {'loss': 1.1416, 'learning_rate': 1.7769759635764383e-07, 'epoch': 0.88} + 88%|████████▊ | 6220/7045 [20:12:53<2:36:23, 11.37s/it] 88%|████████▊ | 6221/7045 [20:13:04<2:34:52, 11.28s/it] {'loss': 1.1641, 'learning_rate': 1.7727223709083252e-07, 'epoch': 0.88} + 88%|████████▊ | 6221/7045 [20:13:04<2:34:52, 11.28s/it] 88%|████████▊ | 6222/7045 [20:13:16<2:36:49, 11.43s/it] {'loss': 1.0913, 'learning_rate': 1.7684736881691532e-07, 'epoch': 0.88} + 88%|████████▊ | 6222/7045 [20:13:16<2:36:49, 11.43s/it] 88%|████████▊ | 6223/7045 [20:13:27<2:34:30, 11.28s/it] {'loss': 1.1113, 'learning_rate': 1.764229916257043e-07, 'epoch': 0.88} + 88%|████████▊ | 6223/7045 [20:13:27<2:34:30, 11.28s/it] 88%|████████▊ | 6224/7045 [20:13:39<2:38:51, 11.61s/it] {'loss': 1.125, 'learning_rate': 1.7599910560690686e-07, 'epoch': 0.88} + 88%|████████▊ | 6224/7045 [20:13:39<2:38:51, 11.61s/it] 88%|████████▊ | 6225/7045 [20:13:50<2:36:15, 11.43s/it] {'loss': 1.1025, 'learning_rate': 1.755757108501263e-07, 'epoch': 0.88} + 88%|████████▊ | 6225/7045 [20:13:50<2:36:15, 11.43s/it] 88%|████████▊ | 6226/7045 [20:14:03<2:41:56, 11.86s/it] {'loss': 1.0747, 'learning_rate': 1.751528074448633e-07, 'epoch': 0.88} + 88%|████████▊ | 6226/7045 [20:14:03<2:41:56, 11.86s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2485 > 2048). Running this sequence through the model will result in indexing errors + 88%|████████▊ | 6227/7045 [20:14:14<2:40:00, 11.74s/it] {'loss': 1.1318, 'learning_rate': 1.7473039548051296e-07, 'epoch': 0.88} + 88%|████████▊ | 6227/7045 [20:14:14<2:40:00, 11.74s/it] 88%|████████▊ | 6228/7045 [20:14:27<2:45:28, 12.15s/it] {'loss': 1.0801, 'learning_rate': 1.7430847504636778e-07, 'epoch': 0.88} + 88%|████████▊ | 6228/7045 [20:14:27<2:45:28, 12.15s/it] 88%|████████▊ | 6229/7045 [20:14:38<2:39:41, 11.74s/it] {'loss': 1.084, 'learning_rate': 1.738870462316161e-07, 'epoch': 0.88} + 88%|████████▊ | 6229/7045 [20:14:38<2:39:41, 11.74s/it] 88%|████████▊ | 6230/7045 [20:14:49<2:37:11, 11.57s/it] {'loss': 1.0874, 'learning_rate': 1.73466109125342e-07, 'epoch': 0.88} + 88%|████████▊ | 6230/7045 [20:14:49<2:37:11, 11.57s/it] 88%|████████▊ | 6231/7045 [20:15:01<2:37:11, 11.59s/it] {'loss': 1.1304, 'learning_rate': 1.7304566381652565e-07, 'epoch': 0.88} + 88%|████████▊ | 6231/7045 [20:15:01<2:37:11, 11.59s/it] 88%|████████▊ | 6232/7045 [20:15:12<2:34:56, 11.43s/it] {'loss': 1.1445, 'learning_rate': 1.7262571039404402e-07, 'epoch': 0.88} + 88%|████████▊ | 6232/7045 [20:15:12<2:34:56, 11.43s/it] 88%|████████▊ | 6233/7045 [20:15:23<2:33:13, 11.32s/it] {'loss': 1.1011, 'learning_rate': 1.7220624894666888e-07, 'epoch': 0.88} + 88%|████████▊ | 6233/7045 [20:15:23<2:33:13, 11.32s/it] 88%|████████▊ | 6234/7045 [20:15:34<2:31:55, 11.24s/it] {'loss': 1.1826, 'learning_rate': 1.717872795630693e-07, 'epoch': 0.88} + 88%|████████▊ | 6234/7045 [20:15:34<2:31:55, 11.24s/it] 89%|████████▊ | 6235/7045 [20:15:45<2:31:00, 11.19s/it] {'loss': 1.062, 'learning_rate': 1.713688023318094e-07, 'epoch': 0.89} + 89%|████████▊ | 6235/7045 [20:15:45<2:31:00, 11.19s/it] 89%|████████▊ | 6236/7045 [20:15:57<2:31:14, 11.22s/it] {'loss': 1.0894, 'learning_rate': 1.7095081734134849e-07, 'epoch': 0.89} + 89%|████████▊ | 6236/7045 [20:15:57<2:31:14, 11.22s/it] 89%|████████▊ | 6237/7045 [20:16:08<2:30:38, 11.19s/it] {'loss': 1.1084, 'learning_rate': 1.7053332468004474e-07, 'epoch': 0.89} + 89%|████████▊ | 6237/7045 [20:16:08<2:30:38, 11.19s/it] 89%|████████▊ | 6238/7045 [20:16:19<2:30:01, 11.15s/it] {'loss': 1.0996, 'learning_rate': 1.7011632443614952e-07, 'epoch': 0.89} + 89%|████████▊ | 6238/7045 [20:16:19<2:30:01, 11.15s/it] 89%|████████▊ | 6239/7045 [20:16:30<2:30:48, 11.23s/it] {'loss': 1.0688, 'learning_rate': 1.696998166978106e-07, 'epoch': 0.89} + 89%|████████▊ | 6239/7045 [20:16:30<2:30:48, 11.23s/it] 89%|████████▊ | 6240/7045 [20:16:42<2:35:12, 11.57s/it] {'loss': 1.0767, 'learning_rate': 1.6928380155307288e-07, 'epoch': 0.89} + 89%|████████▊ | 6240/7045 [20:16:42<2:35:12, 11.57s/it] 89%|████████▊ | 6241/7045 [20:16:55<2:37:23, 11.75s/it] {'loss': 1.1572, 'learning_rate': 1.6886827908987573e-07, 'epoch': 0.89} + 89%|████████▊ | 6241/7045 [20:16:55<2:37:23, 11.75s/it] 89%|████████▊ | 6242/7045 [20:17:06<2:34:44, 11.56s/it] {'loss': 1.0742, 'learning_rate': 1.6845324939605472e-07, 'epoch': 0.89} + 89%|████████▊ | 6242/7045 [20:17:06<2:34:44, 11.56s/it] 89%|████████▊ | 6243/7045 [20:17:17<2:32:18, 11.39s/it] {'loss': 1.1191, 'learning_rate': 1.680387125593419e-07, 'epoch': 0.89} + 89%|████████▊ | 6243/7045 [20:17:17<2:32:18, 11.39s/it] 89%|████████▊ | 6244/7045 [20:17:34<2:55:26, 13.14s/it] {'loss': 1.0898, 'learning_rate': 1.676246686673641e-07, 'epoch': 0.89} + 89%|████████▊ | 6244/7045 [20:17:34<2:55:26, 13.14s/it] 89%|████████▊ | 6245/7045 [20:17:48<2:57:49, 13.34s/it] {'loss': 1.0786, 'learning_rate': 1.672111178076455e-07, 'epoch': 0.89} + 89%|████████▊ | 6245/7045 [20:17:48<2:57:49, 13.34s/it] 89%|████████▊ | 6246/7045 [20:17:59<2:48:14, 12.63s/it] {'loss': 1.0972, 'learning_rate': 1.6679806006760447e-07, 'epoch': 0.89} + 89%|████████▊ | 6246/7045 [20:17:59<2:48:14, 12.63s/it] 89%|████████▊ | 6247/7045 [20:18:10<2:42:39, 12.23s/it] {'loss': 1.0728, 'learning_rate': 1.6638549553455562e-07, 'epoch': 0.89} + 89%|████████▊ | 6247/7045 [20:18:10<2:42:39, 12.23s/it] 89%|████████▊ | 6248/7045 [20:18:22<2:39:34, 12.01s/it] {'loss': 1.1074, 'learning_rate': 1.659734242957095e-07, 'epoch': 0.89} + 89%|████████▊ | 6248/7045 [20:18:22<2:39:34, 12.01s/it] 89%|████████▊ | 6249/7045 [20:18:33<2:36:22, 11.79s/it] {'loss': 1.125, 'learning_rate': 1.655618464381728e-07, 'epoch': 0.89} + 89%|████████▊ | 6249/7045 [20:18:33<2:36:22, 11.79s/it] 89%|████████▊ | 6250/7045 [20:18:46<2:40:09, 12.09s/it] {'loss': 1.0376, 'learning_rate': 1.6515076204894702e-07, 'epoch': 0.89} + 89%|████████▊ | 6250/7045 [20:18:46<2:40:09, 12.09s/it] 89%|████████▊ | 6251/7045 [20:18:57<2:35:14, 11.73s/it] {'loss': 1.1118, 'learning_rate': 1.6474017121493018e-07, 'epoch': 0.89} + 89%|████████▊ | 6251/7045 [20:18:57<2:35:14, 11.73s/it] 89%|████████▊ | 6252/7045 [20:19:08<2:32:11, 11.52s/it] {'loss': 1.083, 'learning_rate': 1.6433007402291533e-07, 'epoch': 0.89} + 89%|████████▊ | 6252/7045 [20:19:08<2:32:11, 11.52s/it] 89%|████████▉ | 6253/7045 [20:19:20<2:35:41, 11.80s/it] {'loss': 1.1143, 'learning_rate': 1.6392047055959093e-07, 'epoch': 0.89} + 89%|████████▉ | 6253/7045 [20:19:20<2:35:41, 11.80s/it] 89%|████████▉ | 6254/7045 [20:19:34<2:42:19, 12.31s/it] {'loss': 1.0747, 'learning_rate': 1.6351136091154242e-07, 'epoch': 0.89} + 89%|████████▉ | 6254/7045 [20:19:34<2:42:19, 12.31s/it] 89%|████████▉ | 6255/7045 [20:19:46<2:41:59, 12.30s/it] {'loss': 1.0942, 'learning_rate': 1.6310274516524922e-07, 'epoch': 0.89} + 89%|████████▉ | 6255/7045 [20:19:46<2:41:59, 12.30s/it] 89%|████████▉ | 6256/7045 [20:19:58<2:41:49, 12.31s/it] {'loss': 1.0635, 'learning_rate': 1.6269462340708808e-07, 'epoch': 0.89} + 89%|████████▉ | 6256/7045 [20:19:58<2:41:49, 12.31s/it] 89%|████████▉ | 6257/7045 [20:20:09<2:37:51, 12.02s/it] {'loss': 1.1377, 'learning_rate': 1.6228699572332946e-07, 'epoch': 0.89} + 89%|████████▉ | 6257/7045 [20:20:09<2:37:51, 12.02s/it] 89%|████████▉ | 6258/7045 [20:20:21<2:34:46, 11.80s/it] {'loss': 1.1318, 'learning_rate': 1.6187986220014025e-07, 'epoch': 0.89} + 89%|████████▉ | 6258/7045 [20:20:21<2:34:46, 11.80s/it] 89%|████████▉ | 6259/7045 [20:20:32<2:31:14, 11.54s/it] {'loss': 1.0474, 'learning_rate': 1.6147322292358354e-07, 'epoch': 0.89} + 89%|████████▉ | 6259/7045 [20:20:32<2:31:14, 11.54s/it] 89%|████████▉ | 6260/7045 [20:20:45<2:36:22, 11.95s/it] {'loss': 1.1055, 'learning_rate': 1.6106707797961696e-07, 'epoch': 0.89} + 89%|████████▉ | 6260/7045 [20:20:45<2:36:22, 11.95s/it] 89%|████████▉ | 6261/7045 [20:20:57<2:36:21, 11.97s/it] {'loss': 1.1055, 'learning_rate': 1.606614274540938e-07, 'epoch': 0.89} + 89%|████████▉ | 6261/7045 [20:20:57<2:36:21, 11.97s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 89%|████████▉ | 6262/7045 [20:21:08<2:33:40, 11.78s/it] {'loss': 1.0977, 'learning_rate': 1.602562714327635e-07, 'epoch': 0.89} + 89%|████████▉ | 6262/7045 [20:21:08<2:33:40, 11.78s/it] 89%|████████▉ | 6263/7045 [20:21:19<2:30:12, 11.52s/it] {'loss': 1.0698, 'learning_rate': 1.5985161000127064e-07, 'epoch': 0.89} + 89%|████████▉ | 6263/7045 [20:21:19<2:30:12, 11.52s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 89%|████████▉ | 6264/7045 [20:21:31<2:33:52, 11.82s/it] {'loss': 1.0747, 'learning_rate': 1.5944744324515426e-07, 'epoch': 0.89} + 89%|████████▉ | 6264/7045 [20:21:31<2:33:52, 11.82s/it] 89%|████████▉ | 6265/7045 [20:21:42<2:30:27, 11.57s/it] {'loss': 1.1426, 'learning_rate': 1.590437712498505e-07, 'epoch': 0.89} + 89%|████████▉ | 6265/7045 [20:21:42<2:30:27, 11.57s/it] 89%|████████▉ | 6266/7045 [20:21:54<2:29:39, 11.53s/it] {'loss': 1.1006, 'learning_rate': 1.5864059410068938e-07, 'epoch': 0.89} + 89%|████████▉ | 6266/7045 [20:21:54<2:29:39, 11.53s/it] 89%|████████▉ | 6267/7045 [20:22:05<2:28:01, 11.42s/it] {'loss': 1.1152, 'learning_rate': 1.5823791188289776e-07, 'epoch': 0.89} + 89%|████████▉ | 6267/7045 [20:22:05<2:28:01, 11.42s/it] 89%|████████▉ | 6268/7045 [20:22:19<2:36:44, 12.10s/it] {'loss': 1.103, 'learning_rate': 1.5783572468159703e-07, 'epoch': 0.89} + 89%|████████▉ | 6268/7045 [20:22:19<2:36:44, 12.10s/it] 89%|████████▉ | 6269/7045 [20:22:30<2:31:49, 11.74s/it] {'loss': 1.1504, 'learning_rate': 1.5743403258180357e-07, 'epoch': 0.89} + 89%|████████▉ | 6269/7045 [20:22:30<2:31:49, 11.74s/it] 89%|████████▉ | 6270/7045 [20:22:41<2:29:45, 11.59s/it] {'loss': 1.1279, 'learning_rate': 1.5703283566843003e-07, 'epoch': 0.89} + 89%|████████▉ | 6270/7045 [20:22:41<2:29:45, 11.59s/it] 89%|████████▉ | 6271/7045 [20:22:53<2:30:19, 11.65s/it] {'loss': 1.1162, 'learning_rate': 1.5663213402628413e-07, 'epoch': 0.89} + 89%|████████▉ | 6271/7045 [20:22:53<2:30:19, 11.65s/it] 89%|████████▉ | 6272/7045 [20:23:04<2:29:55, 11.64s/it] {'loss': 1.1045, 'learning_rate': 1.562319277400673e-07, 'epoch': 0.89} + 89%|████████▉ | 6272/7045 [20:23:04<2:29:55, 11.64s/it] 89%|████████▉ | 6273/7045 [20:23:16<2:28:30, 11.54s/it] {'loss': 1.1621, 'learning_rate': 1.558322168943799e-07, 'epoch': 0.89} + 89%|████████▉ | 6273/7045 [20:23:16<2:28:30, 11.54s/it] 89%|████████▉ | 6274/7045 [20:23:27<2:28:13, 11.54s/it] {'loss': 1.083, 'learning_rate': 1.5543300157371404e-07, 'epoch': 0.89} + 89%|████████▉ | 6274/7045 [20:23:27<2:28:13, 11.54s/it] 89%|████████▉ | 6275/7045 [20:23:38<2:26:32, 11.42s/it] {'loss': 1.1025, 'learning_rate': 1.550342818624581e-07, 'epoch': 0.89} + 89%|████████▉ | 6275/7045 [20:23:38<2:26:32, 11.42s/it] 89%|████████▉ | 6276/7045 [20:23:49<2:24:27, 11.27s/it] {'loss': 1.0947, 'learning_rate': 1.546360578448969e-07, 'epoch': 0.89} + 89%|████████▉ | 6276/7045 [20:23:49<2:24:27, 11.27s/it] 89%|████████▉ | 6277/7045 [20:24:00<2:23:08, 11.18s/it] {'loss': 1.0986, 'learning_rate': 1.5423832960520884e-07, 'epoch': 0.89} + 89%|████████▉ | 6277/7045 [20:24:00<2:23:08, 11.18s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 89%|████████▉ | 6278/7045 [20:24:11<2:22:58, 11.18s/it] {'loss': 1.105, 'learning_rate': 1.53841097227469e-07, 'epoch': 0.89} + 89%|████████▉ | 6278/7045 [20:24:11<2:22:58, 11.18s/it] 89%|████████▉ | 6279/7045 [20:24:23<2:24:01, 11.28s/it] {'loss': 1.0967, 'learning_rate': 1.5344436079564596e-07, 'epoch': 0.89} + 89%|████████▉ | 6279/7045 [20:24:23<2:24:01, 11.28s/it] 89%|████████▉ | 6280/7045 [20:24:34<2:23:19, 11.24s/it] {'loss': 1.1396, 'learning_rate': 1.530481203936046e-07, 'epoch': 0.89} + 89%|████████▉ | 6280/7045 [20:24:34<2:23:19, 11.24s/it] 89%|███��████▉ | 6281/7045 [20:24:45<2:23:07, 11.24s/it] {'loss': 1.1475, 'learning_rate': 1.5265237610510548e-07, 'epoch': 0.89} + 89%|████████▉ | 6281/7045 [20:24:45<2:23:07, 11.24s/it] 89%|████████▉ | 6282/7045 [20:24:58<2:29:16, 11.74s/it] {'loss': 1.0908, 'learning_rate': 1.5225712801380272e-07, 'epoch': 0.89} + 89%|████████▉ | 6282/7045 [20:24:58<2:29:16, 11.74s/it] 89%|████████▉ | 6283/7045 [20:25:09<2:27:07, 11.58s/it] {'loss': 1.1201, 'learning_rate': 1.5186237620324646e-07, 'epoch': 0.89} + 89%|████████▉ | 6283/7045 [20:25:09<2:27:07, 11.58s/it] 89%|████████▉ | 6284/7045 [20:25:21<2:26:25, 11.55s/it] {'loss': 1.0967, 'learning_rate': 1.5146812075688162e-07, 'epoch': 0.89} + 89%|████████▉ | 6284/7045 [20:25:21<2:26:25, 11.55s/it] 89%|████████▉ | 6285/7045 [20:25:32<2:24:37, 11.42s/it] {'loss': 1.1191, 'learning_rate': 1.5107436175804957e-07, 'epoch': 0.89} + 89%|████████▉ | 6285/7045 [20:25:32<2:24:37, 11.42s/it] 89%|████████▉ | 6286/7045 [20:25:43<2:23:54, 11.38s/it] {'loss': 1.1064, 'learning_rate': 1.5068109928998405e-07, 'epoch': 0.89} + 89%|████████▉ | 6286/7045 [20:25:43<2:23:54, 11.38s/it] 89%|████████▉ | 6287/7045 [20:25:56<2:27:36, 11.68s/it] {'loss': 1.1006, 'learning_rate': 1.5028833343581684e-07, 'epoch': 0.89} + 89%|████████▉ | 6287/7045 [20:25:56<2:27:36, 11.68s/it] 89%|████████▉ | 6288/7045 [20:26:08<2:32:08, 12.06s/it] {'loss': 1.0977, 'learning_rate': 1.4989606427857185e-07, 'epoch': 0.89} + 89%|████████▉ | 6288/7045 [20:26:08<2:32:08, 12.06s/it] 89%|████████▉ | 6289/7045 [20:26:21<2:33:23, 12.17s/it] {'loss': 1.1235, 'learning_rate': 1.4950429190117049e-07, 'epoch': 0.89} + 89%|████████▉ | 6289/7045 [20:26:21<2:33:23, 12.17s/it] 89%|████████▉ | 6290/7045 [20:26:32<2:28:53, 11.83s/it] {'loss': 1.126, 'learning_rate': 1.4911301638642794e-07, 'epoch': 0.89} + 89%|████████▉ | 6290/7045 [20:26:32<2:28:53, 11.83s/it] 89%|████████▉ | 6291/7045 [20:26:44<2:30:36, 11.99s/it] {'loss': 1.1104, 'learning_rate': 1.4872223781705358e-07, 'epoch': 0.89} + 89%|████████▉ | 6291/7045 [20:26:44<2:30:36, 11.99s/it] 89%|████████▉ | 6292/7045 [20:26:56<2:29:32, 11.92s/it] {'loss': 1.124, 'learning_rate': 1.4833195627565383e-07, 'epoch': 0.89} + 89%|████████▉ | 6292/7045 [20:26:56<2:29:32, 11.92s/it] 89%|████████▉ | 6293/7045 [20:27:07<2:27:04, 11.73s/it] {'loss': 1.125, 'learning_rate': 1.4794217184472854e-07, 'epoch': 0.89} + 89%|████████▉ | 6293/7045 [20:27:07<2:27:04, 11.73s/it] 89%|████████▉ | 6294/7045 [20:27:18<2:23:22, 11.46s/it] {'loss': 1.0835, 'learning_rate': 1.4755288460667205e-07, 'epoch': 0.89} + 89%|████████▉ | 6294/7045 [20:27:18<2:23:22, 11.46s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 89%|████████▉ | 6295/7045 [20:27:29<2:21:50, 11.35s/it] {'loss': 1.1299, 'learning_rate': 1.471640946437755e-07, 'epoch': 0.89} + 89%|████████▉ | 6295/7045 [20:27:29<2:21:50, 11.35s/it] 89%|████████▉ | 6296/7045 [20:27:41<2:24:00, 11.54s/it] {'loss': 1.1084, 'learning_rate': 1.467758020382226e-07, 'epoch': 0.89} + 89%|████████▉ | 6296/7045 [20:27:41<2:24:00, 11.54s/it] 89%|████████▉ | 6297/7045 [20:27:53<2:23:31, 11.51s/it] {'loss': 1.1514, 'learning_rate': 1.4638800687209408e-07, 'epoch': 0.89} + 89%|████████▉ | 6297/7045 [20:27:53<2:23:31, 11.51s/it] 89%|████████▉ | 6298/7045 [20:28:04<2:23:34, 11.53s/it] {'loss': 1.1699, 'learning_rate': 1.4600070922736436e-07, 'epoch': 0.89} + 89%|██���█████▉ | 6298/7045 [20:28:04<2:23:34, 11.53s/it] 89%|████████▉ | 6299/7045 [20:28:18<2:30:14, 12.08s/it] {'loss': 1.0845, 'learning_rate': 1.4561390918590212e-07, 'epoch': 0.89} + 89%|████████▉ | 6299/7045 [20:28:18<2:30:14, 12.08s/it] 89%|████████▉ | 6300/7045 [20:28:30<2:31:28, 12.20s/it] {'loss': 1.0815, 'learning_rate': 1.452276068294728e-07, 'epoch': 0.89} + 89%|████████▉ | 6300/7045 [20:28:30<2:31:28, 12.20s/it] 89%|████████▉ | 6301/7045 [20:28:41<2:28:04, 11.94s/it] {'loss': 1.1128, 'learning_rate': 1.4484180223973464e-07, 'epoch': 0.89} + 89%|████████▉ | 6301/7045 [20:28:41<2:28:04, 11.94s/it] 89%|████████▉ | 6302/7045 [20:28:53<2:27:32, 11.91s/it] {'loss': 1.125, 'learning_rate': 1.4445649549824138e-07, 'epoch': 0.89} + 89%|████████▉ | 6302/7045 [20:28:53<2:27:32, 11.91s/it] 89%|████████▉ | 6303/7045 [20:29:05<2:25:17, 11.75s/it] {'loss': 1.085, 'learning_rate': 1.4407168668644222e-07, 'epoch': 0.89} + 89%|████████▉ | 6303/7045 [20:29:05<2:25:17, 11.75s/it] 89%|████████▉ | 6304/7045 [20:29:16<2:22:17, 11.52s/it] {'loss': 1.1475, 'learning_rate': 1.4368737588567993e-07, 'epoch': 0.89} + 89%|████████▉ | 6304/7045 [20:29:16<2:22:17, 11.52s/it] 89%|████████▉ | 6305/7045 [20:29:27<2:20:44, 11.41s/it] {'loss': 1.0659, 'learning_rate': 1.4330356317719257e-07, 'epoch': 0.89} + 89%|████████▉ | 6305/7045 [20:29:27<2:20:44, 11.41s/it] 90%|████████▉ | 6306/7045 [20:29:38<2:20:48, 11.43s/it] {'loss': 1.1372, 'learning_rate': 1.429202486421133e-07, 'epoch': 0.9} + 90%|████████▉ | 6306/7045 [20:29:38<2:20:48, 11.43s/it] 90%|████████▉ | 6307/7045 [20:29:49<2:19:24, 11.33s/it] {'loss': 1.1465, 'learning_rate': 1.4253743236146893e-07, 'epoch': 0.9} + 90%|████████▉ | 6307/7045 [20:29:49<2:19:24, 11.33s/it] 90%|████████▉ | 6308/7045 [20:30:02<2:22:27, 11.60s/it] {'loss': 1.0601, 'learning_rate': 1.421551144161823e-07, 'epoch': 0.9} + 90%|████████▉ | 6308/7045 [20:30:02<2:22:27, 11.60s/it] 90%|████████▉ | 6309/7045 [20:30:13<2:23:02, 11.66s/it] {'loss': 1.1572, 'learning_rate': 1.417732948870698e-07, 'epoch': 0.9} + 90%|████████▉ | 6309/7045 [20:30:13<2:23:02, 11.66s/it] 90%|████████▉ | 6310/7045 [20:30:25<2:22:14, 11.61s/it] {'loss': 1.1045, 'learning_rate': 1.4139197385484276e-07, 'epoch': 0.9} + 90%|████████▉ | 6310/7045 [20:30:25<2:22:14, 11.61s/it] 90%|████████▉ | 6311/7045 [20:30:37<2:22:58, 11.69s/it] {'loss': 1.1182, 'learning_rate': 1.410111514001078e-07, 'epoch': 0.9} + 90%|████████▉ | 6311/7045 [20:30:37<2:22:58, 11.69s/it] 90%|████████▉ | 6312/7045 [20:30:48<2:21:36, 11.59s/it] {'loss': 1.0664, 'learning_rate': 1.4063082760336522e-07, 'epoch': 0.9} + 90%|████████▉ | 6312/7045 [20:30:48<2:21:36, 11.59s/it] 90%|████████▉ | 6313/7045 [20:31:01<2:26:00, 11.97s/it] {'loss': 1.0596, 'learning_rate': 1.402510025450099e-07, 'epoch': 0.9} + 90%|████████▉ | 6313/7045 [20:31:01<2:26:00, 11.97s/it] 90%|████████▉ | 6314/7045 [20:31:12<2:23:38, 11.79s/it] {'loss': 1.0723, 'learning_rate': 1.3987167630533233e-07, 'epoch': 0.9} + 90%|████████▉ | 6314/7045 [20:31:12<2:23:38, 11.79s/it] 90%|████████▉ | 6315/7045 [20:31:26<2:28:45, 12.23s/it] {'loss': 1.0806, 'learning_rate': 1.394928489645164e-07, 'epoch': 0.9} + 90%|████████▉ | 6315/7045 [20:31:26<2:28:45, 12.23s/it] 90%|████████▉ | 6316/7045 [20:31:37<2:26:06, 12.03s/it] {'loss': 1.1113, 'learning_rate': 1.391145206026412e-07, 'epoch': 0.9} + 90%|████████▉ | 6316/7045 [20:31:37<2:26:06, 12.03s/it] 90%|████████▉ | 6317/7045 [20:31:48<2:22:57, 11.78s/it] {'loss': 1.0747, 'learning_rate': 1.3873669129968015e-07, 'epoch': 0.9} + 90%|████████▉ | 6317/7045 [20:31:48<2:22:57, 11.78s/it] 90%|████████▉ | 6318/7045 [20:31:59<2:19:36, 11.52s/it] {'loss': 1.1016, 'learning_rate': 1.383593611355011e-07, 'epoch': 0.9} + 90%|████████▉ | 6318/7045 [20:31:59<2:19:36, 11.52s/it] 90%|████████▉ | 6319/7045 [20:32:11<2:21:47, 11.72s/it] {'loss': 1.083, 'learning_rate': 1.3798253018986713e-07, 'epoch': 0.9} + 90%|████████▉ | 6319/7045 [20:32:11<2:21:47, 11.72s/it] 90%|████████▉ | 6320/7045 [20:32:24<2:23:27, 11.87s/it] {'loss': 1.1289, 'learning_rate': 1.3760619854243429e-07, 'epoch': 0.9} + 90%|████████▉ | 6320/7045 [20:32:24<2:23:27, 11.87s/it] 90%|████████▉ | 6321/7045 [20:32:35<2:20:22, 11.63s/it] {'loss': 1.0898, 'learning_rate': 1.3723036627275366e-07, 'epoch': 0.9} + 90%|████████▉ | 6321/7045 [20:32:35<2:20:22, 11.63s/it] 90%|████████▉ | 6322/7045 [20:32:47<2:23:37, 11.92s/it] {'loss': 1.0688, 'learning_rate': 1.3685503346027252e-07, 'epoch': 0.9} + 90%|████████▉ | 6322/7045 [20:32:47<2:23:37, 11.92s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 90%|████████▉ | 6323/7045 [20:33:00<2:25:24, 12.08s/it] {'loss': 1.123, 'learning_rate': 1.3648020018433017e-07, 'epoch': 0.9} + 90%|████████▉ | 6323/7045 [20:33:00<2:25:24, 12.08s/it] 90%|████████▉ | 6324/7045 [20:33:13<2:29:07, 12.41s/it] {'loss': 1.082, 'learning_rate': 1.36105866524161e-07, 'epoch': 0.9} + 90%|████████▉ | 6324/7045 [20:33:13<2:29:07, 12.41s/it] 90%|████████▉ | 6325/7045 [20:33:24<2:23:22, 11.95s/it] {'loss': 1.0938, 'learning_rate': 1.3573203255889484e-07, 'epoch': 0.9} + 90%|████████▉ | 6325/7045 [20:33:24<2:23:22, 11.95s/it] 90%|████████▉ | 6326/7045 [20:33:35<2:21:25, 11.80s/it] {'loss': 1.125, 'learning_rate': 1.353586983675545e-07, 'epoch': 0.9} + 90%|████████▉ | 6326/7045 [20:33:35<2:21:25, 11.80s/it] 90%|████████▉ | 6327/7045 [20:33:48<2:24:04, 12.04s/it] {'loss': 1.1025, 'learning_rate': 1.3498586402905744e-07, 'epoch': 0.9} + 90%|████████▉ | 6327/7045 [20:33:48<2:24:04, 12.04s/it] 90%|████████▉ | 6328/7045 [20:33:59<2:20:06, 11.72s/it] {'loss': 1.0845, 'learning_rate': 1.3461352962221646e-07, 'epoch': 0.9} + 90%|████████▉ | 6328/7045 [20:33:59<2:20:06, 11.72s/it] 90%|████████▉ | 6329/7045 [20:34:10<2:17:44, 11.54s/it] {'loss': 1.1157, 'learning_rate': 1.3424169522573722e-07, 'epoch': 0.9} + 90%|████████▉ | 6329/7045 [20:34:10<2:17:44, 11.54s/it] 90%|████████▉ | 6330/7045 [20:34:21<2:16:28, 11.45s/it] {'loss': 1.0859, 'learning_rate': 1.33870360918221e-07, 'epoch': 0.9} + 90%|████████▉ | 6330/7045 [20:34:21<2:16:28, 11.45s/it] 90%|████████▉ | 6331/7045 [20:34:33<2:17:50, 11.58s/it] {'loss': 1.1016, 'learning_rate': 1.3349952677816303e-07, 'epoch': 0.9} + 90%|████████▉ | 6331/7045 [20:34:33<2:17:50, 11.58s/it] 90%|████████▉ | 6332/7045 [20:34:44<2:15:53, 11.44s/it] {'loss': 1.1172, 'learning_rate': 1.331291928839515e-07, 'epoch': 0.9} + 90%|████████▉ | 6332/7045 [20:34:44<2:15:53, 11.44s/it] 90%|████████▉ | 6333/7045 [20:34:56<2:16:38, 11.51s/it] {'loss': 1.1318, 'learning_rate': 1.327593593138707e-07, 'epoch': 0.9} + 90%|████████▉ | 6333/7045 [20:34:56<2:16:38, 11.51s/it] 90%|████████▉ | 6334/7045 [20:35:07<2:16:15, 11.50s/it] {'loss': 1.1104, 'learning_rate': 1.3239002614609865e-07, 'epoch': 0.9} + 90%|████████▉ | 6334/7045 [20:35:07<2:16:15, 11.50s/it] 90%|████████▉ | 6335/7045 [20:35:20<2:21:11, 11.93s/it] {'loss': 1.1021, 'learning_rate': 1.3202119345870678e-07, 'epoch': 0.9} + 90%|████████▉ | 6335/7045 [20:35:20<2:21:11, 11.93s/it] 90%|████████▉ | 6336/7045 [20:35:33<2:23:25, 12.14s/it] {'loss': 1.0786, 'learning_rate': 1.3165286132966164e-07, 'epoch': 0.9} + 90%|████████▉ | 6336/7045 [20:35:33<2:23:25, 12.14s/it] 90%|████████▉ | 6337/7045 [20:35:44<2:19:06, 11.79s/it] {'loss': 1.1118, 'learning_rate': 1.3128502983682345e-07, 'epoch': 0.9} + 90%|████████▉ | 6337/7045 [20:35:44<2:19:06, 11.79s/it] 90%|████████▉ | 6338/7045 [20:35:55<2:18:00, 11.71s/it] {'loss': 1.0752, 'learning_rate': 1.3091769905794637e-07, 'epoch': 0.9} + 90%|████████▉ | 6338/7045 [20:35:55<2:18:00, 11.71s/it] 90%|████████▉ | 6339/7045 [20:36:07<2:17:29, 11.69s/it] {'loss': 1.1123, 'learning_rate': 1.3055086907068e-07, 'epoch': 0.9} + 90%|████████▉ | 6339/7045 [20:36:07<2:17:29, 11.69s/it] 90%|████████▉ | 6340/7045 [20:36:18<2:15:59, 11.57s/it] {'loss': 1.1514, 'learning_rate': 1.3018453995256612e-07, 'epoch': 0.9} + 90%|████████▉ | 6340/7045 [20:36:18<2:15:59, 11.57s/it] 90%|█████████ | 6341/7045 [20:36:31<2:18:46, 11.83s/it] {'loss': 1.124, 'learning_rate': 1.298187117810426e-07, 'epoch': 0.9} + 90%|█████████ | 6341/7045 [20:36:31<2:18:46, 11.83s/it] 90%|█████████ | 6342/7045 [20:36:42<2:16:24, 11.64s/it] {'loss': 1.126, 'learning_rate': 1.294533846334403e-07, 'epoch': 0.9} + 90%|█████████ | 6342/7045 [20:36:42<2:16:24, 11.64s/it] 90%|█████████ | 6343/7045 [20:36:53<2:13:39, 11.42s/it] {'loss': 1.1396, 'learning_rate': 1.290885585869839e-07, 'epoch': 0.9} + 90%|█████████ | 6343/7045 [20:36:53<2:13:39, 11.42s/it] 90%|█████████ | 6344/7045 [20:37:05<2:14:02, 11.47s/it] {'loss': 1.0952, 'learning_rate': 1.2872423371879328e-07, 'epoch': 0.9} + 90%|█████████ | 6344/7045 [20:37:05<2:14:02, 11.47s/it] 90%|█████████ | 6345/7045 [20:37:16<2:12:39, 11.37s/it] {'loss': 1.1069, 'learning_rate': 1.2836041010588107e-07, 'epoch': 0.9} + 90%|█████████ | 6345/7045 [20:37:16<2:12:39, 11.37s/it] 90%|█████████ | 6346/7045 [20:37:29<2:19:01, 11.93s/it] {'loss': 1.106, 'learning_rate': 1.279970878251549e-07, 'epoch': 0.9} + 90%|█████████ | 6346/7045 [20:37:29<2:19:01, 11.93s/it] 90%|█████████ | 6347/7045 [20:37:40<2:16:29, 11.73s/it] {'loss': 1.1074, 'learning_rate': 1.276342669534167e-07, 'epoch': 0.9} + 90%|█████████ | 6347/7045 [20:37:40<2:16:29, 11.73s/it] 90%|█████████ | 6348/7045 [20:37:52<2:15:30, 11.66s/it] {'loss': 1.0957, 'learning_rate': 1.2727194756736117e-07, 'epoch': 0.9} + 90%|█████████ | 6348/7045 [20:37:52<2:15:30, 11.66s/it] 90%|█████████ | 6349/7045 [20:38:03<2:14:23, 11.58s/it] {'loss': 1.0576, 'learning_rate': 1.2691012974357765e-07, 'epoch': 0.9} + 90%|█████████ | 6349/7045 [20:38:03<2:14:23, 11.58s/it] 90%|█████████ | 6350/7045 [20:38:15<2:15:18, 11.68s/it] {'loss': 1.1074, 'learning_rate': 1.2654881355854992e-07, 'epoch': 0.9} + 90%|█████████ | 6350/7045 [20:38:15<2:15:18, 11.68s/it] 90%|█████████ | 6351/7045 [20:38:28<2:20:30, 12.15s/it] {'loss': 1.0557, 'learning_rate': 1.2618799908865498e-07, 'epoch': 0.9} + 90%|█████████ | 6351/7045 [20:38:28<2:20:30, 12.15s/it] 90%|█████████ | 6352/7045 [20:38:40<2:19:45, 12.10s/it] {'loss': 1.0771, 'learning_rate': 1.258276864101643e-07, 'epoch': 0.9} + 90%|█████████ | 6352/7045 [20:38:40<2:19:45, 12.10s/it] 90%|█████████ | 6353/7045 [20:38:53<2:22:00, 12.31s/it] {'loss': 1.0972, 'learning_rate': 1.254678755992428e-07, 'epoch': 0.9} + 90%|█████████ | 6353/7045 [20:38:53<2:22:00, 12.31s/it] 90%|█████████ | 6354/7045 [20:39:04<2:16:51, 11.88s/it] {'loss': 1.1221, 'learning_rate': 1.2510856673194933e-07, 'epoch': 0.9} + 90%|█████████ | 6354/7045 [20:39:04<2:16:51, 11.88s/it] 90%|█████████ | 6355/7045 [20:39:16<2:17:40, 11.97s/it] {'loss': 1.1089, 'learning_rate': 1.2474975988423786e-07, 'epoch': 0.9} + 90%|█████████ | 6355/7045 [20:39:16<2:17:40, 11.97s/it] 90%|█████████ | 6356/7045 [20:39:29<2:19:03, 12.11s/it] {'loss': 1.1348, 'learning_rate': 1.243914551319539e-07, 'epoch': 0.9} + 90%|█████████ | 6356/7045 [20:39:29<2:19:03, 12.11s/it] 90%|█████████ | 6357/7045 [20:39:41<2:20:15, 12.23s/it] {'loss': 1.0713, 'learning_rate': 1.24033652550839e-07, 'epoch': 0.9} + 90%|█████████ | 6357/7045 [20:39:41<2:20:15, 12.23s/it] 90%|█████████ | 6358/7045 [20:39:52<2:16:02, 11.88s/it] {'loss': 1.0938, 'learning_rate': 1.2367635221652797e-07, 'epoch': 0.9} + 90%|█████████ | 6358/7045 [20:39:52<2:16:02, 11.88s/it] 90%|█████████ | 6359/7045 [20:40:05<2:19:06, 12.17s/it] {'loss': 1.0664, 'learning_rate': 1.23319554204549e-07, 'epoch': 0.9} + 90%|█████████ | 6359/7045 [20:40:05<2:19:06, 12.17s/it] 90%|█████████ | 6360/7045 [20:40:16<2:16:24, 11.95s/it] {'loss': 1.0781, 'learning_rate': 1.229632585903237e-07, 'epoch': 0.9} + 90%|█████████ | 6360/7045 [20:40:16<2:16:24, 11.95s/it] 90%|█████████ | 6361/7045 [20:40:30<2:20:31, 12.33s/it] {'loss': 1.0488, 'learning_rate': 1.226074654491688e-07, 'epoch': 0.9} + 90%|█████████ | 6361/7045 [20:40:30<2:20:31, 12.33s/it] 90%|█████████ | 6362/7045 [20:40:42<2:19:36, 12.26s/it] {'loss': 1.1011, 'learning_rate': 1.2225217485629353e-07, 'epoch': 0.9} + 90%|█████████ | 6362/7045 [20:40:42<2:19:36, 12.26s/it] 90%|█████████ | 6363/7045 [20:40:53<2:16:52, 12.04s/it] {'loss': 1.0942, 'learning_rate': 1.218973868868023e-07, 'epoch': 0.9} + 90%|█████████ | 6363/7045 [20:40:53<2:16:52, 12.04s/it] 90%|█████████ | 6364/7045 [20:41:04<2:13:50, 11.79s/it] {'loss': 1.1035, 'learning_rate': 1.2154310161569177e-07, 'epoch': 0.9} + 90%|█████████ | 6364/7045 [20:41:04<2:13:50, 11.79s/it] 90%|█████████ | 6365/7045 [20:41:17<2:15:52, 11.99s/it] {'loss': 1.0996, 'learning_rate': 1.2118931911785315e-07, 'epoch': 0.9} + 90%|█████████ | 6365/7045 [20:41:17<2:15:52, 11.99s/it] 90%|█████████ | 6366/7045 [20:41:28<2:13:50, 11.83s/it] {'loss': 1.1035, 'learning_rate': 1.2083603946807133e-07, 'epoch': 0.9} + 90%|█████████ | 6366/7045 [20:41:28<2:13:50, 11.83s/it] 90%|█████████ | 6367/7045 [20:41:39<2:10:55, 11.59s/it] {'loss': 1.1123, 'learning_rate': 1.2048326274102434e-07, 'epoch': 0.9} + 90%|█████████ | 6367/7045 [20:41:39<2:10:55, 11.59s/it] 90%|█████████ | 6368/7045 [20:41:55<2:25:59, 12.94s/it] {'loss': 1.0874, 'learning_rate': 1.2013098901128506e-07, 'epoch': 0.9} + 90%|█████████ | 6368/7045 [20:41:55<2:25:59, 12.94s/it] 90%|█████████ | 6369/7045 [20:42:08<2:23:12, 12.71s/it] {'loss': 1.1147, 'learning_rate': 1.1977921835331917e-07, 'epoch': 0.9} + 90%|█████████ | 6369/7045 [20:42:08<2:23:12, 12.71s/it] 90%|█████████ | 6370/7045 [20:42:19<2:16:52, 12.17s/it] {'loss': 1.0615, 'learning_rate': 1.1942795084148555e-07, 'epoch': 0.9} + 90%|█████████ | 6370/7045 [20:42:19<2:16:52, 12.17s/it] 90%|█████████ | 6371/7045 [20:42:30<2:13:10, 11.86s/it] {'loss': 1.1523, 'learning_rate': 1.1907718655003787e-07, 'epoch': 0.9} + 90%|█████████ | 6371/7045 [20:42:30<2:13:10, 11.86s/it] 90%|█████████ | 6372/7045 [20:42:40<2:08:48, 11.48s/it] {'loss': 1.0601, 'learning_rate': 1.1872692555312315e-07, 'epoch': 0.9} + 90%|█████████ | 6372/7045 [20:42:40<2:08:48, 11.48s/it] 90%|█████████ | 6373/7045 [20:42:52<2:07:50, 11.41s/it] {'loss': 1.083, 'learning_rate': 1.183771679247811e-07, 'epoch': 0.9} + 90%|█████████ | 6373/7045 [20:42:52<2:07:50, 11.41s/it] 90%|█████████ | 6374/7045 [20:43:03<2:06:51, 11.34s/it] {'loss': 1.0781, 'learning_rate': 1.1802791373894645e-07, 'epoch': 0.9} + 90%|█████████ | 6374/7045 [20:43:03<2:06:51, 11.34s/it] 90%|█████████ | 6375/7045 [20:43:14<2:05:35, 11.25s/it] {'loss': 1.1084, 'learning_rate': 1.1767916306944655e-07, 'epoch': 0.9} + 90%|█████████ | 6375/7045 [20:43:14<2:05:35, 11.25s/it] 91%|█████████ | 6376/7045 [20:43:27<2:11:29, 11.79s/it] {'loss': 1.0459, 'learning_rate': 1.1733091599000184e-07, 'epoch': 0.91} + 91%|█████████ | 6376/7045 [20:43:27<2:11:29, 11.79s/it] 91%|█████████ | 6377/7045 [20:43:38<2:10:54, 11.76s/it] {'loss': 1.1162, 'learning_rate': 1.1698317257422792e-07, 'epoch': 0.91} + 91%|█████████ | 6377/7045 [20:43:38<2:10:54, 11.76s/it] 91%|█████████ | 6378/7045 [20:43:49<2:07:39, 11.48s/it] {'loss': 1.1113, 'learning_rate': 1.1663593289563235e-07, 'epoch': 0.91} + 91%|█████████ | 6378/7045 [20:43:49<2:07:39, 11.48s/it] 91%|█████████ | 6379/7045 [20:44:02<2:10:39, 11.77s/it] {'loss': 1.1035, 'learning_rate': 1.1628919702761726e-07, 'epoch': 0.91} + 91%|█████████ | 6379/7045 [20:44:02<2:10:39, 11.77s/it] 91%|█████████ | 6380/7045 [20:44:14<2:12:15, 11.93s/it] {'loss': 1.0903, 'learning_rate': 1.1594296504347791e-07, 'epoch': 0.91} + 91%|█████████ | 6380/7045 [20:44:14<2:12:15, 11.93s/it] 91%|█████████ | 6381/7045 [20:44:25<2:08:35, 11.62s/it] {'loss': 1.061, 'learning_rate': 1.1559723701640213e-07, 'epoch': 0.91} + 91%|█████████ | 6381/7045 [20:44:25<2:08:35, 11.62s/it] 91%|█████████ | 6382/7045 [20:44:36<2:07:24, 11.53s/it] {'loss': 1.0703, 'learning_rate': 1.1525201301947314e-07, 'epoch': 0.91} + 91%|█████████ | 6382/7045 [20:44:36<2:07:24, 11.53s/it] 91%|█████████ | 6383/7045 [20:44:48<2:08:49, 11.68s/it] {'loss': 1.0811, 'learning_rate': 1.1490729312566618e-07, 'epoch': 0.91} + 91%|█████████ | 6383/7045 [20:44:48<2:08:49, 11.68s/it] 91%|█████████ | 6384/7045 [20:45:00<2:07:26, 11.57s/it] {'loss': 1.1147, 'learning_rate': 1.1456307740785017e-07, 'epoch': 0.91} + 91%|█████████ | 6384/7045 [20:45:00<2:07:26, 11.57s/it] 91%|█████████ | 6385/7045 [20:45:10<2:04:27, 11.31s/it] {'loss': 1.0791, 'learning_rate': 1.1421936593878829e-07, 'epoch': 0.91} + 91%|█████████ | 6385/7045 [20:45:10<2:04:27, 11.31s/it] 91%|█████████ | 6386/7045 [20:45:21<2:03:01, 11.20s/it] {'loss': 1.1094, 'learning_rate': 1.1387615879113578e-07, 'epoch': 0.91} + 91%|█████████ | 6386/7045 [20:45:21<2:03:01, 11.20s/it] 91%|█████████ | 6387/7045 [20:45:34<2:09:29, 11.81s/it] {'loss': 1.1074, 'learning_rate': 1.1353345603744181e-07, 'epoch': 0.91} + 91%|█████████ | 6387/7045 [20:45:35<2:09:29, 11.81s/it] 91%|█████████ | 6388/7045 [20:45:46<2:07:28, 11.64s/it] {'loss': 1.1006, 'learning_rate': 1.1319125775014955e-07, 'epoch': 0.91} + 91%|█████████ | 6388/7045 [20:45:46<2:07:28, 11.64s/it] 91%|█████████ | 6389/7045 [20:45:57<2:06:04, 11.53s/it] {'loss': 1.1182, 'learning_rate': 1.1284956400159503e-07, 'epoch': 0.91} + 91%|█████████ | 6389/7045 [20:45:57<2:06:04, 11.53s/it] 91%|█████████ | 6390/7045 [20:46:09<2:07:46, 11.70s/it] {'loss': 1.1064, 'learning_rate': 1.1250837486400684e-07, 'epoch': 0.91} + 91%|█████████ | 6390/7045 [20:46:09<2:07:46, 11.70s/it] 91%|█████████ | 6391/7045 [20:46:21<2:07:35, 11.71s/it] {'loss': 1.1289, 'learning_rate': 1.121676904095087e-07, 'epoch': 0.91} + 91%|█████████ | 6391/7045 [20:46:21<2:07:35, 11.71s/it] 91%|█████████ | 6392/7045 [20:46:32<2:04:58, 11.48s/it] {'loss': 1.1221, 'learning_rate': 1.1182751071011605e-07, 'epoch': 0.91} + 91%|█████████ | 6392/7045 [20:46:32<2:04:58, 11.48s/it] 91%|█████████ | 6393/7045 [20:46:43<2:02:58, 11.32s/it] {'loss': 1.1064, 'learning_rate': 1.1148783583773886e-07, 'epoch': 0.91} + 91%|█████████ | 6393/7045 [20:46:43<2:02:58, 11.32s/it] 91%|█████████ | 6394/7045 [20:46:54<2:02:30, 11.29s/it] {'loss': 1.0552, 'learning_rate': 1.1114866586417888e-07, 'epoch': 0.91} + 91%|█████████ | 6394/7045 [20:46:54<2:02:30, 11.29s/it] 91%|█████████ | 6395/7045 [20:47:06<2:05:04, 11.55s/it] {'loss': 1.1455, 'learning_rate': 1.1081000086113236e-07, 'epoch': 0.91} + 91%|█████████ | 6395/7045 [20:47:06<2:05:04, 11.55s/it] 91%|█████████ | 6396/7045 [20:47:19<2:09:46, 12.00s/it] {'loss': 1.124, 'learning_rate': 1.104718409001887e-07, 'epoch': 0.91} + 91%|█████████ | 6396/7045 [20:47:19<2:09:46, 12.00s/it] 91%|█████████ | 6397/7045 [20:47:31<2:07:38, 11.82s/it] {'loss': 1.0723, 'learning_rate': 1.1013418605283044e-07, 'epoch': 0.91} + 91%|█████████ | 6397/7045 [20:47:31<2:07:38, 11.82s/it] 91%|█████████ | 6398/7045 [20:47:42<2:05:01, 11.59s/it] {'loss': 1.1094, 'learning_rate': 1.0979703639043244e-07, 'epoch': 0.91} + 91%|█████████ | 6398/7045 [20:47:42<2:05:01, 11.59s/it] 91%|█████████ | 6399/7045 [20:47:54<2:07:22, 11.83s/it] {'loss': 1.0781, 'learning_rate': 1.0946039198426433e-07, 'epoch': 0.91} + 91%|█████████ | 6399/7045 [20:47:54<2:07:22, 11.83s/it] 91%|█████████ | 6400/7045 [20:48:06<2:08:42, 11.97s/it] {'loss': 1.0874, 'learning_rate': 1.0912425290548783e-07, 'epoch': 0.91} + 91%|█████████ | 6400/7045 [20:48:06<2:08:42, 11.97s/it] 91%|█████████ | 6401/7045 [20:48:17<2:04:39, 11.61s/it] {'loss': 1.0884, 'learning_rate': 1.087886192251575e-07, 'epoch': 0.91} + 91%|█████████ | 6401/7045 [20:48:17<2:04:39, 11.61s/it] 91%|█████████ | 6402/7045 [20:48:31<2:10:24, 12.17s/it] {'loss': 1.1104, 'learning_rate': 1.0845349101422292e-07, 'epoch': 0.91} + 91%|█████████ | 6402/7045 [20:48:31<2:10:24, 12.17s/it] 91%|█████████ | 6403/7045 [20:48:44<2:13:33, 12.48s/it] {'loss': 1.0386, 'learning_rate': 1.081188683435247e-07, 'epoch': 0.91} + 91%|█████████ | 6403/7045 [20:48:44<2:13:33, 12.48s/it] 91%|█████████ | 6404/7045 [20:48:55<2:09:27, 12.12s/it] {'loss': 1.124, 'learning_rate': 1.0778475128379789e-07, 'epoch': 0.91} + 91%|█████████ | 6404/7045 [20:48:55<2:09:27, 12.12s/it] 91%|█████████ | 6405/7045 [20:49:06<2:05:24, 11.76s/it] {'loss': 1.1494, 'learning_rate': 1.0745113990567046e-07, 'epoch': 0.91} + 91%|█████████ | 6405/7045 [20:49:06<2:05:24, 11.76s/it] 91%|█████████ | 6406/7045 [20:49:17<2:03:22, 11.59s/it] {'loss': 1.0869, 'learning_rate': 1.0711803427966211e-07, 'epoch': 0.91} + 91%|█████████ | 6406/7045 [20:49:17<2:03:22, 11.59s/it] 91%|█████████ | 6407/7045 [20:49:29<2:03:37, 11.63s/it] {'loss': 1.0762, 'learning_rate': 1.0678543447618845e-07, 'epoch': 0.91} + 91%|█████████ | 6407/7045 [20:49:29<2:03:37, 11.63s/it] 91%|█████████ | 6408/7045 [20:49:41<2:03:30, 11.63s/it] {'loss': 1.082, 'learning_rate': 1.0645334056555573e-07, 'epoch': 0.91} + 91%|█████████ | 6408/7045 [20:49:41<2:03:30, 11.63s/it] 91%|█████████ | 6409/7045 [20:49:52<2:01:50, 11.49s/it] {'loss': 1.0996, 'learning_rate': 1.0612175261796393e-07, 'epoch': 0.91} + 91%|█████████ | 6409/7045 [20:49:52<2:01:50, 11.49s/it] 91%|█████████ | 6410/7045 [20:50:04<2:04:54, 11.80s/it] {'loss': 1.0898, 'learning_rate': 1.0579067070350669e-07, 'epoch': 0.91} + 91%|█████████ | 6410/7045 [20:50:04<2:04:54, 11.80s/it] 91%|█████████ | 6411/7045 [20:50:15<2:02:35, 11.60s/it] {'loss': 1.0781, 'learning_rate': 1.0546009489216996e-07, 'epoch': 0.91} + 91%|█████████ | 6411/7045 [20:50:15<2:02:35, 11.60s/it] 91%|█████████ | 6412/7045 [20:50:29<2:07:27, 12.08s/it] {'loss': 1.0835, 'learning_rate': 1.0513002525383259e-07, 'epoch': 0.91} + 91%|█████████ | 6412/7045 [20:50:29<2:07:27, 12.08s/it] 91%|█████████ | 6413/7045 [20:50:41<2:08:48, 12.23s/it] {'loss': 1.1094, 'learning_rate': 1.0480046185826736e-07, 'epoch': 0.91} + 91%|█████████ | 6413/7045 [20:50:41<2:08:48, 12.23s/it] 91%|█████████ | 6414/7045 [20:50:53<2:07:58, 12.17s/it] {'loss': 1.0972, 'learning_rate': 1.0447140477513883e-07, 'epoch': 0.91} + 91%|█████████ | 6414/7045 [20:50:53<2:07:58, 12.17s/it] 91%|█████████ | 6415/7045 [20:51:04<2:04:09, 11.82s/it] {'loss': 1.1104, 'learning_rate': 1.041428540740061e-07, 'epoch': 0.91} + 91%|█████████ | 6415/7045 [20:51:04<2:04:09, 11.82s/it] 91%|█████████ | 6416/7045 [20:51:15<2:01:19, 11.57s/it] {'loss': 1.0815, 'learning_rate': 1.038148098243194e-07, 'epoch': 0.91} + 91%|█████████ | 6416/7045 [20:51:15<2:01:19, 11.57s/it] 91%|█████████ | 6417/7045 [20:51:26<1:59:35, 11.43s/it] {'loss': 1.1025, 'learning_rate': 1.0348727209542303e-07, 'epoch': 0.91} + 91%|█████████ | 6417/7045 [20:51:26<1:59:35, 11.43s/it] 91%|█████████ | 6418/7045 [20:51:38<1:59:16, 11.41s/it] {'loss': 1.1201, 'learning_rate': 1.0316024095655408e-07, 'epoch': 0.91} + 91%|█████████ | 6418/7045 [20:51:38<1:59:16, 11.41s/it] 91%|█████████ | 6419/7045 [20:51:49<1:59:39, 11.47s/it] {'loss': 1.0718, 'learning_rate': 1.0283371647684282e-07, 'epoch': 0.91} + 91%|█████████ | 6419/7045 [20:51:49<1:59:39, 11.47s/it] 91%|█████████ | 6420/7045 [20:52:01<1:59:47, 11.50s/it] {'loss': 1.1362, 'learning_rate': 1.0250769872531152e-07, 'epoch': 0.91} + 91%|█████████ | 6420/7045 [20:52:01<1:59:47, 11.50s/it] 91%|█████████ | 6421/7045 [20:52:12<1:58:48, 11.42s/it] {'loss': 1.1562, 'learning_rate': 1.0218218777087646e-07, 'epoch': 0.91} + 91%|█████████ | 6421/7045 [20:52:12<1:58:48, 11.42s/it] 91%|█████████ | 6422/7045 [20:52:23<1:57:57, 11.36s/it] {'loss': 1.0703, 'learning_rate': 1.0185718368234565e-07, 'epoch': 0.91} + 91%|█████████ | 6422/7045 [20:52:23<1:57:57, 11.36s/it] 91%|█████████ | 6423/7045 [20:52:35<2:00:17, 11.60s/it] {'loss': 1.1123, 'learning_rate': 1.0153268652842025e-07, 'epoch': 0.91} + 91%|█████████ | 6423/7045 [20:52:35<2:00:17, 11.60s/it] 91%|█████████ | 6424/7045 [20:52:47<1:59:52, 11.58s/it] {'loss': 1.1289, 'learning_rate': 1.0120869637769537e-07, 'epoch': 0.91} + 91%|█████████ | 6424/7045 [20:52:47<1:59:52, 11.58s/it] 91%|█████████ | 6425/7045 [20:53:01<2:07:39, 12.35s/it] {'loss': 1.0688, 'learning_rate': 1.0088521329865763e-07, 'epoch': 0.91} + 91%|█████████ | 6425/7045 [20:53:01<2:07:39, 12.35s/it] 91%|█████████ | 6426/7045 [20:53:12<2:03:53, 12.01s/it] {'loss': 1.1133, 'learning_rate': 1.0056223735968706e-07, 'epoch': 0.91} + 91%|█████████ | 6426/7045 [20:53:12<2:03:53, 12.01s/it] 91%|█████████ | 6427/7045 [20:53:23<2:00:58, 11.74s/it] {'loss': 1.0747, 'learning_rate': 1.0023976862905627e-07, 'epoch': 0.91} + 91%|█████████ | 6427/7045 [20:53:23<2:00:58, 11.74s/it] 91%|█████████ | 6428/7045 [20:53:35<1:59:08, 11.59s/it] {'loss': 1.1367, 'learning_rate': 9.991780717493043e-08, 'epoch': 0.91} + 91%|█████████ | 6428/7045 [20:53:35<1:59:08, 11.59s/it] 91%|█████████▏| 6429/7045 [20:53:46<1:57:24, 11.44s/it] {'loss': 1.1177, 'learning_rate': 9.959635306536875e-08, 'epoch': 0.91} + 91%|█████████▏| 6429/7045 [20:53:46<1:57:24, 11.44s/it] 91%|█████████▏| 6430/7045 [20:53:57<1:56:32, 11.37s/it] {'loss': 1.0728, 'learning_rate': 9.9275406368321e-08, 'epoch': 0.91} + 91%|█████████▏| 6430/7045 [20:53:57<1:56:32, 11.37s/it] 91%|█████████▏| 6431/7045 [20:54:08<1:55:05, 11.25s/it] {'loss': 1.0938, 'learning_rate': 9.895496715163155e-08, 'epoch': 0.91} + 91%|█████████▏| 6431/7045 [20:54:08<1:55:05, 11.25s/it] 91%|█████████▏| 6432/7045 [20:54:19<1:55:56, 11.35s/it] {'loss': 1.125, 'learning_rate': 9.863503548303705e-08, 'epoch': 0.91} + 91%|█████████▏| 6432/7045 [20:54:19<1:55:56, 11.35s/it] 91%|█████████▏| 6433/7045 [20:54:32<1:58:34, 11.62s/it] {'loss': 1.0732, 'learning_rate': 9.831561143016671e-08, 'epoch': 0.91} + 91%|█████████▏| 6433/7045 [20:54:32<1:58:34, 11.62s/it] 91%|█████████▏| 6434/7045 [20:54:43<1:56:45, 11.47s/it] {'loss': 1.1318, 'learning_rate': 9.799669506054182e-08, 'epoch': 0.91} + 91%|█████████▏| 6434/7045 [20:54:43<1:56:45, 11.47s/it] 91%|█████████▏| 6435/7045 [20:54:54<1:56:56, 11.50s/it] {'loss': 1.1328, 'learning_rate': 9.767828644157761e-08, 'epoch': 0.91} + 91%|█████████▏| 6435/7045 [20:54:54<1:56:56, 11.50s/it] 91%|█████████▏| 6436/7045 [20:55:05<1:54:58, 11.33s/it] {'loss': 1.0586, 'learning_rate': 9.736038564058054e-08, 'epoch': 0.91} + 91%|█████████▏| 6436/7045 [20:55:05<1:54:58, 11.33s/it] 91%|█████████▏| 6437/7045 [20:55:16<1:53:54, 11.24s/it] {'loss': 1.1177, 'learning_rate': 9.704299272475154e-08, 'epoch': 0.91} + 91%|█████████▏| 6437/7045 [20:55:16<1:53:54, 11.24s/it] 91%|█████████▏| 6438/7045 [20:55:28<1:53:38, 11.23s/it] {'loss': 1.0923, 'learning_rate': 9.672610776118225e-08, 'epoch': 0.91} + 91%|█████████▏| 6438/7045 [20:55:28<1:53:38, 11.23s/it] 91%|█████████▏| 6439/7045 [20:55:39<1:52:58, 11.19s/it] {'loss': 1.0918, 'learning_rate': 9.640973081685796e-08, 'epoch': 0.91} + 91%|█████████▏| 6439/7045 [20:55:39<1:52:58, 11.19s/it] 91%|█████████▏| 6440/7045 [20:55:50<1:51:44, 11.08s/it] {'loss': 1.1309, 'learning_rate': 9.609386195865683e-08, 'epoch': 0.91} + 91%|█████████▏| 6440/7045 [20:55:50<1:51:44, 11.08s/it] 91%|█████████▏| 6441/7045 [20:56:02<1:56:55, 11.61s/it] {'loss': 1.0742, 'learning_rate': 9.577850125334882e-08, 'epoch': 0.91} + 91%|█████████▏| 6441/7045 [20:56:02<1:56:55, 11.61s/it] 91%|█████████▏| 6442/7045 [20:56:15<1:58:35, 11.80s/it] {'loss': 1.0718, 'learning_rate': 9.546364876759723e-08, 'epoch': 0.91} + 91%|█████████▏| 6442/7045 [20:56:15<1:58:35, 11.80s/it] 91%|█████████▏| 6443/7045 [20:56:26<1:56:34, 11.62s/it] {'loss': 1.1045, 'learning_rate': 9.51493045679569e-08, 'epoch': 0.91} + 91%|█████████▏| 6443/7045 [20:56:26<1:56:34, 11.62s/it] 91%|█████████▏| 6444/7045 [20:56:40<2:03:00, 12.28s/it] {'loss': 1.0967, 'learning_rate': 9.48354687208769e-08, 'epoch': 0.91} + 91%|█████████▏| 6444/7045 [20:56:40<2:03:00, 12.28s/it] 91%|█████████▏| 6445/7045 [20:56:51<2:01:01, 12.10s/it] {'loss': 1.0947, 'learning_rate': 9.452214129269693e-08, 'epoch': 0.91} + 91%|█████████▏| 6445/7045 [20:56:51<2:01:01, 12.10s/it] 91%|█████████▏| 6446/7045 [20:57:04<2:02:55, 12.31s/it] {'loss': 1.1729, 'learning_rate': 9.420932234965068e-08, 'epoch': 0.91} + 91%|█████████▏| 6446/7045 [20:57:04<2:02:55, 12.31s/it] 92%|█████████▏| 6447/7045 [20:57:17<2:03:42, 12.41s/it] {'loss': 1.0991, 'learning_rate': 9.389701195786333e-08, 'epoch': 0.92} + 92%|█████████▏| 6447/7045 [20:57:17<2:03:42, 12.41s/it] 92%|█████████▏| 6448/7045 [20:57:28<2:00:58, 12.16s/it] {'loss': 1.1021, 'learning_rate': 9.358521018335371e-08, 'epoch': 0.92} + 92%|█████████▏| 6448/7045 [20:57:28<2:00:58, 12.16s/it] 92%|█████████▏| 6449/7045 [20:57:40<1:58:35, 11.94s/it] {'loss': 1.1138, 'learning_rate': 9.327391709203187e-08, 'epoch': 0.92} + 92%|█████████▏| 6449/7045 [20:57:40<1:58:35, 11.94s/it] 92%|█████████▏| 6450/7045 [20:57:51<1:56:34, 11.76s/it] {'loss': 1.1104, 'learning_rate': 9.296313274970104e-08, 'epoch': 0.92} + 92%|█████████▏| 6450/7045 [20:57:51<1:56:34, 11.76s/it] 92%|█████████▏| 6451/7045 [20:58:02<1:54:47, 11.59s/it] {'loss': 1.1455, 'learning_rate': 9.265285722205697e-08, 'epoch': 0.92} + 92%|█████████▏| 6451/7045 [20:58:02<1:54:47, 11.59s/it] 92%|█████████▏| 6452/7045 [20:58:14<1:53:48, 11.52s/it] {'loss': 1.1357, 'learning_rate': 9.234309057468722e-08, 'epoch': 0.92} + 92%|█████████▏| 6452/7045 [20:58:14<1:53:48, 11.52s/it] 92%|█████████▏| 6453/7045 [20:58:25<1:52:11, 11.37s/it] {'loss': 1.082, 'learning_rate': 9.203383287307299e-08, 'epoch': 0.92} + 92%|█████████▏| 6453/7045 [20:58:25<1:52:11, 11.37s/it] 92%|█████████▏| 6454/7045 [20:58:36<1:52:42, 11.44s/it] {'loss': 1.1211, 'learning_rate': 9.172508418258675e-08, 'epoch': 0.92} + 92%|█████████▏| 6454/7045 [20:58:36<1:52:42, 11.44s/it] 92%|█████████▏| 6455/7045 [20:58:48<1:52:43, 11.46s/it] {'loss': 1.1289, 'learning_rate': 9.14168445684932e-08, 'epoch': 0.92} + 92%|█████████▏| 6455/7045 [20:58:48<1:52:43, 11.46s/it] 92%|█████████▏| 6456/7045 [20:58:59<1:51:08, 11.32s/it] {'loss': 1.0791, 'learning_rate': 9.110911409595024e-08, 'epoch': 0.92} + 92%|█████████▏| 6456/7045 [20:58:59<1:51:08, 11.32s/it] 92%|█████████▏| 6457/7045 [20:59:11<1:52:17, 11.46s/it] {'loss': 1.1211, 'learning_rate': 9.08018928300089e-08, 'epoch': 0.92} + 92%|█████████▏| 6457/7045 [20:59:11<1:52:17, 11.46s/it] 92%|█████████▏| 6458/7045 [20:59:22<1:50:46, 11.32s/it] {'loss': 1.1025, 'learning_rate': 9.049518083560999e-08, 'epoch': 0.92} + 92%|█████████▏| 6458/7045 [20:59:22<1:50:46, 11.32s/it] 92%|█████████▏| 6459/7045 [20:59:34<1:53:40, 11.64s/it] {'loss': 1.0874, 'learning_rate': 9.018897817758943e-08, 'epoch': 0.92} + 92%|█████████▏| 6459/7045 [20:59:34<1:53:40, 11.64s/it] 92%|█████████▏| 6460/7045 [20:59:45<1:51:35, 11.44s/it] {'loss': 1.0723, 'learning_rate': 8.988328492067405e-08, 'epoch': 0.92} + 92%|█████████▏| 6460/7045 [20:59:45<1:51:35, 11.44s/it] 92%|█████████▏| 6461/7045 [20:59:56<1:51:17, 11.43s/it] {'loss': 1.0757, 'learning_rate': 8.957810112948245e-08, 'epoch': 0.92} + 92%|█████████▏| 6461/7045 [20:59:56<1:51:17, 11.43s/it] 92%|█████████▏| 6462/7045 [21:00:07<1:49:35, 11.28s/it] {'loss': 1.0581, 'learning_rate': 8.927342686852747e-08, 'epoch': 0.92} + 92%|█████████▏| 6462/7045 [21:00:07<1:49:35, 11.28s/it] 92%|█████████▏| 6463/7045 [21:00:19<1:49:37, 11.30s/it] {'loss': 1.1025, 'learning_rate': 8.896926220221202e-08, 'epoch': 0.92} + 92%|█████████▏| 6463/7045 [21:00:19<1:49:37, 11.30s/it] 92%|█████████▏| 6464/7045 [21:00:31<1:51:56, 11.56s/it] {'loss': 1.0757, 'learning_rate': 8.86656071948333e-08, 'epoch': 0.92} + 92%|█████████▏| 6464/7045 [21:00:31<1:51:56, 11.56s/it] 92%|█████████▏| 6465/7045 [21:00:42<1:51:33, 11.54s/it] {'loss': 1.1143, 'learning_rate': 8.836246191057912e-08, 'epoch': 0.92} + 92%|█████████▏| 6465/7045 [21:00:42<1:51:33, 11.54s/it] 92%|█████████▏| 6466/7045 [21:00:54<1:52:16, 11.63s/it] {'loss': 1.0947, 'learning_rate': 8.805982641353017e-08, 'epoch': 0.92} + 92%|█████████▏| 6466/7045 [21:00:54<1:52:16, 11.63s/it] 92%|█████████▏| 6467/7045 [21:01:06<1:52:33, 11.68s/it] {'loss': 1.0938, 'learning_rate': 8.775770076766e-08, 'epoch': 0.92} + 92%|█████████▏| 6467/7045 [21:01:06<1:52:33, 11.68s/it] 92%|█████████▏| 6468/7045 [21:01:17<1:51:31, 11.60s/it] {'loss': 1.124, 'learning_rate': 8.745608503683361e-08, 'epoch': 0.92} + 92%|█████████▏| 6468/7045 [21:01:17<1:51:31, 11.60s/it] 92%|█████████▏| 6469/7045 [21:01:28<1:49:54, 11.45s/it] {'loss': 1.1182, 'learning_rate': 8.715497928480838e-08, 'epoch': 0.92} + 92%|█████████▏| 6469/7045 [21:01:28<1:49:54, 11.45s/it] 92%|█████████▏| 6470/7045 [21:01:39<1:48:29, 11.32s/it] {'loss': 1.1299, 'learning_rate': 8.685438357523445e-08, 'epoch': 0.92} + 92%|█████████▏| 6470/7045 [21:01:39<1:48:29, 11.32s/it] 92%|█████████▏| 6471/7045 [21:01:51<1:48:22, 11.33s/it] {'loss': 1.0659, 'learning_rate': 8.655429797165299e-08, 'epoch': 0.92} + 92%|█████████▏| 6471/7045 [21:01:51<1:48:22, 11.33s/it] 92%|█████████▏| 6472/7045 [21:02:02<1:47:31, 11.26s/it] {'loss': 1.1221, 'learning_rate': 8.625472253749795e-08, 'epoch': 0.92} + 92%|█████████▏| 6472/7045 [21:02:02<1:47:31, 11.26s/it] 92%|█████████▏| 6473/7045 [21:02:14<1:48:55, 11.42s/it] {'loss': 1.127, 'learning_rate': 8.59556573360959e-08, 'epoch': 0.92} + 92%|█████████▏| 6473/7045 [21:02:14<1:48:55, 11.42s/it] 92%|█████████▏| 6474/7045 [21:02:25<1:48:04, 11.36s/it] {'loss': 1.1221, 'learning_rate': 8.565710243066489e-08, 'epoch': 0.92} + 92%|█████████▏| 6474/7045 [21:02:25<1:48:04, 11.36s/it] 92%|█████████▏| 6475/7045 [21:02:36<1:46:55, 11.26s/it] {'loss': 1.0762, 'learning_rate': 8.535905788431554e-08, 'epoch': 0.92} + 92%|█████████▏| 6475/7045 [21:02:36<1:46:55, 11.26s/it] 92%|█████████▏| 6476/7045 [21:02:48<1:50:03, 11.60s/it] {'loss': 1.1045, 'learning_rate': 8.50615237600505e-08, 'epoch': 0.92} + 92%|█████████▏| 6476/7045 [21:02:48<1:50:03, 11.60s/it] 92%|█████████▏| 6477/7045 [21:02:59<1:47:35, 11.37s/it] {'loss': 1.0986, 'learning_rate': 8.476450012076365e-08, 'epoch': 0.92} + 92%|█████████▏| 6477/7045 [21:02:59<1:47:35, 11.37s/it] 92%|█████████▏| 6478/7045 [21:03:10<1:46:51, 11.31s/it] {'loss': 1.126, 'learning_rate': 8.44679870292428e-08, 'epoch': 0.92} + 92%|█████████▏| 6478/7045 [21:03:10<1:46:51, 11.31s/it] 92%|█████████▏| 6479/7045 [21:03:21<1:45:54, 11.23s/it] {'loss': 1.0649, 'learning_rate': 8.417198454816588e-08, 'epoch': 0.92} + 92%|█████████▏| 6479/7045 [21:03:21<1:45:54, 11.23s/it] 92%|█████████▏| 6480/7045 [21:03:34<1:50:32, 11.74s/it] {'loss': 1.0767, 'learning_rate': 8.387649274010396e-08, 'epoch': 0.92} + 92%|█████████▏| 6480/7045 [21:03:34<1:50:32, 11.74s/it] 92%|█████████▏| 6481/7045 [21:03:46<1:51:31, 11.86s/it] {'loss': 1.0845, 'learning_rate': 8.358151166752066e-08, 'epoch': 0.92} + 92%|█████████▏| 6481/7045 [21:03:46<1:51:31, 11.86s/it] 92%|█████████▏| 6482/7045 [21:03:59<1:53:52, 12.14s/it] {'loss': 1.1021, 'learning_rate': 8.328704139277028e-08, 'epoch': 0.92} + 92%|█████████▏| 6482/7045 [21:03:59<1:53:52, 12.14s/it] 92%|█████████▏| 6483/7045 [21:04:10<1:51:09, 11.87s/it] {'loss': 1.061, 'learning_rate': 8.299308197809996e-08, 'epoch': 0.92} + 92%|█████████▏| 6483/7045 [21:04:10<1:51:09, 11.87s/it] 92%|█████████▏| 6484/7045 [21:04:23<1:53:01, 12.09s/it] {'loss': 1.1064, 'learning_rate': 8.269963348564863e-08, 'epoch': 0.92} + 92%|█████████▏| 6484/7045 [21:04:23<1:53:01, 12.09s/it] 92%|█████████▏| 6485/7045 [21:04:34<1:50:50, 11.88s/it] {'loss': 1.1074, 'learning_rate': 8.240669597744722e-08, 'epoch': 0.92} + 92%|█████████▏| 6485/7045 [21:04:34<1:50:50, 11.88s/it] 92%|█████████▏| 6486/7045 [21:04:48<1:55:34, 12.41s/it] {'loss': 1.0562, 'learning_rate': 8.211426951541923e-08, 'epoch': 0.92} + 92%|█████████▏| 6486/7045 [21:04:48<1:55:34, 12.41s/it] 92%|█████████▏| 6487/7045 [21:05:01<1:58:04, 12.70s/it] {'loss': 1.1021, 'learning_rate': 8.182235416137913e-08, 'epoch': 0.92} + 92%|█████████▏| 6487/7045 [21:05:01<1:58:04, 12.70s/it] 92%|█████████▏| 6488/7045 [21:05:12<1:52:20, 12.10s/it] {'loss': 1.1055, 'learning_rate': 8.153094997703365e-08, 'epoch': 0.92} + 92%|█████████▏| 6488/7045 [21:05:12<1:52:20, 12.10s/it] 92%|█████████▏| 6489/7045 [21:05:25<1:54:00, 12.30s/it] {'loss': 1.0425, 'learning_rate': 8.124005702398214e-08, 'epoch': 0.92} + 92%|█████████▏| 6489/7045 [21:05:25<1:54:00, 12.30s/it] 92%|█████████▏| 6490/7045 [21:05:38<1:55:00, 12.43s/it] {'loss': 1.0811, 'learning_rate': 8.094967536371512e-08, 'epoch': 0.92} + 92%|█████████▏| 6490/7045 [21:05:38<1:55:00, 12.43s/it] 92%|█████████▏| 6491/7045 [21:05:49<1:51:03, 12.03s/it] {'loss': 1.0801, 'learning_rate': 8.065980505761517e-08, 'epoch': 0.92} + 92%|█████████▏| 6491/7045 [21:05:49<1:51:03, 12.03s/it] 92%|█████████▏| 6492/7045 [21:06:00<1:48:08, 11.73s/it] {'loss': 1.0771, 'learning_rate': 8.037044616695743e-08, 'epoch': 0.92} + 92%|█████████▏| 6492/7045 [21:06:00<1:48:08, 11.73s/it] 92%|█████████▏| 6493/7045 [21:06:11<1:45:51, 11.51s/it] {'loss': 1.0977, 'learning_rate': 8.0081598752908e-08, 'epoch': 0.92} + 92%|█████████▏| 6493/7045 [21:06:11<1:45:51, 11.51s/it] 92%|█████████▏| 6494/7045 [21:06:23<1:46:15, 11.57s/it] {'loss': 1.1592, 'learning_rate': 7.97932628765255e-08, 'epoch': 0.92} + 92%|█████████▏| 6494/7045 [21:06:23<1:46:15, 11.57s/it] 92%|█████████▏| 6495/7045 [21:06:36<1:51:10, 12.13s/it] {'loss': 1.0728, 'learning_rate': 7.950543859876008e-08, 'epoch': 0.92} + 92%|█████████▏| 6495/7045 [21:06:36<1:51:10, 12.13s/it] 92%|█████████▏| 6496/7045 [21:06:48<1:51:06, 12.14s/it] {'loss': 1.0757, 'learning_rate': 7.921812598045336e-08, 'epoch': 0.92} + 92%|█████████▏| 6496/7045 [21:06:48<1:51:06, 12.14s/it] 92%|█████████▏| 6497/7045 [21:07:00<1:48:51, 11.92s/it] {'loss': 1.1494, 'learning_rate': 7.893132508234036e-08, 'epoch': 0.92} + 92%|█████████▏| 6497/7045 [21:07:00<1:48:51, 11.92s/it] 92%|█████████▏| 6498/7045 [21:07:11<1:46:54, 11.73s/it] {'loss': 1.0713, 'learning_rate': 7.864503596504619e-08, 'epoch': 0.92} + 92%|█████████▏| 6498/7045 [21:07:11<1:46:54, 11.73s/it] 92%|█████████▏| 6499/7045 [21:07:22<1:46:13, 11.67s/it] {'loss': 1.1514, 'learning_rate': 7.835925868908828e-08, 'epoch': 0.92} + 92%|█████████▏| 6499/7045 [21:07:22<1:46:13, 11.67s/it] 92%|█████████▏| 6500/7045 [21:07:34<1:45:06, 11.57s/it] {'loss': 1.1338, 'learning_rate': 7.807399331487664e-08, 'epoch': 0.92} + 92%|█████████▏| 6500/7045 [21:07:34<1:45:06, 11.57s/it] 92%|█████████▏| 6501/7045 [21:07:45<1:43:18, 11.39s/it] {'loss': 1.1255, 'learning_rate': 7.77892399027122e-08, 'epoch': 0.92} + 92%|█████████▏| 6501/7045 [21:07:45<1:43:18, 11.39s/it] 92%|█████████▏| 6502/7045 [21:07:57<1:46:10, 11.73s/it] {'loss': 1.0981, 'learning_rate': 7.750499851278764e-08, 'epoch': 0.92} + 92%|█████████▏| 6502/7045 [21:07:57<1:46:10, 11.73s/it] 92%|█████████▏| 6503/7045 [21:08:08<1:44:21, 11.55s/it] {'loss': 1.1484, 'learning_rate': 7.722126920518769e-08, 'epoch': 0.92} + 92%|█████████▏| 6503/7045 [21:08:08<1:44:21, 11.55s/it] 92%|█████████▏| 6504/7045 [21:08:21<1:48:32, 12.04s/it] {'loss': 1.0415, 'learning_rate': 7.693805203988963e-08, 'epoch': 0.92} + 92%|█████████▏| 6504/7045 [21:08:21<1:48:32, 12.04s/it] 92%|█████████▏| 6505/7045 [21:08:33<1:45:49, 11.76s/it] {'loss': 1.0933, 'learning_rate': 7.665534707676086e-08, 'epoch': 0.92} + 92%|█████████▏| 6505/7045 [21:08:33<1:45:49, 11.76s/it] 92%|█████████▏| 6506/7045 [21:08:44<1:43:55, 11.57s/it] {'loss': 1.1299, 'learning_rate': 7.637315437556192e-08, 'epoch': 0.92} + 92%|█████████▏| 6506/7045 [21:08:44<1:43:55, 11.57s/it] 92%|█████████▏| 6507/7045 [21:08:55<1:43:45, 11.57s/it] {'loss': 1.1592, 'learning_rate': 7.609147399594397e-08, 'epoch': 0.92} + 92%|█████████▏| 6507/7045 [21:08:55<1:43:45, 11.57s/it] 92%|█████████▏| 6508/7045 [21:09:08<1:47:26, 12.00s/it] {'loss': 1.0928, 'learning_rate': 7.581030599745076e-08, 'epoch': 0.92} + 92%|█████████▏| 6508/7045 [21:09:08<1:47:26, 12.00s/it] 92%|█████████▏| 6509/7045 [21:09:20<1:46:48, 11.96s/it] {'loss': 1.1045, 'learning_rate': 7.552965043951754e-08, 'epoch': 0.92} + 92%|█████████▏| 6509/7045 [21:09:20<1:46:48, 11.96s/it] 92%|█████████▏| 6510/7045 [21:09:31<1:43:55, 11.66s/it] {'loss': 1.1299, 'learning_rate': 7.524950738147018e-08, 'epoch': 0.92} + 92%|█████████▏| 6510/7045 [21:09:31<1:43:55, 11.66s/it] 92%|█████████▏| 6511/7045 [21:09:42<1:41:27, 11.40s/it] {'loss': 1.0923, 'learning_rate': 7.496987688252828e-08, 'epoch': 0.92} + 92%|█████████▏| 6511/7045 [21:09:42<1:41:27, 11.40s/it] 92%|█████████▏| 6512/7045 [21:09:54<1:42:45, 11.57s/it] {'loss': 1.0723, 'learning_rate': 7.469075900180117e-08, 'epoch': 0.92} + 92%|█████████▏| 6512/7045 [21:09:54<1:42:45, 11.57s/it] 92%|█████████▏| 6513/7045 [21:10:05<1:42:17, 11.54s/it] {'loss': 1.0879, 'learning_rate': 7.441215379829031e-08, 'epoch': 0.92} + 92%|█████████▏| 6513/7045 [21:10:05<1:42:17, 11.54s/it] 92%|█████████▏| 6514/7045 [21:10:18<1:45:32, 11.93s/it] {'loss': 1.0493, 'learning_rate': 7.413406133088968e-08, 'epoch': 0.92} + 92%|█████████▏| 6514/7045 [21:10:18<1:45:32, 11.93s/it] 92%|█████████▏| 6515/7045 [21:10:30<1:45:47, 11.98s/it] {'loss': 1.0928, 'learning_rate': 7.385648165838393e-08, 'epoch': 0.92} + 92%|█████████▏| 6515/7045 [21:10:30<1:45:47, 11.98s/it] 92%|█████████▏| 6516/7045 [21:10:42<1:44:04, 11.81s/it] {'loss': 1.0923, 'learning_rate': 7.357941483944947e-08, 'epoch': 0.92} + 92%|█████████▏| 6516/7045 [21:10:42<1:44:04, 11.81s/it] 93%|█████████▎| 6517/7045 [21:10:54<1:44:42, 11.90s/it] {'loss': 1.1289, 'learning_rate': 7.33028609326547e-08, 'epoch': 0.93} + 93%|█████████▎| 6517/7045 [21:10:54<1:44:42, 11.90s/it] 93%|█████████▎| 6518/7045 [21:11:05<1:42:41, 11.69s/it] {'loss': 1.1318, 'learning_rate': 7.302681999645927e-08, 'epoch': 0.93} + 93%|█████████▎| 6518/7045 [21:11:05<1:42:41, 11.69s/it] 93%|█████████▎| 6519/7045 [21:11:17<1:42:16, 11.67s/it] {'loss': 1.1396, 'learning_rate': 7.275129208921455e-08, 'epoch': 0.93} + 93%|█████████▎| 6519/7045 [21:11:17<1:42:16, 11.67s/it] 93%|█████████▎| 6520/7045 [21:11:28<1:40:08, 11.44s/it] {'loss': 1.0864, 'learning_rate': 7.24762772691634e-08, 'epoch': 0.93} + 93%|█████████▎| 6520/7045 [21:11:28<1:40:08, 11.44s/it] 93%|█████████▎| 6521/7045 [21:11:38<1:38:32, 11.28s/it] {'loss': 1.0977, 'learning_rate': 7.220177559443959e-08, 'epoch': 0.93} + 93%|█████████▎| 6521/7045 [21:11:38<1:38:32, 11.28s/it] 93%|█████████▎| 6522/7045 [21:11:50<1:38:01, 11.25s/it] {'loss': 1.123, 'learning_rate': 7.192778712306975e-08, 'epoch': 0.93} + 93%|█████████▎| 6522/7045 [21:11:50<1:38:01, 11.25s/it] 93%|█████████▎| 6523/7045 [21:12:01<1:36:57, 11.15s/it] {'loss': 1.0928, 'learning_rate': 7.165431191297117e-08, 'epoch': 0.93} + 93%|█████████▎| 6523/7045 [21:12:01<1:36:57, 11.15s/it] 93%|█████████▎| 6524/7045 [21:12:12<1:36:39, 11.13s/it] {'loss': 1.1016, 'learning_rate': 7.138135002195207e-08, 'epoch': 0.93} + 93%|█████████▎| 6524/7045 [21:12:12<1:36:39, 11.13s/it] 93%|█████████▎| 6525/7045 [21:12:23<1:36:48, 11.17s/it] {'loss': 1.0776, 'learning_rate': 7.110890150771404e-08, 'epoch': 0.93} + 93%|█████████▎| 6525/7045 [21:12:23<1:36:48, 11.17s/it] 93%|█████████▎| 6526/7045 [21:12:35<1:38:12, 11.35s/it] {'loss': 1.1172, 'learning_rate': 7.083696642784771e-08, 'epoch': 0.93} + 93%|█████████▎| 6526/7045 [21:12:35<1:38:12, 11.35s/it] 93%|█████████▎| 6527/7045 [21:12:46<1:38:20, 11.39s/it] {'loss': 1.1338, 'learning_rate': 7.056554483983763e-08, 'epoch': 0.93} + 93%|█████████▎| 6527/7045 [21:12:46<1:38:20, 11.39s/it] 93%|█████████▎| 6528/7045 [21:12:57<1:37:08, 11.27s/it] {'loss': 1.1201, 'learning_rate': 7.029463680105764e-08, 'epoch': 0.93} + 93%|█████████▎| 6528/7045 [21:12:57<1:37:08, 11.27s/it] 93%|█████████▎| 6529/7045 [21:13:09<1:37:18, 11.31s/it] {'loss': 1.0889, 'learning_rate': 7.002424236877442e-08, 'epoch': 0.93} + 93%|█████████▎| 6529/7045 [21:13:09<1:37:18, 11.31s/it] 93%|█████████▎| 6530/7045 [21:13:21<1:40:03, 11.66s/it] {'loss': 1.0977, 'learning_rate': 6.975436160014615e-08, 'epoch': 0.93} + 93%|█████████▎| 6530/7045 [21:13:21<1:40:03, 11.66s/it] 93%|█████████▎| 6531/7045 [21:13:32<1:39:00, 11.56s/it] {'loss': 1.0811, 'learning_rate': 6.948499455222107e-08, 'epoch': 0.93} + 93%|█████████▎| 6531/7045 [21:13:32<1:39:00, 11.56s/it] 93%|█████████▎| 6532/7045 [21:13:43<1:37:08, 11.36s/it] {'loss': 1.0898, 'learning_rate': 6.921614128194004e-08, 'epoch': 0.93} + 93%|█████████▎| 6532/7045 [21:13:43<1:37:08, 11.36s/it] 93%|█████████▎| 6533/7045 [21:13:55<1:37:15, 11.40s/it] {'loss': 1.0874, 'learning_rate': 6.894780184613537e-08, 'epoch': 0.93} + 93%|█████████▎| 6533/7045 [21:13:55<1:37:15, 11.40s/it] 93%|█████████▎| 6534/7045 [21:14:07<1:38:56, 11.62s/it] {'loss': 1.0942, 'learning_rate': 6.867997630152972e-08, 'epoch': 0.93} + 93%|█████████▎| 6534/7045 [21:14:07<1:38:56, 11.62s/it] 93%|█████████▎| 6535/7045 [21:14:19<1:38:55, 11.64s/it] {'loss': 1.1055, 'learning_rate': 6.841266470473783e-08, 'epoch': 0.93} + 93%|█████████▎| 6535/7045 [21:14:19<1:38:55, 11.64s/it] 93%|█████████▎| 6536/7045 [21:14:30<1:37:05, 11.45s/it] {'loss': 1.1035, 'learning_rate': 6.814586711226589e-08, 'epoch': 0.93} + 93%|█████████▎| 6536/7045 [21:14:30<1:37:05, 11.45s/it] 93%|█████████▎| 6537/7045 [21:14:42<1:40:34, 11.88s/it] {'loss': 1.0776, 'learning_rate': 6.787958358051128e-08, 'epoch': 0.93} + 93%|█████████▎| 6537/7045 [21:14:42<1:40:34, 11.88s/it] 93%|█████████▎| 6538/7045 [21:14:54<1:38:42, 11.68s/it] {'loss': 1.1631, 'learning_rate': 6.761381416576285e-08, 'epoch': 0.93} + 93%|█████████▎| 6538/7045 [21:14:54<1:38:42, 11.68s/it] 93%|█████████▎| 6539/7045 [21:15:05<1:37:00, 11.50s/it] {'loss': 1.1104, 'learning_rate': 6.734855892420039e-08, 'epoch': 0.93} + 93%|█████████▎| 6539/7045 [21:15:05<1:37:00, 11.50s/it] 93%|█████████▎| 6540/7045 [21:15:17<1:38:19, 11.68s/it] {'loss': 1.1318, 'learning_rate': 6.708381791189488e-08, 'epoch': 0.93} + 93%|█████████▎| 6540/7045 [21:15:17<1:38:19, 11.68s/it] 93%|█████████▎| 6541/7045 [21:15:29<1:38:45, 11.76s/it] {'loss': 1.0635, 'learning_rate': 6.681959118480963e-08, 'epoch': 0.93} + 93%|█████████▎| 6541/7045 [21:15:29<1:38:45, 11.76s/it] 93%|█████████▎| 6542/7045 [21:15:41<1:40:39, 12.01s/it] {'loss': 1.1074, 'learning_rate': 6.655587879879855e-08, 'epoch': 0.93} + 93%|█████████▎| 6542/7045 [21:15:41<1:40:39, 12.01s/it] 93%|█████████▎| 6543/7045 [21:15:53<1:38:53, 11.82s/it] {'loss': 1.082, 'learning_rate': 6.629268080960621e-08, 'epoch': 0.93} + 93%|█████████▎| 6543/7045 [21:15:53<1:38:53, 11.82s/it] 93%|█████████▎| 6544/7045 [21:16:04<1:36:36, 11.57s/it] {'loss': 1.0801, 'learning_rate': 6.602999727286951e-08, 'epoch': 0.93} + 93%|█████████▎| 6544/7045 [21:16:04<1:36:36, 11.57s/it] 93%|█████████▎| 6545/7045 [21:16:16<1:38:14, 11.79s/it] {'loss': 1.125, 'learning_rate': 6.576782824411627e-08, 'epoch': 0.93} + 93%|█████████▎| 6545/7045 [21:16:16<1:38:14, 11.79s/it] 93%|█████████▎| 6546/7045 [21:16:27<1:36:44, 11.63s/it] {'loss': 1.0708, 'learning_rate': 6.55061737787649e-08, 'epoch': 0.93} + 93%|█████████▎| 6546/7045 [21:16:27<1:36:44, 11.63s/it] 93%|█████████▎| 6547/7045 [21:16:38<1:34:47, 11.42s/it] {'loss': 1.061, 'learning_rate': 6.524503393212617e-08, 'epoch': 0.93} + 93%|█████████▎| 6547/7045 [21:16:38<1:34:47, 11.42s/it] 93%|█████████▎| 6548/7045 [21:16:49<1:33:48, 11.33s/it] {'loss': 1.1113, 'learning_rate': 6.498440875940094e-08, 'epoch': 0.93} + 93%|█████████▎| 6548/7045 [21:16:49<1:33:48, 11.33s/it] 93%|█████████▎| 6549/7045 [21:17:01<1:34:21, 11.41s/it] {'loss': 1.124, 'learning_rate': 6.472429831568234e-08, 'epoch': 0.93} + 93%|█████████▎| 6549/7045 [21:17:01<1:34:21, 11.41s/it] 93%|█████████▎| 6550/7045 [21:17:13<1:36:41, 11.72s/it] {'loss': 1.1016, 'learning_rate': 6.446470265595389e-08, 'epoch': 0.93} + 93%|█████████▎| 6550/7045 [21:17:13<1:36:41, 11.72s/it] 93%|█████████▎| 6551/7045 [21:17:26<1:38:21, 11.95s/it] {'loss': 1.1133, 'learning_rate': 6.420562183509033e-08, 'epoch': 0.93} + 93%|█████████▎| 6551/7045 [21:17:26<1:38:21, 11.95s/it] 93%|█████████▎| 6552/7045 [21:17:39<1:40:21, 12.21s/it] {'loss': 1.1172, 'learning_rate': 6.39470559078581e-08, 'epoch': 0.93} + 93%|█████████▎| 6552/7045 [21:17:39<1:40:21, 12.21s/it] 93%|█████████▎| 6553/7045 [21:17:51<1:40:01, 12.20s/it] {'loss': 1.1094, 'learning_rate': 6.368900492891489e-08, 'epoch': 0.93} + 93%|█████████▎| 6553/7045 [21:17:51<1:40:01, 12.20s/it] 93%|█████████▎| 6554/7045 [21:18:03<1:39:58, 12.22s/it] {'loss': 1.1035, 'learning_rate': 6.343146895280816e-08, 'epoch': 0.93} + 93%|█████████▎| 6554/7045 [21:18:03<1:39:58, 12.22s/it] 93%|█████████▎| 6555/7045 [21:18:16<1:41:40, 12.45s/it] {'loss': 1.0518, 'learning_rate': 6.317444803397855e-08, 'epoch': 0.93} + 93%|█████████▎| 6555/7045 [21:18:16<1:41:40, 12.45s/it] 93%|█████████▎| 6556/7045 [21:18:28<1:38:57, 12.14s/it] {'loss': 1.1016, 'learning_rate': 6.29179422267559e-08, 'epoch': 0.93} + 93%|█████████▎| 6556/7045 [21:18:28<1:38:57, 12.14s/it] 93%|█████████▎| 6557/7045 [21:18:39<1:36:31, 11.87s/it] {'loss': 1.0586, 'learning_rate': 6.266195158536242e-08, 'epoch': 0.93} + 93%|█████████▎| 6557/7045 [21:18:39<1:36:31, 11.87s/it] 93%|█████████▎| 6558/7045 [21:18:51<1:36:58, 11.95s/it] {'loss': 1.0591, 'learning_rate': 6.240647616391121e-08, 'epoch': 0.93} + 93%|█████████▎| 6558/7045 [21:18:51<1:36:58, 11.95s/it] 93%|█████████▎| 6559/7045 [21:19:02<1:34:56, 11.72s/it] {'loss': 1.1084, 'learning_rate': 6.215151601640601e-08, 'epoch': 0.93} + 93%|█████████▎| 6559/7045 [21:19:02<1:34:56, 11.72s/it] 93%|█████████▎| 6560/7045 [21:19:13<1:33:34, 11.58s/it] {'loss': 1.1143, 'learning_rate': 6.189707119674204e-08, 'epoch': 0.93} + 93%|█████████▎| 6560/7045 [21:19:13<1:33:34, 11.58s/it] 93%|█████████▎| 6561/7045 [21:19:25<1:34:29, 11.71s/it] {'loss': 1.0659, 'learning_rate': 6.164314175870545e-08, 'epoch': 0.93} + 93%|█████████▎| 6561/7045 [21:19:25<1:34:29, 11.71s/it] 93%|█████████▎| 6562/7045 [21:19:36<1:32:28, 11.49s/it] {'loss': 1.0684, 'learning_rate': 6.138972775597301e-08, 'epoch': 0.93} + 93%|█████████▎| 6562/7045 [21:19:36<1:32:28, 11.49s/it] 93%|█████████▎| 6563/7045 [21:19:50<1:36:53, 12.06s/it] {'loss': 1.1143, 'learning_rate': 6.113682924211383e-08, 'epoch': 0.93} + 93%|█████████▎| 6563/7045 [21:19:50<1:36:53, 12.06s/it] 93%|█████████▎| 6564/7045 [21:20:03<1:40:40, 12.56s/it] {'loss': 1.022, 'learning_rate': 6.088444627058626e-08, 'epoch': 0.93} + 93%|█████████▎| 6564/7045 [21:20:03<1:40:40, 12.56s/it] 93%|█████████▎| 6565/7045 [21:20:15<1:37:54, 12.24s/it] {'loss': 1.1318, 'learning_rate': 6.063257889474122e-08, 'epoch': 0.93} + 93%|█████████▎| 6565/7045 [21:20:15<1:37:54, 12.24s/it] 93%|█████████▎| 6566/7045 [21:20:26<1:34:25, 11.83s/it] {'loss': 1.1104, 'learning_rate': 6.038122716782002e-08, 'epoch': 0.93} + 93%|█████████▎| 6566/7045 [21:20:26<1:34:25, 11.83s/it] 93%|█████████▎| 6567/7045 [21:20:37<1:32:23, 11.60s/it] {'loss': 1.085, 'learning_rate': 6.013039114295515e-08, 'epoch': 0.93} + 93%|█████████▎| 6567/7045 [21:20:37<1:32:23, 11.60s/it] 93%|█████████▎| 6568/7045 [21:20:49<1:33:29, 11.76s/it] {'loss': 1.0503, 'learning_rate': 5.988007087316894e-08, 'epoch': 0.93} + 93%|█████████▎| 6568/7045 [21:20:49<1:33:29, 11.76s/it] 93%|█████████▎| 6569/7045 [21:21:00<1:31:30, 11.53s/it] {'loss': 1.0938, 'learning_rate': 5.963026641137681e-08, 'epoch': 0.93} + 93%|█████████▎| 6569/7045 [21:21:00<1:31:30, 11.53s/it] 93%|█████████▎| 6570/7045 [21:21:11<1:30:06, 11.38s/it] {'loss': 1.0806, 'learning_rate': 5.938097781038349e-08, 'epoch': 0.93} + 93%|█████████▎| 6570/7045 [21:21:11<1:30:06, 11.38s/it] 93%|█████████▎| 6571/7045 [21:21:22<1:28:48, 11.24s/it] {'loss': 1.0996, 'learning_rate': 5.9132205122885143e-08, 'epoch': 0.93} + 93%|█████████▎| 6571/7045 [21:21:22<1:28:48, 11.24s/it] 93%|█████████▎| 6572/7045 [21:21:33<1:28:14, 11.19s/it] {'loss': 1.0928, 'learning_rate': 5.888394840146944e-08, 'epoch': 0.93} + 93%|█████████▎| 6572/7045 [21:21:33<1:28:14, 11.19s/it] 93%|█████████▎| 6573/7045 [21:21:44<1:28:30, 11.25s/it] {'loss': 1.123, 'learning_rate': 5.863620769861328e-08, 'epoch': 0.93} + 93%|█████████▎| 6573/7045 [21:21:44<1:28:30, 11.25s/it] 93%|█████████▎| 6574/7045 [21:21:56<1:29:17, 11.38s/it] {'loss': 1.0757, 'learning_rate': 5.8388983066687e-08, 'epoch': 0.93} + 93%|█████████▎| 6574/7045 [21:21:56<1:29:17, 11.38s/it] 93%|█████████▎| 6575/7045 [21:22:10<1:35:48, 12.23s/it] {'loss': 1.0557, 'learning_rate': 5.8142274557949907e-08, 'epoch': 0.93} + 93%|█████████▎| 6575/7045 [21:22:10<1:35:48, 12.23s/it] 93%|█████████▎| 6576/7045 [21:22:21<1:33:07, 11.91s/it] {'loss': 1.082, 'learning_rate': 5.789608222455251e-08, 'epoch': 0.93} + 93%|█████████▎| 6576/7045 [21:22:21<1:33:07, 11.91s/it] 93%|█████████▎| 6577/7045 [21:22:33<1:30:59, 11.67s/it] {'loss': 1.0781, 'learning_rate': 5.765040611853678e-08, 'epoch': 0.93} + 93%|█████████▎| 6577/7045 [21:22:33<1:30:59, 11.67s/it] 93%|█████████▎| 6578/7045 [21:22:44<1:30:16, 11.60s/it] {'loss': 1.0933, 'learning_rate': 5.740524629183536e-08, 'epoch': 0.93} + 93%|█████████▎| 6578/7045 [21:22:44<1:30:16, 11.60s/it] 93%|█████████▎| 6579/7045 [21:22:55<1:29:14, 11.49s/it] {'loss': 1.0908, 'learning_rate': 5.716060279627178e-08, 'epoch': 0.93} + 93%|█████████▎| 6579/7045 [21:22:55<1:29:14, 11.49s/it] 93%|█████████▎| 6580/7045 [21:23:07<1:28:39, 11.44s/it] {'loss': 1.1582, 'learning_rate': 5.6916475683560235e-08, 'epoch': 0.93} + 93%|█████████▎| 6580/7045 [21:23:07<1:28:39, 11.44s/it] 93%|█████████▎| 6581/7045 [21:23:19<1:31:40, 11.85s/it] {'loss': 1.0771, 'learning_rate': 5.667286500530556e-08, 'epoch': 0.93} + 93%|█████████▎| 6581/7045 [21:23:19<1:31:40, 11.85s/it] 93%|█████████▎| 6582/7045 [21:23:30<1:29:10, 11.56s/it] {'loss': 1.1211, 'learning_rate': 5.642977081300433e-08, 'epoch': 0.93} + 93%|█████████▎| 6582/7045 [21:23:30<1:29:10, 11.56s/it] 93%|█████████▎| 6583/7045 [21:23:42<1:28:24, 11.48s/it] {'loss': 1.1035, 'learning_rate': 5.618719315804294e-08, 'epoch': 0.93} + 93%|█████████▎| 6583/7045 [21:23:42<1:28:24, 11.48s/it] 93%|█████████▎| 6584/7045 [21:23:55<1:31:53, 11.96s/it] {'loss': 1.1094, 'learning_rate': 5.594513209169872e-08, 'epoch': 0.93} + 93%|█████████▎| 6584/7045 [21:23:55<1:31:53, 11.96s/it] 93%|█████████▎| 6585/7045 [21:24:06<1:29:40, 11.70s/it] {'loss': 1.124, 'learning_rate': 5.5703587665140726e-08, 'epoch': 0.93} + 93%|█████████▎| 6585/7045 [21:24:06<1:29:40, 11.70s/it] 93%|█████████▎| 6586/7045 [21:24:17<1:27:35, 11.45s/it] {'loss': 1.1226, 'learning_rate': 5.546255992942784e-08, 'epoch': 0.93} + 93%|█████████▎| 6586/7045 [21:24:17<1:27:35, 11.45s/it] 93%|█████████▎| 6587/7045 [21:24:28<1:26:59, 11.40s/it] {'loss': 1.1289, 'learning_rate': 5.522204893550959e-08, 'epoch': 0.93} + 93%|█████████▎| 6587/7045 [21:24:28<1:26:59, 11.40s/it] 94%|█████████▎| 6588/7045 [21:24:39<1:26:17, 11.33s/it] {'loss': 1.1699, 'learning_rate': 5.498205473422752e-08, 'epoch': 0.94} + 94%|█████████▎| 6588/7045 [21:24:39<1:26:17, 11.33s/it] 94%|█████████▎| 6589/7045 [21:24:51<1:27:10, 11.47s/it] {'loss': 1.0723, 'learning_rate': 5.4742577376312165e-08, 'epoch': 0.94} + 94%|█████████▎| 6589/7045 [21:24:51<1:27:10, 11.47s/it] 94%|█████████▎| 6590/7045 [21:25:02<1:26:15, 11.38s/it] {'loss': 1.0972, 'learning_rate': 5.450361691238665e-08, 'epoch': 0.94} + 94%|█████████▎| 6590/7045 [21:25:02<1:26:15, 11.38s/it] 94%|█████████▎| 6591/7045 [21:25:14<1:28:32, 11.70s/it] {'loss': 1.0923, 'learning_rate': 5.4265173392963885e-08, 'epoch': 0.94} + 94%|█████████▎| 6591/7045 [21:25:14<1:28:32, 11.70s/it] 94%|█████████▎| 6592/7045 [21:25:28<1:32:02, 12.19s/it] {'loss': 1.0928, 'learning_rate': 5.4027246868446903e-08, 'epoch': 0.94} + 94%|█████████▎| 6592/7045 [21:25:28<1:32:02, 12.19s/it] 94%|█████████▎| 6593/7045 [21:25:39<1:28:56, 11.81s/it] {'loss': 1.1055, 'learning_rate': 5.378983738913074e-08, 'epoch': 0.94} + 94%|█████████▎| 6593/7045 [21:25:39<1:28:56, 11.81s/it] 94%|█████████▎| 6594/7045 [21:25:50<1:28:02, 11.71s/it] {'loss': 1.1191, 'learning_rate': 5.3552945005200255e-08, 'epoch': 0.94} + 94%|█████████▎| 6594/7045 [21:25:50<1:28:02, 11.71s/it] 94%|█████████▎| 6595/7045 [21:26:03<1:30:02, 12.01s/it] {'loss': 1.0894, 'learning_rate': 5.331656976673122e-08, 'epoch': 0.94} + 94%|█████████▎| 6595/7045 [21:26:03<1:30:02, 12.01s/it] 94%|█████████▎| 6596/7045 [21:26:14<1:28:53, 11.88s/it] {'loss': 1.1348, 'learning_rate': 5.308071172369061e-08, 'epoch': 0.94} + 94%|█████████▎| 6596/7045 [21:26:14<1:28:53, 11.88s/it] 94%|█████████▎| 6597/7045 [21:26:26<1:27:33, 11.73s/it] {'loss': 1.1299, 'learning_rate': 5.2845370925934946e-08, 'epoch': 0.94} + 94%|█████████▎| 6597/7045 [21:26:26<1:27:33, 11.73s/it] 94%|█████████▎| 6598/7045 [21:26:37<1:26:32, 11.62s/it] {'loss': 1.0864, 'learning_rate': 5.2610547423211924e-08, 'epoch': 0.94} + 94%|█████████▎| 6598/7045 [21:26:37<1:26:32, 11.62s/it] 94%|█████████▎| 6599/7045 [21:26:49<1:26:10, 11.59s/it] {'loss': 1.1001, 'learning_rate': 5.237624126516072e-08, 'epoch': 0.94} + 94%|█████████▎| 6599/7045 [21:26:49<1:26:10, 11.59s/it] 94%|█████████▎| 6600/7045 [21:27:00<1:25:10, 11.48s/it] {'loss': 1.1147, 'learning_rate': 5.214245250131006e-08, 'epoch': 0.94} + 94%|█████████▎| 6600/7045 [21:27:00<1:25:10, 11.48s/it] 94%|█████████▎| 6601/7045 [21:27:11<1:23:46, 11.32s/it] {'loss': 1.0728, 'learning_rate': 5.190918118107985e-08, 'epoch': 0.94} + 94%|█████████▎| 6601/7045 [21:27:11<1:23:46, 11.32s/it] 94%|█████████▎| 6602/7045 [21:27:22<1:23:05, 11.25s/it] {'loss': 1.1387, 'learning_rate': 5.167642735378037e-08, 'epoch': 0.94} + 94%|█████████▎| 6602/7045 [21:27:22<1:23:05, 11.25s/it] 94%|█████████▎| 6603/7045 [21:27:33<1:23:11, 11.29s/it] {'loss': 1.0874, 'learning_rate': 5.144419106861226e-08, 'epoch': 0.94} + 94%|█████████▎| 6603/7045 [21:27:33<1:23:11, 11.29s/it] 94%|█████████▎| 6604/7045 [21:27:46<1:25:29, 11.63s/it] {'loss': 1.1016, 'learning_rate': 5.121247237466792e-08, 'epoch': 0.94} + 94%|█████████▎| 6604/7045 [21:27:46<1:25:29, 11.63s/it] 94%|█████████▍| 6605/7045 [21:27:58<1:27:27, 11.93s/it] {'loss': 1.0723, 'learning_rate': 5.098127132092901e-08, 'epoch': 0.94} + 94%|█████████▍| 6605/7045 [21:27:58<1:27:27, 11.93s/it] 94%|█████████▍| 6606/7045 [21:28:09<1:25:06, 11.63s/it] {'loss': 1.1172, 'learning_rate': 5.0750587956267805e-08, 'epoch': 0.94} + 94%|█████████▍| 6606/7045 [21:28:09<1:25:06, 11.63s/it] 94%|█████████▍| 6607/7045 [21:28:21<1:25:03, 11.65s/it] {'loss': 1.1143, 'learning_rate': 5.052042232944865e-08, 'epoch': 0.94} + 94%|█████████▍| 6607/7045 [21:28:21<1:25:03, 11.65s/it] 94%|█████████▍| 6608/7045 [21:28:34<1:27:41, 12.04s/it] {'loss': 1.0776, 'learning_rate': 5.029077448912456e-08, 'epoch': 0.94} + 94%|█████████▍| 6608/7045 [21:28:34<1:27:41, 12.04s/it] 94%|█████████▍| 6609/7045 [21:28:45<1:25:17, 11.74s/it] {'loss': 1.0859, 'learning_rate': 5.006164448384032e-08, 'epoch': 0.94} + 94%|█████████▍| 6609/7045 [21:28:45<1:25:17, 11.74s/it] 94%|█████████▍| 6610/7045 [21:28:56<1:24:01, 11.59s/it] {'loss': 1.1045, 'learning_rate': 4.98330323620308e-08, 'epoch': 0.94} + 94%|█████████▍| 6610/7045 [21:28:56<1:24:01, 11.59s/it] 94%|█████████▍| 6611/7045 [21:29:07<1:22:16, 11.38s/it] {'loss': 1.0986, 'learning_rate': 4.96049381720215e-08, 'epoch': 0.94} + 94%|█████████▍| 6611/7045 [21:29:07<1:22:16, 11.38s/it] 94%|█████████▍| 6612/7045 [21:29:18<1:21:28, 11.29s/it] {'loss': 1.1201, 'learning_rate': 4.93773619620283e-08, 'epoch': 0.94} + 94%|█████████▍| 6612/7045 [21:29:18<1:21:28, 11.29s/it] 94%|█████████▍| 6613/7045 [21:29:30<1:22:52, 11.51s/it] {'loss': 1.1138, 'learning_rate': 4.9150303780157725e-08, 'epoch': 0.94} + 94%|█████████▍| 6613/7045 [21:29:30<1:22:52, 11.51s/it] 94%|█████████▍| 6614/7045 [21:29:42<1:22:48, 11.53s/it] {'loss': 1.1172, 'learning_rate': 4.8923763674406934e-08, 'epoch': 0.94} + 94%|█████████▍| 6614/7045 [21:29:42<1:22:48, 11.53s/it] 94%|█████████▍| 6615/7045 [21:29:53<1:21:57, 11.44s/it] {'loss': 1.1016, 'learning_rate': 4.8697741692663446e-08, 'epoch': 0.94} + 94%|█████████▍| 6615/7045 [21:29:53<1:21:57, 11.44s/it] 94%|█████████▍| 6616/7045 [21:30:05<1:21:52, 11.45s/it] {'loss': 1.1152, 'learning_rate': 4.847223788270516e-08, 'epoch': 0.94} + 94%|█████████▍| 6616/7045 [21:30:05<1:21:52, 11.45s/it] 94%|█████████▍| 6617/7045 [21:30:15<1:20:39, 11.31s/it] {'loss': 1.0947, 'learning_rate': 4.824725229220034e-08, 'epoch': 0.94} + 94%|█████████▍| 6617/7045 [21:30:16<1:20:39, 11.31s/it] 94%|█████████▍| 6618/7045 [21:30:27<1:21:55, 11.51s/it] {'loss': 1.0884, 'learning_rate': 4.8022784968707883e-08, 'epoch': 0.94} + 94%|█████████▍| 6618/7045 [21:30:27<1:21:55, 11.51s/it] 94%|█████████▍| 6619/7045 [21:30:40<1:24:33, 11.91s/it] {'loss': 1.1045, 'learning_rate': 4.7798835959677325e-08, 'epoch': 0.94} + 94%|█████████▍| 6619/7045 [21:30:40<1:24:33, 11.91s/it] 94%|█████████▍| 6620/7045 [21:30:52<1:22:57, 11.71s/it] {'loss': 1.1226, 'learning_rate': 4.757540531244803e-08, 'epoch': 0.94} + 94%|█████████▍| 6620/7045 [21:30:52<1:22:57, 11.71s/it] 94%|█████████▍| 6621/7045 [21:31:04<1:23:23, 11.80s/it] {'loss': 1.1094, 'learning_rate': 4.735249307425055e-08, 'epoch': 0.94} + 94%|█████████▍| 6621/7045 [21:31:04<1:23:23, 11.80s/it] 94%|█████████▍| 6622/7045 [21:31:15<1:22:13, 11.66s/it] {'loss': 1.1094, 'learning_rate': 4.713009929220552e-08, 'epoch': 0.94} + 94%|█████████▍| 6622/7045 [21:31:15<1:22:13, 11.66s/it] 94%|█████████▍| 6623/7045 [21:31:26<1:21:11, 11.54s/it] {'loss': 1.0811, 'learning_rate': 4.690822401332368e-08, 'epoch': 0.94} + 94%|█████████▍| 6623/7045 [21:31:26<1:21:11, 11.54s/it] 94%|█████████▍| 6624/7045 [21:31:38<1:21:29, 11.61s/it] {'loss': 1.127, 'learning_rate': 4.668686728450667e-08, 'epoch': 0.94} + 94%|█████████▍| 6624/7045 [21:31:38<1:21:29, 11.61s/it] 94%|█████████▍| 6625/7045 [21:31:49<1:20:25, 11.49s/it] {'loss': 1.1406, 'learning_rate': 4.6466029152545664e-08, 'epoch': 0.94} + 94%|█████████▍| 6625/7045 [21:31:49<1:20:25, 11.49s/it] 94%|█████████▍| 6626/7045 [21:32:01<1:20:12, 11.49s/it] {'loss': 1.0996, 'learning_rate': 4.624570966412362e-08, 'epoch': 0.94} + 94%|█████████▍| 6626/7045 [21:32:01<1:20:12, 11.49s/it] 94%|█████████▍| 6627/7045 [21:32:12<1:19:16, 11.38s/it] {'loss': 1.0967, 'learning_rate': 4.6025908865812986e-08, 'epoch': 0.94} + 94%|█████████▍| 6627/7045 [21:32:12<1:19:16, 11.38s/it] 94%|█████████▍| 6628/7045 [21:32:23<1:18:51, 11.35s/it] {'loss': 1.125, 'learning_rate': 4.580662680407605e-08, 'epoch': 0.94} + 94%|█████████▍| 6628/7045 [21:32:23<1:18:51, 11.35s/it] 94%|█████████▍| 6629/7045 [21:32:34<1:18:05, 11.26s/it] {'loss': 1.1367, 'learning_rate': 4.558786352526684e-08, 'epoch': 0.94} + 94%|█████████▍| 6629/7045 [21:32:34<1:18:05, 11.26s/it] 94%|█████████▍| 6630/7045 [21:32:46<1:18:52, 11.40s/it] {'loss': 1.0957, 'learning_rate': 4.5369619075628365e-08, 'epoch': 0.94} + 94%|█████████▍| 6630/7045 [21:32:46<1:18:52, 11.40s/it] 94%|█████████▍| 6631/7045 [21:32:57<1:18:42, 11.41s/it] {'loss': 1.1025, 'learning_rate': 4.515189350129428e-08, 'epoch': 0.94} + 94%|█████████▍| 6631/7045 [21:32:57<1:18:42, 11.41s/it] 94%|█████████▍| 6632/7045 [21:33:08<1:18:04, 11.34s/it] {'loss': 1.1069, 'learning_rate': 4.4934686848289445e-08, 'epoch': 0.94} + 94%|█████████▍| 6632/7045 [21:33:08<1:18:04, 11.34s/it] 94%|█████████▍| 6633/7045 [21:33:20<1:17:24, 11.27s/it] {'loss': 1.1396, 'learning_rate': 4.471799916252795e-08, 'epoch': 0.94} + 94%|█████████▍| 6633/7045 [21:33:20<1:17:24, 11.27s/it] 94%|█████████▍| 6634/7045 [21:33:32<1:18:40, 11.48s/it] {'loss': 1.0942, 'learning_rate': 4.45018304898151e-08, 'epoch': 0.94} + 94%|█████████▍| 6634/7045 [21:33:32<1:18:40, 11.48s/it] 94%|█████████▍| 6635/7045 [21:33:48<1:28:19, 12.92s/it] {'loss': 1.0898, 'learning_rate': 4.428618087584546e-08, 'epoch': 0.94} + 94%|█████████▍| 6635/7045 [21:33:48<1:28:19, 12.92s/it] 94%|█████████▍| 6636/7045 [21:33:59<1:24:32, 12.40s/it] {'loss': 1.1016, 'learning_rate': 4.407105036620452e-08, 'epoch': 0.94} + 94%|█████████▍| 6636/7045 [21:33:59<1:24:32, 12.40s/it] 94%|█████████▍| 6637/7045 [21:34:11<1:22:31, 12.14s/it] {'loss': 1.1152, 'learning_rate': 4.3856439006367825e-08, 'epoch': 0.94} + 94%|█████████▍| 6637/7045 [21:34:11<1:22:31, 12.14s/it] 94%|█████████▍| 6638/7045 [21:34:22<1:20:17, 11.84s/it] {'loss': 1.0557, 'learning_rate': 4.36423468417016e-08, 'epoch': 0.94} + 94%|█████████▍| 6638/7045 [21:34:22<1:20:17, 11.84s/it] 94%|█████████▍| 6639/7045 [21:34:33<1:19:28, 11.75s/it] {'loss': 1.1201, 'learning_rate': 4.3428773917461866e-08, 'epoch': 0.94} + 94%|█████████▍| 6639/7045 [21:34:33<1:19:28, 11.75s/it] 94%|█████████▍| 6640/7045 [21:34:47<1:23:18, 12.34s/it] {'loss': 1.063, 'learning_rate': 4.321572027879528e-08, 'epoch': 0.94} + 94%|█████████▍| 6640/7045 [21:34:47<1:23:18, 12.34s/it] 94%|█████████▍| 6641/7045 [21:34:58<1:20:46, 12.00s/it] {'loss': 1.1094, 'learning_rate': 4.300318597073805e-08, 'epoch': 0.94} + 94%|█████████▍| 6641/7045 [21:34:58<1:20:46, 12.00s/it] 94%|█████████▍| 6642/7045 [21:35:11<1:22:04, 12.22s/it] {'loss': 1.0532, 'learning_rate': 4.2791171038217e-08, 'epoch': 0.94} + 94%|█████████▍| 6642/7045 [21:35:11<1:22:04, 12.22s/it] 94%|█████████▍| 6643/7045 [21:35:22<1:19:52, 11.92s/it] {'loss': 1.1055, 'learning_rate': 4.2579675526049634e-08, 'epoch': 0.94} + 94%|█████████▍| 6643/7045 [21:35:22<1:19:52, 11.92s/it] 94%|█████████▍| 6644/7045 [21:35:34<1:18:48, 11.79s/it] {'loss': 1.0762, 'learning_rate': 4.236869947894268e-08, 'epoch': 0.94} + 94%|█████████▍| 6644/7045 [21:35:34<1:18:48, 11.79s/it] 94%|█████████▍| 6645/7045 [21:35:46<1:19:09, 11.87s/it] {'loss': 1.1113, 'learning_rate': 4.215824294149434e-08, 'epoch': 0.94} + 94%|█████████▍| 6645/7045 [21:35:46<1:19:09, 11.87s/it] 94%|█████████▍| 6646/7045 [21:35:57<1:17:16, 11.62s/it] {'loss': 1.0718, 'learning_rate': 4.194830595819155e-08, 'epoch': 0.94} + 94%|█████████▍| 6646/7045 [21:35:57<1:17:16, 11.62s/it] 94%|█████████▍| 6647/7045 [21:36:10<1:19:34, 12.00s/it] {'loss': 1.0698, 'learning_rate': 4.173888857341185e-08, 'epoch': 0.94} + 94%|█████████▍| 6647/7045 [21:36:10<1:19:34, 12.00s/it] 94%|█████████▍| 6648/7045 [21:36:22<1:20:11, 12.12s/it] {'loss': 1.1201, 'learning_rate': 4.152999083142428e-08, 'epoch': 0.94} + 94%|█████████▍| 6648/7045 [21:36:22<1:20:11, 12.12s/it] 94%|█████████▍| 6649/7045 [21:36:34<1:20:19, 12.17s/it] {'loss': 1.0903, 'learning_rate': 4.1321612776386025e-08, 'epoch': 0.94} + 94%|█████████▍| 6649/7045 [21:36:34<1:20:19, 12.17s/it] 94%|█████████▍| 6650/7045 [21:36:46<1:19:02, 12.01s/it] {'loss': 1.1016, 'learning_rate': 4.1113754452345724e-08, 'epoch': 0.94} + 94%|█████████▍| 6650/7045 [21:36:46<1:19:02, 12.01s/it] 94%|█████████▍| 6651/7045 [21:36:58<1:18:18, 11.93s/it] {'loss': 1.0811, 'learning_rate': 4.0906415903242126e-08, 'epoch': 0.94} + 94%|█████████▍| 6651/7045 [21:36:58<1:18:18, 11.93s/it] 94%|█████████▍| 6652/7045 [21:37:11<1:21:58, 12.52s/it] {'loss': 1.0649, 'learning_rate': 4.069959717290323e-08, 'epoch': 0.94} + 94%|█████████▍| 6652/7045 [21:37:11<1:21:58, 12.52s/it] 94%|█████████▍| 6653/7045 [21:37:23<1:19:21, 12.15s/it] {'loss': 1.1074, 'learning_rate': 4.0493298305047666e-08, 'epoch': 0.94} + 94%|█████████▍| 6653/7045 [21:37:23<1:19:21, 12.15s/it] 94%|█████████▍| 6654/7045 [21:37:34<1:17:16, 11.86s/it] {'loss': 1.1621, 'learning_rate': 4.028751934328473e-08, 'epoch': 0.94} + 94%|█████████▍| 6654/7045 [21:37:34<1:17:16, 11.86s/it] 94%|█████████▍| 6655/7045 [21:37:45<1:15:23, 11.60s/it] {'loss': 1.1011, 'learning_rate': 4.0082260331112675e-08, 'epoch': 0.94} + 94%|█████████▍| 6655/7045 [21:37:45<1:15:23, 11.60s/it] 94%|█████████▍| 6656/7045 [21:37:56<1:14:34, 11.50s/it] {'loss': 1.1084, 'learning_rate': 3.9877521311920966e-08, 'epoch': 0.94} + 94%|█████████▍| 6656/7045 [21:37:56<1:14:34, 11.50s/it] 94%|█████████▍| 6657/7045 [21:38:09<1:16:11, 11.78s/it] {'loss': 1.0986, 'learning_rate': 3.967330232898858e-08, 'epoch': 0.94} + 94%|█████████▍| 6657/7045 [21:38:09<1:16:11, 11.78s/it] 95%|█████████▍| 6658/7045 [21:38:21<1:17:26, 12.01s/it] {'loss': 1.1006, 'learning_rate': 3.946960342548406e-08, 'epoch': 0.95} + 95%|█████████▍| 6658/7045 [21:38:21<1:17:26, 12.01s/it] 95%|█████████▍| 6659/7045 [21:38:34<1:17:56, 12.12s/it] {'loss': 1.0586, 'learning_rate': 3.9266424644467114e-08, 'epoch': 0.95} + 95%|█████████▍| 6659/7045 [21:38:34<1:17:56, 12.12s/it] 95%|█████████▍| 6660/7045 [21:38:45<1:16:09, 11.87s/it] {'loss': 1.125, 'learning_rate': 3.9063766028886454e-08, 'epoch': 0.95} + 95%|█████████▍| 6660/7045 [21:38:45<1:16:09, 11.87s/it] 95%|█████████▍| 6661/7045 [21:38:58<1:18:38, 12.29s/it] {'loss': 1.0293, 'learning_rate': 3.8861627621581964e-08, 'epoch': 0.95} + 95%|█████████▍| 6661/7045 [21:38:58<1:18:38, 12.29s/it] 95%|█████████▍| 6662/7045 [21:39:09<1:15:59, 11.90s/it] {'loss': 1.0771, 'learning_rate': 3.866000946528253e-08, 'epoch': 0.95} + 95%|█████████▍| 6662/7045 [21:39:09<1:15:59, 11.90s/it] 95%|█████████▍| 6663/7045 [21:39:20<1:14:13, 11.66s/it] {'loss': 1.1191, 'learning_rate': 3.8458911602607954e-08, 'epoch': 0.95} + 95%|█████████▍| 6663/7045 [21:39:20<1:14:13, 11.66s/it] 95%|█████████▍| 6664/7045 [21:39:32<1:13:23, 11.56s/it] {'loss': 1.0986, 'learning_rate': 3.8258334076066713e-08, 'epoch': 0.95} + 95%|█████████▍| 6664/7045 [21:39:32<1:13:23, 11.56s/it] 95%|█████████▍| 6665/7045 [21:39:44<1:14:14, 11.72s/it] {'loss': 1.1289, 'learning_rate': 3.805827692805908e-08, 'epoch': 0.95} + 95%|█████████▍| 6665/7045 [21:39:44<1:14:14, 11.72s/it] 95%|█████████▍| 6666/7045 [21:39:57<1:16:46, 12.16s/it] {'loss': 1.0742, 'learning_rate': 3.785874020087343e-08, 'epoch': 0.95} + 95%|█████████▍| 6666/7045 [21:39:57<1:16:46, 12.16s/it] 95%|█████████▍| 6667/7045 [21:40:08<1:14:48, 11.87s/it] {'loss': 1.061, 'learning_rate': 3.765972393669021e-08, 'epoch': 0.95} + 95%|█████████▍| 6667/7045 [21:40:08<1:14:48, 11.87s/it] 95%|█████████▍| 6668/7045 [21:40:19<1:13:36, 11.71s/it] {'loss': 1.1191, 'learning_rate': 3.746122817757797e-08, 'epoch': 0.95} + 95%|█████████▍| 6668/7045 [21:40:19<1:13:36, 11.71s/it] 95%|█████████▍| 6669/7045 [21:40:31<1:12:56, 11.64s/it] {'loss': 1.1201, 'learning_rate': 3.726325296549621e-08, 'epoch': 0.95} + 95%|█████████▍| 6669/7045 [21:40:31<1:12:56, 11.64s/it] 95%|█████████▍| 6670/7045 [21:40:43<1:13:45, 11.80s/it] {'loss': 1.1035, 'learning_rate': 3.706579834229424e-08, 'epoch': 0.95} + 95%|█████████▍| 6670/7045 [21:40:43<1:13:45, 11.80s/it] 95%|█████████▍| 6671/7045 [21:40:54<1:12:04, 11.56s/it] {'loss': 1.1299, 'learning_rate': 3.6868864349710874e-08, 'epoch': 0.95} + 95%|█████████▍| 6671/7045 [21:40:54<1:12:04, 11.56s/it] 95%|█████████▍| 6672/7045 [21:41:05<1:11:07, 11.44s/it] {'loss': 1.0884, 'learning_rate': 3.667245102937617e-08, 'epoch': 0.95} + 95%|█████████▍| 6672/7045 [21:41:05<1:11:07, 11.44s/it] 95%|█████████▍| 6673/7045 [21:41:16<1:10:08, 11.31s/it] {'loss': 1.1357, 'learning_rate': 3.647655842280828e-08, 'epoch': 0.95} + 95%|█████████▍| 6673/7045 [21:41:16<1:10:08, 11.31s/it] 95%|█████████▍| 6674/7045 [21:41:29<1:12:33, 11.73s/it] {'loss': 1.106, 'learning_rate': 3.6281186571416604e-08, 'epoch': 0.95} + 95%|█████████▍| 6674/7045 [21:41:29<1:12:33, 11.73s/it] 95%|█████████▍| 6675/7045 [21:41:41<1:12:11, 11.71s/it] {'loss': 1.1543, 'learning_rate': 3.608633551650004e-08, 'epoch': 0.95} + 95%|█████████▍| 6675/7045 [21:41:41<1:12:11, 11.71s/it] 95%|█████████▍| 6676/7045 [21:41:52<1:11:41, 11.66s/it] {'loss': 1.1016, 'learning_rate': 3.589200529924758e-08, 'epoch': 0.95} + 95%|█████████▍| 6676/7045 [21:41:52<1:11:41, 11.66s/it] 95%|█████████▍| 6677/7045 [21:42:03<1:10:53, 11.56s/it] {'loss': 1.0835, 'learning_rate': 3.569819596073748e-08, 'epoch': 0.95} + 95%|█████████▍| 6677/7045 [21:42:03<1:10:53, 11.56s/it] 95%|█████████▍| 6678/7045 [21:42:16<1:12:08, 11.80s/it] {'loss': 1.1133, 'learning_rate': 3.550490754193919e-08, 'epoch': 0.95} + 95%|█████████▍| 6678/7045 [21:42:16<1:12:08, 11.80s/it] 95%|█████████▍| 6679/7045 [21:42:27<1:10:44, 11.60s/it] {'loss': 1.1279, 'learning_rate': 3.531214008371087e-08, 'epoch': 0.95} + 95%|█████████▍| 6679/7045 [21:42:27<1:10:44, 11.60s/it] 95%|█████████▍| 6680/7045 [21:42:38<1:10:22, 11.57s/it] {'loss': 1.0957, 'learning_rate': 3.5119893626800737e-08, 'epoch': 0.95} + 95%|█████████▍| 6680/7045 [21:42:38<1:10:22, 11.57s/it] 95%|█████████▍| 6681/7045 [21:42:50<1:09:23, 11.44s/it] {'loss': 1.1191, 'learning_rate': 3.4928168211847136e-08, 'epoch': 0.95} + 95%|█████████▍| 6681/7045 [21:42:50<1:09:23, 11.44s/it] 95%|█████████▍| 6682/7045 [21:43:01<1:09:13, 11.44s/it] {'loss': 1.1641, 'learning_rate': 3.473696387937819e-08, 'epoch': 0.95} + 95%|█████████▍| 6682/7045 [21:43:01<1:09:13, 11.44s/it] 95%|█████████▍| 6683/7045 [21:43:14<1:11:43, 11.89s/it] {'loss': 1.103, 'learning_rate': 3.4546280669812135e-08, 'epoch': 0.95} + 95%|█████████▍| 6683/7045 [21:43:14<1:11:43, 11.89s/it] 95%|█████████▍| 6684/7045 [21:43:25<1:10:01, 11.64s/it] {'loss': 1.1182, 'learning_rate': 3.435611862345645e-08, 'epoch': 0.95} + 95%|█████████▍| 6684/7045 [21:43:25<1:10:01, 11.64s/it] 95%|█████████▍| 6685/7045 [21:43:36<1:08:37, 11.44s/it] {'loss': 1.0693, 'learning_rate': 3.4166477780508964e-08, 'epoch': 0.95} + 95%|█████████▍| 6685/7045 [21:43:36<1:08:37, 11.44s/it] 95%|█████████▍| 6686/7045 [21:43:48<1:09:19, 11.59s/it] {'loss': 1.0913, 'learning_rate': 3.397735818105707e-08, 'epoch': 0.95} + 95%|█████████▍| 6686/7045 [21:43:48<1:09:19, 11.59s/it] 95%|█████████▍| 6687/7045 [21:44:00<1:09:13, 11.60s/it] {'loss': 1.0757, 'learning_rate': 3.3788759865078235e-08, 'epoch': 0.95} + 95%|█████████▍| 6687/7045 [21:44:00<1:09:13, 11.60s/it] 95%|█████████▍| 6688/7045 [21:44:11<1:09:26, 11.67s/it] {'loss': 1.1113, 'learning_rate': 3.360068287243945e-08, 'epoch': 0.95} + 95%|█████████▍| 6688/7045 [21:44:11<1:09:26, 11.67s/it] 95%|█████████▍| 6689/7045 [21:44:25<1:12:16, 12.18s/it] {'loss': 1.1318, 'learning_rate': 3.34131272428978e-08, 'epoch': 0.95} + 95%|█████████▍| 6689/7045 [21:44:25<1:12:16, 12.18s/it] 95%|█████████▍| 6690/7045 [21:44:36<1:11:07, 12.02s/it] {'loss': 1.1069, 'learning_rate': 3.322609301609964e-08, 'epoch': 0.95} + 95%|█████████▍| 6690/7045 [21:44:36<1:11:07, 12.02s/it] 95%|█████████▍| 6691/7045 [21:44:48<1:10:16, 11.91s/it] {'loss': 1.041, 'learning_rate': 3.3039580231581655e-08, 'epoch': 0.95} + 95%|█████████▍| 6691/7045 [21:44:48<1:10:16, 11.91s/it] 95%|█████████▍| 6692/7045 [21:45:00<1:09:57, 11.89s/it] {'loss': 1.0986, 'learning_rate': 3.285358892877011e-08, 'epoch': 0.95} + 95%|█████████▍| 6692/7045 [21:45:00<1:09:57, 11.89s/it] 95%|█████████▌| 6693/7045 [21:45:11<1:08:23, 11.66s/it] {'loss': 1.1309, 'learning_rate': 3.2668119146981035e-08, 'epoch': 0.95} + 95%|█████████▌| 6693/7045 [21:45:11<1:08:23, 11.66s/it] 95%|█████████▌| 6694/7045 [21:45:22<1:06:55, 11.44s/it] {'loss': 1.1309, 'learning_rate': 3.2483170925420025e-08, 'epoch': 0.95} + 95%|█████████▌| 6694/7045 [21:45:22<1:06:55, 11.44s/it] 95%|█████████▌| 6695/7045 [21:45:34<1:07:54, 11.64s/it] {'loss': 1.1177, 'learning_rate': 3.229874430318275e-08, 'epoch': 0.95} + 95%|█████████▌| 6695/7045 [21:45:34<1:07:54, 11.64s/it] 95%|█████████▌| 6696/7045 [21:45:45<1:06:47, 11.48s/it] {'loss': 1.1006, 'learning_rate': 3.211483931925441e-08, 'epoch': 0.95} + 95%|█████████▌| 6696/7045 [21:45:45<1:06:47, 11.48s/it] 95%|█████████▌| 6697/7045 [21:45:56<1:05:54, 11.36s/it] {'loss': 1.1006, 'learning_rate': 3.1931456012510296e-08, 'epoch': 0.95} + 95%|█████████▌| 6697/7045 [21:45:56<1:05:54, 11.36s/it] 95%|█████████▌| 6698/7045 [21:46:07<1:05:12, 11.28s/it] {'loss': 1.125, 'learning_rate': 3.174859442171469e-08, 'epoch': 0.95} + 95%|█████████▌| 6698/7045 [21:46:07<1:05:12, 11.28s/it] 95%|█████████▌| 6699/7045 [21:46:18<1:04:19, 11.15s/it] {'loss': 1.1143, 'learning_rate': 3.1566254585522206e-08, 'epoch': 0.95} + 95%|█████████▌| 6699/7045 [21:46:18<1:04:19, 11.15s/it] 95%|█████████▌| 6700/7045 [21:46:29<1:03:55, 11.12s/it] {'loss': 1.1216, 'learning_rate': 3.138443654247703e-08, 'epoch': 0.95} + 95%|█████████▌| 6700/7045 [21:46:29<1:03:55, 11.12s/it] 95%|█████████▌| 6701/7045 [21:46:42<1:06:55, 11.67s/it] {'loss': 1.0747, 'learning_rate': 3.120314033101313e-08, 'epoch': 0.95} + 95%|█████████▌| 6701/7045 [21:46:42<1:06:55, 11.67s/it] 95%|█████████▌| 6702/7045 [21:46:54<1:06:39, 11.66s/it] {'loss': 1.1426, 'learning_rate': 3.102236598945402e-08, 'epoch': 0.95} + 95%|█████████▌| 6702/7045 [21:46:54<1:06:39, 11.66s/it] 95%|█████████▌| 6703/7045 [21:47:06<1:07:21, 11.82s/it] {'loss': 1.1113, 'learning_rate': 3.084211355601274e-08, 'epoch': 0.95} + 95%|█████████▌| 6703/7045 [21:47:06<1:07:21, 11.82s/it] 95%|█████████▌| 6704/7045 [21:47:17<1:06:27, 11.69s/it] {'loss': 1.083, 'learning_rate': 3.066238306879216e-08, 'epoch': 0.95} + 95%|█████████▌| 6704/7045 [21:47:17<1:06:27, 11.69s/it] 95%|█████████▌| 6705/7045 [21:47:28<1:04:56, 11.46s/it] {'loss': 1.1084, 'learning_rate': 3.0483174565784926e-08, 'epoch': 0.95} + 95%|█████████▌| 6705/7045 [21:47:28<1:04:56, 11.46s/it] 95%|█████████▌| 6706/7045 [21:47:39<1:04:03, 11.34s/it] {'loss': 1.1201, 'learning_rate': 3.0304488084873517e-08, 'epoch': 0.95} + 95%|█████████▌| 6706/7045 [21:47:39<1:04:03, 11.34s/it] 95%|█████████▌| 6707/7045 [21:47:51<1:03:38, 11.30s/it] {'loss': 1.1045, 'learning_rate': 3.012632366382939e-08, 'epoch': 0.95} + 95%|█████████▌| 6707/7045 [21:47:51<1:03:38, 11.30s/it] 95%|█████████▌| 6708/7045 [21:48:03<1:05:21, 11.64s/it] {'loss': 1.0952, 'learning_rate': 2.9948681340314366e-08, 'epoch': 0.95} + 95%|█████████▌| 6708/7045 [21:48:03<1:05:21, 11.64s/it] 95%|█████████▌| 6709/7045 [21:48:14<1:04:14, 11.47s/it] {'loss': 1.0986, 'learning_rate': 2.9771561151879515e-08, 'epoch': 0.95} + 95%|█████████▌| 6709/7045 [21:48:14<1:04:14, 11.47s/it] 95%|█████████▌| 6710/7045 [21:48:25<1:02:58, 11.28s/it] {'loss': 1.0781, 'learning_rate': 2.9594963135965437e-08, 'epoch': 0.95} + 95%|█████████▌| 6710/7045 [21:48:25<1:02:58, 11.28s/it] 95%|█████████▌| 6711/7045 [21:48:36<1:02:40, 11.26s/it] {'loss': 1.1006, 'learning_rate': 2.9418887329902833e-08, 'epoch': 0.95} + 95%|█████████▌| 6711/7045 [21:48:36<1:02:40, 11.26s/it] 95%|█████████▌| 6712/7045 [21:48:47<1:02:28, 11.26s/it] {'loss': 1.1318, 'learning_rate': 2.9243333770911654e-08, 'epoch': 0.95} + 95%|█████████▌| 6712/7045 [21:48:47<1:02:28, 11.26s/it] 95%|█████████▌| 6713/7045 [21:48:58<1:02:02, 11.21s/it] {'loss': 1.084, 'learning_rate': 2.9068302496101373e-08, 'epoch': 0.95} + 95%|█████████▌| 6713/7045 [21:48:58<1:02:02, 11.21s/it] 95%|█████████▌| 6714/7045 [21:49:10<1:03:02, 11.43s/it] {'loss': 1.125, 'learning_rate': 2.889379354247157e-08, 'epoch': 0.95} + 95%|█████████▌| 6714/7045 [21:49:10<1:03:02, 11.43s/it] 95%|█████████▌| 6715/7045 [21:49:21<1:02:04, 11.29s/it] {'loss': 1.1143, 'learning_rate': 2.871980694691079e-08, 'epoch': 0.95} + 95%|█████████▌| 6715/7045 [21:49:21<1:02:04, 11.29s/it] 95%|█████████▌| 6716/7045 [21:49:33<1:02:00, 11.31s/it] {'loss': 1.1357, 'learning_rate': 2.854634274619711e-08, 'epoch': 0.95} + 95%|█████████▌| 6716/7045 [21:49:33<1:02:00, 11.31s/it] 95%|█████████▌| 6717/7045 [21:49:44<1:01:56, 11.33s/it] {'loss': 1.0391, 'learning_rate': 2.8373400976998977e-08, 'epoch': 0.95} + 95%|█████████▌| 6717/7045 [21:49:44<1:01:56, 11.33s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Truncated File Read + warnings.warn(str(msg)) + 95%|█████████▌| 6718/7045 [21:49:56<1:03:23, 11.63s/it] {'loss': 1.0581, 'learning_rate': 2.8200981675873816e-08, 'epoch': 0.95} + 95%|█████████▌| 6718/7045 [21:49:56<1:03:23, 11.63s/it] 95%|█████████▌| 6719/7045 [21:50:09<1:04:51, 11.94s/it] {'loss': 1.1318, 'learning_rate': 2.8029084879268585e-08, 'epoch': 0.95} + 95%|█████████▌| 6719/7045 [21:50:09<1:04:51, 11.94s/it] 95%|█████████▌| 6720/7045 [21:50:20<1:03:15, 11.68s/it] {'loss': 1.0898, 'learning_rate': 2.785771062352005e-08, 'epoch': 0.95} + 95%|█████████▌| 6720/7045 [21:50:20<1:03:15, 11.68s/it] 95%|█████████▌| 6721/7045 [21:50:32<1:03:28, 11.76s/it] {'loss': 1.0938, 'learning_rate': 2.7686858944853956e-08, 'epoch': 0.95} + 95%|█████████▌| 6721/7045 [21:50:32<1:03:28, 11.76s/it] 95%|█████████▌| 6722/7045 [21:50:43<1:02:07, 11.54s/it] {'loss': 1.1387, 'learning_rate': 2.7516529879386687e-08, 'epoch': 0.95} + 95%|█████████▌| 6722/7045 [21:50:43<1:02:07, 11.54s/it] 95%|█████████▌| 6723/7045 [21:50:55<1:01:50, 11.52s/it] {'loss': 1.1187, 'learning_rate': 2.734672346312306e-08, 'epoch': 0.95} + 95%|█████████▌| 6723/7045 [21:50:55<1:01:50, 11.52s/it] 95%|█████████▌| 6724/7045 [21:51:06<1:00:59, 11.40s/it] {'loss': 1.1758, 'learning_rate': 2.7177439731957966e-08, 'epoch': 0.95} + 95%|█████████▌| 6724/7045 [21:51:06<1:00:59, 11.40s/it] 95%|█████████▌| 6725/7045 [21:51:17<1:00:46, 11.40s/it] {'loss': 1.0967, 'learning_rate': 2.7008678721675562e-08, 'epoch': 0.95} + 95%|█████████▌| 6725/7045 [21:51:17<1:00:46, 11.40s/it] 95%|█████████▌| 6726/7045 [21:51:28<59:41, 11.23s/it] {'loss': 1.0908, 'learning_rate': 2.6840440467949812e-08, 'epoch': 0.95} + 95%|█████████▌| 6726/7045 [21:51:28<59:41, 11.23s/it] 95%|█████████▌| 6727/7045 [21:51:40<1:00:51, 11.48s/it] {'loss': 1.1045, 'learning_rate': 2.6672725006343934e-08, 'epoch': 0.95} + 95%|█████████▌| 6727/7045 [21:51:40<1:00:51, 11.48s/it] 96%|█████████▌| 6728/7045 [21:51:51<1:00:24, 11.43s/it] {'loss': 1.0625, 'learning_rate': 2.6505532372310406e-08, 'epoch': 0.96} + 96%|█████████▌| 6728/7045 [21:51:51<1:00:24, 11.43s/it] 96%|█████████▌| 6729/7045 [21:52:04<1:02:50, 11.93s/it] {'loss': 1.0615, 'learning_rate': 2.6338862601191783e-08, 'epoch': 0.96} + 96%|█████████▌| 6729/7045 [21:52:04<1:02:50, 11.93s/it] 96%|█████████▌| 6730/7045 [21:52:19<1:06:28, 12.66s/it] {'loss': 1.1089, 'learning_rate': 2.617271572821961e-08, 'epoch': 0.96} + 96%|█████████▌| 6730/7045 [21:52:19<1:06:28, 12.66s/it] 96%|█████████▌| 6731/7045 [21:52:30<1:03:29, 12.13s/it] {'loss': 1.085, 'learning_rate': 2.6007091788515238e-08, 'epoch': 0.96} + 96%|█████████▌| 6731/7045 [21:52:30<1:03:29, 12.13s/it] 96%|█████████▌| 6732/7045 [21:52:41<1:02:01, 11.89s/it] {'loss': 1.1006, 'learning_rate': 2.584199081708899e-08, 'epoch': 0.96} + 96%|█████████▌| 6732/7045 [21:52:41<1:02:01, 11.89s/it] 96%|█████████▌| 6733/7045 [21:52:52<1:00:53, 11.71s/it] {'loss': 1.1089, 'learning_rate': 2.567741284884101e-08, 'epoch': 0.96} + 96%|█████████▌| 6733/7045 [21:52:52<1:00:53, 11.71s/it] 96%|█████████▌| 6734/7045 [21:53:03<59:33, 11.49s/it] {'loss': 1.127, 'learning_rate': 2.5513357918561243e-08, 'epoch': 0.96} + 96%|█████████▌| 6734/7045 [21:53:03<59:33, 11.49s/it] 96%|█████████▌| 6735/7045 [21:53:15<59:04, 11.43s/it] {'loss': 1.0898, 'learning_rate': 2.5349826060928062e-08, 'epoch': 0.96} + 96%|█████████▌| 6735/7045 [21:53:15<59:04, 11.43s/it] 96%|█████████▌| 6736/7045 [21:53:28<1:02:04, 12.05s/it] {'loss': 1.0913, 'learning_rate': 2.5186817310510758e-08, 'epoch': 0.96} + 96%|█████████▌| 6736/7045 [21:53:28<1:02:04, 12.05s/it] 96%|█████████▌| 6737/7045 [21:53:39<1:00:31, 11.79s/it] {'loss': 1.1201, 'learning_rate': 2.5024331701766214e-08, 'epoch': 0.96} + 96%|█████████▌| 6737/7045 [21:53:39<1:00:31, 11.79s/it] 96%|█████████▌| 6738/7045 [21:53:51<1:00:04, 11.74s/it] {'loss': 1.0869, 'learning_rate': 2.4862369269041953e-08, 'epoch': 0.96} + 96%|█████████▌| 6738/7045 [21:53:51<1:00:04, 11.74s/it] 96%|█████████▌| 6739/7045 [21:54:02<59:07, 11.59s/it] {'loss': 1.1025, 'learning_rate': 2.4700930046574757e-08, 'epoch': 0.96} + 96%|█████████▌| 6739/7045 [21:54:02<59:07, 11.59s/it] 96%|█████████▌| 6740/7045 [21:54:13<57:47, 11.37s/it] {'loss': 1.1094, 'learning_rate': 2.454001406849066e-08, 'epoch': 0.96} + 96%|█████████▌| 6740/7045 [21:54:13<57:47, 11.37s/it] 96%|█████████▌| 6741/7045 [21:54:24<57:03, 11.26s/it] {'loss': 1.0952, 'learning_rate': 2.4379621368805228e-08, 'epoch': 0.96} + 96%|█████████▌| 6741/7045 [21:54:24<57:03, 11.26s/it] 96%|█████████▌| 6742/7045 [21:54:35<56:33, 11.20s/it] {'loss': 1.1172, 'learning_rate': 2.421975198142301e-08, 'epoch': 0.96} + 96%|█████████▌| 6742/7045 [21:54:35<56:33, 11.20s/it] 96%|█████████▌| 6743/7045 [21:54:47<57:10, 11.36s/it] {'loss': 1.085, 'learning_rate': 2.4060405940138086e-08, 'epoch': 0.96} + 96%|█████████▌| 6743/7045 [21:54:47<57:10, 11.36s/it] 96%|█████████▌| 6744/7045 [21:55:00<59:18, 11.82s/it] {'loss': 1.0986, 'learning_rate': 2.3901583278634344e-08, 'epoch': 0.96} + 96%|█████████▌| 6744/7045 [21:55:00<59:18, 11.82s/it] 96%|█████████▌| 6745/7045 [21:55:11<58:42, 11.74s/it] {'loss': 1.1191, 'learning_rate': 2.3743284030484647e-08, 'epoch': 0.96} + 96%|█████████▌| 6745/7045 [21:55:11<58:42, 11.74s/it] 96%|█████████▌| 6746/7045 [21:55:24<59:58, 12.04s/it] {'loss': 1.0791, 'learning_rate': 2.35855082291514e-08, 'epoch': 0.96} + 96%|█████████▌| 6746/7045 [21:55:24<59:58, 12.04s/it] 96%|█████████▌| 6747/7045 [21:55:35<58:20, 11.75s/it] {'loss': 1.1025, 'learning_rate': 2.3428255907985976e-08, 'epoch': 0.96} + 96%|█████████▌| 6747/7045 [21:55:35<58:20, 11.75s/it] 96%|█████████▌| 6748/7045 [21:55:46<57:33, 11.63s/it] {'loss': 1.0713, 'learning_rate': 2.3271527100229564e-08, 'epoch': 0.96} + 96%|█████████▌| 6748/7045 [21:55:46<57:33, 11.63s/it] 96%|█████████▌| 6749/7045 [21:55:58<57:01, 11.56s/it] {'loss': 1.0928, 'learning_rate': 2.3115321839012328e-08, 'epoch': 0.96} + 96%|█████████▌| 6749/7045 [21:55:58<57:01, 11.56s/it] 96%|█████████▌| 6750/7045 [21:56:11<59:31, 12.11s/it] {'loss': 1.1064, 'learning_rate': 2.2959640157354246e-08, 'epoch': 0.96} + 96%|█████████▌| 6750/7045 [21:56:11<59:31, 12.11s/it] 96%|█████████▌| 6751/7045 [21:56:24<1:00:18, 12.31s/it] {'loss': 1.082, 'learning_rate': 2.2804482088163992e-08, 'epoch': 0.96} + 96%|█████████▌| 6751/7045 [21:56:24<1:00:18, 12.31s/it] 96%|█████████▌| 6752/7045 [21:56:35<58:00, 11.88s/it] {'loss': 1.1074, 'learning_rate': 2.2649847664240053e-08, 'epoch': 0.96} + 96%|█████████▌| 6752/7045 [21:56:35<58:00, 11.88s/it] 96%|█████████▌| 6753/7045 [21:56:46<56:21, 11.58s/it] {'loss': 1.0796, 'learning_rate': 2.2495736918269894e-08, 'epoch': 0.96} + 96%|█████████▌| 6753/7045 [21:56:46<56:21, 11.58s/it] 96%|█████████▌| 6754/7045 [21:56:57<55:07, 11.36s/it] {'loss': 1.0898, 'learning_rate': 2.2342149882830512e-08, 'epoch': 0.96} + 96%|█████████▌| 6754/7045 [21:56:57<55:07, 11.36s/it] 96%|█████████▌| 6755/7045 [21:57:08<54:29, 11.27s/it] {'loss': 1.0947, 'learning_rate': 2.2189086590387876e-08, 'epoch': 0.96} + 96%|█████████▌| 6755/7045 [21:57:08<54:29, 11.27s/it] 96%|█████████▌| 6756/7045 [21:57:19<53:43, 11.15s/it] {'loss': 1.1211, 'learning_rate': 2.2036547073297776e-08, 'epoch': 0.96} + 96%|█████████▌| 6756/7045 [21:57:19<53:43, 11.15s/it] 96%|█████████▌| 6757/7045 [21:57:30<54:17, 11.31s/it] {'loss': 1.1069, 'learning_rate': 2.1884531363804972e-08, 'epoch': 0.96} + 96%|█████████▌| 6757/7045 [21:57:30<54:17, 11.31s/it] 96%|█████████▌| 6758/7045 [21:57:42<54:50, 11.46s/it] {'loss': 1.0908, 'learning_rate': 2.1733039494043206e-08, 'epoch': 0.96} + 96%|█████████▌| 6758/7045 [21:57:42<54:50, 11.46s/it] 96%|█████████▌| 6759/7045 [21:57:53<54:26, 11.42s/it] {'loss': 1.127, 'learning_rate': 2.1582071496036027e-08, 'epoch': 0.96} + 96%|█████████▌| 6759/7045 [21:57:53<54:26, 11.42s/it] 96%|█████████▌| 6760/7045 [21:58:06<55:34, 11.70s/it] {'loss': 1.0967, 'learning_rate': 2.1431627401695965e-08, 'epoch': 0.96} + 96%|█████████▌| 6760/7045 [21:58:06<55:34, 11.70s/it] 96%|█████████▌| 6761/7045 [21:58:17<54:17, 11.47s/it] {'loss': 1.1289, 'learning_rate': 2.1281707242824802e-08, 'epoch': 0.96} + 96%|█████████▌| 6761/7045 [21:58:17<54:17, 11.47s/it] 96%|█████████▌| 6762/7045 [21:58:29<55:32, 11.78s/it] {'loss': 1.063, 'learning_rate': 2.1132311051113575e-08, 'epoch': 0.96} + 96%|█████████▌| 6762/7045 [21:58:29<55:32, 11.78s/it] 96%|█████████▌| 6763/7045 [21:58:42<57:03, 12.14s/it] {'loss': 1.0898, 'learning_rate': 2.0983438858142857e-08, 'epoch': 0.96} + 96%|█████████▌| 6763/7045 [21:58:42<57:03, 12.14s/it] 96%|█████████▌| 6764/7045 [21:58:53<55:14, 11.80s/it] {'loss': 1.0908, 'learning_rate': 2.0835090695381922e-08, 'epoch': 0.96} + 96%|█████████▌| 6764/7045 [21:58:53<55:14, 11.80s/it] 96%|█████████▌| 6765/7045 [21:59:04<54:12, 11.62s/it] {'loss': 1.1123, 'learning_rate': 2.068726659418957e-08, 'epoch': 0.96} + 96%|█████████▌| 6765/7045 [21:59:04<54:12, 11.62s/it] 96%|█████████▌| 6766/7045 [21:59:15<53:18, 11.46s/it] {'loss': 1.0674, 'learning_rate': 2.053996658581414e-08, 'epoch': 0.96} + 96%|█████████▌| 6766/7045 [21:59:15<53:18, 11.46s/it] 96%|█████████▌| 6767/7045 [21:59:26<52:22, 11.30s/it] {'loss': 1.0825, 'learning_rate': 2.0393190701392117e-08, 'epoch': 0.96} + 96%|█████████▌| 6767/7045 [21:59:26<52:22, 11.30s/it] 96%|█████████▌| 6768/7045 [21:59:39<54:04, 11.71s/it] {'loss': 1.127, 'learning_rate': 2.0246938971950626e-08, 'epoch': 0.96} + 96%|█████████▌| 6768/7045 [21:59:39<54:04, 11.71s/it] 96%|█████████▌| 6769/7045 [21:59:50<53:08, 11.55s/it] {'loss': 1.103, 'learning_rate': 2.0101211428404933e-08, 'epoch': 0.96} + 96%|█████████▌| 6769/7045 [21:59:50<53:08, 11.55s/it] 96%|█████████▌| 6770/7045 [22:00:01<52:21, 11.42s/it] {'loss': 1.1025, 'learning_rate': 1.9956008101559842e-08, 'epoch': 0.96} + 96%|█████████▌| 6770/7045 [22:00:01<52:21, 11.42s/it] 96%|█████████▌| 6771/7045 [22:00:12<51:23, 11.25s/it] {'loss': 1.1064, 'learning_rate': 1.9811329022109416e-08, 'epoch': 0.96} + 96%|█████████▌| 6771/7045 [22:00:12<51:23, 11.25s/it] 96%|█████████▌| 6772/7045 [22:00:23<50:48, 11.17s/it] {'loss': 1.0518, 'learning_rate': 1.9667174220636964e-08, 'epoch': 0.96} + 96%|█████████▌| 6772/7045 [22:00:23<50:48, 11.17s/it] 96%|█████████▌| 6773/7045 [22:00:35<51:01, 11.25s/it] {'loss': 1.1221, 'learning_rate': 1.952354372761478e-08, 'epoch': 0.96} + 96%|█████████▌| 6773/7045 [22:00:35<51:01, 11.25s/it] 96%|█████████▌| 6774/7045 [22:00:46<51:24, 11.38s/it] {'loss': 1.0591, 'learning_rate': 1.938043757340441e-08, 'epoch': 0.96} + 96%|█████████▌| 6774/7045 [22:00:46<51:24, 11.38s/it] 96%|█████████▌| 6775/7045 [22:00:58<51:22, 11.42s/it] {'loss': 1.0649, 'learning_rate': 1.9237855788256378e-08, 'epoch': 0.96} + 96%|█████████▌| 6775/7045 [22:00:58<51:22, 11.42s/it] 96%|█████████▌| 6776/7045 [22:01:09<50:37, 11.29s/it] {'loss': 1.1426, 'learning_rate': 1.909579840231046e-08, 'epoch': 0.96} + 96%|█████████▌| 6776/7045 [22:01:09<50:37, 11.29s/it] 96%|█████████▌| 6777/7045 [22:01:20<50:18, 11.26s/it] {'loss': 1.124, 'learning_rate': 1.8954265445595698e-08, 'epoch': 0.96} + 96%|█████████▌| 6777/7045 [22:01:20<50:18, 11.26s/it] 96%|█████████▌| 6778/7045 [22:01:33<52:13, 11.74s/it] {'loss': 1.1372, 'learning_rate': 1.8813256948030654e-08, 'epoch': 0.96} + 96%|█████████▌| 6778/7045 [22:01:33<52:13, 11.74s/it] 96%|█████████▌| 6779/7045 [22:01:44<51:04, 11.52s/it] {'loss': 1.1133, 'learning_rate': 1.8672772939422045e-08, 'epoch': 0.96} + 96%|█████████▌| 6779/7045 [22:01:44<51:04, 11.52s/it] 96%|█████████▌| 6780/7045 [22:01:55<50:04, 11.34s/it] {'loss': 1.0703, 'learning_rate': 1.853281344946667e-08, 'epoch': 0.96} + 96%|█████████▌| 6780/7045 [22:01:55<50:04, 11.34s/it] 96%|█████████▋| 6781/7045 [22:02:07<50:59, 11.59s/it] {'loss': 1.1001, 'learning_rate': 1.839337850774947e-08, 'epoch': 0.96} + 96%|█████████▋| 6781/7045 [22:02:07<50:59, 11.59s/it] 96%|█████████▋| 6782/7045 [22:02:19<51:43, 11.80s/it] {'loss': 1.0947, 'learning_rate': 1.825446814374604e-08, 'epoch': 0.96} + 96%|█████████▋| 6782/7045 [22:02:19<51:43, 11.80s/it] 96%|█████████▋| 6783/7045 [22:02:33<54:17, 12.43s/it] {'loss': 1.0581, 'learning_rate': 1.811608238681928e-08, 'epoch': 0.96} + 96%|█████████▋| 6783/7045 [22:02:33<54:17, 12.43s/it] 96%|█████████▋| 6784/7045 [22:02:45<53:42, 12.35s/it] {'loss': 1.0625, 'learning_rate': 1.7978221266222172e-08, 'epoch': 0.96} + 96%|█████████▋| 6784/7045 [22:02:45<53:42, 12.35s/it] 96%|█████████▋| 6785/7045 [22:02:56<51:58, 11.99s/it] {'loss': 1.1035, 'learning_rate': 1.784088481109697e-08, 'epoch': 0.96} + 96%|█████████▋| 6785/7045 [22:02:56<51:58, 11.99s/it] 96%|█████████▋| 6786/7045 [22:03:09<53:01, 12.29s/it] {'loss': 1.0781, 'learning_rate': 1.770407305047461e-08, 'epoch': 0.96} + 96%|█████████▋| 6786/7045 [22:03:09<53:01, 12.29s/it] 96%|█████████▋| 6787/7045 [22:03:21<51:30, 11.98s/it] {'loss': 1.1113, 'learning_rate': 1.75677860132753e-08, 'epoch': 0.96} + 96%|█████████▋| 6787/7045 [22:03:21<51:30, 11.98s/it] 96%|█████████▋| 6788/7045 [22:03:33<52:09, 12.18s/it] {'loss': 1.0737, 'learning_rate': 1.743202372830821e-08, 'epoch': 0.96} + 96%|█████████▋| 6788/7045 [22:03:33<52:09, 12.18s/it] 96%|█████████▋| 6789/7045 [22:03:45<50:59, 11.95s/it] {'loss': 1.1328, 'learning_rate': 1.72967862242715e-08, 'epoch': 0.96} + 96%|█████████▋| 6789/7045 [22:03:45<50:59, 11.95s/it] 96%|█████████▋| 6790/7045 [22:03:56<50:09, 11.80s/it] {'loss': 1.0693, 'learning_rate': 1.7162073529752575e-08, 'epoch': 0.96} + 96%|█████████▋| 6790/7045 [22:03:56<50:09, 11.80s/it] 96%|█████████▋| 6791/7045 [22:04:09<50:50, 12.01s/it] {'loss': 1.127, 'learning_rate': 1.7027885673228105e-08, 'epoch': 0.96} + 96%|█████████▋| 6791/7045 [22:04:09<50:50, 12.01s/it] 96%|█████████▋| 6792/7045 [22:04:20<49:30, 11.74s/it] {'loss': 1.1284, 'learning_rate': 1.68942226830629e-08, 'epoch': 0.96} + 96%|█████████▋| 6792/7045 [22:04:20<49:30, 11.74s/it] 96%|█████████▋| 6793/7045 [22:04:31<48:49, 11.62s/it] {'loss': 1.0547, 'learning_rate': 1.676108458751241e-08, 'epoch': 0.96} + 96%|█████████▋| 6793/7045 [22:04:31<48:49, 11.62s/it] 96%|█████████▋| 6794/7045 [22:04:44<50:44, 12.13s/it] {'loss': 1.0527, 'learning_rate': 1.6628471414719682e-08, 'epoch': 0.96} + 96%|█████████▋| 6794/7045 [22:04:44<50:44, 12.13s/it] 96%|█████████▋| 6795/7045 [22:04:55<48:57, 11.75s/it] {'loss': 1.1045, 'learning_rate': 1.649638319271729e-08, 'epoch': 0.96} + 96%|█████████▋| 6795/7045 [22:04:55<48:57, 11.75s/it] 96%|█████████▋| 6796/7045 [22:05:06<47:57, 11.56s/it] {'loss': 1.085, 'learning_rate': 1.636481994942679e-08, 'epoch': 0.96} + 96%|█████████▋| 6796/7045 [22:05:06<47:57, 11.56s/it] 96%|█████████▋| 6797/7045 [22:05:18<47:23, 11.46s/it] {'loss': 1.1201, 'learning_rate': 1.6233781712659546e-08, 'epoch': 0.96} + 96%|█████████▋| 6797/7045 [22:05:18<47:23, 11.46s/it] 96%|█████████▋| 6798/7045 [22:05:31<49:03, 11.92s/it] {'loss': 1.0742, 'learning_rate': 1.6103268510114235e-08, 'epoch': 0.96} + 96%|█████████▋| 6798/7045 [22:05:31<49:03, 11.92s/it] 97%|█████████▋| 6799/7045 [22:05:42<48:16, 11.78s/it] {'loss': 1.0986, 'learning_rate': 1.597328036938045e-08, 'epoch': 0.97} + 97%|█████████▋| 6799/7045 [22:05:42<48:16, 11.78s/it] 97%|█████████▋| 6800/7045 [22:05:53<47:04, 11.53s/it] {'loss': 1.126, 'learning_rate': 1.5843817317935382e-08, 'epoch': 0.97} + 97%|█████████▋| 6800/7045 [22:05:53<47:04, 11.53s/it] 97%|█████████▋| 6801/7045 [22:06:04<46:37, 11.47s/it] {'loss': 1.083, 'learning_rate': 1.571487938314603e-08, 'epoch': 0.97} + 97%|█████████▋| 6801/7045 [22:06:04<46:37, 11.47s/it] 97%|█████████▋| 6802/7045 [22:06:16<46:05, 11.38s/it] {'loss': 1.1006, 'learning_rate': 1.558646659226781e-08, 'epoch': 0.97} + 97%|█████████▋| 6802/7045 [22:06:16<46:05, 11.38s/it] 97%|█████████▋| 6803/7045 [22:06:26<45:22, 11.25s/it] {'loss': 1.0977, 'learning_rate': 1.5458578972445682e-08, 'epoch': 0.97} + 97%|█████████▋| 6803/7045 [22:06:26<45:22, 11.25s/it] 97%|█████████▋| 6804/7045 [22:06:39<46:20, 11.54s/it] {'loss': 1.0942, 'learning_rate': 1.533121655071329e-08, 'epoch': 0.97} + 97%|█████████▋| 6804/7045 [22:06:39<46:20, 11.54s/it] 97%|█████████▋| 6805/7045 [22:06:50<45:48, 11.45s/it] {'loss': 1.0771, 'learning_rate': 1.5204379353993548e-08, 'epoch': 0.97} + 97%|█████████▋| 6805/7045 [22:06:50<45:48, 11.45s/it] 97%|█████████▋| 6806/7045 [22:07:01<45:33, 11.44s/it] {'loss': 1.1025, 'learning_rate': 1.5078067409097507e-08, 'epoch': 0.97} + 97%|█████████▋| 6806/7045 [22:07:01<45:33, 11.44s/it] 97%|█████████▋| 6807/7045 [22:07:12<44:49, 11.30s/it] {'loss': 1.1094, 'learning_rate': 1.495228074272603e-08, 'epoch': 0.97} + 97%|█████████▋| 6807/7045 [22:07:12<44:49, 11.30s/it] 97%|█████████▋| 6808/7045 [22:07:26<47:26, 12.01s/it] {'loss': 1.082, 'learning_rate': 1.4827019381468966e-08, 'epoch': 0.97} + 97%|█████████▋| 6808/7045 [22:07:26<47:26, 12.01s/it] 97%|█████████▋| 6809/7045 [22:07:39<49:00, 12.46s/it] {'loss': 1.0767, 'learning_rate': 1.4702283351804569e-08, 'epoch': 0.97} + 97%|█████████▋| 6809/7045 [22:07:39<49:00, 12.46s/it] 97%|█████████▋| 6810/7045 [22:07:52<48:51, 12.48s/it] {'loss': 1.0713, 'learning_rate': 1.4578072680100642e-08, 'epoch': 0.97} + 97%|█████████▋| 6810/7045 [22:07:52<48:51, 12.48s/it] 97%|█████████▋| 6811/7045 [22:08:05<48:55, 12.55s/it] {'loss': 1.1104, 'learning_rate': 1.44543873926134e-08, 'epoch': 0.97} + 97%|█████████▋| 6811/7045 [22:08:05<48:55, 12.55s/it] 97%|█████████▋| 6812/7045 [22:08:18<49:19, 12.70s/it] {'loss': 1.1172, 'learning_rate': 1.4331227515488045e-08, 'epoch': 0.97} + 97%|█████████▋| 6812/7045 [22:08:18<49:19, 12.70s/it] 97%|█████████▋| 6813/7045 [22:08:29<47:19, 12.24s/it] {'loss': 1.1064, 'learning_rate': 1.4208593074759303e-08, 'epoch': 0.97} + 97%|█████████▋| 6813/7045 [22:08:29<47:19, 12.24s/it] 97%|█████████▋| 6814/7045 [22:08:41<47:17, 12.29s/it] {'loss': 1.1611, 'learning_rate': 1.408648409635005e-08, 'epoch': 0.97} + 97%|█████████▋| 6814/7045 [22:08:41<47:17, 12.29s/it] 97%|█████████▋| 6815/7045 [22:08:54<47:31, 12.40s/it] {'loss': 1.0732, 'learning_rate': 1.3964900606072696e-08, 'epoch': 0.97} + 97%|█████████▋| 6815/7045 [22:08:54<47:31, 12.40s/it] 97%|█████████▋| 6816/7045 [22:09:07<47:59, 12.57s/it] {'loss': 1.1001, 'learning_rate': 1.3843842629628346e-08, 'epoch': 0.97} + 97%|█████████▋| 6816/7045 [22:09:07<47:59, 12.57s/it] 97%|█████████▋| 6817/7045 [22:09:18<46:17, 12.18s/it] {'loss': 1.1006, 'learning_rate': 1.3723310192606532e-08, 'epoch': 0.97} + 97%|█████████▋| 6817/7045 [22:09:18<46:17, 12.18s/it] 97%|█████████▋| 6818/7045 [22:09:29<44:45, 11.83s/it] {'loss': 1.1113, 'learning_rate': 1.3603303320486872e-08, 'epoch': 0.97} + 97%|█████████▋| 6818/7045 [22:09:29<44:45, 11.83s/it] 97%|█████████▋| 6819/7045 [22:09:40<43:39, 11.59s/it] {'loss': 1.1182, 'learning_rate': 1.3483822038636574e-08, 'epoch': 0.97} + 97%|█████████▋| 6819/7045 [22:09:40<43:39, 11.59s/it] 97%|█████████▋| 6820/7045 [22:09:51<42:54, 11.44s/it] {'loss': 1.0972, 'learning_rate': 1.3364866372312657e-08, 'epoch': 0.97} + 97%|█████████▋| 6820/7045 [22:09:51<42:54, 11.44s/it] 97%|█████████▋| 6821/7045 [22:10:03<43:21, 11.61s/it] {'loss': 1.103, 'learning_rate': 1.324643634666084e-08, 'epoch': 0.97} + 97%|█████████▋| 6821/7045 [22:10:03<43:21, 11.61s/it] 97%|█████████▋| 6822/7045 [22:10:14<42:26, 11.42s/it] {'loss': 1.127, 'learning_rate': 1.3128531986715265e-08, 'epoch': 0.97} + 97%|█████████▋| 6822/7045 [22:10:14<42:26, 11.42s/it] 97%|█████████▋| 6823/7045 [22:10:26<42:52, 11.59s/it] {'loss': 1.1055, 'learning_rate': 1.3011153317399606e-08, 'epoch': 0.97} + 97%|█████████▋| 6823/7045 [22:10:26<42:52, 11.59s/it] 97%|█████████▋| 6824/7045 [22:10:39<44:05, 11.97s/it] {'loss': 1.0649, 'learning_rate': 1.2894300363526236e-08, 'epoch': 0.97} + 97%|█████████▋| 6824/7045 [22:10:39<44:05, 11.97s/it] 97%|█████████▋| 6825/7045 [22:10:51<44:02, 12.01s/it] {'loss': 1.1133, 'learning_rate': 1.2777973149795676e-08, 'epoch': 0.97} + 97%|█████████▋| 6825/7045 [22:10:51<44:02, 12.01s/it] 97%|█████████▋| 6826/7045 [22:11:03<43:03, 11.80s/it] {'loss': 1.1377, 'learning_rate': 1.2662171700798254e-08, 'epoch': 0.97} + 97%|█████████▋| 6826/7045 [22:11:03<43:03, 11.80s/it] 97%|█████████▋| 6827/7045 [22:11:14<42:18, 11.65s/it] {'loss': 1.1064, 'learning_rate': 1.2546896041013001e-08, 'epoch': 0.97} + 97%|█████████▋| 6827/7045 [22:11:14<42:18, 11.65s/it] 97%|█████████▋| 6828/7045 [22:11:25<41:45, 11.55s/it] {'loss': 1.1016, 'learning_rate': 1.2432146194807093e-08, 'epoch': 0.97} + 97%|█████████▋| 6828/7045 [22:11:25<41:45, 11.55s/it] 97%|█████████▋| 6829/7045 [22:11:37<41:23, 11.50s/it] {'loss': 1.1348, 'learning_rate': 1.2317922186437791e-08, 'epoch': 0.97} + 97%|█████████▋| 6829/7045 [22:11:37<41:23, 11.50s/it] 97%|█████████▋| 6830/7045 [22:11:48<40:40, 11.35s/it] {'loss': 1.126, 'learning_rate': 1.220422404004995e-08, 'epoch': 0.97} + 97%|█████████▋| 6830/7045 [22:11:48<40:40, 11.35s/it] 97%|█████████▋| 6831/7045 [22:11:59<40:15, 11.29s/it] {'loss': 1.1455, 'learning_rate': 1.2091051779677953e-08, 'epoch': 0.97} + 97%|█████████▋| 6831/7045 [22:11:59<40:15, 11.29s/it] 97%|█████████▋| 6832/7045 [22:12:10<39:33, 11.14s/it] {'loss': 1.0791, 'learning_rate': 1.1978405429244888e-08, 'epoch': 0.97} + 97%|█████████▋| 6832/7045 [22:12:10<39:33, 11.14s/it] 97%|█████████▋| 6833/7045 [22:12:22<40:18, 11.41s/it] {'loss': 1.1167, 'learning_rate': 1.186628501256254e-08, 'epoch': 0.97} + 97%|█████████▋| 6833/7045 [22:12:22<40:18, 11.41s/it] 97%|█████████▋| 6834/7045 [22:12:33<39:50, 11.33s/it] {'loss': 1.0693, 'learning_rate': 1.1754690553331672e-08, 'epoch': 0.97} + 97%|█████████▋| 6834/7045 [22:12:33<39:50, 11.33s/it] 97%|█████████▋| 6835/7045 [22:12:46<41:45, 11.93s/it] {'loss': 1.0596, 'learning_rate': 1.1643622075142303e-08, 'epoch': 0.97} + 97%|█████████▋| 6835/7045 [22:12:46<41:45, 11.93s/it] 97%|█████████▋| 6836/7045 [22:12:58<41:06, 11.80s/it] {'loss': 1.1348, 'learning_rate': 1.1533079601472041e-08, 'epoch': 0.97} + 97%|█████████▋| 6836/7045 [22:12:58<41:06, 11.80s/it] 97%|█████████▋| 6837/7045 [22:13:09<40:45, 11.76s/it] {'loss': 1.1895, 'learning_rate': 1.1423063155688586e-08, 'epoch': 0.97} + 97%|█████████▋| 6837/7045 [22:13:09<40:45, 11.76s/it] 97%|█████████▋| 6838/7045 [22:13:21<40:36, 11.77s/it] {'loss': 1.085, 'learning_rate': 1.1313572761047498e-08, 'epoch': 0.97} + 97%|█████████▋| 6838/7045 [22:13:21<40:36, 11.77s/it] 97%|█████████▋| 6839/7045 [22:13:35<42:28, 12.37s/it] {'loss': 1.1426, 'learning_rate': 1.1204608440693876e-08, 'epoch': 0.97} + 97%|█████████▋| 6839/7045 [22:13:35<42:28, 12.37s/it] 97%|█████████▋| 6840/7045 [22:13:46<41:34, 12.17s/it] {'loss': 1.0742, 'learning_rate': 1.109617021766124e-08, 'epoch': 0.97} + 97%|█████████▋| 6840/7045 [22:13:46<41:34, 12.17s/it] 97%|█████████▋| 6841/7045 [22:13:58<40:16, 11.84s/it] {'loss': 1.0889, 'learning_rate': 1.0988258114871808e-08, 'epoch': 0.97} + 97%|█████████▋| 6841/7045 [22:13:58<40:16, 11.84s/it] 97%|█████████▋| 6842/7045 [22:14:09<39:34, 11.70s/it] {'loss': 1.1309, 'learning_rate': 1.088087215513678e-08, 'epoch': 0.97} + 97%|█████████▋| 6842/7045 [22:14:09<39:34, 11.70s/it] 97%|█████████▋| 6843/7045 [22:14:20<38:45, 11.51s/it] {'loss': 1.1172, 'learning_rate': 1.0774012361156328e-08, 'epoch': 0.97} + 97%|█████████▋| 6843/7045 [22:14:20<38:45, 11.51s/it] 97%|█████████▋| 6844/7045 [22:14:31<38:12, 11.41s/it] {'loss': 1.1328, 'learning_rate': 1.0667678755518495e-08, 'epoch': 0.97} + 97%|█████████▋| 6844/7045 [22:14:31<38:12, 11.41s/it] 97%|█████████▋| 6845/7045 [22:14:42<37:55, 11.38s/it] {'loss': 1.063, 'learning_rate': 1.0561871360701414e-08, 'epoch': 0.97} + 97%|█████████▋| 6845/7045 [22:14:42<37:55, 11.38s/it] 97%|█████████▋| 6846/7045 [22:14:55<38:26, 11.59s/it] {'loss': 1.1172, 'learning_rate': 1.045659019907136e-08, 'epoch': 0.97} + 97%|█████████▋| 6846/7045 [22:14:55<38:26, 11.59s/it] 97%|█████████▋| 6847/7045 [22:15:06<38:07, 11.55s/it] {'loss': 1.0898, 'learning_rate': 1.035183529288275e-08, 'epoch': 0.97} + 97%|█████████▋| 6847/7045 [22:15:06<38:07, 11.55s/it] 97%|█████████▋| 6848/7045 [22:15:19<39:37, 12.07s/it] {'loss': 1.0967, 'learning_rate': 1.0247606664279819e-08, 'epoch': 0.97} + 97%|█████████▋| 6848/7045 [22:15:19<39:37, 12.07s/it] 97%|█████████▋| 6849/7045 [22:15:30<38:09, 11.68s/it] {'loss': 1.1084, 'learning_rate': 1.0143904335294662e-08, 'epoch': 0.97} + 97%|█████████▋| 6849/7045 [22:15:30<38:09, 11.68s/it] 97%|█████████▋| 6850/7045 [22:15:42<37:49, 11.64s/it] {'loss': 1.0913, 'learning_rate': 1.0040728327848914e-08, 'epoch': 0.97} + 97%|█████████▋| 6850/7045 [22:15:42<37:49, 11.64s/it] 97%|█████████▋| 6851/7045 [22:15:53<37:39, 11.65s/it] {'loss': 1.1123, 'learning_rate': 9.938078663752626e-09, 'epoch': 0.97} + 97%|█████████▋| 6851/7045 [22:15:53<37:39, 11.65s/it] 97%|█████████▋| 6852/7045 [22:16:05<37:56, 11.79s/it] {'loss': 1.1084, 'learning_rate': 9.835955364703997e-09, 'epoch': 0.97} + 97%|█████████▋| 6852/7045 [22:16:05<37:56, 11.79s/it] 97%|█████████▋| 6853/7045 [22:16:17<37:26, 11.70s/it] {'loss': 1.125, 'learning_rate': 9.734358452291038e-09, 'epoch': 0.97} + 97%|█████████▋| 6853/7045 [22:16:17<37:26, 11.70s/it] 97%|█████████▋| 6854/7045 [22:16:28<37:06, 11.65s/it] {'loss': 1.0972, 'learning_rate': 9.6332879479899e-09, 'epoch': 0.97} + 97%|█████████▋| 6854/7045 [22:16:28<37:06, 11.65s/it] 97%|█████████▋| 6855/7045 [22:16:42<38:25, 12.13s/it] {'loss': 1.1504, 'learning_rate': 9.532743873165162e-09, 'epoch': 0.97} + 97%|█████████▋| 6855/7045 [22:16:42<38:25, 12.13s/it] 97%|█████████▋| 6856/7045 [22:16:53<37:14, 11.82s/it] {'loss': 1.0972, 'learning_rate': 9.432726249070656e-09, 'epoch': 0.97} + 97%|█████████▋| 6856/7045 [22:16:53<37:14, 11.82s/it] 97%|█████████▋| 6857/7045 [22:17:04<36:45, 11.73s/it] {'loss': 1.1128, 'learning_rate': 9.333235096848636e-09, 'epoch': 0.97} + 97%|█████████▋| 6857/7045 [22:17:04<36:45, 11.73s/it] 97%|█████████▋| 6858/7045 [22:17:15<35:57, 11.54s/it] {'loss': 1.1094, 'learning_rate': 9.234270437530335e-09, 'epoch': 0.97} + 97%|██���██████▋| 6858/7045 [22:17:15<35:57, 11.54s/it] 97%|█████████▋| 6859/7045 [22:17:28<36:45, 11.86s/it] {'loss': 1.0732, 'learning_rate': 9.135832292035684e-09, 'epoch': 0.97} + 97%|█████████▋| 6859/7045 [22:17:28<36:45, 11.86s/it] 97%|█████████▋| 6860/7045 [22:17:39<36:07, 11.72s/it] {'loss': 1.126, 'learning_rate': 9.037920681172763e-09, 'epoch': 0.97} + 97%|█████████▋| 6860/7045 [22:17:39<36:07, 11.72s/it] 97%|█████████▋| 6861/7045 [22:17:51<35:29, 11.57s/it] {'loss': 1.1064, 'learning_rate': 8.940535625639179e-09, 'epoch': 0.97} + 97%|█████████▋| 6861/7045 [22:17:51<35:29, 11.57s/it] 97%|█████████▋| 6862/7045 [22:18:02<34:58, 11.47s/it] {'loss': 1.0732, 'learning_rate': 8.843677146020413e-09, 'epoch': 0.97} + 97%|█████████▋| 6862/7045 [22:18:02<34:58, 11.47s/it] 97%|█████████▋| 6863/7045 [22:18:13<34:28, 11.37s/it] {'loss': 1.1045, 'learning_rate': 8.747345262790918e-09, 'epoch': 0.97} + 97%|█████████▋| 6863/7045 [22:18:13<34:28, 11.37s/it] 97%|█████████▋| 6864/7045 [22:18:24<34:23, 11.40s/it] {'loss': 1.1201, 'learning_rate': 8.651539996314685e-09, 'epoch': 0.97} + 97%|█████████▋| 6864/7045 [22:18:24<34:23, 11.40s/it] 97%|█████████▋| 6865/7045 [22:18:37<35:06, 11.70s/it] {'loss': 1.1143, 'learning_rate': 8.556261366842733e-09, 'epoch': 0.97} + 97%|█████████▋| 6865/7045 [22:18:37<35:06, 11.70s/it] 97%|█████████▋| 6866/7045 [22:18:49<35:26, 11.88s/it] {'loss': 1.1035, 'learning_rate': 8.461509394516177e-09, 'epoch': 0.97} + 97%|█████████▋| 6866/7045 [22:18:49<35:26, 11.88s/it] 97%|█████████▋| 6867/7045 [22:19:00<34:26, 11.61s/it] {'loss': 1.1504, 'learning_rate': 8.36728409936427e-09, 'epoch': 0.97} + 97%|█████████▋| 6867/7045 [22:19:00<34:26, 11.61s/it] 97%|█████████▋| 6868/7045 [22:19:11<33:50, 11.47s/it] {'loss': 1.0801, 'learning_rate': 8.27358550130497e-09, 'epoch': 0.97} + 97%|█████████▋| 6868/7045 [22:19:11<33:50, 11.47s/it] 98%|█████████▊| 6869/7045 [22:19:24<34:47, 11.86s/it] {'loss': 1.1104, 'learning_rate': 8.180413620144378e-09, 'epoch': 0.98} + 98%|█████████▊| 6869/7045 [22:19:24<34:47, 11.86s/it] 98%|█████████▊| 6870/7045 [22:19:35<33:42, 11.56s/it] {'loss': 1.127, 'learning_rate': 8.087768475578684e-09, 'epoch': 0.98} + 98%|█████████▊| 6870/7045 [22:19:35<33:42, 11.56s/it] 98%|█████████▊| 6871/7045 [22:19:46<33:04, 11.41s/it] {'loss': 1.0928, 'learning_rate': 7.995650087190832e-09, 'epoch': 0.98} + 98%|█████████▊| 6871/7045 [22:19:46<33:04, 11.41s/it] 98%|█████████▊| 6872/7045 [22:19:57<32:21, 11.22s/it] {'loss': 1.1504, 'learning_rate': 7.904058474454134e-09, 'epoch': 0.98} + 98%|█████████▊| 6872/7045 [22:19:57<32:21, 11.22s/it] 98%|█████████▊| 6873/7045 [22:20:10<33:43, 11.76s/it] {'loss': 1.0986, 'learning_rate': 7.812993656729218e-09, 'epoch': 0.98} + 98%|█████████▊| 6873/7045 [22:20:10<33:43, 11.76s/it] 98%|█████████▊| 6874/7045 [22:20:22<33:34, 11.78s/it] {'loss': 1.0693, 'learning_rate': 7.722455653266515e-09, 'epoch': 0.98} + 98%|█████████▊| 6874/7045 [22:20:22<33:34, 11.78s/it] 98%|█████████▊| 6875/7045 [22:20:33<32:52, 11.61s/it] {'loss': 1.1323, 'learning_rate': 7.63244448320405e-09, 'epoch': 0.98} + 98%|█████████▊| 6875/7045 [22:20:33<32:52, 11.61s/it] 98%|█████████▊| 6876/7045 [22:20:44<32:17, 11.46s/it] {'loss': 1.0645, 'learning_rate': 7.54296016556938e-09, 'epoch': 0.98} + 98%|█████████▊| 6876/7045 [22:20:44<32:17, 11.46s/it] 98%|█████████▊| 6877/7045 [22:20:55<31:51, 11.38s/it] {'loss': 1.1113, 'learning_rate': 7.454002719277654e-09, 'epoch': 0.98} + 98%|█████████▊| 6877/7045 [22:20:55<31:51, 11.38s/it] 98%|█████████▊| 6878/7045 [22:21:06<31:35, 11.35s/it] {'loss': 1.1201, 'learning_rate': 7.365572163133827e-09, 'epoch': 0.98} + 98%|█████████▊| 6878/7045 [22:21:06<31:35, 11.35s/it] 98%|█████████▊| 6879/7045 [22:21:19<32:32, 11.76s/it] {'loss': 1.1113, 'learning_rate': 7.277668515830727e-09, 'epoch': 0.98} + 98%|█████████▊| 6879/7045 [22:21:19<32:32, 11.76s/it] 98%|█████████▊| 6880/7045 [22:21:30<31:49, 11.57s/it] {'loss': 1.1094, 'learning_rate': 7.190291795949878e-09, 'epoch': 0.98} + 98%|█████████▊| 6880/7045 [22:21:30<31:49, 11.57s/it] 98%|█████████▊| 6881/7045 [22:21:42<31:30, 11.53s/it] {'loss': 1.1523, 'learning_rate': 7.103442021961504e-09, 'epoch': 0.98} + 98%|█████████▊| 6881/7045 [22:21:42<31:30, 11.53s/it] 98%|█████████▊| 6882/7045 [22:21:55<33:09, 12.21s/it] {'loss': 1.0913, 'learning_rate': 7.01711921222481e-09, 'epoch': 0.98} + 98%|█████████▊| 6882/7045 [22:21:55<33:09, 12.21s/it] 98%|█████████▊| 6883/7045 [22:22:07<32:17, 11.96s/it] {'loss': 1.1182, 'learning_rate': 6.931323384986865e-09, 'epoch': 0.98} + 98%|█████████▊| 6883/7045 [22:22:07<32:17, 11.96s/it] 98%|█████████▊| 6884/7045 [22:22:19<32:33, 12.13s/it] {'loss': 1.0708, 'learning_rate': 6.846054558383996e-09, 'epoch': 0.98} + 98%|█████████▊| 6884/7045 [22:22:19<32:33, 12.13s/it] 98%|█████████▊| 6885/7045 [22:22:31<32:07, 12.04s/it] {'loss': 1.1592, 'learning_rate': 6.7613127504406736e-09, 'epoch': 0.98} + 98%|█████████▊| 6885/7045 [22:22:31<32:07, 12.04s/it] 98%|█████████▊| 6886/7045 [22:22:43<31:50, 12.01s/it] {'loss': 1.0898, 'learning_rate': 6.677097979070346e-09, 'epoch': 0.98} + 98%|█████████▊| 6886/7045 [22:22:43<31:50, 12.01s/it] 98%|█████████▊| 6887/7045 [22:22:54<31:04, 11.80s/it] {'loss': 1.1201, 'learning_rate': 6.593410262074884e-09, 'epoch': 0.98} + 98%|█████████▊| 6887/7045 [22:22:54<31:04, 11.80s/it] 98%|█████████▊| 6888/7045 [22:23:06<30:33, 11.68s/it] {'loss': 1.1782, 'learning_rate': 6.510249617144304e-09, 'epoch': 0.98} + 98%|█████████▊| 6888/7045 [22:23:06<30:33, 11.68s/it] 98%|█████████▊| 6889/7045 [22:23:17<29:52, 11.49s/it] {'loss': 1.0957, 'learning_rate': 6.427616061858155e-09, 'epoch': 0.98} + 98%|█████████▊| 6889/7045 [22:23:17<29:52, 11.49s/it] 98%|█████████▊| 6890/7045 [22:23:30<30:50, 11.94s/it] {'loss': 1.1328, 'learning_rate': 6.345509613683576e-09, 'epoch': 0.98} + 98%|█████████▊| 6890/7045 [22:23:30<30:50, 11.94s/it] 98%|█████████▊| 6891/7045 [22:23:43<31:47, 12.38s/it] {'loss': 1.1094, 'learning_rate': 6.263930289976961e-09, 'epoch': 0.98} + 98%|█████████▊| 6891/7045 [22:23:43<31:47, 12.38s/it] 98%|█████████▊| 6892/7045 [22:23:55<31:10, 12.23s/it] {'loss': 1.1289, 'learning_rate': 6.182878107983404e-09, 'epoch': 0.98} + 98%|█████████▊| 6892/7045 [22:23:55<31:10, 12.23s/it] 98%|█████████▊| 6893/7045 [22:24:07<30:46, 12.15s/it] {'loss': 1.0791, 'learning_rate': 6.102353084835588e-09, 'epoch': 0.98} + 98%|█████████▊| 6893/7045 [22:24:07<30:46, 12.15s/it] 98%|█████████▊| 6894/7045 [22:24:20<31:07, 12.37s/it] {'loss': 1.0894, 'learning_rate': 6.02235523755601e-09, 'epoch': 0.98} + 98%|█████████▊| 6894/7045 [22:24:20<31:07, 12.37s/it] 98%|█████████▊| 6895/7045 [22:24:32<30:59, 12.40s/it] {'loss': 1.0557, 'learning_rate': 5.942884583054753e-09, 'epoch': 0.98} + 98%|█████████▊| 6895/7045 [22:24:32<30:59, 12.40s/it] 98%|█████████▊| 6896/7045 [22:24:43<29:45, 11.98s/it] {'loss': 1.0786, 'learning_rate': 5.8639411381308796e-09, 'epoch': 0.98} + 98%|█████████▊| 6896/7045 [22:24:43<29:45, 11.98s/it] 98%|█████████▊| 6897/7045 [22:24:55<28:58, 11.75s/it] {'loss': 1.127, 'learning_rate': 5.785524919471874e-09, 'epoch': 0.98} + 98%|█████████▊| 6897/7045 [22:24:55<28:58, 11.75s/it] 98%|█████████▊| 6898/7045 [22:25:06<28:25, 11.60s/it] {'loss': 1.167, 'learning_rate': 5.7076359436542e-09, 'epoch': 0.98} + 98%|█████████▊| 6898/7045 [22:25:06<28:25, 11.60s/it] 98%|█████████▊| 6899/7045 [22:25:17<28:09, 11.57s/it] {'loss': 1.1084, 'learning_rate': 5.630274227141907e-09, 'epoch': 0.98} + 98%|█████████▊| 6899/7045 [22:25:17<28:09, 11.57s/it] 98%|█████████▊| 6900/7045 [22:25:28<27:33, 11.40s/it] {'loss': 1.0742, 'learning_rate': 5.5534397862888615e-09, 'epoch': 0.98} + 98%|█████████▊| 6900/7045 [22:25:28<27:33, 11.40s/it] 98%|█████████▊| 6901/7045 [22:25:41<28:00, 11.67s/it] {'loss': 1.1255, 'learning_rate': 5.4771326373367905e-09, 'epoch': 0.98} + 98%|█████████▊| 6901/7045 [22:25:41<28:00, 11.67s/it] 98%|█████████▊| 6902/7045 [22:25:52<27:11, 11.41s/it] {'loss': 1.0996, 'learning_rate': 5.4013527964152935e-09, 'epoch': 0.98} + 98%|█████████▊| 6902/7045 [22:25:52<27:11, 11.41s/it] 98%|█████████▊| 6903/7045 [22:26:02<26:35, 11.23s/it] {'loss': 1.1318, 'learning_rate': 5.326100279543778e-09, 'epoch': 0.98} + 98%|█████████▊| 6903/7045 [22:26:02<26:35, 11.23s/it] 98%|█████████▊| 6904/7045 [22:26:13<26:13, 11.16s/it] {'loss': 1.1055, 'learning_rate': 5.251375102629519e-09, 'epoch': 0.98} + 98%|█████████▊| 6904/7045 [22:26:13<26:13, 11.16s/it] 98%|█████████▊| 6905/7045 [22:26:24<25:52, 11.09s/it] {'loss': 1.1748, 'learning_rate': 5.177177281468215e-09, 'epoch': 0.98} + 98%|█████████▊| 6905/7045 [22:26:24<25:52, 11.09s/it] 98%|█████████▊| 6906/7045 [22:26:35<25:39, 11.07s/it] {'loss': 1.126, 'learning_rate': 5.103506831744265e-09, 'epoch': 0.98} + 98%|█████████▊| 6906/7045 [22:26:35<25:39, 11.07s/it] 98%|█████████▊| 6907/7045 [22:26:48<26:29, 11.52s/it] {'loss': 1.1104, 'learning_rate': 5.030363769031044e-09, 'epoch': 0.98} + 98%|█████████▊| 6907/7045 [22:26:48<26:29, 11.52s/it] 98%|█████████▊| 6908/7045 [22:26:59<26:02, 11.40s/it] {'loss': 1.1128, 'learning_rate': 4.957748108789517e-09, 'epoch': 0.98} + 98%|█████████▊| 6908/7045 [22:26:59<26:02, 11.40s/it] 98%|█████████▊| 6909/7045 [22:27:10<25:28, 11.24s/it] {'loss': 1.0698, 'learning_rate': 4.8856598663696295e-09, 'epoch': 0.98} + 98%|█████████▊| 6909/7045 [22:27:10<25:28, 11.24s/it] 98%|█████████▊| 6910/7045 [22:27:21<25:17, 11.24s/it] {'loss': 1.0928, 'learning_rate': 4.814099057010302e-09, 'epoch': 0.98} + 98%|█████████▊| 6910/7045 [22:27:21<25:17, 11.24s/it] 98%|█████████▊| 6911/7045 [22:27:32<25:01, 11.21s/it] {'loss': 1.1484, 'learning_rate': 4.743065695838045e-09, 'epoch': 0.98} + 98%|█████████▊| 6911/7045 [22:27:32<25:01, 11.21s/it] 98%|█████████▊| 6912/7045 [22:27:44<25:02, 11.29s/it] {'loss': 1.1108, 'learning_rate': 4.672559797868348e-09, 'epoch': 0.98} + 98%|█████████▊| 6912/7045 [22:27:44<25:02, 11.29s/it] 98%|█████████▊| 6913/7045 [22:27:56<25:44, 11.70s/it] {'loss': 1.0771, 'learning_rate': 4.6025813780056775e-09, 'epoch': 0.98} + 98%|█████████���| 6913/7045 [22:27:56<25:44, 11.70s/it] 98%|█████████▊| 6914/7045 [22:28:08<25:10, 11.53s/it] {'loss': 1.1299, 'learning_rate': 4.533130451042089e-09, 'epoch': 0.98} + 98%|█████████▊| 6914/7045 [22:28:08<25:10, 11.53s/it] 98%|█████████▊| 6915/7045 [22:28:19<24:48, 11.45s/it] {'loss': 1.106, 'learning_rate': 4.464207031658618e-09, 'epoch': 0.98} + 98%|█████████▊| 6915/7045 [22:28:19<24:48, 11.45s/it] 98%|█████████▊| 6916/7045 [22:28:30<24:34, 11.43s/it] {'loss': 1.123, 'learning_rate': 4.395811134424721e-09, 'epoch': 0.98} + 98%|█████████▊| 6916/7045 [22:28:30<24:34, 11.43s/it] 98%|█████████▊| 6917/7045 [22:28:43<25:26, 11.93s/it] {'loss': 1.0908, 'learning_rate': 4.327942773798554e-09, 'epoch': 0.98} + 98%|█████████▊| 6917/7045 [22:28:43<25:26, 11.93s/it] 98%|█████████▊| 6918/7045 [22:28:54<24:45, 11.70s/it] {'loss': 1.1191, 'learning_rate': 4.260601964126421e-09, 'epoch': 0.98} + 98%|█████████▊| 6918/7045 [22:28:54<24:45, 11.70s/it] 98%|█████████▊| 6919/7045 [22:29:06<24:16, 11.56s/it] {'loss': 1.0938, 'learning_rate': 4.193788719643044e-09, 'epoch': 0.98} + 98%|█████████▊| 6919/7045 [22:29:06<24:16, 11.56s/it] 98%|█████████▊| 6920/7045 [22:29:17<24:09, 11.60s/it] {'loss': 1.1221, 'learning_rate': 4.1275030544721266e-09, 'epoch': 0.98} + 98%|█████████▊| 6920/7045 [22:29:17<24:09, 11.60s/it] 98%|█████████▊| 6921/7045 [22:29:28<23:39, 11.45s/it] {'loss': 1.0376, 'learning_rate': 4.0617449826255154e-09, 'epoch': 0.98} + 98%|█████████▊| 6921/7045 [22:29:28<23:39, 11.45s/it] 98%|█████████▊| 6922/7045 [22:29:40<23:41, 11.55s/it] {'loss': 1.106, 'learning_rate': 3.996514518003758e-09, 'epoch': 0.98} + 98%|█████████▊| 6922/7045 [22:29:40<23:41, 11.55s/it] 98%|█████████▊| 6923/7045 [22:29:52<23:22, 11.49s/it] {'loss': 1.1152, 'learning_rate': 3.931811674394992e-09, 'epoch': 0.98} + 98%|█████████▊| 6923/7045 [22:29:52<23:22, 11.49s/it] 98%|█████████▊| 6924/7045 [22:30:03<23:04, 11.44s/it] {'loss': 1.0908, 'learning_rate': 3.867636465477165e-09, 'epoch': 0.98} + 98%|█████████▊| 6924/7045 [22:30:03<23:04, 11.44s/it] 98%|█████████▊| 6925/7045 [22:30:14<22:47, 11.40s/it] {'loss': 1.1421, 'learning_rate': 3.8039889048160915e-09, 'epoch': 0.98} + 98%|█████████▊| 6925/7045 [22:30:14<22:47, 11.40s/it] 98%|█████████▊| 6926/7045 [22:30:25<22:23, 11.29s/it] {'loss': 1.1084, 'learning_rate': 3.740869005865455e-09, 'epoch': 0.98} + 98%|█████████▊| 6926/7045 [22:30:25<22:23, 11.29s/it] 98%|█████████▊| 6927/7045 [22:30:38<23:09, 11.77s/it] {'loss': 1.0698, 'learning_rate': 3.678276781968193e-09, 'epoch': 0.98} + 98%|█████████▊| 6927/7045 [22:30:38<23:09, 11.77s/it] 98%|█████████▊| 6928/7045 [22:30:50<22:43, 11.65s/it] {'loss': 1.123, 'learning_rate': 3.616212246355388e-09, 'epoch': 0.98} + 98%|█████████▊| 6928/7045 [22:30:50<22:43, 11.65s/it] 98%|█████████▊| 6929/7045 [22:31:01<22:24, 11.59s/it] {'loss': 1.0791, 'learning_rate': 3.5546754121471015e-09, 'epoch': 0.98} + 98%|█████████▊| 6929/7045 [22:31:01<22:24, 11.59s/it] 98%|█████████▊| 6930/7045 [22:31:14<23:11, 12.10s/it] {'loss': 1.106, 'learning_rate': 3.493666292350706e-09, 'epoch': 0.98} + 98%|█████████▊| 6930/7045 [22:31:14<23:11, 12.10s/it] 98%|█████████▊| 6931/7045 [22:31:25<22:29, 11.84s/it] {'loss': 1.1162, 'learning_rate': 3.433184899863107e-09, 'epoch': 0.98} + 98%|█████████▊| 6931/7045 [22:31:25<22:29, 11.84s/it] 98%|█████████▊| 6932/7045 [22:31:37<22:11, 11.78s/it] {'loss': 1.0723, 'learning_rate': 3.3732312474688e-09, 'epoch': 0.98} + 98%|█████████▊| 6932/7045 [22:31:37<22:11, 11.78s/it] 98%|█████████▊| 6933/7045 [22:31:48<21:33, 11.55s/it] {'loss': 1.0884, 'learning_rate': 3.313805347841814e-09, 'epoch': 0.98} + 98%|█████████▊| 6933/7045 [22:31:48<21:33, 11.55s/it] 98%|█████████▊| 6934/7045 [22:31:59<21:06, 11.41s/it] {'loss': 1.1396, 'learning_rate': 3.2549072135437675e-09, 'epoch': 0.98} + 98%|█████████▊| 6934/7045 [22:31:59<21:06, 11.41s/it] 98%|█████████▊| 6935/7045 [22:32:12<21:26, 11.70s/it] {'loss': 1.0664, 'learning_rate': 3.196536857024701e-09, 'epoch': 0.98} + 98%|█████████▊| 6935/7045 [22:32:12<21:26, 11.70s/it] 98%|█████████▊| 6936/7045 [22:32:23<20:57, 11.54s/it] {'loss': 1.1201, 'learning_rate': 3.138694290623356e-09, 'epoch': 0.98} + 98%|█████████▊| 6936/7045 [22:32:23<20:57, 11.54s/it] 98%|█████████▊| 6937/7045 [22:32:34<20:30, 11.39s/it] {'loss': 1.1079, 'learning_rate': 3.0813795265668966e-09, 'epoch': 0.98} + 98%|█████████▊| 6937/7045 [22:32:34<20:30, 11.39s/it] 98%|█████████▊| 6938/7045 [22:32:45<20:13, 11.35s/it] {'loss': 1.1299, 'learning_rate': 3.0245925769709086e-09, 'epoch': 0.98} + 98%|█████████▊| 6938/7045 [22:32:45<20:13, 11.35s/it] 98%|█████████▊| 6939/7045 [22:32:58<20:42, 11.72s/it] {'loss': 1.0547, 'learning_rate': 2.9683334538394003e-09, 'epoch': 0.98} + 98%|█████████▊| 6939/7045 [22:32:58<20:42, 11.72s/it] 99%|█████████▊| 6940/7045 [22:33:09<20:12, 11.55s/it] {'loss': 1.1533, 'learning_rate': 2.9126021690645266e-09, 'epoch': 0.99} + 99%|█████████▊| 6940/7045 [22:33:09<20:12, 11.55s/it] 99%|█████████▊| 6941/7045 [22:33:21<20:14, 11.68s/it] {'loss': 1.0967, 'learning_rate': 2.8573987344276964e-09, 'epoch': 0.99} + 99%|█████████▊| 6941/7045 [22:33:21<20:14, 11.68s/it] 99%|█████████▊| 6942/7045 [22:33:32<19:52, 11.58s/it] {'loss': 1.1201, 'learning_rate': 2.8027231615976316e-09, 'epoch': 0.99} + 99%|█████████▊| 6942/7045 [22:33:32<19:52, 11.58s/it] 99%|█████████▊| 6943/7045 [22:33:44<19:50, 11.68s/it] {'loss': 1.0811, 'learning_rate': 2.7485754621320304e-09, 'epoch': 0.99} + 99%|█████████▊| 6943/7045 [22:33:44<19:50, 11.68s/it] 99%|█████████▊| 6944/7045 [22:33:55<19:14, 11.43s/it] {'loss': 1.1279, 'learning_rate': 2.694955647477293e-09, 'epoch': 0.99} + 99%|█████████▊| 6944/7045 [22:33:55<19:14, 11.43s/it] 99%|█████████▊| 6945/7045 [22:34:06<19:05, 11.46s/it] {'loss': 1.0718, 'learning_rate': 2.6418637289679636e-09, 'epoch': 0.99} + 99%|█████████▊| 6945/7045 [22:34:06<19:05, 11.46s/it] 99%|█████████▊| 6946/7045 [22:34:18<18:50, 11.42s/it] {'loss': 1.1406, 'learning_rate': 2.5892997178264545e-09, 'epoch': 0.99} + 99%|█████████▊| 6946/7045 [22:34:18<18:50, 11.42s/it] 99%|█████████▊| 6947/7045 [22:34:31<19:25, 11.89s/it] {'loss': 1.0996, 'learning_rate': 2.5372636251644322e-09, 'epoch': 0.99} + 99%|█████████▊| 6947/7045 [22:34:31<19:25, 11.89s/it] 99%|█████████▊| 6948/7045 [22:34:42<19:01, 11.77s/it] {'loss': 1.0884, 'learning_rate': 2.4857554619814317e-09, 'epoch': 0.99} + 99%|█████████▊| 6948/7045 [22:34:42<19:01, 11.77s/it] 99%|█████████▊| 6949/7045 [22:34:54<18:43, 11.71s/it] {'loss': 1.1348, 'learning_rate': 2.4347752391656875e-09, 'epoch': 0.99} + 99%|█████████▊| 6949/7045 [22:34:54<18:43, 11.71s/it] 99%|█████████▊| 6950/7045 [22:35:05<18:17, 11.55s/it] {'loss': 1.1074, 'learning_rate': 2.384322967493857e-09, 'epoch': 0.99} + 99%|█████████▊| 6950/7045 [22:35:05<18:17, 11.55s/it] 99%|█████████▊| 6951/7045 [22:35:17<18:32, 11.83s/it] {'loss': 1.0996, 'learning_rate': 2.3343986576307433e-09, 'epoch': 0.99} + 99%|█████████▊| 6951/7045 [22:35:17<18:32, 11.83s/it] 99%|█████████▊| 6952/7045 [22:35:31<18:57, 12.24s/it] {'loss': 1.083, 'learning_rate': 2.2850023201295703e-09, 'epoch': 0.99} + 99%|█████████▊| 6952/7045 [22:35:31<18:57, 12.24s/it] 99%|█████████▊| 6953/7045 [22:35:42<18:17, 11.93s/it] {'loss': 1.124, 'learning_rate': 2.236133965431986e-09, 'epoch': 0.99} + 99%|█████████▊| 6953/7045 [22:35:42<18:17, 11.93s/it] 99%|█████████▊| 6954/7045 [22:35:55<18:29, 12.19s/it] {'loss': 1.1191, 'learning_rate': 2.1877936038683377e-09, 'epoch': 0.99} + 99%|█████████▊| 6954/7045 [22:35:55<18:29, 12.19s/it] 99%|█████████▊| 6955/7045 [22:36:06<17:54, 11.94s/it] {'loss': 1.0771, 'learning_rate': 2.139981245657119e-09, 'epoch': 0.99} + 99%|█████████▊| 6955/7045 [22:36:06<17:54, 11.94s/it] 99%|█████████▊| 6956/7045 [22:36:18<17:33, 11.84s/it] {'loss': 1.0972, 'learning_rate': 2.092696900904967e-09, 'epoch': 0.99} + 99%|█████████▊| 6956/7045 [22:36:18<17:33, 11.84s/it] 99%|█████████▉| 6957/7045 [22:36:30<17:39, 12.04s/it] {'loss': 1.0854, 'learning_rate': 2.0459405796072197e-09, 'epoch': 0.99} + 99%|█████████▉| 6957/7045 [22:36:30<17:39, 12.04s/it] 99%|█████████▉| 6958/7045 [22:36:41<17:00, 11.73s/it] {'loss': 1.1152, 'learning_rate': 1.9997122916476375e-09, 'epoch': 0.99} + 99%|█████████▉| 6958/7045 [22:36:41<17:00, 11.73s/it] 99%|█████████▉| 6959/7045 [22:36:53<17:00, 11.86s/it] {'loss': 1.0835, 'learning_rate': 1.954012046798126e-09, 'epoch': 0.99} + 99%|█████████▉| 6959/7045 [22:36:53<17:00, 11.86s/it] 99%|█████████▉| 6960/7045 [22:37:05<16:35, 11.71s/it] {'loss': 1.083, 'learning_rate': 1.908839854719291e-09, 'epoch': 0.99} + 99%|█████████▉| 6960/7045 [22:37:05<16:35, 11.71s/it] 99%|█████████▉| 6961/7045 [22:37:16<16:24, 11.72s/it] {'loss': 1.1055, 'learning_rate': 1.8641957249596056e-09, 'epoch': 0.99} + 99%|█████████▉| 6961/7045 [22:37:16<16:24, 11.72s/it] 99%|█████████▉| 6962/7045 [22:37:28<16:14, 11.74s/it] {'loss': 1.0708, 'learning_rate': 1.8200796669565202e-09, 'epoch': 0.99} + 99%|█████████▉| 6962/7045 [22:37:28<16:14, 11.74s/it] 99%|█████████▉| 6963/7045 [22:37:42<16:50, 12.33s/it] {'loss': 1.0869, 'learning_rate': 1.7764916900353535e-09, 'epoch': 0.99} + 99%|█████████▉| 6963/7045 [22:37:42<16:50, 12.33s/it] 99%|█████████▉| 6964/7045 [22:37:53<16:08, 11.95s/it] {'loss': 1.1094, 'learning_rate': 1.7334318034101239e-09, 'epoch': 0.99} + 99%|█████████▉| 6964/7045 [22:37:53<16:08, 11.95s/it] 99%|█████████▉| 6965/7045 [22:38:05<16:05, 12.07s/it] {'loss': 1.0889, 'learning_rate': 1.6909000161832723e-09, 'epoch': 0.99} + 99%|█████████▉| 6965/7045 [22:38:05<16:05, 12.07s/it] 99%|█████████▉| 6966/7045 [22:38:16<15:30, 11.78s/it] {'loss': 1.1113, 'learning_rate': 1.6488963373448296e-09, 'epoch': 0.99} + 99%|█████████▉| 6966/7045 [22:38:16<15:30, 11.78s/it] 99%|█████████▉| 6967/7045 [22:38:29<15:35, 11.99s/it] {'loss': 1.0996, 'learning_rate': 1.6074207757746373e-09, 'epoch': 0.99} + 99%|█████████▉| 6967/7045 [22:38:29<15:35, 11.99s/it] 99%|█████████▉| 6968/7045 [22:38:40<15:10, 11.82s/it] {'loss': 1.0918, 'learning_rate': 1.566473340239294e-09, 'epoch': 0.99} + 99%|█████████▉| 6968/7045 [22:38:40<15:10, 11.82s/it] 99%|█████████▉| 6969/7045 [22:38:51<14:44, 11.63s/it] {'loss': 1.0996, 'learning_rate': 1.5260540393952083e-09, 'epoch': 0.99} + 99%|█████████▉| 6969/7045 [22:38:51<14:44, 11.63s/it] 99%|█████████▉| 6970/7045 [22:39:03<14:28, 11.58s/it] {'loss': 1.1104, 'learning_rate': 1.4861628817858242e-09, 'epoch': 0.99} + 99%|█████████▉| 6970/7045 [22:39:03<14:28, 11.58s/it] 99%|█████████▉| 6971/7045 [22:39:16<14:38, 11.88s/it] {'loss': 1.1201, 'learning_rate': 1.4467998758441181e-09, 'epoch': 0.99} + 99%|█████████▉| 6971/7045 [22:39:16<14:38, 11.88s/it] 99%|█████████▉| 6972/7045 [22:39:28<14:30, 11.92s/it] {'loss': 1.1348, 'learning_rate': 1.4079650298906567e-09, 'epoch': 0.99} + 99%|█████████▉| 6972/7045 [22:39:28<14:30, 11.92s/it] 99%|█████████▉| 6973/7045 [22:39:39<14:15, 11.88s/it] {'loss': 1.1172, 'learning_rate': 1.3696583521344286e-09, 'epoch': 0.99} + 99%|█████████▉| 6973/7045 [22:39:39<14:15, 11.88s/it] 99%|█████████▉| 6974/7045 [22:39:50<13:46, 11.65s/it] {'loss': 1.0942, 'learning_rate': 1.331879850673401e-09, 'epoch': 0.99} + 99%|█████████▉| 6974/7045 [22:39:50<13:46, 11.65s/it] 99%|█████████▉| 6975/7045 [22:40:03<14:00, 12.01s/it] {'loss': 1.0938, 'learning_rate': 1.2946295334928527e-09, 'epoch': 0.99} + 99%|█████████▉| 6975/7045 [22:40:03<14:00, 12.01s/it] 99%|█████████▉| 6976/7045 [22:40:15<13:34, 11.81s/it] {'loss': 1.0928, 'learning_rate': 1.2579074084673182e-09, 'epoch': 0.99} + 99%|█████████▉| 6976/7045 [22:40:15<13:34, 11.81s/it] 99%|█████████▉| 6977/7045 [22:40:26<13:14, 11.69s/it] {'loss': 1.0703, 'learning_rate': 1.221713483359477e-09, 'epoch': 0.99} + 99%|█████████▉| 6977/7045 [22:40:26<13:14, 11.69s/it] 99%|█████████▉| 6978/7045 [22:40:37<12:47, 11.46s/it] {'loss': 1.1006, 'learning_rate': 1.186047765820153e-09, 'epoch': 0.99} + 99%|█████████▉| 6978/7045 [22:40:37<12:47, 11.46s/it] 99%|█████████▉| 6979/7045 [22:40:48<12:32, 11.40s/it] {'loss': 1.1377, 'learning_rate': 1.1509102633883163e-09, 'epoch': 0.99} + 99%|█████████▉| 6979/7045 [22:40:48<12:32, 11.40s/it] 99%|█████████▉| 6980/7045 [22:41:00<12:20, 11.39s/it] {'loss': 1.1016, 'learning_rate': 1.1163009834919136e-09, 'epoch': 0.99} + 99%|█████████▉| 6980/7045 [22:41:00<12:20, 11.39s/it] 99%|█████████▉| 6981/7045 [22:41:11<12:01, 11.28s/it] {'loss': 1.1299, 'learning_rate': 1.0822199334464823e-09, 'epoch': 0.99} + 99%|█████████▉| 6981/7045 [22:41:11<12:01, 11.28s/it] 99%|█████████▉| 6982/7045 [22:41:23<12:11, 11.61s/it] {'loss': 1.0854, 'learning_rate': 1.0486671204568145e-09, 'epoch': 0.99} + 99%|█████████▉| 6982/7045 [22:41:23<12:11, 11.61s/it] 99%|█████████▉| 6983/7045 [22:41:38<12:56, 12.53s/it] {'loss': 1.1006, 'learning_rate': 1.0156425516150147e-09, 'epoch': 0.99} + 99%|█████████▉| 6983/7045 [22:41:38<12:56, 12.53s/it] 99%|█████████▉| 6984/7045 [22:41:51<13:04, 12.85s/it] {'loss': 1.0928, 'learning_rate': 9.831462339021658e-10, 'epoch': 0.99} + 99%|█████████▉| 6984/7045 [22:41:51<13:04, 12.85s/it] 99%|█████████▉| 6985/7045 [22:42:02<12:18, 12.30s/it] {'loss': 1.083, 'learning_rate': 9.511781741877724e-10, 'epoch': 0.99} + 99%|█████████▉| 6985/7045 [22:42:02<12:18, 12.30s/it] 99%|█████████▉| 6986/7045 [22:42:14<11:47, 12.00s/it] {'loss': 1.1055, 'learning_rate': 9.197383792292069e-10, 'epoch': 0.99} + 99%|█████████▉| 6986/7045 [22:42:14<11:47, 12.00s/it] 99%|█████████▉| 6987/7045 [22:42:25<11:21, 11.75s/it] {'loss': 1.0967, 'learning_rate': 8.888268556725422e-10, 'epoch': 0.99} + 99%|█████████▉| 6987/7045 [22:42:25<11:21, 11.75s/it] 99%|█████████▉| 6988/7045 [22:42:36<10:56, 11.52s/it] {'loss': 1.1274, 'learning_rate': 8.584436100519955e-10, 'epoch': 0.99} + 99%|█████████▉| 6988/7045 [22:42:36<10:56, 11.52s/it] 99%|█████████▉| 6989/7045 [22:42:49<11:19, 12.14s/it] {'loss': 1.0962, 'learning_rate': 8.285886487902073e-10, 'epoch': 0.99} + 99%|█████████▉| 6989/7045 [22:42:49<11:19, 12.14s/it] 99%|█████████▉| 6990/7045 [22:43:02<11:14, 12.27s/it] {'loss': 1.0508, 'learning_rate': 7.992619781982403e-10, 'epoch': 0.99} + 99%|█████████▉| 6990/7045 [22:43:02<11:14, 12.27s/it] 99%|█████████▉| 6991/7045 [22:43:13<10:43, 11.91s/it] {'loss': 1.1016, 'learning_rate': 7.704636044747471e-10, 'epoch': 0.99} + 99%|█████████▉| 6991/7045 [22:43:13<10:43, 11.91s/it] 99%|█████████▉| 6992/7045 [22:43:24<10:20, 11.70s/it] {'loss': 1.125, 'learning_rate': 7.421935337081909e-10, 'epoch': 0.99} + 99%|█████████▉| 6992/7045 [22:43:24<10:20, 11.70s/it] 99%|█████████▉| 6993/7045 [22:43:36<10:06, 11.67s/it] {'loss': 1.1104, 'learning_rate': 7.14451771873792e-10, 'epoch': 0.99} + 99%|█████████▉| 6993/7045 [22:43:36<10:06, 11.67s/it] 99%|█████████▉| 6994/7045 [22:43:47<09:45, 11.47s/it] {'loss': 1.1328, 'learning_rate': 6.872383248360259e-10, 'epoch': 0.99} + 99%|█████████▉| 6994/7045 [22:43:47<09:45, 11.47s/it] 99%|█████████▉| 6995/7045 [22:43:58<09:24, 11.29s/it] {'loss': 1.0889, 'learning_rate': 6.605531983475133e-10, 'epoch': 0.99} + 99%|█████████▉| 6995/7045 [22:43:58<09:24, 11.29s/it]Token indices sequence length is longer than the specified maximum sequence length for this model (2750 > 2048). Running this sequence through the model will result in indexing errors + 99%|█████████▉| 6996/7045 [22:44:09<09:10, 11.24s/it] {'loss': 1.1064, 'learning_rate': 6.343963980490198e-10, 'epoch': 0.99} + 99%|█████████▉| 6996/7045 [22:44:09<09:10, 11.24s/it] 99%|█████████▉| 6997/7045 [22:44:20<09:05, 11.37s/it] {'loss': 1.125, 'learning_rate': 6.087679294697335e-10, 'epoch': 0.99} + 99%|█████████▉| 6997/7045 [22:44:20<09:05, 11.37s/it] 99%|█████████▉| 6998/7045 [22:44:32<08:54, 11.38s/it] {'loss': 1.124, 'learning_rate': 5.836677980272654e-10, 'epoch': 0.99} + 99%|█████████▉| 6998/7045 [22:44:32<08:54, 11.38s/it] 99%|█████████▉| 6999/7045 [22:44:43<08:38, 11.27s/it] {'loss': 1.1143, 'learning_rate': 5.590960090273711e-10, 'epoch': 0.99} + 99%|█████████▉| 6999/7045 [22:44:43<08:38, 11.27s/it] 99%|█████████▉| 7000/7045 [22:44:56<08:50, 11.79s/it] {'loss': 1.0967, 'learning_rate': 5.350525676642293e-10, 'epoch': 0.99} + 99%|█████████▉| 7000/7045 [22:44:56<08:50, 11.79s/it] 99%|█████████▉| 7001/7045 [22:45:07<08:30, 11.59s/it] {'loss': 1.1006, 'learning_rate': 5.115374790201632e-10, 'epoch': 0.99} + 99%|█████████▉| 7001/7045 [22:45:07<08:30, 11.59s/it] 99%|█████████▉| 7002/7045 [22:45:19<08:27, 11.81s/it] {'loss': 1.1201, 'learning_rate': 4.885507480661967e-10, 'epoch': 0.99} + 99%|█████████▉| 7002/7045 [22:45:19<08:27, 11.81s/it] 99%|█████████▉| 7003/7045 [22:45:32<08:29, 12.13s/it] {'loss': 1.0625, 'learning_rate': 4.660923796612205e-10, 'epoch': 0.99} + 99%|█████████▉| 7003/7045 [22:45:32<08:29, 12.13s/it] 99%|█████████▉| 7004/7045 [22:45:44<08:07, 11.89s/it] {'loss': 1.1182, 'learning_rate': 4.441623785525484e-10, 'epoch': 0.99} + 99%|█████████���| 7004/7045 [22:45:44<08:07, 11.89s/it] 99%|█████████▉| 7005/7045 [22:45:57<08:12, 12.32s/it] {'loss': 1.126, 'learning_rate': 4.227607493759167e-10, 'epoch': 0.99} + 99%|█████████▉| 7005/7045 [22:45:57<08:12, 12.32s/it] 99%|█████████▉| 7006/7045 [22:46:09<07:55, 12.19s/it] {'loss': 1.0435, 'learning_rate': 4.0188749665576175e-10, 'epoch': 0.99} + 99%|█████████▉| 7006/7045 [22:46:09<07:55, 12.19s/it] 99%|█████████▉| 7007/7045 [22:46:20<07:32, 11.91s/it] {'loss': 1.0864, 'learning_rate': 3.8154262480383233e-10, 'epoch': 0.99} + 99%|█████████▉| 7007/7045 [22:46:20<07:32, 11.91s/it] 99%|█████████▉| 7008/7045 [22:46:33<07:34, 12.28s/it] {'loss': 1.0933, 'learning_rate': 3.617261381208548e-10, 'epoch': 0.99} + 99%|█████████▉| 7008/7045 [22:46:33<07:34, 12.28s/it] 99%|█████████▉| 7009/7045 [22:46:45<07:13, 12.03s/it] {'loss': 1.127, 'learning_rate': 3.424380407959782e-10, 'epoch': 0.99} + 99%|█████████▉| 7009/7045 [22:46:45<07:13, 12.03s/it] 100%|█████████▉| 7010/7045 [22:46:55<06:49, 11.69s/it] {'loss': 1.0938, 'learning_rate': 3.236783369064966e-10, 'epoch': 1.0} + 100%|█████████▉| 7010/7045 [22:46:55<06:49, 11.69s/it] 100%|█████████▉| 7011/7045 [22:47:07<06:32, 11.56s/it] {'loss': 1.0776, 'learning_rate': 3.054470304175716e-10, 'epoch': 1.0} + 100%|█████████▉| 7011/7045 [22:47:07<06:32, 11.56s/it] 100%|█████████▉| 7012/7045 [22:47:18<06:15, 11.39s/it] {'loss': 1.0947, 'learning_rate': 2.8774412518334236e-10, 'epoch': 1.0} + 100%|█████████▉| 7012/7045 [22:47:18<06:15, 11.39s/it] 100%|█████████▉| 7013/7045 [22:47:36<07:12, 13.51s/it] {'loss': 1.1572, 'learning_rate': 2.7056962494609317e-10, 'epoch': 1.0} + 100%|█████████▉| 7013/7045 [22:47:36<07:12, 13.51s/it] 100%|█████████▉| 7014/7045 [22:47:47<06:35, 12.77s/it] {'loss': 1.1377, 'learning_rate': 2.5392353333597577e-10, 'epoch': 1.0} + 100%|█████████▉| 7014/7045 [22:47:47<06:35, 12.77s/it] 100%|█████████▉| 7015/7045 [22:47:59<06:17, 12.59s/it] {'loss': 1.1152, 'learning_rate': 2.37805853871842e-10, 'epoch': 1.0} + 100%|█████████▉| 7015/7045 [22:47:59<06:17, 12.59s/it] 100%|█████████▉| 7016/7045 [22:48:10<05:50, 12.08s/it] {'loss': 1.1504, 'learning_rate': 2.2221658996068872e-10, 'epoch': 1.0} + 100%|█████████▉| 7016/7045 [22:48:10<05:50, 12.08s/it] 100%|█████████▉| 7017/7045 [22:48:21<05:30, 11.82s/it] {'loss': 1.0854, 'learning_rate': 2.071557448982131e-10, 'epoch': 1.0} + 100%|█████████▉| 7017/7045 [22:48:21<05:30, 11.82s/it] 100%|█████████▉| 7018/7045 [22:48:35<05:29, 12.20s/it] {'loss': 1.0576, 'learning_rate': 1.926233218674245e-10, 'epoch': 1.0} + 100%|█████████▉| 7018/7045 [22:48:35<05:29, 12.20s/it] 100%|█████████▉| 7019/7045 [22:48:46<05:09, 11.91s/it] {'loss': 1.083, 'learning_rate': 1.7861932394114268e-10, 'epoch': 1.0} + 100%|█████████▉| 7019/7045 [22:48:46<05:09, 11.91s/it] 100%|█████████▉| 7020/7045 [22:48:59<05:04, 12.19s/it] {'loss': 1.1104, 'learning_rate': 1.6514375407866712e-10, 'epoch': 1.0} + 100%|█████████▉| 7020/7045 [22:48:59<05:04, 12.19s/it] 100%|█████████▉| 7021/7045 [22:49:11<04:52, 12.17s/it] {'loss': 1.0776, 'learning_rate': 1.521966151293852e-10, 'epoch': 1.0} + 100%|█████████▉| 7021/7045 [22:49:11<04:52, 12.17s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 100%|█████████▉| 7022/7045 [22:49:23<04:37, 12.08s/it] {'loss': 1.1064, 'learning_rate': 1.3977790982971916e-10, 'epoch': 1.0} + 100%|█████████▉| 7022/7045 [22:49:23<04:37, 12.08s/it] 100%|█████████▉| 7023/7045 [22:49:34<04:23, 12.00s/it] {'loss': 1.0923, 'learning_rate': 1.2788764080479133e-10, 'epoch': 1.0} + 100%|█████████▉| 7023/7045 [22:49:34<04:23, 12.00s/it] 100%|█████████▉| 7024/7045 [22:49:46<04:06, 11.76s/it] {'loss': 1.1367, 'learning_rate': 1.165258105681466e-10, 'epoch': 1.0} + 100%|█████████▉| 7024/7045 [22:49:46<04:06, 11.76s/it]/usr/local/lib/python3.9/dist-packages/PIL/TiffImagePlugin.py:850: UserWarning: Corrupt EXIF data. Expecting to read 4 bytes but only got 0. + warnings.warn(str(msg)) + 100%|█████████▉| 7025/7045 [22:49:58<03:57, 11.86s/it] {'loss': 1.0547, 'learning_rate': 1.0569242152147497e-10, 'epoch': 1.0} + 100%|█████████▉| 7025/7045 [22:49:58<03:57, 11.86s/it] 100%|█████████▉| 7026/7045 [22:50:11<03:53, 12.28s/it] {'loss': 1.0947, 'learning_rate': 9.538747595516651e-11, 'epoch': 1.0} + 100%|█████████▉| 7026/7045 [22:50:11<03:53, 12.28s/it] 100%|█████████▉| 7027/7045 [22:50:23<03:40, 12.24s/it] {'loss': 1.1475, 'learning_rate': 8.56109760469237e-11, 'epoch': 1.0} + 100%|█████████▉| 7027/7045 [22:50:23<03:40, 12.24s/it] 100%|█████████▉| 7028/7045 [22:50:36<03:31, 12.45s/it] {'loss': 1.0493, 'learning_rate': 7.636292386370425e-11, 'epoch': 1.0} + 100%|█████████▉| 7028/7045 [22:50:36<03:31, 12.45s/it] 100%|█████████▉| 7029/7045 [22:50:48<03:17, 12.33s/it] {'loss': 1.0513, 'learning_rate': 6.764332136033336e-11, 'epoch': 1.0} + 100%|█████████▉| 7029/7045 [22:50:48<03:17, 12.33s/it] 100%|█████████▉| 7030/7045 [22:51:00<03:02, 12.14s/it] {'loss': 1.1543, 'learning_rate': 5.945217038005879e-11, 'epoch': 1.0} + 100%|█████████▉| 7030/7045 [22:51:00<03:02, 12.14s/it] 100%|█████████▉| 7031/7045 [22:51:14<02:57, 12.69s/it] {'loss': 1.0889, 'learning_rate': 5.178947265455092e-11, 'epoch': 1.0} + 100%|█████████▉| 7031/7045 [22:51:14<02:57, 12.69s/it] 100%|█████████▉| 7032/7045 [22:51:26<02:44, 12.64s/it] {'loss': 1.0991, 'learning_rate': 4.465522980307002e-11, 'epoch': 1.0} + 100%|█████████▉| 7032/7045 [22:51:26<02:44, 12.64s/it] 100%|█████████▉| 7033/7045 [22:51:39<02:31, 12.61s/it] {'loss': 1.0898, 'learning_rate': 3.804944333440919e-11, 'epoch': 1.0} + 100%|█████████▉| 7033/7045 [22:51:39<02:31, 12.61s/it] 100%|█████████▉| 7034/7045 [22:51:50<02:14, 12.19s/it] {'loss': 1.0796, 'learning_rate': 3.1972114644396315e-11, 'epoch': 1.0} + 100%|█████████▉| 7034/7045 [22:51:50<02:14, 12.19s/it] 100%|█████████▉| 7035/7045 [22:52:01<01:59, 11.95s/it] {'loss': 1.1108, 'learning_rate': 2.642324501811455e-11, 'epoch': 1.0} + 100%|█████████▉| 7035/7045 [22:52:01<01:59, 11.95s/it] 100%|█████████▉| 7036/7045 [22:52:13<01:46, 11.83s/it] {'loss': 1.0898, 'learning_rate': 2.1402835627959416e-11, 'epoch': 1.0} + 100%|█████████▉| 7036/7045 [22:52:13<01:46, 11.83s/it] 100%|█████████▉| 7037/7045 [22:52:24<01:31, 11.50s/it] {'loss': 1.1572, 'learning_rate': 1.6910887535581676e-11, 'epoch': 1.0} + 100%|█████████▉| 7037/7045 [22:52:24<01:31, 11.50s/it] 100%|█████████▉| 7038/7045 [22:52:35<01:20, 11.46s/it] {'loss': 1.1587, 'learning_rate': 1.2947401690777129e-11, 'epoch': 1.0} + 100%|█████████▉| 7038/7045 [22:52:35<01:20, 11.46s/it] 100%|█████████▉| 7039/7045 [22:52:47<01:08, 11.45s/it] {'loss': 1.0854, 'learning_rate': 9.512378930653932e-12, 'epoch': 1.0} + 100%|█████████▉| 7039/7045 [22:52:47<01:08, 11.45s/it] 100%|█████████▉| 7040/7045 [22:52:58<00:56, 11.40s/it] {'loss': 1.0908, 'learning_rate': 6.605819981853057e-12, 'epoch': 1.0} + 100%|█████████▉| 7040/7045 [22:52:58<00:56, 11.40s/it] 100%|█████████▉| 7041/7045 [22:53:09<00:45, 11.35s/it] {'loss': 1.0737, 'learning_rate': 4.227725458605392e-12, 'epoch': 1.0} + 100%|█████████▉| 7041/7045 [22:53:09<00:45, 11.35s/it] 100%|█████████▉| 7042/7045 [22:53:21<00:34, 11.39s/it] {'loss': 1.0996, 'learning_rate': 2.378095863841967e-12, 'epoch': 1.0} + 100%|█████████▉| 7042/7045 [22:53:21<00:34, 11.39s/it] 100%|█████████▉| 7043/7045 [22:53:32<00:22, 11.39s/it] {'loss': 1.1064, 'learning_rate': 1.0569315880837316e-12, 'epoch': 1.0} + 100%|█████████▉| 7043/7045 [22:53:32<00:22, 11.39s/it] 100%|█████████▉| 7044/7045 [22:53:43<00:11, 11.28s/it] {'loss': 1.1108, 'learning_rate': 2.6423291110688754e-13, 'epoch': 1.0} + 100%|█████████▉| 7044/7045 [22:53:43<00:11, 11.28s/it] 100%|██████████| 7045/7045 [22:53:57<00:00, 12.13s/it] {'loss': 1.0869, 'learning_rate': 0.0, 'epoch': 1.0} + 100%|██████████| 7045/7045 [22:53:57<00:00, 12.13s/it] {'train_runtime': 82446.7613, 'train_samples_per_second': 10.937, 'train_steps_per_second': 0.085, 'train_loss': 1.1225040476401704, 'epoch': 1.0} + 100%|██████████| 7045/7045 [22:53:57<00:00, 12.13s/it] 100%|██████████| 7045/7045 [22:53:57<00:00, 11.70s/it] +2024-03-10 05:19:42.110 n213-017-210:2252780:2254222 [4] NCCL INFO [Service thread] Connection closed by localRank 5 +2024-03-10 05:19:42.110 n213-017-210:2252782:2254221 [6] NCCL INFO [Service thread] Connection closed by localRank 5 +2024-03-10 05:19:42.110 n213-017-210:2252781:2254223 [5] NCCL INFO [Service thread] Connection closed by localRank 5 +2024-03-10 05:19:42.299 n213-017-210:2252782:2254221 [6] NCCL INFO [Service thread] Connection closed by localRank 7 +2024-03-10 05:19:42.299 n213-017-210:2252783:2254220 [7] NCCL INFO [Service thread] Connection closed by localRank 7 +2024-03-10 05:19:42.299 n213-017-210:2252776:2254227 [0] NCCL INFO [Service thread] Connection closed by localRank 7 +2024-03-10 05:19:42.658 n213-017-210:2252780:2254222 [4] NCCL INFO [Service thread] Connection closed by localRank 3 +2024-03-10 05:19:42.658 n213-017-210:2252778:2254226 [2] NCCL INFO [Service thread] Connection closed by localRank 3 +2024-03-10 05:19:42.658 n213-017-210:2252779:2254225 [3] NCCL INFO [Service thread] Connection closed by localRank 3 +2024-03-10 05:19:42.658 n213-017-210:2252778:2254226 [2] NCCL INFO [Service thread] Connection closed by localRank 2 +2024-03-10 05:19:42.658 n213-017-210:2252779:2254225 [3] NCCL INFO [Service thread] Connection closed by localRank 2 +2024-03-10 05:19:42.658 n213-017-210:2252777:2254224 [1] NCCL INFO [Service thread] Connection closed by localRank 2 +2024-03-10 05:19:42.683 n213-017-210:2252778:2254226 [2] NCCL INFO [Service thread] Connection closed by localRank 1 +2024-03-10 05:19:42.683 n213-017-210:2252777:2254224 [1] NCCL INFO [Service thread] Connection closed by localRank 1 +2024-03-10 05:19:42.683 n213-017-210:2252776:2254227 [0] NCCL INFO [Service thread] Connection closed by localRank 1 +2024-03-10 05:19:42.735 n213-017-210:2252781:2254223 [5] NCCL INFO [Service thread] Connection closed by localRank 6 +2024-03-10 05:19:42.735 n213-017-210:2252782:2254221 [6] NCCL INFO [Service thread] Connection closed by localRank 6 +2024-03-10 05:19:42.735 n213-017-210:2252783:2254220 [7] NCCL INFO [Service thread] Connection closed by localRank 6 +2024-03-10 05:19:42.786 n213-017-210:2252779:2254225 [3] NCCL INFO [Service thread] Connection closed by localRank 4 +2024-03-10 05:19:42.786 n213-017-210:2252780:2254222 [4] NCCL INFO [Service thread] Connection closed by localRank 4 +2024-03-10 05:19:42.786 n213-017-210:2252781:2254223 [5] NCCL INFO [Service thread] Connection closed by localRank 4 +2024-03-10 05:19:43.052 n213-017-210:2252778:2252778 [2] NCCL INFO comm 0x6f970a00 rank 2 nranks 8 cudaDev 2 busId 4a000 - Abort COMPLETE +2024-03-10 05:19:43.117 n213-017-210:2252781:2252781 [5] NCCL INFO comm 0x6f5eb560 rank 5 nranks 8 cudaDev 5 busId 8e000 - Abort COMPLETE +2024-03-10 05:19:43.119 n213-017-210:2252779:2252779 [3] NCCL INFO comm 0x70282cc0 rank 3 nranks 8 cudaDev 3 busId 4e000 - Abort COMPLETE +2024-03-10 05:19:43.524 n213-017-210:2252780:2252780 [4] NCCL INFO comm 0x6fd97400 rank 4 nranks 8 cudaDev 4 busId 89000 - Abort COMPLETE +2024-03-10 05:19:43.525 n213-017-210:2252782:2252782 [6] NCCL INFO comm 0x6f5c01c0 rank 6 nranks 8 cudaDev 6 busId c5000 - Abort COMPLETE +2024-03-10 05:19:46.932 n213-017-210:2252783:2252783 [7] NCCL INFO comm 0x6f584bc0 rank 7 nranks 8 cudaDev 7 busId c9000 - Abort COMPLETE +2024-03-10 05:19:46.938 n213-017-210:2252777:2252777 [1] NCCL INFO comm 0x6ece5cc0 rank 1 nranks 8 cudaDev 1 busId 16000 - Abort COMPLETE +2024-03-10 05:19:46.964 n213-017-210:2252780:2253716 [4] NCCL INFO [Service thread] Connection closed by localRank 5 +2024-03-10 05:19:47.077 n213-017-210:2252780:2253716 [4] NCCL INFO [Service thread] Connection closed by localRank 3 +2024-03-10 05:19:47.302 n213-017-210:2252777:2253722 [1] NCCL INFO [Service thread] Connection closed by localRank 2 +2024-03-10 05:19:47.563 n213-017-210:2252783:2253719 [7] NCCL INFO [Service thread] Connection closed by localRank 6 +2024-03-10 05:19:51.118 n213-017-210:2252776:2253723 [0] NCCL INFO [Service thread] Connection closed by localRank 7 +2024-03-10 05:19:51.120 n213-017-210:2252776:2253723 [0] NCCL INFO [Service thread] Connection closed by localRank 1 +wandb: Waiting for W&B process to finish... (success). +wandb: +wandb: Run history: +wandb: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███ +wandb: train/learning_rate ▃▇██████▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁ +wandb: train/loss █▇▄▄▅▅▄▃▃▅▃▃▂▄▄▃▃▁▄▃▄▂▂▂▂▃▃▅▁▅▂▁▄▂▃▃▁▃▂▂ +wandb: train/total_flos ▁ +wandb: train/train_loss ▁ +wandb: train/train_runtime ▁ +wandb: train/train_samples_per_second ▁ +wandb: train/train_steps_per_second ▁ +wandb: +wandb: Run summary: +wandb: train/epoch 1.0 +wandb: train/global_step 7045 +wandb: train/learning_rate 0.0 +wandb: train/loss 1.0869 +wandb: train/total_flos 1.4118353085990437e+19 +wandb: train/train_loss 1.1225 +wandb: train/train_runtime 82446.7613 +wandb: train/train_samples_per_second 10.937 +wandb: train/train_steps_per_second 0.085 +wandb: +wandb: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s) +wandb: Find logs at: ./wandb/run-20240309_062525-run_20240309_98cb39ab/logs +2024-03-10 05:20:07.273 n213-017-210:2252776:2254227 [0] NCCL INFO [Service thread] Connection closed by localRank 0 +2024-03-10 05:20:07.911 n213-017-210:2252776:2252776 [0] NCCL INFO comm 0x19413970 rank 0 nranks 8 cudaDev 0 busId 10000 - Abort COMPLETE