{ "audio": { "max_len": 1920000, "max_time": 120, "normalize": true, "random_sampling": true, "sample_rate": 16000, "skip_time": false }, "augment": { "freq_mask_param": 8, "mixup_alpha": 2.5, "mixup_p": 0.5, "n_freq_masks": 1, "n_time_masks": 2, "time_freq_mask_p": 0.5, "time_mask_param": 8 }, "dataset": { "test_dataframe": "test.csv", "train_dataframe": "train.csv", "valid_dataframe": "valid.csv" }, "environment": { "mixed_precision": true, "num_workers": 2, "seed": 42 }, "experiment_name": "spectttra_beta-t=120", "logger": { "primary_metric": "f1", "project": "sonics" }, "loss": { "label_smoothing": 0.02, "name": "BCEWithLogitsLoss" }, "melspec": { "f_max": 8000, "f_min": 20, "hop_length": 512, "n_fft": 2048, "n_mels": 128, "norm": "mean_std", "power": 2, "top_db": 80, "win_length": 2048 }, "model": { "attn_drop_rate": 0.1, "embed_dim": 384, "f_clip": 3, "input_shape": [ 128, 3744 ], "mlp_ratio": 2.67, "name": "SpecTTTra", "num_heads": 6, "num_layers": 12, "pe_learnable": true, "pos_drop_rate": 0.1, "pre_norm": true, "proj_drop_rate": 0.0, "resume": null, "t_clip": 5, "use_init_weights": false }, "num_classes": 1, "optimizer": { "clip_grad_norm": 5.0, "grad_accum_steps": 1, "momentum": 0.9, "opt": "adamw", "opt_betas": [ 0.9, 0.999 ], "opt_eps": 1e-08, "weight_decay": 0.05 }, "scheduler": { "decay_rate": 0.1, "lr": 0.0008, "lr_base": 0.001, "lr_base_scale": "linear", "lr_base_size": 256, "min_lr": 0.0, "sched": "cosine", "warmup_epochs": 5, "warmup_lr": 1e-06 }, "training": { "batch_size": 128, "epochs": 50 }, "validation": { "batch_size": 128 } }