{
    "architectures": [
        "YolosForObjectDetection"
    ],
    "attention_probs_dropout_prob": 0.0,
    "auxiliary_loss": false,
    "bbox_cost": 5,
    "bbox_loss_coefficient": 5,
    "class_cost": 1,
    "eos_coefficient": 0.1,
    "giou_cost": 2,
    "giou_loss_coefficient": 2,
    "hidden_act": "gelu",
    "hidden_dropout_prob": 0.0,
    "hidden_size": 192,
    "id2label": {
        "0": "ba",
        "1": "ca",
        "2": "da",
        "3": "dha",
        "4": "ga",
        "5": "ha",
        "6": "ja",
        "7": "ka",
        "8": "la",
        "9": "ma",
        "10": "na",
        "11": "nga",
        "12": "nya",
        "13": "pa",
        "14": "ra",
        "15": "sa",
        "16": "ta",
        "17": "tha",
        "18": "wa",
        "19": "ya"
    },
    "image_size": [
        800,
        1333
    ],
    "initializer_range": 0.02,
    "intermediate_size": 768,
    "label2id": {
        "ba": 0,
        "ca": 1,
        "da": 2,
        "dha": 3,
        "ga": 4,
        "ha": 5,
        "ja": 6,
        "ka": 7,
        "la": 8,
        "ma": 9,
        "na": 10,
        "nga": 11,
        "nya": 12,
        "pa": 13,
        "ra": 14,
        "sa": 15,
        "ta": 16,
        "tha": 17,
        "wa": 18,
        "ya": 19
    },
    "layer_norm_eps": 1e-12,
    "model_type": "yolos",
    "num_attention_heads": 3,
    "num_channels": 3,
    "num_detection_tokens": 100,
    "num_hidden_layers": 12,
    "patch_size": 16,
    "qkv_bias": true,
    "torch_dtype": "float32",
    "transformers_version": "4.19.0.dev0",
    "use_mid_position_embeddings": false
}