vivqa-model / config.json
ngocson2002's picture
Update model
86614a8
{
"activation_dropout": 0.0,
"activation_fn": "gelu",
"architectures": [
"BEiT3ForVietnameseVisualQuestionAnswering"
],
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_vivqa.ViVQAConfig",
"AutoModel": "modeling_vivqa.BEiT3ForVietnameseVisualQuestionAnswering"
},
"bert_init": false,
"checkpoint_activations": false,
"ddp_rank": 0,
"deepnorm": false,
"drop_path_rate": 0.0,
"dropout": 0.0,
"encoder_attention_heads": 6,
"encoder_embed_dim": 768,
"encoder_ffn_embed_dim": 3072,
"encoder_layers": 6,
"encoder_normalize_before": true,
"fsdp": false,
"id2label": {
"0": "hai",
"1": "ba",
"2": "b\u1ed1n",
"3": "m\u00e0u tr\u1eafng",
"4": "m\u00e0u \u0111\u1ecf",
"5": "m\u00e0u xanh d\u01b0\u01a1ng",
"6": "m\u00e0u \u0111en",
"7": "m\u00e0u xanh l\u00e1",
"8": "ph\u00f2ng",
"9": "m\u00e0u v\u00e0ng",
"10": "ph\u00f2ng b\u1ebfp",
"11": "m\u00e0u n\u00e2u",
"12": "ph\u00f2ng t\u1eafm",
"13": "m\u00e0u cam",
"14": "gi\u01b0\u1eddng",
"15": "con m\u00e8o",
"16": "h\u01b0\u01a1u cao c\u1ed5",
"17": "m\u00e1y bay",
"18": "g\u01b0\u01a1ng",
"19": "n\u0103m",
"20": "con chim",
"21": "m\u00e0u x\u00e1m",
"22": "m\u00e0u t\u00eda",
"23": "con ch\u00f3",
"24": "con thuy\u1ec1n",
"25": "g\u1ea5u",
"26": "xe \u00f4 t\u00f4",
"27": "l\u1ecd c\u1eafm hoa",
"28": "con voi",
"29": "m\u1ed9t",
"30": "con ng\u1ef1a",
"31": "c\u00e1i gh\u1ebf",
"32": "xe m\u00e1y",
"33": "xe t\u1ea3i",
"34": "t\u00e0u h\u1ecfa",
"35": "xe bu\u00fdt",
"36": "\u0111\u01b0\u1eddng ph\u1ed1",
"37": "ch\u1eadu",
"38": "h\u1ed9p",
"39": "b\u00e1t",
"40": "pizza",
"41": "xe \u0111\u1ea1p",
"42": "chu\u1ed3ng",
"43": "con b\u00f2",
"44": "vali",
"45": "b\u00e1nh",
"46": "\u0111\u1ed3ng h\u1ed3",
"47": "s\u00e1u",
"48": "di\u1ec1u",
"49": "b\u0103ng gh\u1ebf",
"50": "donut",
"51": "nh\u00e0 v\u1ec7 sinh",
"52": "l\u00f2 vi s\u00f3ng",
"53": "sandwich",
"54": "ng\u1ef1a v\u1eb1n",
"55": "tr\u1ea1m",
"56": "chi\u1ebfc \u00f4",
"57": "ph\u00f2ng ng\u1ee7",
"58": "ng\u1ef1a r\u1eb1n",
"59": "\u0111\u0129a \u0103n",
"60": "v\u00f2i",
"61": "\u0111i\u1ec7n tho\u1ea1i",
"62": "con c\u1eebu",
"63": "t\u00f2a nh\u00e0",
"64": "v\u00e1n tr\u01b0\u1ee3t",
"65": "c\u1eeda s\u1ed5",
"66": "c\u1eeda h\u00e0ng",
"67": "t\u00f2a th\u00e1p",
"68": "b\u1ed3n t\u1eafm",
"69": "c\u00e1i r\u1ed5",
"70": "c\u00e2y",
"71": "m\u00e1y vi t\u00ednh",
"72": "qu\u00e1n \u0103n",
"73": "ga ra",
"74": "ch\u1ea3o",
"75": "v\u01b0\u1eddn b\u00e1ch th\u00fa",
"76": "nh\u00e0 \u1edf",
"77": "xe \u0111\u1ea9y",
"78": "laptop",
"79": "xe l\u1eeda",
"80": "b\u00f4ng hoa",
"81": "v\u00e1n l\u01b0\u1edbt s\u00f3ng",
"82": "c\u00e1i t\u00fai",
"83": "t\u1ee7 \u0111\u00e1",
"84": "qu\u1ea3 b\u00f3ng",
"85": "chu\u1ed1i",
"86": "s\u00e2n bay",
"87": "v\u0103n ph\u00f2ng",
"88": "th\u00f9ng ch\u1ee9a",
"89": "n\u00fai",
"90": "c\u00e1i b\u00e0n",
"91": "tr\u01b0\u1ee3t tuy\u1ebft",
"92": "c\u00e0 v\u1ea1t",
"93": "h\u1ed3 b\u01a1i",
"94": "b\u00e3i c\u1ecf",
"95": "b\u1ea3y",
"96": "m\u00f3n \u0103n",
"97": "\u0111\u01b0\u1eddng b\u1ed9",
"98": "xe",
"99": "n\u00f3n",
"100": "\u0111\u1ed9ng c\u01a1",
"101": "c\u00e1i m\u00e2m",
"102": "g\u1eady",
"103": "g\u1ea5u tr\u00fac",
"104": "c\u1eeda ti\u1ec7m",
"105": "con v\u1ecbt",
"106": "l\u1ed3ng",
"107": "t\u01b0\u1eddng",
"108": "c\u00e1i n\u1ed3i",
"109": "t\u1ee7 l\u1ea1nh",
"110": "c\u1eeda",
"111": "t\u00e1ch",
"112": "b\u1ee9c \u1ea3nh",
"113": "s\u00e2n v\u01b0\u1eddn",
"114": "\u0111\u1ed3i",
"115": "b\u1eefa \u0103n",
"116": "s\u00e2n v\u1eadn \u0111\u1ed9ng",
"117": "d\u0129a nh\u1ef1a",
"118": "ph\u01b0\u01a1ng ti\u1ec7n giao th\u00f4ng",
"119": "m\u00e1y xay",
"120": "\u0111\u1ed3 ch\u01a1i",
"121": "m\u0169",
"122": "rau",
"123": "\u00e1o vest",
"124": "v\u00f2i hoa sen",
"125": "b\u00e0n ch\u1ea3i",
"126": "c\u00e1i k\u1ec7",
"127": "\u0111\u01b0\u1eddng",
"128": "xe l\u0103n",
"129": "c\u00e0 r\u1ed1t",
"130": "xe c\u1ed9",
"131": "th\u00e2n c\u00e2y",
"132": "m\u00e1y \u1ea3nh",
"133": "chai",
"134": "\u00f4 c\u1eeda",
"135": "s\u00e2n",
"136": "b\u1ebfn du thuy\u1ec1n",
"137": "dao",
"138": "xe tay ga",
"139": "qu\u00e1n bar",
"140": "th\u01b0 vi\u1ec7n",
"141": "h\u00e0nh l\u00fd",
"142": "b\u1edd bi\u1ec3n",
"143": "t\u00e1m",
"144": "c\u00e1i l\u1ecd",
"145": "m\u1eb7t tr\u1eddi",
"146": "\u00e1o s\u01a1 mi",
"147": "qu\u1ea7y t\u00ednh ti\u1ec1n",
"148": "\u0111\u01b0\u1eddng s\u1eaft",
"149": "b\u1ea7u tr\u1eddi",
"150": "chu\u1ed9t",
"151": "r\u00e0o ch\u1eafn",
"152": "\u1ea3nh ch\u1ee5p",
"153": "balo",
"154": "b\u1ea3o t\u00e0ng",
"155": "qu\u1ea3 t\u00e1o",
"156": "hoa qu\u1ea3",
"157": "b\u1ee9c t\u01b0\u1ee3ng",
"158": "m\u00e1y t\u00ednh",
"159": "c\u00e1c t\u00f2a nh\u00e0",
"160": "ch\u00e9n \u0111\u0129a",
"161": "m\u01b0\u1eddi",
"162": "ch\u00edn",
"163": "gi\u1ea5y b\u1ea1c",
"164": "s\u00e0n nh\u00e0",
"165": "chu\u1ed3ng tr\u1ea1i",
"166": "l\u1edbp h\u1ecdc",
"167": "kho",
"168": "b\u1ebfp",
"169": "b\u1ea3ng",
"170": "gia s\u00fac",
"171": "th\u1ecbt",
"172": "b\u1ed3n ti\u1ec3u",
"173": "t\u1ea1p d\u1ec1",
"174": "c\u00e1i l\u1ec1u",
"175": "g\u0103ng tay",
"176": "h\u00e0nh lang",
"177": "l\u00e1",
"178": "t\u00fai",
"179": "h\u1ea3i \u00e2u",
"180": "v\u1ee3t",
"181": "b\u00e0n ph\u00edm",
"182": "s\u00f4 c\u00f4 la",
"183": "r\u01b0\u1ee3u",
"184": "t\u00e1o",
"185": "gian h\u00e0ng",
"186": "xe \u0111i\u1ec7n ng\u1ea7m",
"187": "m\u00e1y s\u1ea5y kh\u00f4",
"188": "toa xe",
"189": "trang thi\u1ebft b\u1ecb",
"190": "c\u1ed7 m\u00e1y",
"191": "n\u01b0\u1edbc",
"192": "c\u00e2y k\u00e9o",
"193": "ng\u0103n k\u00e9o",
"194": "v\u1ea1ch k\u1ebb \u0111\u01b0\u1eddng",
"195": "b\u00e1nh ng\u1ecdt",
"196": "l\u1ed1i \u0111i",
"197": "t\u00e0u",
"198": "\u0111\u01b0\u1eddng \u0111i b\u1ed9",
"199": "d\u0129a",
"200": "con v\u1eb9t",
"201": "l\u00e1 c\u1edd",
"202": "kh\u0103n",
"203": "chung c\u01b0",
"204": "h\u1ed3",
"205": "ca n\u00f4",
"206": "gi\u00e1 \u0111\u1ee1",
"207": "nh\u1eefng qu\u1ea3 cam",
"208": "b\u1eefa tr\u01b0a",
"209": "k\u00ednh \u0111eo",
"210": "cupcake",
"211": "\u0111\u01b0\u1eddng ray",
"212": "b\u1ed9 \u0111\u1ed3",
"213": "h\u00e0ng ho\u00e1",
"214": "nh\u1eefng b\u1ee9c \u1ea3nh",
"215": "c\u00e1i v\u00ed",
"216": "c\u1eebu",
"217": "ng\u01b0\u1eddi gi\u1eef",
"218": "b\u1ee9c tranh",
"219": "c\u1ea7u",
"220": "nhi\u1ec1u c\u00e1i gh\u1ebf",
"221": "b\u00f4ng c\u1ea3i xanh",
"222": "b\u1eefa \u0103n t\u1ed1i",
"223": "v\u1ebd tranh l\u00ean t\u01b0\u1eddng",
"224": "thuy\u1ec1n bu\u1ed3m",
"225": "\u0111i v\u0103ng",
"226": "s\u00e2n kh\u1ea5u",
"227": "n\u1ebfn",
"228": "bu\u1ed3ng",
"229": "c\u00e1i th\u00eca",
"230": "c\u1ecf kh\u00f4",
"231": "con kh\u1ec9",
"232": "t\u01b0\u1ee3ng \u0111\u00e0i",
"233": "t\u1ee7 \u0111\u00f4ng",
"234": "hoa h\u1ed3ng",
"235": "chim b\u1ed3 c\u00e2u",
"236": "hay",
"237": "g\u1ea7u m\u00fac",
"238": "b\u00fai t\u00f3c",
"239": "m\u00f3ng vu\u1ed1t",
"240": "xe \u0111i\u1ec7n",
"241": "\u0111\u0129a",
"242": "m\u00e0n",
"243": "\u00e1o kho\u00e1c",
"244": "m\u1eb7t n\u1ea1",
"245": "\u0111\u1ed3 u\u1ed1ng",
"246": "b\u00f2 \u0111\u1ef1c",
"247": "c\u00e1i n\u0129a",
"248": "\u0111\u01b0\u1eddng \u1ed1ng",
"249": "n\u01b0\u1edbc ti\u1ec3u",
"250": "ly",
"251": "\u0111\u00e8n \u0111\u1ec3 b\u00e0n",
"252": "\u0111\u1ed3 n\u1ed9i th\u1ea5t",
"253": "m\u00e1i ch\u00e8o",
"254": "\u0111\u1ea7u m\u00e1y",
"255": "\u0111\u1ea7m",
"256": "m\u0169 l\u01b0\u1ee1i trai",
"257": "truy\u1ec1n h\u00ecnh",
"258": "ph\u00f4 mai",
"259": "c\u00e0 ph\u00ea",
"260": "b\u1ebfn t\u00e0u",
"261": "con d\u00ea",
"262": "c\u1eeda ra v\u00e0o",
"263": "k\u00fd t\u00ean",
"264": "thi\u1ebft b\u1ecb",
"265": "b\u00ecnh hoa",
"266": "bia",
"267": "con d\u1ed1c",
"268": "\u00e1o cho\u00e0ng",
"269": "m\u00f3n tr\u00e1ng mi\u1ec7ng",
"270": "c\u00e2y s\u00e0o",
"271": "thu\u1ed1c l\u00e1",
"272": "m\u1eb7t",
"273": "k\u00ednh r\u00e2m",
"274": "\u0111i\u00eau kh\u1eafc",
"275": "nh\u00e0",
"276": "rau qu\u1ea3",
"277": "tr\u00e1i c\u00e2y",
"278": "qu\u1ea3 cam",
"279": "\u0111\u0129a n\u00e9m",
"280": "ba lan",
"281": "c\u00e2y g\u1eady",
"282": "s\u1eefa",
"283": "h\u1ed9p \u0111\u1ef1ng",
"284": "khung",
"285": "ngo\u00e0i tr\u1eddi",
"286": "\u0111o\u1ea1n phim gi\u1edbi thi\u1ec7u",
"287": "c\u1edd",
"288": "th\u00f9ng",
"289": "l\u00f2 s\u01b0\u1edfi",
"290": "l\u00e1t c\u1eaft",
"291": "b\u1eafp ch\u00e2n",
"292": "c\u00fan y\u00eau",
"293": "ng\u00e2n h\u00e0ng",
"294": "rau x\u00e0 l\u00e1ch",
"295": "xa l\u1ed9",
"296": "g\u00e0",
"297": "qu\u1ea7n short",
"298": "v\u00f2i n\u01b0\u1edbc",
"299": "m\u0169 b\u1ea3o hi\u1ec3m",
"300": "c\u00f4ng c\u1ee5",
"301": "qu\u1ea3 cam ",
"302": "v\u00e1n tr\u01b0\u1ee3t tuy\u1ebft",
"303": "g\u1ea1ch",
"304": "ch\u00ecm xu\u1ed1ng",
"305": "kh\u0103n t\u1eafm",
"306": "l\u00e1t g\u1ea1ch",
"307": "ng\u0103n",
"308": "b\u1ea3ng hi\u1ec7u",
"309": "l\u0103n tr\u00f2n",
"310": "hotdog",
"311": "c\u1ecf",
"312": "b\u00ecnh",
"313": "b\u00ean",
"314": "t\u00e0u ho\u1ea3",
"315": "b\u00e1nh xe",
"316": "lon",
"317": "nh\u00e0 t\u1eafm",
"318": "\u0111\u01b0\u1eddng \u0111ua",
"319": "m\u00e0u s\u1eafc",
"320": "bao b\u00ec",
"321": "th\u00e0nh ph\u1ea7n",
"322": "chim \u01b0ng",
"323": "\u0111i\u1ec3m t\u00e2m",
"324": "d\u0129a ",
"325": "b\u00e0n ch\u1ea3i \u0111\u00e1nh r\u0103ng",
"326": "h\u00e0ng h\u00f3a",
"327": "pug",
"328": "h\u1ed9p s\u1ed1",
"329": "c\u00e1",
"330": "gi\u1ecf",
"331": "gh\u1ebf s\u00f4 pha",
"332": "qu\u1ea7n \u00e1o",
"333": "tr\u01b0\u1eddng h\u1ee3p",
"334": "b\u00f2",
"335": "v\u00f4 tuy\u1ebfn",
"336": "con thoi",
"337": "theo d\u00f5i",
"338": "\u00e1o ba l\u1ed7",
"339": "d\u00f2ng s\u00f4ng",
"340": "g\u00e0 t\u00e2y",
"341": "d\u1ea5u hi\u1ec7u",
"342": "m\u00e8o con",
"343": "m\u1eaft",
"344": "\u0111\u01b0a \u0111\u00f3n",
"345": "con heo",
"346": "ngo\u00e0i",
"347": "\u0111\u1ed3ng ph\u1ee5c",
"348": "m\u00e1y bay tr\u1ef1c th\u0103ng",
"349": "\u0111\u1ea1i d\u01b0\u01a1ng",
"350": "b\u1ee9c m\u00e0n",
"351": "cam",
"352": "b\u00e1nh hamburger"
},
"img_size": 224,
"in_chans": 3,
"label2id": null,
"layernorm_embedding": false,
"layernorm_eps": 1e-05,
"max_rel_pos": 0,
"max_source_positions": 1024,
"model_type": "vivqa",
"moe_eval_capacity_token_fraction": 0.25,
"moe_expert_count": 0,
"moe_freq": 0,
"moe_gating_use_fp32": true,
"moe_normalize_gate_prob_before_dropping": false,
"moe_second_expert_policy": "random",
"moe_top1_expert": false,
"multiway": true,
"no_output_layer": true,
"no_scale_embedding": true,
"normalize_output": true,
"patch_size": 16,
"rel_pos_buckets": 0,
"share_encoder_input_output_embed": false,
"subln": true,
"torch_dtype": "float32",
"transformers_version": "4.36.2",
"use_xmoe": false,
"vocab_size": -1,
"xpos_rel_pos": false,
"xpos_scale_base": 512
}