|
{ |
|
"activation_dropout": 0.0, |
|
"activation_fn": "gelu", |
|
"architectures": [ |
|
"BEiT3ForVietnameseVisualQuestionAnswering" |
|
], |
|
"attention_dropout": 0.0, |
|
"auto_map": { |
|
"AutoConfig": "configuration_vivqa.ViVQAConfig", |
|
"AutoModel": "modeling_vivqa.BEiT3ForVietnameseVisualQuestionAnswering" |
|
}, |
|
"bert_init": false, |
|
"checkpoint_activations": false, |
|
"ddp_rank": 0, |
|
"deepnorm": false, |
|
"drop_path_rate": 0.0, |
|
"dropout": 0.0, |
|
"encoder_attention_heads": 6, |
|
"encoder_embed_dim": 768, |
|
"encoder_ffn_embed_dim": 3072, |
|
"encoder_layers": 6, |
|
"encoder_normalize_before": true, |
|
"fsdp": false, |
|
"id2label": { |
|
"0": "hai", |
|
"1": "ba", |
|
"2": "b\u1ed1n", |
|
"3": "m\u00e0u tr\u1eafng", |
|
"4": "m\u00e0u \u0111\u1ecf", |
|
"5": "m\u00e0u xanh d\u01b0\u01a1ng", |
|
"6": "m\u00e0u \u0111en", |
|
"7": "m\u00e0u xanh l\u00e1", |
|
"8": "ph\u00f2ng", |
|
"9": "m\u00e0u v\u00e0ng", |
|
"10": "ph\u00f2ng b\u1ebfp", |
|
"11": "m\u00e0u n\u00e2u", |
|
"12": "ph\u00f2ng t\u1eafm", |
|
"13": "m\u00e0u cam", |
|
"14": "gi\u01b0\u1eddng", |
|
"15": "con m\u00e8o", |
|
"16": "h\u01b0\u01a1u cao c\u1ed5", |
|
"17": "m\u00e1y bay", |
|
"18": "g\u01b0\u01a1ng", |
|
"19": "n\u0103m", |
|
"20": "con chim", |
|
"21": "m\u00e0u x\u00e1m", |
|
"22": "m\u00e0u t\u00eda", |
|
"23": "con ch\u00f3", |
|
"24": "con thuy\u1ec1n", |
|
"25": "g\u1ea5u", |
|
"26": "xe \u00f4 t\u00f4", |
|
"27": "l\u1ecd c\u1eafm hoa", |
|
"28": "con voi", |
|
"29": "m\u1ed9t", |
|
"30": "con ng\u1ef1a", |
|
"31": "c\u00e1i gh\u1ebf", |
|
"32": "xe m\u00e1y", |
|
"33": "xe t\u1ea3i", |
|
"34": "t\u00e0u h\u1ecfa", |
|
"35": "xe bu\u00fdt", |
|
"36": "\u0111\u01b0\u1eddng ph\u1ed1", |
|
"37": "ch\u1eadu", |
|
"38": "h\u1ed9p", |
|
"39": "b\u00e1t", |
|
"40": "pizza", |
|
"41": "xe \u0111\u1ea1p", |
|
"42": "chu\u1ed3ng", |
|
"43": "con b\u00f2", |
|
"44": "vali", |
|
"45": "b\u00e1nh", |
|
"46": "\u0111\u1ed3ng h\u1ed3", |
|
"47": "s\u00e1u", |
|
"48": "di\u1ec1u", |
|
"49": "b\u0103ng gh\u1ebf", |
|
"50": "donut", |
|
"51": "nh\u00e0 v\u1ec7 sinh", |
|
"52": "l\u00f2 vi s\u00f3ng", |
|
"53": "sandwich", |
|
"54": "ng\u1ef1a v\u1eb1n", |
|
"55": "tr\u1ea1m", |
|
"56": "chi\u1ebfc \u00f4", |
|
"57": "ph\u00f2ng ng\u1ee7", |
|
"58": "ng\u1ef1a r\u1eb1n", |
|
"59": "\u0111\u0129a \u0103n", |
|
"60": "v\u00f2i", |
|
"61": "\u0111i\u1ec7n tho\u1ea1i", |
|
"62": "con c\u1eebu", |
|
"63": "t\u00f2a nh\u00e0", |
|
"64": "v\u00e1n tr\u01b0\u1ee3t", |
|
"65": "c\u1eeda s\u1ed5", |
|
"66": "c\u1eeda h\u00e0ng", |
|
"67": "t\u00f2a th\u00e1p", |
|
"68": "b\u1ed3n t\u1eafm", |
|
"69": "c\u00e1i r\u1ed5", |
|
"70": "c\u00e2y", |
|
"71": "m\u00e1y vi t\u00ednh", |
|
"72": "qu\u00e1n \u0103n", |
|
"73": "ga ra", |
|
"74": "ch\u1ea3o", |
|
"75": "v\u01b0\u1eddn b\u00e1ch th\u00fa", |
|
"76": "nh\u00e0 \u1edf", |
|
"77": "xe \u0111\u1ea9y", |
|
"78": "laptop", |
|
"79": "xe l\u1eeda", |
|
"80": "b\u00f4ng hoa", |
|
"81": "v\u00e1n l\u01b0\u1edbt s\u00f3ng", |
|
"82": "c\u00e1i t\u00fai", |
|
"83": "t\u1ee7 \u0111\u00e1", |
|
"84": "qu\u1ea3 b\u00f3ng", |
|
"85": "chu\u1ed1i", |
|
"86": "s\u00e2n bay", |
|
"87": "v\u0103n ph\u00f2ng", |
|
"88": "th\u00f9ng ch\u1ee9a", |
|
"89": "n\u00fai", |
|
"90": "c\u00e1i b\u00e0n", |
|
"91": "tr\u01b0\u1ee3t tuy\u1ebft", |
|
"92": "c\u00e0 v\u1ea1t", |
|
"93": "h\u1ed3 b\u01a1i", |
|
"94": "b\u00e3i c\u1ecf", |
|
"95": "b\u1ea3y", |
|
"96": "m\u00f3n \u0103n", |
|
"97": "\u0111\u01b0\u1eddng b\u1ed9", |
|
"98": "xe", |
|
"99": "n\u00f3n", |
|
"100": "\u0111\u1ed9ng c\u01a1", |
|
"101": "c\u00e1i m\u00e2m", |
|
"102": "g\u1eady", |
|
"103": "g\u1ea5u tr\u00fac", |
|
"104": "c\u1eeda ti\u1ec7m", |
|
"105": "con v\u1ecbt", |
|
"106": "l\u1ed3ng", |
|
"107": "t\u01b0\u1eddng", |
|
"108": "c\u00e1i n\u1ed3i", |
|
"109": "t\u1ee7 l\u1ea1nh", |
|
"110": "c\u1eeda", |
|
"111": "t\u00e1ch", |
|
"112": "b\u1ee9c \u1ea3nh", |
|
"113": "s\u00e2n v\u01b0\u1eddn", |
|
"114": "\u0111\u1ed3i", |
|
"115": "b\u1eefa \u0103n", |
|
"116": "s\u00e2n v\u1eadn \u0111\u1ed9ng", |
|
"117": "d\u0129a nh\u1ef1a", |
|
"118": "ph\u01b0\u01a1ng ti\u1ec7n giao th\u00f4ng", |
|
"119": "m\u00e1y xay", |
|
"120": "\u0111\u1ed3 ch\u01a1i", |
|
"121": "m\u0169", |
|
"122": "rau", |
|
"123": "\u00e1o vest", |
|
"124": "v\u00f2i hoa sen", |
|
"125": "b\u00e0n ch\u1ea3i", |
|
"126": "c\u00e1i k\u1ec7", |
|
"127": "\u0111\u01b0\u1eddng", |
|
"128": "xe l\u0103n", |
|
"129": "c\u00e0 r\u1ed1t", |
|
"130": "xe c\u1ed9", |
|
"131": "th\u00e2n c\u00e2y", |
|
"132": "m\u00e1y \u1ea3nh", |
|
"133": "chai", |
|
"134": "\u00f4 c\u1eeda", |
|
"135": "s\u00e2n", |
|
"136": "b\u1ebfn du thuy\u1ec1n", |
|
"137": "dao", |
|
"138": "xe tay ga", |
|
"139": "qu\u00e1n bar", |
|
"140": "th\u01b0 vi\u1ec7n", |
|
"141": "h\u00e0nh l\u00fd", |
|
"142": "b\u1edd bi\u1ec3n", |
|
"143": "t\u00e1m", |
|
"144": "c\u00e1i l\u1ecd", |
|
"145": "m\u1eb7t tr\u1eddi", |
|
"146": "\u00e1o s\u01a1 mi", |
|
"147": "qu\u1ea7y t\u00ednh ti\u1ec1n", |
|
"148": "\u0111\u01b0\u1eddng s\u1eaft", |
|
"149": "b\u1ea7u tr\u1eddi", |
|
"150": "chu\u1ed9t", |
|
"151": "r\u00e0o ch\u1eafn", |
|
"152": "\u1ea3nh ch\u1ee5p", |
|
"153": "balo", |
|
"154": "b\u1ea3o t\u00e0ng", |
|
"155": "qu\u1ea3 t\u00e1o", |
|
"156": "hoa qu\u1ea3", |
|
"157": "b\u1ee9c t\u01b0\u1ee3ng", |
|
"158": "m\u00e1y t\u00ednh", |
|
"159": "c\u00e1c t\u00f2a nh\u00e0", |
|
"160": "ch\u00e9n \u0111\u0129a", |
|
"161": "m\u01b0\u1eddi", |
|
"162": "ch\u00edn", |
|
"163": "gi\u1ea5y b\u1ea1c", |
|
"164": "s\u00e0n nh\u00e0", |
|
"165": "chu\u1ed3ng tr\u1ea1i", |
|
"166": "l\u1edbp h\u1ecdc", |
|
"167": "kho", |
|
"168": "b\u1ebfp", |
|
"169": "b\u1ea3ng", |
|
"170": "gia s\u00fac", |
|
"171": "th\u1ecbt", |
|
"172": "b\u1ed3n ti\u1ec3u", |
|
"173": "t\u1ea1p d\u1ec1", |
|
"174": "c\u00e1i l\u1ec1u", |
|
"175": "g\u0103ng tay", |
|
"176": "h\u00e0nh lang", |
|
"177": "l\u00e1", |
|
"178": "t\u00fai", |
|
"179": "h\u1ea3i \u00e2u", |
|
"180": "v\u1ee3t", |
|
"181": "b\u00e0n ph\u00edm", |
|
"182": "s\u00f4 c\u00f4 la", |
|
"183": "r\u01b0\u1ee3u", |
|
"184": "t\u00e1o", |
|
"185": "gian h\u00e0ng", |
|
"186": "xe \u0111i\u1ec7n ng\u1ea7m", |
|
"187": "m\u00e1y s\u1ea5y kh\u00f4", |
|
"188": "toa xe", |
|
"189": "trang thi\u1ebft b\u1ecb", |
|
"190": "c\u1ed7 m\u00e1y", |
|
"191": "n\u01b0\u1edbc", |
|
"192": "c\u00e2y k\u00e9o", |
|
"193": "ng\u0103n k\u00e9o", |
|
"194": "v\u1ea1ch k\u1ebb \u0111\u01b0\u1eddng", |
|
"195": "b\u00e1nh ng\u1ecdt", |
|
"196": "l\u1ed1i \u0111i", |
|
"197": "t\u00e0u", |
|
"198": "\u0111\u01b0\u1eddng \u0111i b\u1ed9", |
|
"199": "d\u0129a", |
|
"200": "con v\u1eb9t", |
|
"201": "l\u00e1 c\u1edd", |
|
"202": "kh\u0103n", |
|
"203": "chung c\u01b0", |
|
"204": "h\u1ed3", |
|
"205": "ca n\u00f4", |
|
"206": "gi\u00e1 \u0111\u1ee1", |
|
"207": "nh\u1eefng qu\u1ea3 cam", |
|
"208": "b\u1eefa tr\u01b0a", |
|
"209": "k\u00ednh \u0111eo", |
|
"210": "cupcake", |
|
"211": "\u0111\u01b0\u1eddng ray", |
|
"212": "b\u1ed9 \u0111\u1ed3", |
|
"213": "h\u00e0ng ho\u00e1", |
|
"214": "nh\u1eefng b\u1ee9c \u1ea3nh", |
|
"215": "c\u00e1i v\u00ed", |
|
"216": "c\u1eebu", |
|
"217": "ng\u01b0\u1eddi gi\u1eef", |
|
"218": "b\u1ee9c tranh", |
|
"219": "c\u1ea7u", |
|
"220": "nhi\u1ec1u c\u00e1i gh\u1ebf", |
|
"221": "b\u00f4ng c\u1ea3i xanh", |
|
"222": "b\u1eefa \u0103n t\u1ed1i", |
|
"223": "v\u1ebd tranh l\u00ean t\u01b0\u1eddng", |
|
"224": "thuy\u1ec1n bu\u1ed3m", |
|
"225": "\u0111i v\u0103ng", |
|
"226": "s\u00e2n kh\u1ea5u", |
|
"227": "n\u1ebfn", |
|
"228": "bu\u1ed3ng", |
|
"229": "c\u00e1i th\u00eca", |
|
"230": "c\u1ecf kh\u00f4", |
|
"231": "con kh\u1ec9", |
|
"232": "t\u01b0\u1ee3ng \u0111\u00e0i", |
|
"233": "t\u1ee7 \u0111\u00f4ng", |
|
"234": "hoa h\u1ed3ng", |
|
"235": "chim b\u1ed3 c\u00e2u", |
|
"236": "hay", |
|
"237": "g\u1ea7u m\u00fac", |
|
"238": "b\u00fai t\u00f3c", |
|
"239": "m\u00f3ng vu\u1ed1t", |
|
"240": "xe \u0111i\u1ec7n", |
|
"241": "\u0111\u0129a", |
|
"242": "m\u00e0n", |
|
"243": "\u00e1o kho\u00e1c", |
|
"244": "m\u1eb7t n\u1ea1", |
|
"245": "\u0111\u1ed3 u\u1ed1ng", |
|
"246": "b\u00f2 \u0111\u1ef1c", |
|
"247": "c\u00e1i n\u0129a", |
|
"248": "\u0111\u01b0\u1eddng \u1ed1ng", |
|
"249": "n\u01b0\u1edbc ti\u1ec3u", |
|
"250": "ly", |
|
"251": "\u0111\u00e8n \u0111\u1ec3 b\u00e0n", |
|
"252": "\u0111\u1ed3 n\u1ed9i th\u1ea5t", |
|
"253": "m\u00e1i ch\u00e8o", |
|
"254": "\u0111\u1ea7u m\u00e1y", |
|
"255": "\u0111\u1ea7m", |
|
"256": "m\u0169 l\u01b0\u1ee1i trai", |
|
"257": "truy\u1ec1n h\u00ecnh", |
|
"258": "ph\u00f4 mai", |
|
"259": "c\u00e0 ph\u00ea", |
|
"260": "b\u1ebfn t\u00e0u", |
|
"261": "con d\u00ea", |
|
"262": "c\u1eeda ra v\u00e0o", |
|
"263": "k\u00fd t\u00ean", |
|
"264": "thi\u1ebft b\u1ecb", |
|
"265": "b\u00ecnh hoa", |
|
"266": "bia", |
|
"267": "con d\u1ed1c", |
|
"268": "\u00e1o cho\u00e0ng", |
|
"269": "m\u00f3n tr\u00e1ng mi\u1ec7ng", |
|
"270": "c\u00e2y s\u00e0o", |
|
"271": "thu\u1ed1c l\u00e1", |
|
"272": "m\u1eb7t", |
|
"273": "k\u00ednh r\u00e2m", |
|
"274": "\u0111i\u00eau kh\u1eafc", |
|
"275": "nh\u00e0", |
|
"276": "rau qu\u1ea3", |
|
"277": "tr\u00e1i c\u00e2y", |
|
"278": "qu\u1ea3 cam", |
|
"279": "\u0111\u0129a n\u00e9m", |
|
"280": "ba lan", |
|
"281": "c\u00e2y g\u1eady", |
|
"282": "s\u1eefa", |
|
"283": "h\u1ed9p \u0111\u1ef1ng", |
|
"284": "khung", |
|
"285": "ngo\u00e0i tr\u1eddi", |
|
"286": "\u0111o\u1ea1n phim gi\u1edbi thi\u1ec7u", |
|
"287": "c\u1edd", |
|
"288": "th\u00f9ng", |
|
"289": "l\u00f2 s\u01b0\u1edfi", |
|
"290": "l\u00e1t c\u1eaft", |
|
"291": "b\u1eafp ch\u00e2n", |
|
"292": "c\u00fan y\u00eau", |
|
"293": "ng\u00e2n h\u00e0ng", |
|
"294": "rau x\u00e0 l\u00e1ch", |
|
"295": "xa l\u1ed9", |
|
"296": "g\u00e0", |
|
"297": "qu\u1ea7n short", |
|
"298": "v\u00f2i n\u01b0\u1edbc", |
|
"299": "m\u0169 b\u1ea3o hi\u1ec3m", |
|
"300": "c\u00f4ng c\u1ee5", |
|
"301": "qu\u1ea3 cam ", |
|
"302": "v\u00e1n tr\u01b0\u1ee3t tuy\u1ebft", |
|
"303": "g\u1ea1ch", |
|
"304": "ch\u00ecm xu\u1ed1ng", |
|
"305": "kh\u0103n t\u1eafm", |
|
"306": "l\u00e1t g\u1ea1ch", |
|
"307": "ng\u0103n", |
|
"308": "b\u1ea3ng hi\u1ec7u", |
|
"309": "l\u0103n tr\u00f2n", |
|
"310": "hotdog", |
|
"311": "c\u1ecf", |
|
"312": "b\u00ecnh", |
|
"313": "b\u00ean", |
|
"314": "t\u00e0u ho\u1ea3", |
|
"315": "b\u00e1nh xe", |
|
"316": "lon", |
|
"317": "nh\u00e0 t\u1eafm", |
|
"318": "\u0111\u01b0\u1eddng \u0111ua", |
|
"319": "m\u00e0u s\u1eafc", |
|
"320": "bao b\u00ec", |
|
"321": "th\u00e0nh ph\u1ea7n", |
|
"322": "chim \u01b0ng", |
|
"323": "\u0111i\u1ec3m t\u00e2m", |
|
"324": "d\u0129a ", |
|
"325": "b\u00e0n ch\u1ea3i \u0111\u00e1nh r\u0103ng", |
|
"326": "h\u00e0ng h\u00f3a", |
|
"327": "pug", |
|
"328": "h\u1ed9p s\u1ed1", |
|
"329": "c\u00e1", |
|
"330": "gi\u1ecf", |
|
"331": "gh\u1ebf s\u00f4 pha", |
|
"332": "qu\u1ea7n \u00e1o", |
|
"333": "tr\u01b0\u1eddng h\u1ee3p", |
|
"334": "b\u00f2", |
|
"335": "v\u00f4 tuy\u1ebfn", |
|
"336": "con thoi", |
|
"337": "theo d\u00f5i", |
|
"338": "\u00e1o ba l\u1ed7", |
|
"339": "d\u00f2ng s\u00f4ng", |
|
"340": "g\u00e0 t\u00e2y", |
|
"341": "d\u1ea5u hi\u1ec7u", |
|
"342": "m\u00e8o con", |
|
"343": "m\u1eaft", |
|
"344": "\u0111\u01b0a \u0111\u00f3n", |
|
"345": "con heo", |
|
"346": "ngo\u00e0i", |
|
"347": "\u0111\u1ed3ng ph\u1ee5c", |
|
"348": "m\u00e1y bay tr\u1ef1c th\u0103ng", |
|
"349": "\u0111\u1ea1i d\u01b0\u01a1ng", |
|
"350": "b\u1ee9c m\u00e0n", |
|
"351": "cam", |
|
"352": "b\u00e1nh hamburger" |
|
}, |
|
"img_size": 224, |
|
"in_chans": 3, |
|
"label2id": null, |
|
"layernorm_embedding": false, |
|
"layernorm_eps": 1e-05, |
|
"max_rel_pos": 0, |
|
"max_source_positions": 1024, |
|
"model_type": "vivqa", |
|
"moe_eval_capacity_token_fraction": 0.25, |
|
"moe_expert_count": 0, |
|
"moe_freq": 0, |
|
"moe_gating_use_fp32": true, |
|
"moe_normalize_gate_prob_before_dropping": false, |
|
"moe_second_expert_policy": "random", |
|
"moe_top1_expert": false, |
|
"multiway": true, |
|
"no_output_layer": true, |
|
"no_scale_embedding": true, |
|
"normalize_output": true, |
|
"patch_size": 16, |
|
"rel_pos_buckets": 0, |
|
"share_encoder_input_output_embed": false, |
|
"subln": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.36.2", |
|
"use_xmoe": false, |
|
"vocab_size": -1, |
|
"xpos_rel_pos": false, |
|
"xpos_scale_base": 512 |
|
} |
|
|