phonghoccode commited on
Commit
f7ac1f0
·
verified ·
1 Parent(s): 1a2e086

Upload CustomViltForVQA

Browse files
Files changed (2) hide show
  1. config.json +214 -214
  2. model.safetensors +1 -1
config.json CHANGED
@@ -8,227 +8,227 @@
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "potted plant",
12
- "1": "cell phone",
13
- "2": "stop sign",
14
- "3": "dog",
15
- "4": "laptop",
16
- "5": "frisbee",
17
- "6": "sports ball",
18
- "7": "cup",
19
- "8": "mouse",
20
- "9": "elephant",
21
- "10": "3",
22
- "11": "toothbrush",
23
- "12": "vase",
24
- "13": "airplane",
25
- "14": "right",
26
- "15": "Behind",
27
- "16": "Front",
28
- "17": "skateboard",
29
- "18": "knife",
30
- "19": "toilet",
31
- "20": "No",
32
- "21": "left",
33
- "22": "Brown",
34
- "23": "cow",
35
- "24": "bear",
36
- "25": "boat",
37
- "26": "bird",
38
  "27": "skis",
39
- "28": "baseball glove",
40
- "29": "oven",
41
- "30": "Grey",
42
- "31": "parking meter",
43
- "32": "horse",
44
- "33": "couch",
45
- "34": "orange",
46
- "35": "White",
47
- "36": "Purple",
48
- "37": "Left",
49
- "38": "surfboard",
50
- "39": "truck",
51
- "40": "tv",
52
- "41": "traffic light",
53
- "42": "pizza",
54
- "43": "banana",
55
- "44": "tie",
56
- "45": "train",
57
- "46": "remote",
58
- "47": "bottle",
59
- "48": "bus",
60
- "49": "Blue",
61
- "50": "person",
62
- "51": "tennis racket",
63
- "52": "broccoli",
64
- "53": "bed",
65
- "54": "4",
66
- "55": "keyboard",
67
- "56": "cat",
68
- "57": "sink",
69
- "58": "Yes",
70
- "59": "dining table",
71
- "60": "cake",
72
- "61": "Green",
73
- "62": "snowboard",
74
- "63": "Above",
75
- "64": "Right",
76
- "65": "above",
77
- "66": "giraffe",
78
- "67": "refrigerator",
79
- "68": "9",
80
- "69": "kite",
81
- "70": "book",
82
- "71": "suitcase",
83
- "72": "bicycle",
84
- "73": "motorcycle",
85
- "74": "1",
86
- "75": "2",
87
- "76": "carrot",
88
- "77": "6",
89
- "78": "Black",
90
- "79": "5",
91
- "80": "wine glass",
92
- "81": "8",
93
- "82": "7",
94
- "83": "scissors",
95
- "84": "sheep",
96
- "85": "sandwich",
97
- "86": "clock",
98
- "87": "handbag",
99
- "88": "hot dog",
100
- "89": "bowl",
101
- "90": "Pink",
102
- "91": "chair",
103
- "92": "fire hydrant",
104
- "93": "donut",
105
- "94": "fork",
106
- "95": "0",
107
- "96": "Red",
108
- "97": "teddy bear",
109
- "98": "apple",
110
- "99": "Yellow",
111
- "100": "bench",
112
- "101": "baseball bat",
113
- "102": "umbrella",
114
- "103": "zebra",
115
- "104": "spoon",
116
- "105": "Orange",
117
- "106": "Below",
118
- "107": "car"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
- "0": 95,
125
- "1": 74,
126
- "2": 75,
127
- "3": 10,
128
- "4": 54,
129
- "5": 79,
130
- "6": 77,
131
- "7": 82,
132
- "8": 81,
133
- "9": 68,
134
- "Above": 63,
135
- "Behind": 15,
136
- "Below": 106,
137
- "Black": 78,
138
- "Blue": 49,
139
- "Brown": 22,
140
- "Front": 16,
141
- "Green": 61,
142
- "Grey": 30,
143
- "Left": 37,
144
- "No": 20,
145
- "Orange": 105,
146
- "Pink": 90,
147
- "Purple": 36,
148
- "Red": 96,
149
- "Right": 64,
150
- "White": 35,
151
- "Yellow": 99,
152
- "Yes": 58,
153
- "above": 65,
154
- "airplane": 13,
155
- "apple": 98,
156
- "banana": 43,
157
- "baseball bat": 101,
158
- "baseball glove": 28,
159
- "bear": 24,
160
- "bed": 53,
161
- "bench": 100,
162
- "bicycle": 72,
163
- "bird": 26,
164
- "boat": 25,
165
- "book": 70,
166
- "bottle": 47,
167
- "bowl": 89,
168
- "broccoli": 52,
169
- "bus": 48,
170
- "cake": 60,
171
- "car": 107,
172
- "carrot": 76,
173
- "cat": 56,
174
- "cell phone": 1,
175
- "chair": 91,
176
- "clock": 86,
177
- "couch": 33,
178
- "cow": 23,
179
- "cup": 7,
180
- "dining table": 59,
181
- "dog": 3,
182
- "donut": 93,
183
- "elephant": 9,
184
- "fire hydrant": 92,
185
- "fork": 94,
186
- "frisbee": 5,
187
- "giraffe": 66,
188
- "handbag": 87,
189
- "horse": 32,
190
- "hot dog": 88,
191
- "keyboard": 55,
192
- "kite": 69,
193
- "knife": 18,
194
- "laptop": 4,
195
- "left": 21,
196
- "motorcycle": 73,
197
- "mouse": 8,
198
- "orange": 34,
199
- "oven": 29,
200
- "parking meter": 31,
201
- "person": 50,
202
- "pizza": 42,
203
- "potted plant": 0,
204
- "refrigerator": 67,
205
- "remote": 46,
206
- "right": 14,
207
- "sandwich": 85,
208
- "scissors": 83,
209
- "sheep": 84,
210
- "sink": 57,
211
- "skateboard": 17,
212
  "skis": 27,
213
- "snowboard": 62,
214
- "spoon": 104,
215
- "sports ball": 6,
216
- "stop sign": 2,
217
- "suitcase": 71,
218
- "surfboard": 38,
219
- "teddy bear": 97,
220
- "tennis racket": 51,
221
- "tie": 44,
222
- "toilet": 19,
223
- "toothbrush": 11,
224
- "traffic light": 41,
225
- "train": 45,
226
- "truck": 39,
227
- "tv": 40,
228
- "umbrella": 102,
229
- "vase": 12,
230
- "wine glass": 80,
231
- "zebra": 103
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
 
8
  "hidden_dropout_prob": 0.0,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "boat",
12
+ "1": "spoon",
13
+ "2": "bench",
14
+ "3": "banana",
15
+ "4": "cat",
16
+ "5": "oven",
17
+ "6": "bottle",
18
+ "7": "No",
19
+ "8": "Yes",
20
+ "9": "knife",
21
+ "10": "clock",
22
+ "11": "5",
23
+ "12": "mouse",
24
+ "13": "cup",
25
+ "14": "sheep",
26
+ "15": "dining table",
27
+ "16": "fork",
28
+ "17": "refrigerator",
29
+ "18": "zebra",
30
+ "19": "broccoli",
31
+ "20": "dog",
32
+ "21": "skateboard",
33
+ "22": "Black",
34
+ "23": "bed",
35
+ "24": "motorcycle",
36
+ "25": "pizza",
37
+ "26": "donut",
38
  "27": "skis",
39
+ "28": "chair",
40
+ "29": "tennis racket",
41
+ "30": "bird",
42
+ "31": "potted plant",
43
+ "32": "sports ball",
44
+ "33": "Brown",
45
+ "34": "laptop",
46
+ "35": "elephant",
47
+ "36": "horse",
48
+ "37": "Blue",
49
+ "38": "suitcase",
50
+ "39": "hot dog",
51
+ "40": "4",
52
+ "41": "Orange",
53
+ "42": "Purple",
54
+ "43": "handbag",
55
+ "44": "cow",
56
+ "45": "fire hydrant",
57
+ "46": "snowboard",
58
+ "47": "toothbrush",
59
+ "48": "Below",
60
+ "49": "parking meter",
61
+ "50": "Front",
62
+ "51": "Right",
63
+ "52": "cake",
64
+ "53": "tv",
65
+ "54": "9",
66
+ "55": "tie",
67
+ "56": "orange",
68
+ "57": "wine glass",
69
+ "58": "cell phone",
70
+ "59": "stop sign",
71
+ "60": "right",
72
+ "61": "Pink",
73
+ "62": "giraffe",
74
+ "63": "scissors",
75
+ "64": "1",
76
+ "65": "7",
77
+ "66": "keyboard",
78
+ "67": "Yellow",
79
+ "68": "3",
80
+ "69": "remote",
81
+ "70": "bear",
82
+ "71": "car",
83
+ "72": "truck",
84
+ "73": "surfboard",
85
+ "74": "traffic light",
86
+ "75": "left",
87
+ "76": "bus",
88
+ "77": "frisbee",
89
+ "78": "couch",
90
+ "79": "Red",
91
+ "80": "Left",
92
+ "81": "6",
93
+ "82": "toilet",
94
+ "83": "airplane",
95
+ "84": "Grey",
96
+ "85": "8",
97
+ "86": "above",
98
+ "87": "baseball glove",
99
+ "88": "vase",
100
+ "89": "kite",
101
+ "90": "bowl",
102
+ "91": "0",
103
+ "92": "2",
104
+ "93": "White",
105
+ "94": "Behind",
106
+ "95": "Above",
107
+ "96": "baseball bat",
108
+ "97": "Green",
109
+ "98": "person",
110
+ "99": "sandwich",
111
+ "100": "sink",
112
+ "101": "book",
113
+ "102": "train",
114
+ "103": "umbrella",
115
+ "104": "carrot",
116
+ "105": "bicycle",
117
+ "106": "apple",
118
+ "107": "teddy bear"
119
  },
120
  "image_size": 384,
121
  "initializer_range": 0.02,
122
  "intermediate_size": 3072,
123
  "label2id": {
124
+ "0": 91,
125
+ "1": 64,
126
+ "2": 92,
127
+ "3": 68,
128
+ "4": 40,
129
+ "5": 11,
130
+ "6": 81,
131
+ "7": 65,
132
+ "8": 85,
133
+ "9": 54,
134
+ "Above": 95,
135
+ "Behind": 94,
136
+ "Below": 48,
137
+ "Black": 22,
138
+ "Blue": 37,
139
+ "Brown": 33,
140
+ "Front": 50,
141
+ "Green": 97,
142
+ "Grey": 84,
143
+ "Left": 80,
144
+ "No": 7,
145
+ "Orange": 41,
146
+ "Pink": 61,
147
+ "Purple": 42,
148
+ "Red": 79,
149
+ "Right": 51,
150
+ "White": 93,
151
+ "Yellow": 67,
152
+ "Yes": 8,
153
+ "above": 86,
154
+ "airplane": 83,
155
+ "apple": 106,
156
+ "banana": 3,
157
+ "baseball bat": 96,
158
+ "baseball glove": 87,
159
+ "bear": 70,
160
+ "bed": 23,
161
+ "bench": 2,
162
+ "bicycle": 105,
163
+ "bird": 30,
164
+ "boat": 0,
165
+ "book": 101,
166
+ "bottle": 6,
167
+ "bowl": 90,
168
+ "broccoli": 19,
169
+ "bus": 76,
170
+ "cake": 52,
171
+ "car": 71,
172
+ "carrot": 104,
173
+ "cat": 4,
174
+ "cell phone": 58,
175
+ "chair": 28,
176
+ "clock": 10,
177
+ "couch": 78,
178
+ "cow": 44,
179
+ "cup": 13,
180
+ "dining table": 15,
181
+ "dog": 20,
182
+ "donut": 26,
183
+ "elephant": 35,
184
+ "fire hydrant": 45,
185
+ "fork": 16,
186
+ "frisbee": 77,
187
+ "giraffe": 62,
188
+ "handbag": 43,
189
+ "horse": 36,
190
+ "hot dog": 39,
191
+ "keyboard": 66,
192
+ "kite": 89,
193
+ "knife": 9,
194
+ "laptop": 34,
195
+ "left": 75,
196
+ "motorcycle": 24,
197
+ "mouse": 12,
198
+ "orange": 56,
199
+ "oven": 5,
200
+ "parking meter": 49,
201
+ "person": 98,
202
+ "pizza": 25,
203
+ "potted plant": 31,
204
+ "refrigerator": 17,
205
+ "remote": 69,
206
+ "right": 60,
207
+ "sandwich": 99,
208
+ "scissors": 63,
209
+ "sheep": 14,
210
+ "sink": 100,
211
+ "skateboard": 21,
212
  "skis": 27,
213
+ "snowboard": 46,
214
+ "spoon": 1,
215
+ "sports ball": 32,
216
+ "stop sign": 59,
217
+ "suitcase": 38,
218
+ "surfboard": 73,
219
+ "teddy bear": 107,
220
+ "tennis racket": 29,
221
+ "tie": 55,
222
+ "toilet": 82,
223
+ "toothbrush": 47,
224
+ "traffic light": 74,
225
+ "train": 102,
226
+ "truck": 72,
227
+ "tv": 53,
228
+ "umbrella": 103,
229
+ "vase": 88,
230
+ "wine glass": 57,
231
+ "zebra": 18
232
  },
233
  "layer_norm_eps": 1e-12,
234
  "max_image_length": -1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85c2d34454baed6f5e6e338be3937ff748194cca397ecec7b65f353b824b8cb8
3
  size 446736704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d01dbeaa5245e629b38823a8d8c98a138bf4824da8709a844f2ae675a4301e2e
3
  size 446736704