Zero-Shot Image Classification
OpenCLIP
PyTorch
clip
baltachev commited on
Commit
9454a6f
1 Parent(s): 84c9828

Make the hf model compatible with transformers older versions

Browse files

Hello!
Yesterday we merged the model compatible with hf CLIP class. It works fine on the latest transformers version.

However, I found out that it can't be loaded on the older versions of transformers (specifically, 4.20.1), so I converted the open_clip model on transformers==4.20.1 and it works like a charm on version >= 4.19.

Decided not to check on older versions because there are quite outdated.

Files changed (2) hide show
  1. config.json +18 -12
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_commit_hash": null,
3
  "architectures": [
4
  "CLIPModel"
5
  ],
@@ -13,13 +12,13 @@
13
  "architectures": null,
14
  "attention_dropout": 0.0,
15
  "bad_words_ids": null,
16
- "begin_suppress_tokens": null,
17
  "bos_token_id": 0,
18
  "chunk_size_feed_forward": 0,
19
  "cross_attention_hidden_size": null,
20
  "decoder_start_token_id": null,
21
  "diversity_penalty": 0.0,
22
  "do_sample": false,
 
23
  "early_stopping": false,
24
  "encoder_no_repeat_ngram_size": 0,
25
  "eos_token_id": 2,
@@ -60,17 +59,14 @@
60
  "pad_token_id": 1,
61
  "prefix": null,
62
  "problem_type": null,
63
- "projection_dim": 512,
64
  "pruned_heads": {},
65
  "remove_invalid_values": false,
66
  "repetition_penalty": 1.0,
67
  "return_dict": true,
68
  "return_dict_in_generate": false,
69
  "sep_token_id": null,
70
- "suppress_tokens": null,
71
  "task_specific_params": null,
72
  "temperature": 1.0,
73
- "tf_legacy_loss": false,
74
  "tie_encoder_decoder": false,
75
  "tie_word_embeddings": true,
76
  "tokenizer_class": null,
@@ -78,11 +74,17 @@
78
  "top_p": 1.0,
79
  "torch_dtype": null,
80
  "torchscript": false,
81
- "transformers_version": "4.29.1",
82
  "typical_p": 1.0,
83
  "use_bfloat16": false,
84
  "vocab_size": 49408
85
  },
 
 
 
 
 
 
86
  "torch_dtype": "float32",
87
  "transformers_version": null,
88
  "vision_config": {
@@ -91,13 +93,13 @@
91
  "architectures": null,
92
  "attention_dropout": 0.0,
93
  "bad_words_ids": null,
94
- "begin_suppress_tokens": null,
95
  "bos_token_id": null,
96
  "chunk_size_feed_forward": 0,
97
  "cross_attention_hidden_size": null,
98
  "decoder_start_token_id": null,
99
  "diversity_penalty": 0.0,
100
  "do_sample": false,
 
101
  "early_stopping": false,
102
  "encoder_no_repeat_ngram_size": 0,
103
  "eos_token_id": null,
@@ -130,7 +132,6 @@
130
  "num_attention_heads": 16,
131
  "num_beam_groups": 1,
132
  "num_beams": 1,
133
- "num_channels": 3,
134
  "num_hidden_layers": 24,
135
  "num_return_sequences": 1,
136
  "output_attentions": false,
@@ -140,17 +141,14 @@
140
  "patch_size": 14,
141
  "prefix": null,
142
  "problem_type": null,
143
- "projection_dim": 512,
144
  "pruned_heads": {},
145
  "remove_invalid_values": false,
146
  "repetition_penalty": 1.0,
147
  "return_dict": true,
148
  "return_dict_in_generate": false,
149
  "sep_token_id": null,
150
- "suppress_tokens": null,
151
  "task_specific_params": null,
152
  "temperature": 1.0,
153
- "tf_legacy_loss": false,
154
  "tie_encoder_decoder": false,
155
  "tie_word_embeddings": true,
156
  "tokenizer_class": null,
@@ -158,8 +156,16 @@
158
  "top_p": 1.0,
159
  "torch_dtype": null,
160
  "torchscript": false,
161
- "transformers_version": "4.29.1",
162
  "typical_p": 1.0,
163
  "use_bfloat16": false
 
 
 
 
 
 
 
 
164
  }
165
  }
 
1
  {
 
2
  "architectures": [
3
  "CLIPModel"
4
  ],
 
12
  "architectures": null,
13
  "attention_dropout": 0.0,
14
  "bad_words_ids": null,
 
15
  "bos_token_id": 0,
16
  "chunk_size_feed_forward": 0,
17
  "cross_attention_hidden_size": null,
18
  "decoder_start_token_id": null,
19
  "diversity_penalty": 0.0,
20
  "do_sample": false,
21
+ "dropout": 0.0,
22
  "early_stopping": false,
23
  "encoder_no_repeat_ngram_size": 0,
24
  "eos_token_id": 2,
 
59
  "pad_token_id": 1,
60
  "prefix": null,
61
  "problem_type": null,
 
62
  "pruned_heads": {},
63
  "remove_invalid_values": false,
64
  "repetition_penalty": 1.0,
65
  "return_dict": true,
66
  "return_dict_in_generate": false,
67
  "sep_token_id": null,
 
68
  "task_specific_params": null,
69
  "temperature": 1.0,
 
70
  "tie_encoder_decoder": false,
71
  "tie_word_embeddings": true,
72
  "tokenizer_class": null,
 
74
  "top_p": 1.0,
75
  "torch_dtype": null,
76
  "torchscript": false,
77
+ "transformers_version": "4.20.1",
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "vocab_size": 49408
81
  },
82
+ "text_config_dict": {
83
+ "hidden_act": "gelu",
84
+ "hidden_size": 768,
85
+ "intermediate_size": 3072,
86
+ "num_attention_heads": 12
87
+ },
88
  "torch_dtype": "float32",
89
  "transformers_version": null,
90
  "vision_config": {
 
93
  "architectures": null,
94
  "attention_dropout": 0.0,
95
  "bad_words_ids": null,
 
96
  "bos_token_id": null,
97
  "chunk_size_feed_forward": 0,
98
  "cross_attention_hidden_size": null,
99
  "decoder_start_token_id": null,
100
  "diversity_penalty": 0.0,
101
  "do_sample": false,
102
+ "dropout": 0.0,
103
  "early_stopping": false,
104
  "encoder_no_repeat_ngram_size": 0,
105
  "eos_token_id": null,
 
132
  "num_attention_heads": 16,
133
  "num_beam_groups": 1,
134
  "num_beams": 1,
 
135
  "num_hidden_layers": 24,
136
  "num_return_sequences": 1,
137
  "output_attentions": false,
 
141
  "patch_size": 14,
142
  "prefix": null,
143
  "problem_type": null,
 
144
  "pruned_heads": {},
145
  "remove_invalid_values": false,
146
  "repetition_penalty": 1.0,
147
  "return_dict": true,
148
  "return_dict_in_generate": false,
149
  "sep_token_id": null,
 
150
  "task_specific_params": null,
151
  "temperature": 1.0,
 
152
  "tie_encoder_decoder": false,
153
  "tie_word_embeddings": true,
154
  "tokenizer_class": null,
 
156
  "top_p": 1.0,
157
  "torch_dtype": null,
158
  "torchscript": false,
159
+ "transformers_version": "4.20.1",
160
  "typical_p": 1.0,
161
  "use_bfloat16": false
162
+ },
163
+ "vision_config_dict": {
164
+ "hidden_act": "gelu",
165
+ "hidden_size": 1024,
166
+ "intermediate_size": 4096,
167
+ "num_attention_heads": 16,
168
+ "num_hidden_layers": 24,
169
+ "patch_size": 14
170
  }
171
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbae707ef72c3d2748f3c9482e9ca8645d8bbd915d59a86e4f11107c9b341068
3
- size 1710660257
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff37869878b61d3a1f2a82dbbd093927ea658f7477e1a347c0ba384591238904
3
+ size 1710664269