Xenova HF staff commited on
Commit
c698ac4
·
verified ·
1 Parent(s): 2e8fdb5

Improved ONNX models (#2)

Browse files

- Upload folder using huggingface_hub (bc2ee718a771348a89a656ca30c1b462a1f9bd91)
- Update config.json (9909734e10b2001ee7de4a1ca33c9cfbe66ad30b)
- Upload folder using huggingface_hub (2a4eaa1ec0302fea4343009b2bb6eb1a6f36507c)
- Simplify usage (65b5c46665c177112f4fedfd38475d07a8fc1167)

README.md CHANGED
@@ -1,5 +1,9 @@
1
  ---
2
  pipeline_tag: text-generation
 
 
 
 
3
  ---
4
 
5
  ## Usage
@@ -41,8 +45,7 @@ past_key_values = {
41
  for kv in ('key', 'value')
42
  }
43
  input_ids = inputs['input_ids']
44
- attention_mask = inputs['attention_mask']
45
- position_ids = np.cumsum(inputs['attention_mask'], axis=-1)
46
 
47
  # 3. Generation loop
48
  max_new_tokens = 1024
@@ -50,14 +53,12 @@ generated_tokens = np.array([[]], dtype=np.int64)
50
  for i in range(max_new_tokens):
51
  logits, *present_key_values = decoder_session.run(None, dict(
52
  input_ids=input_ids,
53
- attention_mask=attention_mask,
54
  position_ids=position_ids,
55
  **past_key_values,
56
  ))
57
 
58
  ## Update values for next generation loop
59
  input_ids = logits[:, -1].argmax(-1, keepdims=True)
60
- attention_mask = np.ones_like(input_ids)
61
  position_ids = position_ids[:, -1:] + 1
62
  for j, key in enumerate(past_key_values):
63
  past_key_values[key] = present_key_values[j]
@@ -145,5 +146,4 @@ const messages = [
145
  // Generate a response
146
  const output = await generator(messages, { max_new_tokens: 512, do_sample: false });
147
  console.log(output[0].generated_text.at(-1).content);
148
- ```
149
-
 
1
  ---
2
  pipeline_tag: text-generation
3
+ base_model:
4
+ - google/gemma-3-1b-it
5
+ library_name: transformers.js
6
+ license: gemma
7
  ---
8
 
9
  ## Usage
 
45
  for kv in ('key', 'value')
46
  }
47
  input_ids = inputs['input_ids']
48
+ position_ids = np.tile(np.arange(1, input_ids.shape[-1] + 1), (batch_size, 1))
 
49
 
50
  # 3. Generation loop
51
  max_new_tokens = 1024
 
53
  for i in range(max_new_tokens):
54
  logits, *present_key_values = decoder_session.run(None, dict(
55
  input_ids=input_ids,
 
56
  position_ids=position_ids,
57
  **past_key_values,
58
  ))
59
 
60
  ## Update values for next generation loop
61
  input_ids = logits[:, -1].argmax(-1, keepdims=True)
 
62
  position_ids = position_ids[:, -1:] + 1
63
  for j, key in enumerate(past_key_values):
64
  past_key_values[key] = present_key_values[j]
 
146
  // Generate a response
147
  const output = await generator(messages, { max_new_tokens: 512, do_sample: false });
148
  console.log(output[0].generated_text.at(-1).content);
149
+ ```
 
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "gg-hf-g/gemma-3-1b-it",
4
  "architectures": [
5
  "Gemma3ForCausalLM"
6
  ],
@@ -9,7 +9,10 @@
9
  "attn_logit_softcapping": null,
10
  "bos_token_id": 2,
11
  "cache_implementation": "hybrid",
12
- "eos_token_id": 1,
 
 
 
13
  "final_logit_softcapping": null,
14
  "head_dim": 256,
15
  "hidden_activation": "gelu_pytorch_tanh",
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "google/gemma-3-1b-it",
4
  "architectures": [
5
  "Gemma3ForCausalLM"
6
  ],
 
9
  "attn_logit_softcapping": null,
10
  "bos_token_id": 2,
11
  "cache_implementation": "hybrid",
12
+ "eos_token_id": [
13
+ 1,
14
+ 106
15
+ ],
16
  "final_logit_softcapping": null,
17
  "head_dim": 256,
18
  "hidden_activation": "gelu_pytorch_tanh",
generation_config.json CHANGED
@@ -2,7 +2,10 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 2,
4
  "cache_implementation": "hybrid",
5
- "eos_token_id": [1, 106],
 
 
 
6
  "pad_token_id": 0,
7
  "transformers_version": "4.50.0.dev0"
8
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 2,
4
  "cache_implementation": "hybrid",
5
+ "eos_token_id": [
6
+ 1,
7
+ 106
8
+ ],
9
  "pad_token_id": 0,
10
  "transformers_version": "4.50.0.dev0"
11
  }
onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19fe4f234ac17c6a6fbd3e322684bb3ee4165f440853844ec9411fa4730c4c00
3
- size 1011486
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdd6cc5892fb4f3ea04b423657561b437239b37ab963b85bebae4532081a16f
3
+ size 1012725
onnx/model_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89fae7cf5013bb962435a439de89cc8197cf02f2a9fe0cf17ac49c9c47f7ad71
3
- size 1602006422
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f261e3a3b222e3110bd7d07f1a24d3bdcade389df85d163f11c82cfa1182b700
3
+ size 1602007661
onnx/model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0741cbf360a59a17fb9b3fa63cec131b63d8dd67b38981edf62526629e6e85e1
3
- size 2000773554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7278853fe0e9babcdaad178c2d91aff09830b510a9ca88317634f0ef8194d19e
3
+ size 2000774696
onnx/model_int8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde6cb8cb849399b8987d7cd61a58f2ee9c9d975d25fc2a750a0750e1ee7eb2d
3
- size 1001480743
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8ddeb9c637d43625df45933ad3a9e2337b8a027ab37a70dc230735ba285f5c
3
+ size 1001481982
onnx/model_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a655309bd32bd81724ee97ed6c90d46adbe2bf294db318221d922cd4d9f98b54
3
- size 1645615026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd65478ade20f0bea7ed1e80455e3b3b6eb6f4242d611d63e7bcddc94cc0f108
3
+ size 1645616265
onnx/model_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:094885406e076227cda9f3d00c7b62e8fb5e746bfb9e279457aa272ea69bae88
3
- size 997768167
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a8cb5ab287f04050d29de31e47354f8868069c0dec8cab326376274a6a12508
3
+ size 997769309
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde6cb8cb849399b8987d7cd61a58f2ee9c9d975d25fc2a750a0750e1ee7eb2d
3
- size 1001480743
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8ddeb9c637d43625df45933ad3a9e2337b8a027ab37a70dc230735ba285f5c
3
+ size 1001481982
onnx/model_uint8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71c0ea2b023207970e7e516f0644433a6a5c72c47fecb515b0e5068b3e9d81f0
3
- size 1001480839
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714ab717b4840136b736a3a288f853ac9386751f33c2edd8a753086530cf9dbc
3
+ size 1001482078
tokenizer_config.json CHANGED
@@ -51337,7 +51337,6 @@
51337
  "image_token": "<image_soft_token>",
51338
  "model_max_length": 1000000000000000019884624838656,
51339
  "pad_token": "<pad>",
51340
- "processor_class": "Gemma3Processor",
51341
  "sp_model_kwargs": null,
51342
  "spaces_between_special_tokens": false,
51343
  "tokenizer_class": "GemmaTokenizer",
 
51337
  "image_token": "<image_soft_token>",
51338
  "model_max_length": 1000000000000000019884624838656,
51339
  "pad_token": "<pad>",
 
51340
  "sp_model_kwargs": null,
51341
  "spaces_between_special_tokens": false,
51342
  "tokenizer_class": "GemmaTokenizer",