Improved ONNX models (#2)
Browse files- Upload folder using huggingface_hub (bc2ee718a771348a89a656ca30c1b462a1f9bd91)
- Update config.json (9909734e10b2001ee7de4a1ca33c9cfbe66ad30b)
- Upload folder using huggingface_hub (2a4eaa1ec0302fea4343009b2bb6eb1a6f36507c)
- Simplify usage (65b5c46665c177112f4fedfd38475d07a8fc1167)
- README.md +6 -6
- config.json +5 -2
- generation_config.json +4 -1
- onnx/model.onnx +2 -2
- onnx/model_bnb4.onnx +2 -2
- onnx/model_fp16.onnx +2 -2
- onnx/model_int8.onnx +2 -2
- onnx/model_q4.onnx +2 -2
- onnx/model_q4f16.onnx +2 -2
- onnx/model_quantized.onnx +2 -2
- onnx/model_uint8.onnx +2 -2
- tokenizer_config.json +0 -1
README.md
CHANGED
@@ -1,5 +1,9 @@
|
|
1 |
---
|
2 |
pipeline_tag: text-generation
|
|
|
|
|
|
|
|
|
3 |
---
|
4 |
|
5 |
## Usage
|
@@ -41,8 +45,7 @@ past_key_values = {
|
|
41 |
for kv in ('key', 'value')
|
42 |
}
|
43 |
input_ids = inputs['input_ids']
|
44 |
-
|
45 |
-
position_ids = np.cumsum(inputs['attention_mask'], axis=-1)
|
46 |
|
47 |
# 3. Generation loop
|
48 |
max_new_tokens = 1024
|
@@ -50,14 +53,12 @@ generated_tokens = np.array([[]], dtype=np.int64)
|
|
50 |
for i in range(max_new_tokens):
|
51 |
logits, *present_key_values = decoder_session.run(None, dict(
|
52 |
input_ids=input_ids,
|
53 |
-
attention_mask=attention_mask,
|
54 |
position_ids=position_ids,
|
55 |
**past_key_values,
|
56 |
))
|
57 |
|
58 |
## Update values for next generation loop
|
59 |
input_ids = logits[:, -1].argmax(-1, keepdims=True)
|
60 |
-
attention_mask = np.ones_like(input_ids)
|
61 |
position_ids = position_ids[:, -1:] + 1
|
62 |
for j, key in enumerate(past_key_values):
|
63 |
past_key_values[key] = present_key_values[j]
|
@@ -145,5 +146,4 @@ const messages = [
|
|
145 |
// Generate a response
|
146 |
const output = await generator(messages, { max_new_tokens: 512, do_sample: false });
|
147 |
console.log(output[0].generated_text.at(-1).content);
|
148 |
-
```
|
149 |
-
|
|
|
1 |
---
|
2 |
pipeline_tag: text-generation
|
3 |
+
base_model:
|
4 |
+
- google/gemma-3-1b-it
|
5 |
+
library_name: transformers.js
|
6 |
+
license: gemma
|
7 |
---
|
8 |
|
9 |
## Usage
|
|
|
45 |
for kv in ('key', 'value')
|
46 |
}
|
47 |
input_ids = inputs['input_ids']
|
48 |
+
position_ids = np.tile(np.arange(1, input_ids.shape[-1] + 1), (batch_size, 1))
|
|
|
49 |
|
50 |
# 3. Generation loop
|
51 |
max_new_tokens = 1024
|
|
|
53 |
for i in range(max_new_tokens):
|
54 |
logits, *present_key_values = decoder_session.run(None, dict(
|
55 |
input_ids=input_ids,
|
|
|
56 |
position_ids=position_ids,
|
57 |
**past_key_values,
|
58 |
))
|
59 |
|
60 |
## Update values for next generation loop
|
61 |
input_ids = logits[:, -1].argmax(-1, keepdims=True)
|
|
|
62 |
position_ids = position_ids[:, -1:] + 1
|
63 |
for j, key in enumerate(past_key_values):
|
64 |
past_key_values[key] = present_key_values[j]
|
|
|
146 |
// Generate a response
|
147 |
const output = await generator(messages, { max_new_tokens: 512, do_sample: false });
|
148 |
console.log(output[0].generated_text.at(-1).content);
|
149 |
+
```
|
|
config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_attn_implementation_autoset": true,
|
3 |
-
"_name_or_path": "
|
4 |
"architectures": [
|
5 |
"Gemma3ForCausalLM"
|
6 |
],
|
@@ -9,7 +9,10 @@
|
|
9 |
"attn_logit_softcapping": null,
|
10 |
"bos_token_id": 2,
|
11 |
"cache_implementation": "hybrid",
|
12 |
-
"eos_token_id":
|
|
|
|
|
|
|
13 |
"final_logit_softcapping": null,
|
14 |
"head_dim": 256,
|
15 |
"hidden_activation": "gelu_pytorch_tanh",
|
|
|
1 |
{
|
2 |
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "google/gemma-3-1b-it",
|
4 |
"architectures": [
|
5 |
"Gemma3ForCausalLM"
|
6 |
],
|
|
|
9 |
"attn_logit_softcapping": null,
|
10 |
"bos_token_id": 2,
|
11 |
"cache_implementation": "hybrid",
|
12 |
+
"eos_token_id": [
|
13 |
+
1,
|
14 |
+
106
|
15 |
+
],
|
16 |
"final_logit_softcapping": null,
|
17 |
"head_dim": 256,
|
18 |
"hidden_activation": "gelu_pytorch_tanh",
|
generation_config.json
CHANGED
@@ -2,7 +2,10 @@
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 2,
|
4 |
"cache_implementation": "hybrid",
|
5 |
-
"eos_token_id": [
|
|
|
|
|
|
|
6 |
"pad_token_id": 0,
|
7 |
"transformers_version": "4.50.0.dev0"
|
8 |
}
|
|
|
2 |
"_from_model_config": true,
|
3 |
"bos_token_id": 2,
|
4 |
"cache_implementation": "hybrid",
|
5 |
+
"eos_token_id": [
|
6 |
+
1,
|
7 |
+
106
|
8 |
+
],
|
9 |
"pad_token_id": 0,
|
10 |
"transformers_version": "4.50.0.dev0"
|
11 |
}
|
onnx/model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcdd6cc5892fb4f3ea04b423657561b437239b37ab963b85bebae4532081a16f
|
3 |
+
size 1012725
|
onnx/model_bnb4.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f261e3a3b222e3110bd7d07f1a24d3bdcade389df85d163f11c82cfa1182b700
|
3 |
+
size 1602007661
|
onnx/model_fp16.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7278853fe0e9babcdaad178c2d91aff09830b510a9ca88317634f0ef8194d19e
|
3 |
+
size 2000774696
|
onnx/model_int8.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d8ddeb9c637d43625df45933ad3a9e2337b8a027ab37a70dc230735ba285f5c
|
3 |
+
size 1001481982
|
onnx/model_q4.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd65478ade20f0bea7ed1e80455e3b3b6eb6f4242d611d63e7bcddc94cc0f108
|
3 |
+
size 1645616265
|
onnx/model_q4f16.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a8cb5ab287f04050d29de31e47354f8868069c0dec8cab326376274a6a12508
|
3 |
+
size 997769309
|
onnx/model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d8ddeb9c637d43625df45933ad3a9e2337b8a027ab37a70dc230735ba285f5c
|
3 |
+
size 1001481982
|
onnx/model_uint8.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:714ab717b4840136b736a3a288f853ac9386751f33c2edd8a753086530cf9dbc
|
3 |
+
size 1001482078
|
tokenizer_config.json
CHANGED
@@ -51337,7 +51337,6 @@
|
|
51337 |
"image_token": "<image_soft_token>",
|
51338 |
"model_max_length": 1000000000000000019884624838656,
|
51339 |
"pad_token": "<pad>",
|
51340 |
-
"processor_class": "Gemma3Processor",
|
51341 |
"sp_model_kwargs": null,
|
51342 |
"spaces_between_special_tokens": false,
|
51343 |
"tokenizer_class": "GemmaTokenizer",
|
|
|
51337 |
"image_token": "<image_soft_token>",
|
51338 |
"model_max_length": 1000000000000000019884624838656,
|
51339 |
"pad_token": "<pad>",
|
|
|
51340 |
"sp_model_kwargs": null,
|
51341 |
"spaces_between_special_tokens": false,
|
51342 |
"tokenizer_class": "GemmaTokenizer",
|