Spaces:
Running
Running
Updated with new FIM tokens
Browse files- _hf_gguf.py +13 -0
- app.py +10 -6
_hf_gguf.py
CHANGED
@@ -128,6 +128,12 @@ standard_metadata = {
|
|
128 |
"tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
|
129 |
"tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
|
130 |
"tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
"quantize.imatrix.file": (GGUFValueType.STRING, ""),
|
132 |
"quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
|
133 |
"quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
|
@@ -135,6 +141,13 @@ standard_metadata = {
|
|
135 |
}
|
136 |
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
gguf_scalar_size: dict[GGUFValueType, int] = {
|
139 |
GGUFValueType.UINT8: 1,
|
140 |
GGUFValueType.INT8: 1,
|
|
|
128 |
"tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
|
129 |
"tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
|
130 |
"tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
|
131 |
+
"tokenizer.ggml.fim_pre_token_id": (GGUFValueType.UINT32, 0),
|
132 |
+
"tokenizer.ggml.fim_suf_token_id": (GGUFValueType.UINT32, 0),
|
133 |
+
"tokenizer.ggml.fim_mid_token_id": (GGUFValueType.UINT32, 0),
|
134 |
+
"tokenizer.ggml.fim_pad_token_id": (GGUFValueType.UINT32, 0),
|
135 |
+
"tokenizer.ggml.fim_rep_token_id": (GGUFValueType.UINT32, 0),
|
136 |
+
"tokenizer.ggml.fim_sep_token_id": (GGUFValueType.UINT32, 0),
|
137 |
"quantize.imatrix.file": (GGUFValueType.STRING, ""),
|
138 |
"quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
|
139 |
"quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
|
|
|
141 |
}
|
142 |
|
143 |
|
144 |
+
deprecated_metadata = {
|
145 |
+
"tokenizer.ggml.prefix_token_id",
|
146 |
+
"tokenizer.ggml.suffix_token_id",
|
147 |
+
"tokenizer.ggml.middle_token_id",
|
148 |
+
}
|
149 |
+
|
150 |
+
|
151 |
gguf_scalar_size: dict[GGUFValueType, int] = {
|
152 |
GGUFValueType.UINT8: 1,
|
153 |
GGUFValueType.INT8: 1,
|
app.py
CHANGED
@@ -9,7 +9,7 @@ from typing import Annotated, Any, NamedTuple
|
|
9 |
from urllib.parse import urlencode
|
10 |
|
11 |
from _hf_explorer import FileExplorer
|
12 |
-
from _hf_gguf import standard_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
|
13 |
|
14 |
|
15 |
hfapi = HfApi()
|
@@ -361,7 +361,7 @@ Removing this metadata key from a model will cause `llama.cpp` to output a warni
|
|
361 |
example_description: dict(
|
362 |
value = """## Add missing/change incorrect tokens
|
363 |
|
364 |
-
Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (
|
365 |
Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
|
366 |
|
367 |
Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
|
@@ -369,14 +369,14 @@ Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token
|
|
369 |
A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
|
370 |
infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
|
371 |
|
372 |
-
Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
|
373 |
-
There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`
|
374 |
-
They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where
|
375 |
""",
|
376 |
visible = True,
|
377 |
),
|
378 |
example_keys: dict(
|
379 |
-
value = "tokenizer.ggml.
|
380 |
info = "Select or enter any metadata key ending with _token_id",
|
381 |
visible = True,
|
382 |
),
|
@@ -857,6 +857,7 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
|
|
857 |
meta_keys,
|
858 |
],
|
859 |
outputs = [
|
|
|
860 |
meta_types,
|
861 |
btn_delete,
|
862 |
],
|
@@ -875,6 +876,9 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
|
|
875 |
typ = GGUFValueType.UINT32.name
|
876 |
|
877 |
return {
|
|
|
|
|
|
|
878 |
meta_types: gr.Dropdown(
|
879 |
value = typ,
|
880 |
interactive = False if typ is not None else True,
|
|
|
9 |
from urllib.parse import urlencode
|
10 |
|
11 |
from _hf_explorer import FileExplorer
|
12 |
+
from _hf_gguf import standard_metadata, deprecated_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
|
13 |
|
14 |
|
15 |
hfapi = HfApi()
|
|
|
361 |
example_description: dict(
|
362 |
value = """## Add missing/change incorrect tokens
|
363 |
|
364 |
+
Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (fim_pre, fim_suf, fim_mid, fim_pad, fim_rep, fim_sep) for various reasons.
|
365 |
Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
|
366 |
|
367 |
Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
|
|
|
369 |
A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
|
370 |
infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
|
371 |
|
372 |
+
Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared and not auto-detected (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
|
373 |
+
There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`fim_pre`\_token\_id, `fim_suf` and `fim_mid`, and 3 auxiliary ones; `fim_pad`, `fim_rep` and `fim_sep`, sometimes also EOT/EOM if it differs from EOS in this mode.
|
374 |
+
They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where fim_pre is (...fim...)`begin`, fim_suf is `hole` and fim_mid is `end`.
|
375 |
""",
|
376 |
visible = True,
|
377 |
),
|
378 |
example_keys: dict(
|
379 |
+
value = "tokenizer.ggml.fim_pre_token_id",
|
380 |
info = "Select or enter any metadata key ending with _token_id",
|
381 |
visible = True,
|
382 |
),
|
|
|
857 |
meta_keys,
|
858 |
],
|
859 |
outputs = [
|
860 |
+
meta_keys,
|
861 |
meta_types,
|
862 |
btn_delete,
|
863 |
],
|
|
|
876 |
typ = GGUFValueType.UINT32.name
|
877 |
|
878 |
return {
|
879 |
+
meta_keys: gr.Dropdown(
|
880 |
+
info = "DEPRECATED" if key in deprecated_metadata else "Search by metadata key name",
|
881 |
+
),
|
882 |
meta_types: gr.Dropdown(
|
883 |
value = typ,
|
884 |
interactive = False if typ is not None else True,
|