CISCai commited on
Commit
5bf205a
1 Parent(s): 335ca60

Updated with new FIM tokens

Browse files
Files changed (2) hide show
  1. _hf_gguf.py +13 -0
  2. app.py +10 -6
_hf_gguf.py CHANGED
@@ -128,6 +128,12 @@ standard_metadata = {
128
  "tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
129
  "tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
130
  "tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
 
 
 
 
 
 
131
  "quantize.imatrix.file": (GGUFValueType.STRING, ""),
132
  "quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
133
  "quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
@@ -135,6 +141,13 @@ standard_metadata = {
135
  }
136
 
137
 
 
 
 
 
 
 
 
138
  gguf_scalar_size: dict[GGUFValueType, int] = {
139
  GGUFValueType.UINT8: 1,
140
  GGUFValueType.INT8: 1,
 
128
  "tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
129
  "tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
130
  "tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
131
+ "tokenizer.ggml.fim_pre_token_id": (GGUFValueType.UINT32, 0),
132
+ "tokenizer.ggml.fim_suf_token_id": (GGUFValueType.UINT32, 0),
133
+ "tokenizer.ggml.fim_mid_token_id": (GGUFValueType.UINT32, 0),
134
+ "tokenizer.ggml.fim_pad_token_id": (GGUFValueType.UINT32, 0),
135
+ "tokenizer.ggml.fim_rep_token_id": (GGUFValueType.UINT32, 0),
136
+ "tokenizer.ggml.fim_sep_token_id": (GGUFValueType.UINT32, 0),
137
  "quantize.imatrix.file": (GGUFValueType.STRING, ""),
138
  "quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
139
  "quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
 
141
  }
142
 
143
 
144
+ deprecated_metadata = {
145
+ "tokenizer.ggml.prefix_token_id",
146
+ "tokenizer.ggml.suffix_token_id",
147
+ "tokenizer.ggml.middle_token_id",
148
+ }
149
+
150
+
151
  gguf_scalar_size: dict[GGUFValueType, int] = {
152
  GGUFValueType.UINT8: 1,
153
  GGUFValueType.INT8: 1,
app.py CHANGED
@@ -9,7 +9,7 @@ from typing import Annotated, Any, NamedTuple
9
  from urllib.parse import urlencode
10
 
11
  from _hf_explorer import FileExplorer
12
- from _hf_gguf import standard_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
13
 
14
 
15
  hfapi = HfApi()
@@ -361,7 +361,7 @@ Removing this metadata key from a model will cause `llama.cpp` to output a warni
361
  example_description: dict(
362
  value = """## Add missing/change incorrect tokens
363
 
364
- Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (prefix, suffix, middle) for various reasons.
365
  Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
366
 
367
  Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
@@ -369,14 +369,14 @@ Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token
369
  A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
370
  infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
371
 
372
- Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
373
- There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`prefix`\_token\_id, `suffix` and `middle`, sometimes also EOT/EOM if it differs from EOS in this mode.
374
- They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where prefix is (...fim...)`begin`, suffix is `hole` and middle is `end`.
375
  """,
376
  visible = True,
377
  ),
378
  example_keys: dict(
379
- value = "tokenizer.ggml.prefix_token_id",
380
  info = "Select or enter any metadata key ending with _token_id",
381
  visible = True,
382
  ),
@@ -857,6 +857,7 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
857
  meta_keys,
858
  ],
859
  outputs = [
 
860
  meta_types,
861
  btn_delete,
862
  ],
@@ -875,6 +876,9 @@ Any framework based on `llama-cpp-python` will let you select which chat templat
875
  typ = GGUFValueType.UINT32.name
876
 
877
  return {
 
 
 
878
  meta_types: gr.Dropdown(
879
  value = typ,
880
  interactive = False if typ is not None else True,
 
9
  from urllib.parse import urlencode
10
 
11
  from _hf_explorer import FileExplorer
12
+ from _hf_gguf import standard_metadata, deprecated_metadata, TokenType, LlamaFileType, GGUFValueType, HuggingGGUFstream
13
 
14
 
15
  hfapi = HfApi()
 
361
  example_description: dict(
362
  value = """## Add missing/change incorrect tokens
363
 
364
+ Sometimes converted models will be missing declarations of important tokens like EOT, Fill-in-Middle (fim_pre, fim_suf, fim_mid, fim_pad, fim_rep, fim_sep) for various reasons.
365
  Other times they may have the incorrect tokens set as BOS, EOS, etc. Either way, missing or incorrectly declared tokens means inference will not work as expected.
366
 
367
  Token declaration is made with the metadata key(s) named "tokenizer.ggml.`token name`\_token\_id" which contains the ID (index number) of the token in the token list (`tokenizer.ggml.tokens`).
 
369
  A recurring issue is misconfigured EOS/EOT/EOM tokens, the need to set each of these and what they should be will vary between models, but the effect when these are incorrect is usually the same;
370
  infinte generation responses, ie. inference does not know when to stop. Typically this would be because f.ex. EOS has been set to <|endoftext|> instead of <|im\_end|> (again, model specific, just an example).
371
 
372
+ Another issue, mainly for code models, is that Fill-in-Middle tokens have not been declared and not auto-detected (note; not all models have or use such tokens), causing sub-par results for filling in blanks in code/text.
373
+ There are 3 main metadata keys that need to be present for this; tokenizer.ggml.`fim_pre`\_token\_id, `fim_suf` and `fim_mid`, and 3 auxiliary ones; `fim_pad`, `fim_rep` and `fim_sep`, sometimes also EOT/EOM if it differs from EOS in this mode.
374
+ They are usually named fim\_`something` or just `PRE`, `SUF` and `MID`, take extra care with DeepSeek-based models where fim_pre is (...fim...)`begin`, fim_suf is `hole` and fim_mid is `end`.
375
  """,
376
  visible = True,
377
  ),
378
  example_keys: dict(
379
+ value = "tokenizer.ggml.fim_pre_token_id",
380
  info = "Select or enter any metadata key ending with _token_id",
381
  visible = True,
382
  ),
 
857
  meta_keys,
858
  ],
859
  outputs = [
860
+ meta_keys,
861
  meta_types,
862
  btn_delete,
863
  ],
 
876
  typ = GGUFValueType.UINT32.name
877
 
878
  return {
879
+ meta_keys: gr.Dropdown(
880
+ info = "DEPRECATED" if key in deprecated_metadata else "Search by metadata key name",
881
+ ),
882
  meta_types: gr.Dropdown(
883
  value = typ,
884
  interactive = False if typ is not None else True,