Spaces:
Running
Running
support chat_template.json (and bump dependencies)
Browse files- README.md +1 -1
- app.py +31 -11
- requirements.txt +3 -3
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: 💬📝
|
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
python_version: 3.11
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
|
|
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.38.0
|
| 8 |
python_version: 3.11
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
app.py
CHANGED
|
@@ -14,7 +14,7 @@ hfapi = HfApi()
|
|
| 14 |
|
| 15 |
class ModelFiles(StrEnum):
|
| 16 |
CHAT_TEMPLATE_JSON = "chat_template.json"
|
| 17 |
-
TOKENIZER_CHAT_TEMPLATE = "
|
| 18 |
TOKENIZER_CONFIG = "tokenizer_config.json"
|
| 19 |
TOKENIZER_INVERSE_TEMPLATE = "inverse_template.jinja"
|
| 20 |
|
|
@@ -325,21 +325,21 @@ example_values = [
|
|
| 325 |
"content": [
|
| 326 |
{
|
| 327 |
"type": "text",
|
| 328 |
-
"
|
| 329 |
},
|
| 330 |
{
|
| 331 |
"type": "image"
|
| 332 |
},
|
| 333 |
{
|
| 334 |
"type": "text",
|
| 335 |
-
"
|
| 336 |
},
|
| 337 |
{
|
| 338 |
"type": "audio"
|
| 339 |
},
|
| 340 |
{
|
| 341 |
"type": "text",
|
| 342 |
-
"
|
| 343 |
},
|
| 344 |
{
|
| 345 |
"type": "video"
|
|
@@ -379,7 +379,7 @@ class TokenizerConfig():
|
|
| 379 |
@chat_template.setter
|
| 380 |
def chat_template(self, value: str | list | None):
|
| 381 |
if not value:
|
| 382 |
-
self.chat_templates
|
| 383 |
elif isinstance(value, str):
|
| 384 |
self.chat_templates = {
|
| 385 |
"default": value,
|
|
@@ -711,6 +711,9 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
| 711 |
org_template_tool_use = ""
|
| 712 |
org_template_rag = ""
|
| 713 |
|
|
|
|
|
|
|
|
|
|
| 714 |
for config_file_name in (ModelFiles.CHAT_TEMPLATE_JSON, ModelFiles.TOKENIZER_CONFIG):
|
| 715 |
config_file = info.get(config_file_name, {})
|
| 716 |
org_config = config_file.get("data")
|
|
@@ -719,7 +722,7 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
| 719 |
if org_content and ("chat_template" in org_content or not org_template):
|
| 720 |
tokenizer_config = TokenizerConfig(org_content)
|
| 721 |
|
| 722 |
-
org_template = tokenizer_config.chat_templates.get("default") or ""
|
| 723 |
org_template_tool_use = tokenizer_config.chat_templates.get("tool_use") or ""
|
| 724 |
org_template_rag = tokenizer_config.chat_templates.get("rag") or ""
|
| 725 |
# org_template_inverse = tokenizer_config.inverse_template or ""
|
|
@@ -738,9 +741,6 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
| 738 |
for token in unified_diff(new_config.splitlines(keepends = True), org_config.splitlines(keepends = True), fromfile = config_file_name, tofile = config_file_name)
|
| 739 |
]
|
| 740 |
|
| 741 |
-
tokenizer_chat_template = info.get(ModelFiles.TOKENIZER_CHAT_TEMPLATE, {})
|
| 742 |
-
org_template = tokenizer_chat_template.get("data", org_template)
|
| 743 |
-
|
| 744 |
tokenizer_inverse_template = info.get(ModelFiles.TOKENIZER_INVERSE_TEMPLATE, {})
|
| 745 |
org_template_inverse = tokenizer_inverse_template.get("data", org_template_inverse)
|
| 746 |
|
|
@@ -1171,6 +1171,24 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
| 1171 |
revision = parent_commit or branch,
|
| 1172 |
token = oauth_token.token if oauth_token else False,
|
| 1173 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1174 |
except Exception as e:
|
| 1175 |
pass
|
| 1176 |
else:
|
|
@@ -1183,6 +1201,8 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
| 1183 |
"content": template_content,
|
| 1184 |
}
|
| 1185 |
info["chat_template"] = template_content.get("chat_template")
|
|
|
|
|
|
|
| 1186 |
|
| 1187 |
pr_details = None
|
| 1188 |
if branch and branch.startswith("refs/pr/"):
|
|
@@ -1216,8 +1236,8 @@ You can freely edit and test GGUF chat template(s) (and are encouraged to do so)
|
|
| 1216 |
pr_submit: gr.Button(
|
| 1217 |
value = f"Commit to PR #{pr_details.num}" if pr_details else "Create Pull Request",
|
| 1218 |
),
|
| 1219 |
-
chat_template: gr.skip() if ModelFiles.CHAT_TEMPLATE_JSON not in info else gr.Code(
|
| 1220 |
-
value = TokenizerConfig(info[ModelFiles.CHAT_TEMPLATE_JSON]["content"]).chat_templates.get("default"),
|
| 1221 |
),
|
| 1222 |
# inverse_template: gr.skip() if ModelFiles.TOKENIZER_INVERSE_TEMPLATE not in info else gr.Code(
|
| 1223 |
# value = info[ModelFiles.TOKENIZER_INVERSE_TEMPLATE]["data"],
|
|
|
|
| 14 |
|
| 15 |
class ModelFiles(StrEnum):
|
| 16 |
CHAT_TEMPLATE_JSON = "chat_template.json"
|
| 17 |
+
TOKENIZER_CHAT_TEMPLATE = "chat_template.jinja"
|
| 18 |
TOKENIZER_CONFIG = "tokenizer_config.json"
|
| 19 |
TOKENIZER_INVERSE_TEMPLATE = "inverse_template.jinja"
|
| 20 |
|
|
|
|
| 325 |
"content": [
|
| 326 |
{
|
| 327 |
"type": "text",
|
| 328 |
+
"text": "Can this animal"
|
| 329 |
},
|
| 330 |
{
|
| 331 |
"type": "image"
|
| 332 |
},
|
| 333 |
{
|
| 334 |
"type": "text",
|
| 335 |
+
"text": "make this sound"
|
| 336 |
},
|
| 337 |
{
|
| 338 |
"type": "audio"
|
| 339 |
},
|
| 340 |
{
|
| 341 |
"type": "text",
|
| 342 |
+
"text": "while moving like this?"
|
| 343 |
},
|
| 344 |
{
|
| 345 |
"type": "video"
|
|
|
|
| 379 |
@chat_template.setter
|
| 380 |
def chat_template(self, value: str | list | None):
|
| 381 |
if not value:
|
| 382 |
+
self.chat_templates = {}
|
| 383 |
elif isinstance(value, str):
|
| 384 |
self.chat_templates = {
|
| 385 |
"default": value,
|
|
|
|
| 711 |
org_template_tool_use = ""
|
| 712 |
org_template_rag = ""
|
| 713 |
|
| 714 |
+
tokenizer_chat_template = info.get(ModelFiles.TOKENIZER_CHAT_TEMPLATE, {})
|
| 715 |
+
org_template = tokenizer_chat_template.get("data", org_template)
|
| 716 |
+
|
| 717 |
for config_file_name in (ModelFiles.CHAT_TEMPLATE_JSON, ModelFiles.TOKENIZER_CONFIG):
|
| 718 |
config_file = info.get(config_file_name, {})
|
| 719 |
org_config = config_file.get("data")
|
|
|
|
| 722 |
if org_content and ("chat_template" in org_content or not org_template):
|
| 723 |
tokenizer_config = TokenizerConfig(org_content)
|
| 724 |
|
| 725 |
+
org_template = org_template or tokenizer_config.chat_templates.get("default") or ""
|
| 726 |
org_template_tool_use = tokenizer_config.chat_templates.get("tool_use") or ""
|
| 727 |
org_template_rag = tokenizer_config.chat_templates.get("rag") or ""
|
| 728 |
# org_template_inverse = tokenizer_config.inverse_template or ""
|
|
|
|
| 741 |
for token in unified_diff(new_config.splitlines(keepends = True), org_config.splitlines(keepends = True), fromfile = config_file_name, tofile = config_file_name)
|
| 742 |
]
|
| 743 |
|
|
|
|
|
|
|
|
|
|
| 744 |
tokenizer_inverse_template = info.get(ModelFiles.TOKENIZER_INVERSE_TEMPLATE, {})
|
| 745 |
org_template_inverse = tokenizer_inverse_template.get("data", org_template_inverse)
|
| 746 |
|
|
|
|
| 1171 |
revision = parent_commit or branch,
|
| 1172 |
token = oauth_token.token if oauth_token else False,
|
| 1173 |
)
|
| 1174 |
+
|
| 1175 |
+
if ModelFiles.TOKENIZER_CHAT_TEMPLATE not in info and (hfapi.file_exists(
|
| 1176 |
+
repo,
|
| 1177 |
+
ModelFiles.TOKENIZER_CHAT_TEMPLATE,
|
| 1178 |
+
revision = branch,
|
| 1179 |
+
token = oauth_token.token if oauth_token else False,
|
| 1180 |
+
)):
|
| 1181 |
+
tokenizer_chat_template = hfapi.hf_hub_download(
|
| 1182 |
+
repo,
|
| 1183 |
+
ModelFiles.TOKENIZER_CHAT_TEMPLATE,
|
| 1184 |
+
revision = parent_commit or branch,
|
| 1185 |
+
token = oauth_token.token if oauth_token else False,
|
| 1186 |
+
)
|
| 1187 |
+
with open(tokenizer_chat_template, "r", encoding = "utf-8") as fp:
|
| 1188 |
+
template_data = fp.read()
|
| 1189 |
+
info[ModelFiles.TOKENIZER_CHAT_TEMPLATE] = {
|
| 1190 |
+
"data": template_data,
|
| 1191 |
+
}
|
| 1192 |
except Exception as e:
|
| 1193 |
pass
|
| 1194 |
else:
|
|
|
|
| 1201 |
"content": template_content,
|
| 1202 |
}
|
| 1203 |
info["chat_template"] = template_content.get("chat_template")
|
| 1204 |
+
elif ModelFiles.TOKENIZER_CHAT_TEMPLATE in info:
|
| 1205 |
+
info["chat_template"] = info[ModelFiles.TOKENIZER_CHAT_TEMPLATE].get("data")
|
| 1206 |
|
| 1207 |
pr_details = None
|
| 1208 |
if branch and branch.startswith("refs/pr/"):
|
|
|
|
| 1236 |
pr_submit: gr.Button(
|
| 1237 |
value = f"Commit to PR #{pr_details.num}" if pr_details else "Create Pull Request",
|
| 1238 |
),
|
| 1239 |
+
chat_template: gr.skip() if ModelFiles.CHAT_TEMPLATE_JSON not in info and ModelFiles.TOKENIZER_CHAT_TEMPLATE not in info else gr.Code(
|
| 1240 |
+
value = info.get(ModelFiles.TOKENIZER_CHAT_TEMPLATE, {}).get("data") or TokenizerConfig(info[ModelFiles.CHAT_TEMPLATE_JSON]["content"]).chat_templates.get("default"),
|
| 1241 |
),
|
| 1242 |
# inverse_template: gr.skip() if ModelFiles.TOKENIZER_INVERSE_TEMPLATE not in info else gr.Code(
|
| 1243 |
# value = info[ModelFiles.TOKENIZER_INVERSE_TEMPLATE]["data"],
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
huggingface_hub==0.
|
| 3 |
# gradio_huggingfacehub_search==0.0.8
|
| 4 |
-
transformers==4.
|
| 5 |
https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/main/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl
|
|
|
|
| 1 |
+
gradio[oauth]==5.38.0
|
| 2 |
+
huggingface_hub==0.33.4
|
| 3 |
# gradio_huggingfacehub_search==0.0.8
|
| 4 |
+
transformers==4.53.2
|
| 5 |
https://huggingface.co/spaces/CISCai/chat-template-editor/resolve/main/gradio_huggingfacehub_search-0.0.8-py3-none-any.whl
|