m-ric HF staff commited on
Commit
b23a62c
·
verified ·
1 Parent(s): ad9ba0d

Upload processor

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  language:
3
  - en
4
  library_name: transformers
@@ -7,8 +9,6 @@ pipeline_tag: image-text-to-text
7
  tags:
8
  - multimodal
9
  - aria
10
- base_model:
11
- - rhymes-ai/Aria-Base-64K
12
  ---
13
  <!-- <p align="center">
14
  <br>Aria</br>
 
1
  ---
2
+ base_model:
3
+ - rhymes-ai/Aria-Base-64K
4
  language:
5
  - en
6
  library_name: transformers
 
9
  tags:
10
  - multimodal
11
  - aria
 
 
12
  ---
13
  <!-- <p align="center">
14
  <br>Aria</br>
added_tokens.json CHANGED
@@ -1,3 +1,5 @@
1
  {
2
- "<pad>": 100352
 
 
3
  }
 
1
  {
2
+ "<pad>": 100352,
3
+ "<|im_end|>": 100354,
4
+ "<|im_start|>": 100353
5
  }
chat_template.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
- }
 
1
  {
2
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
special_tokens_map.json CHANGED
@@ -1,12 +1,10 @@
1
  {
 
 
2
  "image_token": "<|img|>",
3
- "pad_token": {
4
- "content": "<pad>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
  "unk_token": {
11
  "content": "<unk>",
12
  "lstrip": false,
 
1
  {
2
+ "image_prefix": "<fim_prefix>",
3
+ "image_suffix": "<fim_suffix>",
4
  "image_token": "<|img|>",
5
+ "pad_token": "<pad>",
6
+ "sequence_end": "<|im_end|>",
7
+ "sequence_start": "<|im_start|>",
 
 
 
 
8
  "unk_token": {
9
  "content": "<unk>",
10
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02702cce6c4de786d52a3dc624b39e86134c159b7490ea30630739c6f723e7f8
3
- size 11091481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1268fb762970b277ed33dd794e5e5fc3d3d274c0bec8662f7a398799fa5fd862
3
+ size 11091855
tokenizer_config.json CHANGED
@@ -321,7 +321,7 @@
321
  "normalized": false,
322
  "rstrip": false,
323
  "single_word": false,
324
- "special": false
325
  },
326
  "40": {
327
  "content": "<fim_middle>",
@@ -337,7 +337,7 @@
337
  "normalized": false,
338
  "rstrip": false,
339
  "single_word": false,
340
- "special": false
341
  },
342
  "42": {
343
  "content": "<fim_pad>",
@@ -4938,6 +4938,22 @@
4938
  "rstrip": false,
4939
  "single_word": false,
4940
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4941
  }
4942
  },
4943
  "bos_token": null,
@@ -4945,14 +4961,22 @@
4945
  "clean_up_tokenization_spaces": false,
4946
  "eos_token": null,
4947
  "extra_special_tokens": {
 
 
4948
  "image_token": "<|img|>",
4949
- "pad_token": "<pad>"
 
 
4950
  },
 
 
4951
  "image_token": "<|img|>",
4952
  "legacy": true,
4953
  "model_max_length": 1000000000000000019884624838656,
4954
  "pad_token": "<pad>",
4955
  "processor_class": "AriaProcessor",
 
 
4956
  "sp_model_kwargs": {},
4957
  "spaces_between_special_tokens": false,
4958
  "tokenizer_class": "LlamaTokenizer",
 
321
  "normalized": false,
322
  "rstrip": false,
323
  "single_word": false,
324
+ "special": true
325
  },
326
  "40": {
327
  "content": "<fim_middle>",
 
337
  "normalized": false,
338
  "rstrip": false,
339
  "single_word": false,
340
+ "special": true
341
  },
342
  "42": {
343
  "content": "<fim_pad>",
 
4938
  "rstrip": false,
4939
  "single_word": false,
4940
  "special": true
4941
+ },
4942
+ "100353": {
4943
+ "content": "<|im_start|>",
4944
+ "lstrip": false,
4945
+ "normalized": false,
4946
+ "rstrip": false,
4947
+ "single_word": false,
4948
+ "special": true
4949
+ },
4950
+ "100354": {
4951
+ "content": "<|im_end|>",
4952
+ "lstrip": false,
4953
+ "normalized": false,
4954
+ "rstrip": false,
4955
+ "single_word": false,
4956
+ "special": true
4957
  }
4958
  },
4959
  "bos_token": null,
 
4961
  "clean_up_tokenization_spaces": false,
4962
  "eos_token": null,
4963
  "extra_special_tokens": {
4964
+ "image_prefix": "<fim_prefix>",
4965
+ "image_suffix": "<fim_suffix>",
4966
  "image_token": "<|img|>",
4967
+ "pad_token": "<pad>",
4968
+ "sequence_end": "<|im_end|>",
4969
+ "sequence_start": "<|im_start|>"
4970
  },
4971
+ "image_prefix": "<fim_prefix>",
4972
+ "image_suffix": "<fim_suffix>",
4973
  "image_token": "<|img|>",
4974
  "legacy": true,
4975
  "model_max_length": 1000000000000000019884624838656,
4976
  "pad_token": "<pad>",
4977
  "processor_class": "AriaProcessor",
4978
+ "sequence_end": "<|im_end|>",
4979
+ "sequence_start": "<|im_start|>",
4980
  "sp_model_kwargs": {},
4981
  "spaces_between_special_tokens": false,
4982
  "tokenizer_class": "LlamaTokenizer",