Add files using upload-large-folder tool

Browse files

Files changed (10) hide show

.gitattributes +1 -0
chat_template.json +3 -0
config.json +136 -0
generation_config.json +8 -0
model.safetensors +3 -0
preprocessor_config.json +27 -0
processor_config.json +13 -0
special_tokens_map.json +23 -0
tokenizer.json +3 -0
tokenizer_config.json +392 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{{ bos_token }}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble\nYou are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. When analyzing images, carefully describe and interpret their content while avoiding any promotion of harm, misinformation, or bias.\n\nYou are Aya Vision, a vision-language model built by Cohere for AI. You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew. You are capable of interpreting images, including describing them, answering questions about their contents, extracting textual information, and analyzing visual context. Your responses must maintain the highest standards of quality, accuracy, and safety.\n\n# Default Preamble\nThe following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.\n- Your name is Aya Vision.\n- You are a large language model built by Cohere for AI.\n- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions.\n- If the input is ambiguous, ask clarifying follow-up questions.\n- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks).\n- Use LaTeX to generate mathematical notation for complex equations.\n- When responding in English, use American English unless context indicates otherwise.\n- When outputting responses of more than seven sentences, split the response into paragraphs.\n- Prefer the active voice.\n- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references.\n- Use gender-neutral pronouns for unspecified persons.\n- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list.\n- Use the third person when asked to write a summary.\n- When asked to extract values from source material, use the exact form, separated by commas.\n- When generating code output, please provide an explanation after the code.\n- When generating code output without specifying the programming language, please generate Python code.\n- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.\n<|END_OF_TURN_TOKEN|>\n{%- for message in messages -%}\n    <|START_OF_TURN_TOKEN|>{{ message.role | replace(\"user\", \"<|USER_TOKEN|>\") | replace(\"assistant\", \"<|CHATBOT_TOKEN|><|START_RESPONSE|>\") | replace(\"system\", \"<|SYSTEM_TOKEN|>\") }}\n    {%- if message.content is defined -%}\n        {%- if message.content is string -%}\n{{ message.content }}\n        {%- else -%}\n            {%- for item in message.content | selectattr('type', 'equalto', 'image') -%}\n<image>\n            {%- endfor -%}\n            {%- for item in message.content | selectattr('type', 'equalto', 'text') -%}\n{{ item.text }}\n            {%- endfor -%}\n        {%- endif -%}\n    {%- elif message.message is defined -%}\n        {%- if message.message is string -%}\n{{ message.message }}\n        {%- else -%}\n            {%- for item in message.message | selectattr('type', 'equalto', 'image') -%}\n<image>\n            {%- endfor -%}\n            {%- for item in message.message | selectattr('type', 'equalto', 'text') -%}\n{{ item.text }}\n            {%- endfor -%}\n        {%- endif -%}\n    {%- endif -%}\n    {%- if message.role == \"assistant\" -%}\n<|END_RESPONSE|>\n    {%- endif -%}\n<|END_OF_TURN_TOKEN|>\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>\n{%- endif -%}\n"
+}

config.json ADDED Viewed

	@@ -0,0 +1,136 @@

+{
+  "adapter_layer_norm_eps": 1e-06,
+  "alignment_activation_fn": "swiglu",
+  "alignment_intermediate_size": 28672,
+  "architectures": [
+    "AyaVisionForConditionalGeneration"
+  ],
+  "bos_token_id": 5,
+  "downsample_factor": 2,
+  "eos_token_id": 255001,
+  "image_token_index": 255036,
+  "max_splits_per_img": 12,
+  "model_type": "aya_vision",
+  "pad_token_id": 0,
+  "projector_hidden_act": "gelu",
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": true,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": [
+      "lm_head",
+      "multi_modal_projector",
+      "merger",
+      "modality_projection",
+      "vision_tower.vision_model.encoder.layers.25.self_attn",
+      "vision_tower.vision_model.encoder.layers.23.self_attn",
+      "vision_tower.vision_model.encoder.layers.20.self_attn",
+      "vision_tower.vision_model.encoder.layers.22.self_attn",
+      "vision_tower.vision_model.encoder.layers.19.mlp",
+      "vision_tower.vision_model.encoder.layers.20.mlp",
+      "vision_tower.vision_model.encoder.layers.24.self_attn",
+      "vision_tower.vision_model.encoder.layers.19.self_attn",
+      "vision_tower.vision_model.encoder.layers.21.mlp",
+      "vision_tower.vision_model.encoder.layers.22.mlp",
+      "vision_tower.vision_model.encoder.layers.21.self_attn",
+      "vision_tower.vision_model.encoder.layers.23.mlp",
+      "vision_tower.vision_model.encoder.layers.18.self_attn",
+      "vision_tower.vision_model.encoder.layers.18.mlp",
+      "vision_tower.vision_model.encoder.layers.25.mlp",
+      "vision_tower.vision_model.encoder.layers.24.mlp",
+      "vision_tower.vision_model.encoder.layers.17.self_attn",
+      "vision_tower.vision_model.encoder.layers.26.self_attn",
+      "vision_tower.vision_model.encoder.layers.17.mlp",
+      "vision_tower.vision_model.encoder.layers.14.mlp",
+      "vision_tower.vision_model.encoder.layers.16.self_attn",
+      "vision_tower.vision_model.encoder.layers.16.mlp",
+      "vision_tower.vision_model.encoder.layers.26.mlp",
+      "vision_tower.vision_model.encoder.layers.15.mlp",
+      "multi_modal_projector",
+      "vision_tower.vision_model.encoder.layers.15.self_attn",
+      "vision_tower.vision_model.encoder.layers.14.self_attn",
+      "vision_tower.vision_model.encoder.layers.12.mlp",
+      "vision_tower.vision_model.encoder.layers.13.self_attn",
+      "vision_tower.vision_model.encoder.layers.13.mlp",
+      "vision_tower.vision_model.encoder.layers.11.mlp",
+      "vision_tower.vision_model.encoder.layers.12.self_attn",
+      "vision_tower.vision_model.encoder.layers.9.self_attn",
+      "vision_tower.vision_model.encoder.layers.11.self_attn",
+      "vision_tower.vision_model.encoder.layers.10.self_attn",
+      "vision_tower.vision_model.encoder.layers.10.mlp",
+      "vision_tower.vision_model.encoder.layers.6.self_attn",
+      "vision_tower.vision_model.encoder.layers.7.self_attn",
+      "vision_tower.vision_model.encoder.layers.8.self_attn",
+      "vision_tower.vision_model.encoder.layers.8.mlp",
+      "vision_tower.vision_model.encoder.layers.5.self_attn",
+      "vision_tower.vision_model.encoder.layers.3.self_attn",
+      "vision_tower.vision_model.encoder.layers.9.mlp",
+      "vision_tower.vision_model.encoder.layers.7.mlp",
+      "vision_tower.vision_model.encoder.layers.4.self_attn",
+      "vision_tower.vision_model.encoder.layers.6.mlp",
+      "vision_tower.vision_model.encoder.layers.5.mlp",
+      "vision_tower.vision_model.encoder.layers.0.self_attn",
+      "vision_tower.vision_model.encoder.layers.2.self_attn",
+      "vision_tower.vision_model.encoder.layers.4.mlp",
+      "vision_tower.vision_model.encoder.layers.3.mlp",
+      "vision_tower.vision_model.encoder.layers.2.mlp",
+      "vision_tower.vision_model.encoder.layers.1.mlp",
+      "vision_tower.vision_model.encoder.layers.1.self_attn",
+      "vision_tower.vision_model.encoder.layers.0.mlp"
+    ],
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "text_config": {
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "cache_implementation": "hybrid",
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "layer_norm_eps": 1e-05,
+    "logit_scale": 0.25,
+    "max_position_embeddings": 8192,
+    "model_type": "cohere2",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 8,
+    "rope_scaling": null,
+    "rope_theta": 50000,
+    "sliding_window": 4096,
+    "sliding_window_pattern": 4,
+    "torch_dtype": "bfloat16",
+    "use_cache": true,
+    "use_qk_norm": false,
+    "vocab_size": 256000
+  },
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.50.0.dev0",
+  "unsloth_fixed": true,
+  "vision_config": {
+    "attention_dropout": 0.0,
+    "hidden_act": "gelu_pytorch_tanh",
+    "hidden_size": 1152,
+    "image_size": 364,
+    "intermediate_size": 4304,
+    "layer_norm_eps": 1e-06,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_channels": 3,
+    "num_hidden_layers": 27,
+    "patch_size": 14,
+    "torch_dtype": "bfloat16",
+    "vision_use_head": false
+  },
+  "vision_feature_layer": -1,
+  "vision_feature_select_strategy": "full"
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 5,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 255001,
+  "pad_token_id": 0,
+  "transformers_version": "4.50.0.dev0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d04b8fcd8ef1de828ad1d881c27331a5706c62f740a740fde3c593869490d21
+size 6905985677

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "crop_to_patches": false,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "GotOcr2ImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "max_patches": 12,
+  "min_patches": 1,
+  "processor_class": "AyaVisionProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 364,
+    "width": 364
+  }
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "end_of_img_token": "<|END_OF_IMG|>",
+  "image_token": "<image>",
+  "img_line_break_token": "<|IMG_LINE_BREAK|>",
+  "img_patch_token": "<|IMG_PATCH|>",
+  "img_size": 364,
+  "patch_size": 28,
+  "processor_class": "AyaVisionProcessor",
+  "start_of_img_token": "<|START_OF_IMG|>",
+  "tile_global_token": "TILE_GLOBAL",
+  "tile_token": "TILE",
+  "vision_feature_select_strategy": "full"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<BOS_TOKEN>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|END_OF_TURN_TOKEN|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<PAD>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d896f76efd3c9d3e0541ec0a0396a5669281c5667a1ac1d82001117625720ff4
+size 20125695

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<PAD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<UNK>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<CLS>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<SEP>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<MASK_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<BOS_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<EOS_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<EOP_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "255000": {
+      "content": "<|START_OF_TURN_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255001": {
+      "content": "<|END_OF_TURN_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "255002": {
+      "content": "<|YES_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255003": {
+      "content": "<|NO_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255004": {
+      "content": "<|GOOD_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255005": {
+      "content": "<|BAD_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255006": {
+      "content": "<|USER_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255007": {
+      "content": "<|CHATBOT_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255008": {
+      "content": "<|SYSTEM_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255009": {
+      "content": "<|USER_0_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255010": {
+      "content": "<|USER_1_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255011": {
+      "content": "<|USER_2_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255012": {
+      "content": "<|USER_3_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255013": {
+      "content": "<|USER_4_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255014": {
+      "content": "<|USER_5_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255015": {
+      "content": "<|USER_6_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255016": {
+      "content": "<|USER_7_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255017": {
+      "content": "<|USER_8_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255018": {
+      "content": "<|USER_9_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255019": {
+      "content": "<|START_THINKING|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255020": {
+      "content": "<|END_THINKING|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255021": {
+      "content": "<|START_RESPONSE|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "255022": {
+      "content": "<|END_RESPONSE|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "255023": {
+      "content": "<|START_ACTION|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255024": {
+      "content": "<|END_ACTION|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255025": {
+      "content": "<|START_TOOL_RESULT|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255026": {
+      "content": "<|END_TOOL_RESULT|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255027": {
+      "content": "<|EXTRA_8_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255028": {
+      "content": "<|NEW_FILE|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "255029": {
+      "content": "<|BEGINNING_OF_PREFIX_FIM_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255030": {
+      "content": "<|BEGINNING_OF_MIDDLE_FIM_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255031": {
+      "content": "<|BEGINNING_OF_SUFFIX_FIM_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255032": {
+      "content": "<|END_OF_MIDDLE_FIM_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255033": {
+      "content": "<|START_OF_IMG|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255034": {
+      "content": "<|END_OF_IMG|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255035": {
+      "content": "<|IMG_LINE_BREAK|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "255036": {
+      "content": "<|IMG_PATCH|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<BOS_TOKEN>",
+  "chat_template": [
+    {
+      "name": "default",
+      "template": "{{ bos_token }}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble\nYou are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. When analyzing images, carefully describe and interpret their content while avoiding any promotion of harm, misinformation, or bias.\n\nYou are Aya Vision, a vision-language model built by Cohere for AI. You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew. You are capable of interpreting images, including describing them, answering questions about their contents, extracting textual information, and analyzing visual context. Your responses must maintain the highest standards of quality, accuracy, and safety.\n\n# Default Preamble\nThe following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.\n- Your name is Aya Vision.\n- You are a large language model built by Cohere for AI.\n- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions.\n- If the input is ambiguous, ask clarifying follow-up questions.\n- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks).\n- Use LaTeX to generate mathematical notation for complex equations.\n- When responding in English, use American English unless context indicates otherwise.\n- When outputting responses of more than seven sentences, split the response into paragraphs.\n- Prefer the active voice.\n- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references.\n- Use gender-neutral pronouns for unspecified persons.\n- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list.\n- Use the third person when asked to write a summary.\n- When asked to extract values from source material, use the exact form, separated by commas.\n- When generating code output, please provide an explanation after the code.\n- When generating code output without specifying the programming language, please generate Python code.\n- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.\n<|END_OF_TURN_TOKEN|>\n{%- for message in messages -%}\n    <|START_OF_TURN_TOKEN|>{{ message.role | replace(\"user\", \"<|USER_TOKEN|>\") | replace(\"assistant\", \"<|CHATBOT_TOKEN|><|START_RESPONSE|>\") | replace(\"system\", \"<|SYSTEM_TOKEN|>\") }}\n    {%- if message.content is defined -%}\n        {%- if message.content is string -%}\n{{ message.content }}\n        {%- else -%}\n            {%- for item in message.content | selectattr('type', 'equalto', 'image') -%}\n<image>\n            {%- endfor -%}\n            {%- for item in message.content | selectattr('type', 'equalto', 'text') -%}\n{{ item.text }}\n            {%- endfor -%}\n        {%- endif -%}\n    {%- elif message.message is defined -%}\n        {%- if message.message is string -%}\n{{ message.message }}\n        {%- else -%}\n            {%- for item in message.message | selectattr('type', 'equalto', 'image') -%}\n<image>\n            {%- endfor -%}\n            {%- for item in message.message | selectattr('type', 'equalto', 'text') -%}\n{{ item.text }}\n            {%- endfor -%}\n        {%- endif -%}\n    {%- endif -%}\n    {%- if message.role == \"assistant\" -%}\n<|END_RESPONSE|>\n    {%- endif -%}\n<|END_OF_TURN_TOKEN|>\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>\n{%- endif -%}\n"
+    }
+  ],
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|END_OF_TURN_TOKEN|>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "max_length": null,
+  "merges_file": null,
+  "model_max_length": 16384,
+  "pad_to_multiple_of": null,
+  "pad_token": "<PAD>",
+  "pad_token_type_id": 0,
+  "padding_side": "left",
+  "processor_class": "AyaVisionProcessor",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "CohereTokenizer",
+  "unk_token": null,
+  "use_default_system_prompt": false,
+  "vocab_file": null
+}