# BEGIN GENERAL GGUF METADATA id: Mistral-Nemo-Instruct-2407 # Model ID unique between models (author / quantization) model: mistral-nemo # Model ID which is used for request construct - should be unique between models (author / quantization) name: Mistral-Nemo-Instruct-2407 # metadata.general.name version: 2 # metadata.version # END GENERAL GGUF METADATA # BEGIN INFERENCE PARAMETERS # BEGIN REQUIRED stop: # tokenizer.ggml.eos_token_id - # END REQUIRED # BEGIN OPTIONAL stream: true # Default true? top_p: 0.949999988 # Ranges: 0 to 1 temperature: 0.699999988 # Ranges: 0 to 1 frequency_penalty: 0 # Ranges: 0 to 1 presence_penalty: 0 # Ranges: 0 to 1 max_tokens: 4096 # Should be default to context length seed: -1 dynatemp_range: 0 dynatemp_exponent: 1 top_k: 40 min_p: 0.0500000007 tfs_z: 1 typ_p: 1 repeat_last_n: 64 repeat_penalty: 1 mirostat: false mirostat_tau: 5 mirostat_eta: 0.100000001 penalize_nl: false ignore_eos: false n_probs: 0 min_keep: 0 # END OPTIONAL # END INFERENCE PARAMETERS # BEGIN MODEL LOAD PARAMETERS # BEGIN REQUIRED engine: llama-cpp # engine to run model prompt_template: "[INST] {system_message}\n[INST] {prompt} [/INST]" # END REQUIRED # BEGIN OPTIONAL ctx_len: 4096 # llama.context_length | 0 or undefined = loaded from model ngl: 41 # Undefined = loaded from model # END OPTIONAL # END MODEL LOAD PARAMETERS