model: _component_: models.lora_mmllama3_8b lora_attn_modules: - q_proj - v_proj apply_lora_to_mlp: false apply_lora_to_output: false lora_rank: 32 lora_alpha: 64 perception_tokens: 2 use_clip: false tokenizer: _component_: models.a2a_tokenizer path: root/top/tokenizer.model device: cuda dtype: bf16 inference: prompt_template: 'Video:\n {video}\n Caption the previous video.' max_new_tokens: 200 temperature: 0.8 top_k: 200