model:
  _component_: models.lora_mmllama3_8b
  lora_attn_modules:
  - q_proj
  - v_proj
  apply_lora_to_mlp: false
  apply_lora_to_output: false
  lora_rank: 32
  lora_alpha: 64
  perception_tokens: 2
  use_clip: false

tokenizer:
  _component_: models.a2a_tokenizer
  path: root/top/tokenizer.model

device: cuda
dtype: bf16

inference:
  prompt_template: 'Video:\n  {video}\n  Caption the previous video.'
  max_new_tokens: 200
  temperature: 0.8
  top_k: 200