Pathumma-llm-audio-1.0.0 / configuration_pathumma_audio.py
PATTARA TIPAKSORN
Upload 9 files
dae6ad4 verified
raw
history blame
1.85 kB
import torch
from typing import Dict, Tuple, List
from transformers import PretrainedConfig
class PathummaAudioConfig(PretrainedConfig):
model_type: str = "pathumma_audio"
def __init__(
self,
llm_path: str = "Qwen/Qwen2-7B-Instruct",
whisper_path: str = "openai/whisper-large-v3",
beats_path: str = "",
init_from_scratch: bool = True,
lora: bool = True,
lora_infer_mode: bool = True,
lora_rank: int = 8,
lora_alpha: int = 32,
lora_dropout: float = 0.1,
target_modules: List[str] = ["q_proj", "v_proj"],
qformer_query_token: int = 1,
qformer_hidden_layers: int = 2,
second_per_window: float = 0.333333,
second_stride: float = 0.333333,
torch_dtype: torch.dtype = torch.bfloat16,
**kwargs
):
super().__init__(**kwargs)
self.architectures = kwargs.get("architectures", ["PathummaAudioModel"])
self.auto_map = kwargs.get("auto_map", {
"AutoConfig": "configuration_pathumma_audio.PathummaAudioConfig",
"AutoModel": "modeling_pathumma_audio.PathummaAudioModel"
})
self.llm_path = llm_path
self.whisper_path = whisper_path
self.beats_path = beats_path
self.init_from_scratch = init_from_scratch
self.lora = lora
self.lora_infer_mode = lora_infer_mode
self.lora_rank = lora_rank
self.lora_alpha = lora_alpha
self.lora_dropout = lora_dropout
self.target_modules = target_modules
self.qformer_query_token = qformer_query_token
self.qformer_hidden_layers = qformer_hidden_layers
self.second_per_window = second_per_window
self.second_stride = second_stride
self.torch_dtype = torch_dtype