import torch from typing import Dict, Tuple, List from transformers import PretrainedConfig class PathummaAudioConfig(PretrainedConfig): model_type: str = "pathumma_audio" def __init__( self, llm_path: str = "Qwen/Qwen2-7B-Instruct", whisper_path: str = "openai/whisper-large-v3", beats_path: str = "", init_from_scratch: bool = True, lora: bool = True, lora_infer_mode: bool = True, lora_rank: int = 8, lora_alpha: int = 32, lora_dropout: float = 0.1, target_modules: List[str] = ["q_proj", "v_proj"], qformer_query_token: int = 1, qformer_hidden_layers: int = 2, second_per_window: float = 0.333333, second_stride: float = 0.333333, torch_dtype: torch.dtype = torch.bfloat16, **kwargs ): super().__init__(**kwargs) self.architectures = kwargs.get("architectures", ["PathummaAudioModel"]) self.auto_map = kwargs.get("auto_map", { "AutoConfig": "configuration_pathumma_audio.PathummaAudioConfig", "AutoModel": "modeling_pathumma_audio.PathummaAudioModel" }) self.llm_path = llm_path self.whisper_path = whisper_path self.beats_path = beats_path self.init_from_scratch = init_from_scratch self.lora = lora self.lora_infer_mode = lora_infer_mode self.lora_rank = lora_rank self.lora_alpha = lora_alpha self.lora_dropout = lora_dropout self.target_modules = target_modules self.qformer_query_token = qformer_query_token self.qformer_hidden_layers = qformer_hidden_layers self.second_per_window = second_per_window self.second_stride = second_stride self.torch_dtype = torch_dtype