Pathumma-llm-audio-1.0.0 / configuration_pathumma_audio.py

PATTARA TIPAKSORN

Upload 9 files

dae6ad4 verified 9 days ago

1.85 kB

	import torch
	from typing import Dict, Tuple, List
	from transformers import PretrainedConfig

	class PathummaAudioConfig(PretrainedConfig):

	model_type: str = "pathumma_audio"

	def __init__(
	self,
	llm_path: str = "Qwen/Qwen2-7B-Instruct",
	whisper_path: str = "openai/whisper-large-v3",
	beats_path: str = "",
	init_from_scratch: bool = True,

	lora: bool = True,
	lora_infer_mode: bool = True,
	lora_rank: int = 8,
	lora_alpha: int = 32,
	lora_dropout: float = 0.1,
	target_modules: List[str] = ["q_proj", "v_proj"],
	qformer_query_token: int = 1,
	qformer_hidden_layers: int = 2,
	second_per_window: float = 0.333333,
	second_stride: float = 0.333333,

	torch_dtype: torch.dtype = torch.bfloat16,
	**kwargs
	):
	super().__init__(**kwargs)

	self.architectures = kwargs.get("architectures", ["PathummaAudioModel"])
	self.auto_map = kwargs.get("auto_map", {
	"AutoConfig": "configuration_pathumma_audio.PathummaAudioConfig",
	"AutoModel": "modeling_pathumma_audio.PathummaAudioModel"
	})

	self.llm_path = llm_path
	self.whisper_path = whisper_path
	self.beats_path = beats_path
	self.init_from_scratch = init_from_scratch

	self.lora = lora
	self.lora_infer_mode = lora_infer_mode
	self.lora_rank = lora_rank
	self.lora_alpha = lora_alpha
	self.lora_dropout = lora_dropout
	self.target_modules = target_modules

	self.qformer_query_token = qformer_query_token
	self.qformer_hidden_layers = qformer_hidden_layers
	self.second_per_window = second_per_window
	self.second_stride = second_stride

	self.torch_dtype = torch_dtype