Tu2003716
/

COCOM_disabled_flash_attn

Safetensors

COCOM

custom_code

Model card Files Files and versions Community

Shiro2 commited on Dec 7, 2024

Commit

6330de6

verified ·

1 Parent(s): 0706b6b

update device_map

Browse files

Files changed (1) hide show

modeling_cocom.py +7 -1

modeling_cocom.py CHANGED Viewed

@@ -61,7 +61,7 @@ class COCOMConfig(PretrainedConfig):
     model_type = "COCOM"
     def __init__(self,
-                decoder_model_name="google-t5/t5-base",
                 quantization = 'no',
                 generation_top_k = 1,
                 sep = False,
@@ -72,6 +72,7 @@ class COCOMConfig(PretrainedConfig):
                 training_form="both",
                 lora_r=16,
                 attn_implementation="eager",
                  **kwargs):
         super().__init__(**kwargs)
@@ -86,6 +87,7 @@ class COCOMConfig(PretrainedConfig):
         self.training_form = training_form # training form, could be compressor: training only comprssor; both:
         self.lora_r = lora_r # lora_r for lora training, we use 16 throughout the experiment.
         self.attn_implementation = attn_implementation
 class COCOM(PreTrainedModel):
     config_class = COCOMConfig
@@ -100,6 +102,7 @@ class COCOM(PreTrainedModel):
                 torch_dtype=torch.float16,
                 attn_implementation=attn_impl,
                 low_cpu_mem_usage = True,
                 )
         elif cfg.quantization == "int4":
             quant_config = BitsAndBytesConfig(
@@ -116,6 +119,7 @@ class COCOM(PreTrainedModel):
                 resume_download=True,
                 low_cpu_mem_usage = True,
                 trust_remote_code=True,
             )
         elif cfg.quantization == "int8":
             quant_config = BitsAndBytesConfig(
@@ -132,6 +136,7 @@ class COCOM(PreTrainedModel):
                 resume_download=True,
                 low_cpu_mem_usage = True,
                 trust_remote_code=True,
             )
         else:
             raise NotImplementedError()
@@ -237,6 +242,7 @@ class COCOM(PreTrainedModel):
         # Perform compression with gradient tracking
         inputs_embeds = self.compress_and_replace_emb(enc_input_ids, enc_attention_mask, dec_input_ids)
         # if training_form is compressor, then detach the inputs_embeds, to make gradient not count in decoder
         if (self.training_form == "compressor") and (self.compr is None):
             inputs_embeds  = inputs_embeds.detach()

     model_type = "COCOM"
     def __init__(self,
+                decoder_model_name="meta-llama/Llama-2-7b-chat-hf",
                 quantization = 'no',
                 generation_top_k = 1,
                 sep = False,
                 training_form="both",
                 lora_r=16,
                 attn_implementation="eager",
+                device_map = "cuda",
                  **kwargs):
         super().__init__(**kwargs)
         self.training_form = training_form # training form, could be compressor: training only comprssor; both:
         self.lora_r = lora_r # lora_r for lora training, we use 16 throughout the experiment.
         self.attn_implementation = attn_implementation
+        self.device_map = device_map
 class COCOM(PreTrainedModel):
     config_class = COCOMConfig
                 torch_dtype=torch.float16,
                 attn_implementation=attn_impl,
                 low_cpu_mem_usage = True,
+                device_map =cfg.device_map
                 )
         elif cfg.quantization == "int4":
             quant_config = BitsAndBytesConfig(
                 resume_download=True,
                 low_cpu_mem_usage = True,
                 trust_remote_code=True,
+                device_map =cfg.device_map
             )
         elif cfg.quantization == "int8":
             quant_config = BitsAndBytesConfig(
                 resume_download=True,
                 low_cpu_mem_usage = True,
                 trust_remote_code=True,
+                device_map =cfg.device_map
             )
         else:
             raise NotImplementedError()
         # Perform compression with gradient tracking
         inputs_embeds = self.compress_and_replace_emb(enc_input_ids, enc_attention_mask, dec_input_ids)
         # if training_form is compressor, then detach the inputs_embeds, to make gradient not count in decoder
         if (self.training_form == "compressor") and (self.compr is None):
             inputs_embeds  = inputs_embeds.detach()