metascroy commited on
Commit
e6eb920
·
verified ·
1 Parent(s): 0369ded

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -11
README.md CHANGED
@@ -33,18 +33,11 @@ from transformers import (
33
  AutoModelForCausalLM,
34
  AutoProcessor,
35
  AutoTokenizer,
36
- TorchAoConfig,
37
- )
38
- from torchao.quantization.quant_api import (
39
- IntxWeightOnlyConfig,
40
- Int8DynamicActivationIntxWeightConfig,
41
- AOPerModuleConfig
42
  )
43
- from torchao.quantization.granularity import PerGroup, PerAxis
44
  import torch
45
 
46
  model_id = "microsoft/Phi-4-mini-instruct"
47
- untied_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="a\uto")
48
  tokenizer = AutoTokenizer.from_pretrained(model_id)
49
 
50
  print(untied_model)
@@ -54,7 +47,7 @@ if getattr(untied_model.config.get_text_config(decoder=True), "tie_word_embeddin
54
  setattr(untied_model.config.get_text_config(decoder=True), "tie_word_embeddings", False)
55
 
56
  untied_model._tied_weights_keys = []
57
- untied_model.lm_head.weight = torch.nn.Parameter(quantized_model.lm_head.weight.clone())
58
 
59
  print("tied weights:", find_tied_parameters(untied_model))
60
 
@@ -91,7 +84,6 @@ USER_ID = "YOUR_USER_ID"
91
  MODEL_NAME = model_id.split("/")[-1]
92
  untied_model_id = f"{USER_ID}/{MODEL_NAME}-untied-weights"
93
 
94
-
95
  embedding_config = IntxWeightOnlyConfig(
96
  weight_dtype=torch.int8,
97
  granularity=PerAxis(0),
@@ -101,7 +93,11 @@ linear_config = Int8DynamicActivationIntxWeightConfig(
101
  weight_granularity=PerGroup(32),
102
  weight_scale_dtype=torch.bfloat16,
103
  )
104
- quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto")
 
 
 
 
105
  tokenizer = AutoTokenizer.from_pretrained(model_id)
106
 
107
  # TODO: use AOPerModuleConfig once fix for tied weights is landed
 
33
  AutoModelForCausalLM,
34
  AutoProcessor,
35
  AutoTokenizer,
 
 
 
 
 
 
36
  )
 
37
  import torch
38
 
39
  model_id = "microsoft/Phi-4-mini-instruct"
40
+ untied_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
41
  tokenizer = AutoTokenizer.from_pretrained(model_id)
42
 
43
  print(untied_model)
 
47
  setattr(untied_model.config.get_text_config(decoder=True), "tie_word_embeddings", False)
48
 
49
  untied_model._tied_weights_keys = []
50
+ untied_model.lm_head.weight = torch.nn.Parameter(untied_model.lm_head.weight.clone())
51
 
52
  print("tied weights:", find_tied_parameters(untied_model))
53
 
 
84
  MODEL_NAME = model_id.split("/")[-1]
85
  untied_model_id = f"{USER_ID}/{MODEL_NAME}-untied-weights"
86
 
 
87
  embedding_config = IntxWeightOnlyConfig(
88
  weight_dtype=torch.int8,
89
  granularity=PerAxis(0),
 
93
  weight_granularity=PerGroup(32),
94
  weight_scale_dtype=torch.bfloat16,
95
  )
96
+
97
+ quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
98
+ quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
99
+
100
+ quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
101
  tokenizer = AutoTokenizer.from_pretrained(model_id)
102
 
103
  # TODO: use AOPerModuleConfig once fix for tied weights is landed