jerryzh168 commited on
Commit
48419ea
·
verified ·
1 Parent(s): 9c26e80

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -0
README.md CHANGED
@@ -66,9 +66,22 @@ print("tied weights:", find_tied_parameters(untied_model))
66
  USER_ID = "YOUR_USER_ID"
67
  MODEL_NAME = model_id.split("/")[-1]
68
  save_to = f"{USER_ID}/{MODEL_NAME}-untied-weights"
 
69
  untied_model.push_to_hub(save_to)
70
  tokenizer.push_to_hub(save_to)
 
 
 
 
 
 
 
 
 
 
 
71
  ```
 
72
 
73
  ## Quantization
74
 
@@ -95,6 +108,7 @@ model_id = "microsoft/Phi-4-mini-instruct"
95
  USER_ID = "YOUR_USER_ID"
96
  MODEL_NAME = model_id.split("/")[-1]
97
  untied_model_id = f"{USER_ID}/{MODEL_NAME}-untied-weights"
 
98
 
99
  embedding_config = IntxWeightOnlyConfig(
100
  weight_dtype=torch.int8,
@@ -108,6 +122,7 @@ linear_config = Int8DynamicActivationIntxWeightConfig(
108
  quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
109
  quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
110
 
 
111
  quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
112
  tokenizer = AutoTokenizer.from_pretrained(model_id)
113
 
 
66
  USER_ID = "YOUR_USER_ID"
67
  MODEL_NAME = model_id.split("/")[-1]
68
  save_to = f"{USER_ID}/{MODEL_NAME}-untied-weights"
69
+
70
  untied_model.push_to_hub(save_to)
71
  tokenizer.push_to_hub(save_to)
72
+
73
+ # or save locally
74
+ save_to_local_path = f"{MODEL_NAME}-untied-weights"
75
+ untied_model.save_pretrained(save_to_local_path)
76
+ tokenizer.save_pretrained(save_to)
77
+ ```
78
+
79
+ Note: to `push_to_hub` you need to run
80
+ ```Shell
81
+ pip install -U "huggingface_hub[cli]"
82
+ huggingface-cli login
83
  ```
84
+ and use a token with write access, from https://huggingface.co/settings/tokens
85
 
86
  ## Quantization
87
 
 
108
  USER_ID = "YOUR_USER_ID"
109
  MODEL_NAME = model_id.split("/")[-1]
110
  untied_model_id = f"{USER_ID}/{MODEL_NAME}-untied-weights"
111
+ untied_model_local_path = f"{MODEL_NAME}-untied-weights"
112
 
113
  embedding_config = IntxWeightOnlyConfig(
114
  weight_dtype=torch.int8,
 
122
  quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
123
  quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
124
 
125
+ # either use `untied_model_id` or `untied_model_local_path`
126
  quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
127
  tokenizer = AutoTokenizer.from_pretrained(model_id)
128