Steven10429 commited on
Commit
05c88c1
·
1 Parent(s): 45da339
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -114,14 +114,14 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
114
  """
115
  os.makedirs("temp", exist_ok=True)
116
  log.info("Loading base model...")
117
- model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, trust_remote_code=True, offload_folder="temp")
118
  log.info("Loading adapter tokenizer...")
119
- adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, offload_folder="temp")
120
  log.info("Resizing token embeddings...")
121
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
122
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
123
  log.info("Loading LoRA adapter...")
124
- peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, trust_remote_code=True, offload_folder="temp")
125
  log.info("Merging and unloading model...")
126
  model = peft_model.merge_and_unload()
127
  log.info("Saving model...")
@@ -337,7 +337,7 @@ def create_ui():
337
  )
338
  quant_method = gr.CheckboxGroup(
339
  choices=["Q2_K", "Q4_K", "IQ4_NL", "Q5_K_M", "Q6_K", "Q8_0"],
340
- value=["Q4_K", "Q8_0"],
341
  label="Quantization Method"
342
  )
343
  hf_token = gr.Textbox(
 
114
  """
115
  os.makedirs("temp", exist_ok=True)
116
  log.info("Loading base model...")
117
+ model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="disk", trust_remote_code=True, offload_folder="temp")
118
  log.info("Loading adapter tokenizer...")
119
+ adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="disk", offload_folder="temp")
120
  log.info("Resizing token embeddings...")
121
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
122
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
123
  log.info("Loading LoRA adapter...")
124
+ peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="disk", trust_remote_code=True, offload_folder="temp")
125
  log.info("Merging and unloading model...")
126
  model = peft_model.merge_and_unload()
127
  log.info("Saving model...")
 
337
  )
338
  quant_method = gr.CheckboxGroup(
339
  choices=["Q2_K", "Q4_K", "IQ4_NL", "Q5_K_M", "Q6_K", "Q8_0"],
340
+ value=["Q2_K", "Q4_K", "IQ4_NL", "Q5_K_M", "Q6_K", "Q8_0"],
341
  label="Quantization Method"
342
  )
343
  hf_token = gr.Textbox(