Steven10429 commited on
Commit
1d6ffe4
·
1 Parent(s): ee65134
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -68,8 +68,7 @@ def check_system_resources(model_name):
68
  if total_memory_gb >= required_memory_gb:
69
  log.info("✅ Sufficient CPU memory available; using CPU.")
70
  return "cpu", total_memory_gb
71
- else:
72
- raise MemoryError(f"❌ Insufficient system memory (requires {required_memory_gb:.1f}GB, available {available_memory_gb:.1f}GB).")
73
 
74
  @timeit
75
  def setup_environment(model_name):
@@ -114,19 +113,20 @@ def download_and_merge_model(base_model_name, lora_model_name, output_dir, devic
114
  """
115
  os.makedirs("temp", exist_ok=True)
116
  log.info("Loading base model...")
117
- model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto", trust_remote_code=True, offload_folder="temp")
118
  log.info("Loading adapter tokenizer...")
119
- adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto", offload_folder="temp")
120
  log.info("Resizing token embeddings...")
121
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
122
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
123
  log.info("Loading LoRA adapter...")
124
- peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto", trust_remote_code=True, offload_folder="temp")
125
  log.info("Merging and unloading model...")
126
  model = peft_model.merge_and_unload()
127
  log.info("Saving model...")
128
  model.save_pretrained(output_dir)
129
  adapter_tokenizer.save_pretrained(output_dir)
 
130
  return output_dir
131
 
132
  @timeit
 
68
  if total_memory_gb >= required_memory_gb:
69
  log.info("✅ Sufficient CPU memory available; using CPU.")
70
  return "cpu", total_memory_gb
71
+
 
72
 
73
  @timeit
74
  def setup_environment(model_name):
 
113
  """
114
  os.makedirs("temp", exist_ok=True)
115
  log.info("Loading base model...")
116
+ model = AutoModelForCausalLM.from_pretrained(base_model_name, low_cpu_mem_usage=True, device_map="auto")
117
  log.info("Loading adapter tokenizer...")
118
+ adapter_tokenizer = AutoTokenizer.from_pretrained(lora_model_name, trust_remote_code=True, device_map="auto")
119
  log.info("Resizing token embeddings...")
120
  added_tokens_decoder = adapter_tokenizer.added_tokens_decoder
121
  model.resize_token_embeddings(adapter_tokenizer.vocab_size + len(added_tokens_decoder))
122
  log.info("Loading LoRA adapter...")
123
+ peft_model = PeftModel.from_pretrained(model, lora_model_name, low_cpu_mem_usage=True, device_map="auto")
124
  log.info("Merging and unloading model...")
125
  model = peft_model.merge_and_unload()
126
  log.info("Saving model...")
127
  model.save_pretrained(output_dir)
128
  adapter_tokenizer.save_pretrained(output_dir)
129
+ del model, peft_model
130
  return output_dir
131
 
132
  @timeit