File size: 11,662 Bytes
260542b f1287e8 260542b f1287e8 260542b f1287e8 f52e9f0 260542b f1287e8 260542b 574d76d 260542b 976e95f f1287e8 260542b f1287e8 bb4bbf2 f52e9f0 260542b f1287e8 bb4bbf2 260542b 574d76d bb4bbf2 260542b f1287e8 260542b 6c1d015 260542b 6c1d015 260542b f1287e8 260542b f1287e8 260542b f52e9f0 260542b f1287e8 260542b f1287e8 12b2006 260542b 47601b7 bb4bbf2 f52e9f0 85f845d 0439ea7 47601b7 bb4bbf2 260542b bb4bbf2 260542b bb4bbf2 260542b f52e9f0 bb4bbf2 f52e9f0 bb4bbf2 260542b bb4bbf2 260542b bb4bbf2 260542b 12b2006 260542b 0439ea7 260542b 12b2006 718b3e9 12b2006 260542b 12b2006 260542b f1287e8 260542b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 |
import os
import torch
import psutil
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from peft import PeftModel, PeftConfig
from pathlib import Path
from tqdm import tqdm
from huggingface_hub import login, create_repo, HfApi
import subprocess
import math
import gradio as gr
import threading
import queue
import time
# 创建一个队列用于存储日志消息
log_queue = queue.Queue()
current_logs = []
def log(msg):
"""统一的日志处理函数"""
print(msg)
current_logs.append(msg)
return "\n".join(current_logs)
def get_model_size_in_gb(model_name):
"""估算模型大小(以GB为单位)"""
try:
# get model size from huggingface
api = HfApi()
model_info = api.model_info(model_name)
return model_info.safetensors.total / (1024 ** 3)
except Exception as e:
log(f"无法估算模型大小: {str(e)}")
return 1 # bypass memory check
def check_system_resources(model_name):
"""检查系统资源并决定使用什么设备"""
log("正在检查系统资源...")
# 获取系统内存信息
system_memory = psutil.virtual_memory()
total_memory_gb = system_memory.total / (1024 ** 3)
available_memory_gb = system_memory.available / (1024 ** 3)
log(f"系统总内存: {total_memory_gb:.1f}GB")
log(f"可用内存: {available_memory_gb:.1f}GB")
# 估算模型所需内存
model_size_gb = get_model_size_in_gb(model_name)
required_memory_gb = model_size_gb * 2.5 # 需要额外的内存用于计算
log(f"估计模型需要内存: {required_memory_gb:.1f}GB")
# 检查CUDA是否可用
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
log(f"发现GPU: {gpu_name}")
log(f"GPU显存: {gpu_memory_gb:.1f}GB")
if gpu_memory_gb >= required_memory_gb:
log("✅ GPU显存足够,将使用GPU进行转换")
return "cuda", gpu_memory_gb
else:
log(f"⚠️ GPU显存不足 (需要 {required_memory_gb:.1f}GB, 实际 {gpu_memory_gb:.1f}GB)")
else:
log("❌ 未检测到可用的GPU")
# 检查CPU内存是否足够
if available_memory_gb >= required_memory_gb:
log("✅ CPU内存足够,将使用CPU进行转换")
return "cpu", available_memory_gb
else:
raise MemoryError(f"❌ 系统内存不足 (需要 {required_memory_gb:.1f}GB, 可用 {available_memory_gb:.1f}GB)")
def setup_environment(model_name):
# # 检查系统资源并决定使用什么设备
# device, available_memory = check_system_resources(model_name)
device = "cpu"
return device
def create_hf_repo(repo_name, private=True):
"""创建HuggingFace仓库"""
try:
# check if repo already exists
api = HfApi()
if api.repo_exists(repo_name):
log(f"仓库已存在: {repo_name}")
return ValueError(f"仓库已存在: {repo_name}, 请使用其他名称或删除已存在的仓库")
repo_url = create_repo(repo_name, private=private)
log(f"创建仓库成功: {repo_url}")
return repo_url
except Exception as e:
log(f"创建仓库失败: {str(e)}")
raise
def download_and_merge_model(base_model_name, lora_model_name, output_dir, device):
log(f"正在加载基础模型: {base_model_name}")
try:
# 先加载原始模型
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map={"": device}
)
old_vocab_size = base_model.get_input_embeddings().weight.shape[0]
print(f"原始词表大小: {old_vocab_size}")
# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
new_vocab_size = tokenizer.vocab_size
print(f"调整词表大小: {old_vocab_size} -> {new_vocab_size}")
# 保存原始权重
old_embeddings = base_model.get_input_embeddings().weight.data.clone()
old_lm_head = base_model.lm_head.weight.data.clone()
# 调整词表大小
base_model.resize_token_embeddings(new_vocab_size)
# 复制原始权重到新的张量
with torch.no_grad():
base_model.get_input_embeddings().weight.data[:new_vocab_size] = old_embeddings[:new_vocab_size]
base_model.lm_head.weight.data[:new_vocab_size] = old_lm_head[:new_vocab_size]
log(f"正在加载LoRA模型: {lora_model_name}")
log("基础模型配置:" + str(base_model.config))
# 加载adapter配置
adapter_config = PeftConfig.from_pretrained(lora_model_name)
log("Adapter配置:" + str(adapter_config))
model = PeftModel.from_pretrained(base_model, lora_model_name)
log("正在合并LoRA权重")
model = model.merge_and_unload()
# 创建输出目录
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# 保存合并后的模型
log(f"正在保存合并后的模型到: {output_dir}")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
return output_dir
except Exception as e:
log(f"错误: {str(e)}")
log(f"错误类型: {type(e)}")
import traceback
log("详细错误信息:")
log(traceback.format_exc())
raise
def quantize_and_push_model(model_path, repo_id, bits=8):
"""量化模型并推送到HuggingFace"""
try:
from optimum.bettertransformer import BetterTransformer
from transformers import AutoModelForCausalLM
log(f"正在加载模型用于{bits}位量化...")
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# 转换为BetterTransformer格式
model = BetterTransformer.transform(model)
# 量化
if bits == 8:
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_threshold=6.0
)
elif bits == 4:
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4"
)
else:
raise ValueError(f"不支持的量化位数: {bits}")
# 保存量化后的模型
quantized_model_path = f"{model_path}_q{bits}"
model.save_pretrained(
quantized_model_path,
quantization_config=quantization_config
)
tokenizer.save_pretrained(quantized_model_path)
# 推送到HuggingFace
log(f"正在将{bits}位量化模型推送到HuggingFace...")
api = HfApi()
api.upload_folder(
folder_path=quantized_model_path,
repo_id=repo_id,
repo_type="model"
)
log(f"{bits}位量化模型上传完成")
except Exception as e:
log(f"量化或上传过程中出错: {str(e)}")
raise
def process_model(base_model, lora_model, repo_name, hf_token, progress=gr.Progress()):
"""处理模型的主函数,用于Gradio界面"""
try:
login(hf_token) # 我不理解为什么登录一次不行,非得放到环境变量里
os.environ["HF_TOKEN"] = hf_token
api = HfApi(token=hf_token)
username = api.whoami()["name"]
if repo_name == "Auto":
repo_name = username + "/" + base_model.split("/")[-1] + "_" + lora_model.split("/")[-1]
# 清空之前的日志
current_logs.clear()
# 设置环境和检查资源
device = setup_environment(base_model)
# 创建HuggingFace仓库
repo_url = create_hf_repo(repo_name)
# 设置输出目录
output_dir = os.path.join(".", "output", repo_name)
progress(0.1, desc="开始模型转换流程...")
# 下载并合并模型
model_path = download_and_merge_model(base_model, lora_model, output_dir, device)
# 推送到HuggingFace
log(f"正在将模型推送到HuggingFace...")
api.upload_folder(
folder_path=model_path,
repo_id=repo_name,
repo_type="model"
)
progress(0.4, desc="开始8位量化...")
# 量化并上传模型
quantize_and_push_model(model_path, repo_name, bits=8)
progress(0.7, desc="开始4位量化...")
quantize_and_push_model(model_path, repo_name, bits=4)
final_message = f"全部完成!模型已上传至: https://huggingface.co/{repo_name}"
log(final_message)
progress(1.0, desc="处理完成")
# remove hf_token from env
os.environ.pop("HF_TOKEN")
log("HF_TOKEN已从环境变量中删除")
# remove model_path
os.remove(model_path)
log(f"模型路径已删除: {model_path}")
return "\n".join(current_logs)
except Exception as e:
error_message = f"处理过程中出错: {str(e)}"
log(error_message)
return "\n".join(current_logs)
def create_ui():
"""创建Gradio界面"""
with gr.Blocks(title="模型转换工具") as app:
gr.Markdown("""
# 🤗 模型转换与量化工具
这个工具可以帮助你:
1. 合并基础模型和LoRA适配器
2. 创建4位和8位量化版本
3. 自动上传到HuggingFace Hub
""")
with gr.Row():
with gr.Column():
base_model = gr.Textbox(
label="基础模型路径",
placeholder="例如: Qwen/Qwen2.5-14B-Instruct",
value="Qwen/Qwen2.5-7B-Instruct"
)
lora_model = gr.Textbox(
label="LoRA模型路径",
placeholder="输入你的LoRA模型路径"
)
repo_name = gr.Textbox(
label="HuggingFace仓库名称",
placeholder="输入要创建的仓库名称",
value="Auto"
)
hf_token = gr.Textbox(
label="HuggingFace Token",
placeholder="输入你的HuggingFace Token",
value=os.getenv("HF_TOKEN")
)
convert_btn = gr.Button("开始转换", variant="primary")
with gr.Column():
output = gr.TextArea(
label="处理日志",
placeholder="处理日志将在这里显示...",
interactive=False,
autoscroll=True,
lines=20
)
# 设置事件处理
convert_btn.click(
fn=process_model,
inputs=[base_model, lora_model, repo_name, hf_token],
outputs=output
)
return app
if __name__ == "__main__":
# 创建并启动Gradio界面
app = create_ui()
app.queue()
app.launch() |