File size: 11,662 Bytes
260542b
 
 
 
 
 
 
 
 
 
f1287e8
260542b
 
 
f1287e8
260542b
 
 
f1287e8
f52e9f0
260542b
 
 
 
 
f1287e8
260542b
 
 
574d76d
 
 
 
260542b
 
 
976e95f
f1287e8
260542b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1287e8
bb4bbf2
f52e9f0
 
 
260542b
f1287e8
bb4bbf2
260542b
 
574d76d
 
 
 
 
bb4bbf2
260542b
 
 
 
 
f1287e8
260542b
 
 
 
 
 
 
 
 
 
 
6c1d015
 
260542b
 
 
6c1d015
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260542b
 
 
 
 
 
 
 
 
 
f1287e8
260542b
 
 
f1287e8
260542b
 
 
 
 
f52e9f0
260542b
 
 
 
 
 
 
 
 
f1287e8
260542b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1287e8
12b2006
260542b
 
47601b7
bb4bbf2
f52e9f0
85f845d
0439ea7
47601b7
bb4bbf2
260542b
 
 
 
bb4bbf2
260542b
 
bb4bbf2
260542b
 
 
 
 
 
 
 
f52e9f0
 
bb4bbf2
f52e9f0
 
 
 
 
 
bb4bbf2
 
 
 
 
 
 
 
 
 
260542b
bb4bbf2
 
 
260542b
bb4bbf2
 
 
260542b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12b2006
260542b
 
 
 
 
 
 
 
0439ea7
 
260542b
12b2006
 
718b3e9
 
12b2006
260542b
 
 
 
 
 
 
 
 
 
 
 
 
 
12b2006
260542b
 
 
 
f1287e8
 
260542b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
import os
import torch
import psutil
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from peft import PeftModel, PeftConfig
from pathlib import Path
from tqdm import tqdm
from huggingface_hub import login, create_repo, HfApi
import subprocess
import math
import gradio as gr
import threading
import queue
import time

# 创建一个队列用于存储日志消息
log_queue = queue.Queue()
current_logs = []


def log(msg):
    """统一的日志处理函数"""
    print(msg)
    current_logs.append(msg)
    return "\n".join(current_logs)

def get_model_size_in_gb(model_name):
    """估算模型大小(以GB为单位)"""
    try:
        # get model size from huggingface
        api = HfApi()
        model_info = api.model_info(model_name)
        return model_info.safetensors.total / (1024 ** 3)
        
    except Exception as e:
        log(f"无法估算模型大小: {str(e)}")
        return 1  # bypass memory check

def check_system_resources(model_name):
    """检查系统资源并决定使用什么设备"""
    log("正在检查系统资源...")
    
    # 获取系统内存信息
    system_memory = psutil.virtual_memory()
    total_memory_gb = system_memory.total / (1024 ** 3)
    available_memory_gb = system_memory.available / (1024 ** 3)
    
    log(f"系统总内存: {total_memory_gb:.1f}GB")
    log(f"可用内存: {available_memory_gb:.1f}GB")
    
    # 估算模型所需内存
    model_size_gb = get_model_size_in_gb(model_name)
    required_memory_gb = model_size_gb * 2.5  # 需要额外的内存用于计算
    log(f"估计模型需要内存: {required_memory_gb:.1f}GB")
    
    # 检查CUDA是否可用
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
        log(f"发现GPU: {gpu_name}")
        log(f"GPU显存: {gpu_memory_gb:.1f}GB")
        
        if gpu_memory_gb >= required_memory_gb:
            log("✅ GPU显存足够,将使用GPU进行转换")
            return "cuda", gpu_memory_gb
        else:
            log(f"⚠️ GPU显存不足 (需要 {required_memory_gb:.1f}GB, 实际 {gpu_memory_gb:.1f}GB)")
    else:
        log("❌ 未检测到可用的GPU")
    
    # 检查CPU内存是否足够
    if available_memory_gb >= required_memory_gb:
        log("✅ CPU内存足够,将使用CPU进行转换")
        return "cpu", available_memory_gb
    else:
        raise MemoryError(f"❌ 系统内存不足 (需要 {required_memory_gb:.1f}GB, 可用 {available_memory_gb:.1f}GB)")

def setup_environment(model_name):
    # # 检查系统资源并决定使用什么设备
    # device, available_memory = check_system_resources(model_name)
    device = "cpu"
    return device

def create_hf_repo(repo_name, private=True):
    """创建HuggingFace仓库"""
    try:
        # check if repo already exists
        api = HfApi()
        if api.repo_exists(repo_name):
            log(f"仓库已存在: {repo_name}")
            return ValueError(f"仓库已存在: {repo_name}, 请使用其他名称或删除已存在的仓库")
        repo_url = create_repo(repo_name, private=private)
        log(f"创建仓库成功: {repo_url}")
        return repo_url
    except Exception as e:
        log(f"创建仓库失败: {str(e)}")
        raise

def download_and_merge_model(base_model_name, lora_model_name, output_dir, device):
    log(f"正在加载基础模型: {base_model_name}")
    
    try:
        # 先加载原始模型
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            torch_dtype=torch.float16,
            device_map={"": device}
        )
        
        old_vocab_size = base_model.get_input_embeddings().weight.shape[0]
        print(f"原始词表大小: {old_vocab_size}")
        # 加载tokenizer
        tokenizer = AutoTokenizer.from_pretrained(base_model_name)
        
        new_vocab_size = tokenizer.vocab_size
        print(f"调整词表大小: {old_vocab_size} -> {new_vocab_size}")
        
        # 保存原始权重
        old_embeddings = base_model.get_input_embeddings().weight.data.clone()
        old_lm_head = base_model.lm_head.weight.data.clone()
        
        # 调整词表大小
        base_model.resize_token_embeddings(new_vocab_size)
        
        # 复制原始权重到新的张量
        with torch.no_grad():
            base_model.get_input_embeddings().weight.data[:new_vocab_size] = old_embeddings[:new_vocab_size]
            base_model.lm_head.weight.data[:new_vocab_size] = old_lm_head[:new_vocab_size]
        
        log(f"正在加载LoRA模型: {lora_model_name}")
        log("基础模型配置:" + str(base_model.config))
        
        # 加载adapter配置
        adapter_config = PeftConfig.from_pretrained(lora_model_name)
        log("Adapter配置:" + str(adapter_config))
        
        model = PeftModel.from_pretrained(base_model, lora_model_name)
        log("正在合并LoRA权重")
        model = model.merge_and_unload()

        # 创建输出目录
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

        # 保存合并后的模型
        log(f"正在保存合并后的模型到: {output_dir}")
        model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)
        
        
        return output_dir
        
    except Exception as e:
        log(f"错误: {str(e)}")
        log(f"错误类型: {type(e)}")
        import traceback
        log("详细错误信息:")
        log(traceback.format_exc())
        raise

def quantize_and_push_model(model_path, repo_id, bits=8):
    """量化模型并推送到HuggingFace"""
    try:
        from optimum.bettertransformer import BetterTransformer
        from transformers import AutoModelForCausalLM
        
        log(f"正在加载模型用于{bits}位量化...")
        model = AutoModelForCausalLM.from_pretrained(model_path)
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # 转换为BetterTransformer格式
        model = BetterTransformer.transform(model)
        
        # 量化
        if bits == 8:
            from transformers import BitsAndBytesConfig
            quantization_config = BitsAndBytesConfig(
                load_in_8bit=True,
                llm_int8_threshold=6.0
            )
        elif bits == 4:
            from transformers import BitsAndBytesConfig
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_quant_type="nf4"
            )
        else:
            raise ValueError(f"不支持的量化位数: {bits}")
        
        # 保存量化后的模型
        quantized_model_path = f"{model_path}_q{bits}"
        model.save_pretrained(
            quantized_model_path,
            quantization_config=quantization_config
        )
        tokenizer.save_pretrained(quantized_model_path)
        
        # 推送到HuggingFace
        log(f"正在将{bits}位量化模型推送到HuggingFace...")
        api = HfApi()
        api.upload_folder(
            folder_path=quantized_model_path,
            repo_id=repo_id,
            repo_type="model"
        )
        log(f"{bits}位量化模型上传完成")
        
    except Exception as e:
        log(f"量化或上传过程中出错: {str(e)}")
        raise

def process_model(base_model, lora_model, repo_name, hf_token, progress=gr.Progress()):
    """处理模型的主函数,用于Gradio界面"""
    try:
        login(hf_token) # 我不理解为什么登录一次不行,非得放到环境变量里
        os.environ["HF_TOKEN"] = hf_token
        api = HfApi(token=hf_token)
        username = api.whoami()["name"]
        if repo_name == "Auto":
            repo_name = username + "/" + base_model.split("/")[-1] + "_" + lora_model.split("/")[-1]
        
        # 清空之前的日志
        current_logs.clear()
        
        # 设置环境和检查资源
        device = setup_environment(base_model)
        
        # 创建HuggingFace仓库
        repo_url = create_hf_repo(repo_name)
        
        # 设置输出目录
        output_dir = os.path.join(".", "output", repo_name)
        
        progress(0.1, desc="开始模型转换流程...")
        # 下载并合并模型
        model_path = download_and_merge_model(base_model, lora_model, output_dir, device)
        
        # 推送到HuggingFace
        log(f"正在将模型推送到HuggingFace...")
        
        api.upload_folder(
            folder_path=model_path,
            repo_id=repo_name,
            repo_type="model"
        )
        
        progress(0.4, desc="开始8位量化...")
        # 量化并上传模型
        quantize_and_push_model(model_path, repo_name, bits=8)
        
        progress(0.7, desc="开始4位量化...")
        quantize_and_push_model(model_path, repo_name, bits=4)
        
        final_message = f"全部完成!模型已上传至: https://huggingface.co/{repo_name}"
        log(final_message)
        progress(1.0, desc="处理完成")
        
        # remove hf_token from env
        os.environ.pop("HF_TOKEN")
        log("HF_TOKEN已从环境变量中删除")
        
        # remove model_path
        os.remove(model_path)
        log(f"模型路径已删除: {model_path}")
        
        return "\n".join(current_logs)
    except Exception as e:
        error_message = f"处理过程中出错: {str(e)}"
        log(error_message)
        return "\n".join(current_logs)

def create_ui():
    """创建Gradio界面"""
    with gr.Blocks(title="模型转换工具") as app:
        gr.Markdown("""
        # 🤗 模型转换与量化工具
        
        这个工具可以帮助你:
        1. 合并基础模型和LoRA适配器
        2. 创建4位和8位量化版本
        3. 自动上传到HuggingFace Hub
        """)
        
        with gr.Row():
            with gr.Column():
                base_model = gr.Textbox(
                    label="基础模型路径",
                    placeholder="例如: Qwen/Qwen2.5-14B-Instruct",
                    value="Qwen/Qwen2.5-7B-Instruct"
                )
                lora_model = gr.Textbox(
                    label="LoRA模型路径",
                    placeholder="输入你的LoRA模型路径"
                )
                repo_name = gr.Textbox(
                    label="HuggingFace仓库名称",
                    placeholder="输入要创建的仓库名称",
                    value="Auto"
                )
                hf_token = gr.Textbox(
                    label="HuggingFace Token",
                    placeholder="输入你的HuggingFace Token",
                    value=os.getenv("HF_TOKEN")
                )
                convert_btn = gr.Button("开始转换", variant="primary")
            
            with gr.Column():
                output = gr.TextArea(
                    label="处理日志",
                    placeholder="处理日志将在这里显示...",
                    interactive=False,
                    autoscroll=True,
                    lines=20
                )
        
        # 设置事件处理
        convert_btn.click(
            fn=process_model,
            inputs=[base_model, lora_model, repo_name, hf_token],
            outputs=output
        )
    
    return app

if __name__ == "__main__":
    # 创建并启动Gradio界面
    app = create_ui()
    app.queue()
    app.launch()