Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,432 Bytes
6073e55 23fdbc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# Copyright (c) 2025 Ye Liu. Licensed under the BSD-3-Clause License.
import warnings
import nncore
import torch
import torch.nn as nn
from peft import PeftModel
from safetensors.torch import load_model
from transformers import AutoConfig, AutoModel, AutoProcessor, GenerationConfig, Qwen2VLForConditionalGeneration
def get_auto_device(device):
try:
import torch_npu
has_npu = torch_npu.npu.is_available()
except ImportError:
has_npu = False
return 'cuda' if torch.cuda.is_available() else 'npu' if has_npu else 'cpu'
def build_model(model_path, config=None, is_trainable=False, merge_adapter=False, device='auto', dtype=torch.float16):
# set do_resize to false to avoid duplicated resizing
# https://github.com/huggingface/transformers/tree/main/src/transformers/models/qwen2_vl/image_processing_qwen2_vl.py
processor = AutoProcessor.from_pretrained(model_path, do_resize=False)
# eager attention has known & unknown bugs
# [4.46.2] broken causality fp16: https://github.com/huggingface/transformers/issues/35151
# [4.48.1] broken sliding window: https://github.com/huggingface/transformers/issues/35924
attn_implementation = 'sdpa'
config = config or AutoConfig.from_pretrained(model_path)
adapter_path = nncore.join(model_path, getattr(config, 'role', 'unknown'))
partial_path = nncore.join(model_path, 'pytorch_model.safetensors')
if nncore.is_dir(adapter_path) or nncore.is_file(partial_path):
print(f'Loading base model from {config.base_model_path}...')
model = AutoModel.from_pretrained(
config.base_model_path,
config=config,
low_cpu_mem_usage=True,
ignore_mismatched_sizes=True,
attn_implementation=attn_implementation,
torch_dtype=dtype)
try:
model.generation_config = GenerationConfig.from_pretrained(model_path)
except OSError:
warnings.warn('generation_config.json not found')
meta_state_dict = {
n: torch.empty_like(p, device='cpu')
for n, p in model.named_parameters() if p.device == torch.device('meta')
}
model.load_state_dict(meta_state_dict, strict=False, assign=True)
size = (model.model.embed_tokens.num_embeddings, model.model.embed_tokens.embedding_dim)
if model.model.embed_tokens.weight.size() != size:
print(f'Resizing embed_tokens to {size}...')
model.model.embed_tokens.weight = nn.Parameter(model.model.embed_tokens.weight.new_empty(size))
size = (model.lm_head.out_features, model.lm_head.in_features)
if model.lm_head.weight.size() != size:
print(f'Resizing lm_head to {size}...')
model.lm_head.weight = nn.Parameter(model.lm_head.weight.new_empty(size))
if nncore.is_dir(adapter_path):
print(f'Loading adapter from {adapter_path}...')
# transformers integration does not support merge_and_unload, use peft instead
model = PeftModel.from_pretrained(
model,
adapter_path,
adapter_name=config.role,
is_trainable=is_trainable,
low_cpu_mem_usage=True,
torch_device=str(model.device))
if nncore.is_file(partial_path):
print(f'Loading state dict from {partial_path}...')
_, unexpected = load_model(model, partial_path, strict=False, device=str(model.device))
assert len(unexpected) == 0, f'unexpected parameters: {unexpected}'
if merge_adapter and nncore.is_dir(adapter_path):
print('Merging adapter and unloading...')
model = model.merge_and_unload()
model._hf_peft_config_loaded = False
else:
print(f'Loading full model from {model_path}...')
if len(config.architectures) == 1 and config.model_type == 'qwen2_vl':
model_cls = Qwen2VLForConditionalGeneration
else:
model_cls = AutoModel
model = model_cls.from_pretrained(
model_path,
config=config,
low_cpu_mem_usage=True,
attn_implementation=attn_implementation,
torch_dtype=dtype)
if not is_trainable:
device = get_auto_device(device) if device == 'auto' else device
model = model.to(device).eval()
return model, processor
|