gguf-editor / _hf_gguf.py
CISCai's picture
Ensure tensor alignment is preserved by adding dummy padding metadata if necessary
37e11f9 verified
raw
history blame
13.9 kB
import struct
from enum import IntEnum
from fsspec.spec import AbstractBufferedFile
from typing import Any, Iterator, NamedTuple
class TokenType(IntEnum):
NORMAL = 1
UNKNOWN = 2
CONTROL = 3
USER_DEFINED = 4
UNUSED = 5
BYTE = 6
class LlamaFileType(IntEnum):
ALL_F32 = 0
MOSTLY_F16 = 1
MOSTLY_Q4_0 = 2
MOSTLY_Q4_1 = 3
MOSTLY_Q4_1_SOME_F16 = 4
MOSTLY_Q4_2 = 5
MOSTLY_Q4_3 = 6
MOSTLY_Q8_0 = 7
MOSTLY_Q5_0 = 8
MOSTLY_Q5_1 = 9
MOSTLY_Q2_K = 10
MOSTLY_Q3_K_S = 11
MOSTLY_Q3_K_M = 12
MOSTLY_Q3_K_L = 13
MOSTLY_Q4_K_S = 14
MOSTLY_Q4_K_M = 15
MOSTLY_Q5_K_S = 16
MOSTLY_Q5_K_M = 17
MOSTLY_Q6_K = 18
MOSTLY_IQ2_XXS = 19
MOSTLY_IQ2_XS = 20
MOSTLY_Q2_K_S = 21
MOSTLY_IQ3_XS = 22
MOSTLY_IQ3_XXS = 23
MOSTLY_IQ1_S = 24
MOSTLY_IQ4_NL = 25
MOSTLY_IQ3_S = 26
MOSTLY_IQ3_M = 27
MOSTLY_IQ2_S = 28
MOSTLY_IQ2_M = 29
MOSTLY_IQ4_XS = 30
MOSTLY_IQ1_M = 31
MOSTLY_BF16 = 32
MOSTLY_Q4_0_4_4 = 33
MOSTLY_Q4_0_4_8 = 34
MOSTLY_Q4_0_8_8 = 35
MOSTLY_TQ1_0 = 36
MOSTLY_TQ2_0 = 37
class GGUFValueType(IntEnum):
UINT8 = 0
INT8 = 1
UINT16 = 2
INT16 = 3
UINT32 = 4
INT32 = 5
FLOAT32 = 6
BOOL = 7
STRING = 8
ARRAY = 9
UINT64 = 10
INT64 = 11
FLOAT64 = 12
standard_metadata = {
"general.type": (GGUFValueType.STRING, "model"),
"general.architecture": (GGUFValueType.STRING, "llama"),
"general.quantization_version": (GGUFValueType.UINT32, 2),
"general.alignment": (GGUFValueType.UINT32, 32),
"general.file_type": (GGUFValueType.UINT32, 0),
"general.name": (GGUFValueType.STRING, ""),
"general.author": (GGUFValueType.STRING, ""),
"general.version": (GGUFValueType.STRING, ""),
"general.organization": (GGUFValueType.STRING, ""),
"general.finetune": (GGUFValueType.STRING, ""),
"general.basename": (GGUFValueType.STRING, ""),
"general.description": (GGUFValueType.STRING, ""),
"general.quantized_by": (GGUFValueType.STRING, ""),
"general.size_label": (GGUFValueType.STRING, ""),
"general.license": (GGUFValueType.STRING, ""),
"general.license.name": (GGUFValueType.STRING, ""),
"general.license.link": (GGUFValueType.STRING, ""),
"general.url": (GGUFValueType.STRING, ""),
"general.doi": (GGUFValueType.STRING, ""),
"general.uuid": (GGUFValueType.STRING, ""),
"general.repo_url": (GGUFValueType.STRING, ""),
"general.source.url": (GGUFValueType.STRING, ""),
"general.source.doi": (GGUFValueType.STRING, ""),
"general.source.uuid": (GGUFValueType.STRING, ""),
"general.source.repo_url": (GGUFValueType.STRING, ""),
"general.tags": (GGUFValueType.STRING, []),
"general.languages": (GGUFValueType.STRING, []),
"general.datasets": (GGUFValueType.STRING, []),
"split.no": (GGUFValueType.UINT16, 0),
"split.count": (GGUFValueType.UINT16, 0),
"split.tensors.count": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.model": (GGUFValueType.STRING, "gpt2"),
"tokenizer.ggml.pre": (GGUFValueType.STRING, "llama-bpe"),
"tokenizer.ggml.tokens": (GGUFValueType.STRING, []),
"tokenizer.ggml.token_type": (GGUFValueType.INT32, []),
"tokenizer.ggml.scores": (GGUFValueType.FLOAT32, []),
"tokenizer.ggml.merges": (GGUFValueType.STRING, []),
"tokenizer.ggml.bos_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.eos_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.unknown_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.seperator_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.padding_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.cls_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.mask_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.add_bos_token": (GGUFValueType.BOOL, False),
"tokenizer.ggml.add_eos_token": (GGUFValueType.BOOL, False),
"tokenizer.ggml.add_space_prefix": (GGUFValueType.BOOL, False),
"tokenizer.ggml.remove_extra_whitespaces": (GGUFValueType.BOOL, False),
"tokenizer.chat_template": (GGUFValueType.STRING, ""),
"tokenizer.chat_template.rag": (GGUFValueType.STRING, ""),
"tokenizer.chat_template.tool_use": (GGUFValueType.STRING, ""),
"tokenizer.chat_templates": (GGUFValueType.STRING, []),
"tokenizer.ggml.prefix_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.suffix_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.middle_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.eot_token_id": (GGUFValueType.UINT32, 0),
"tokenizer.ggml.eom_token_id": (GGUFValueType.UINT32, 0),
"quantize.imatrix.file": (GGUFValueType.STRING, ""),
"quantize.imatrix.dataset": (GGUFValueType.STRING, ""),
"quantize.imatrix.entries_count": (GGUFValueType.INT32, 0),
"quantize.imatrix.chunks_count": (GGUFValueType.INT32, 0),
}
gguf_scalar_size: dict[GGUFValueType, int] = {
GGUFValueType.UINT8: 1,
GGUFValueType.INT8: 1,
GGUFValueType.UINT16: 2,
GGUFValueType.INT16: 2,
GGUFValueType.UINT32: 4,
GGUFValueType.INT32: 4,
GGUFValueType.FLOAT32: 4,
GGUFValueType.BOOL: 1,
GGUFValueType.UINT64: 8,
GGUFValueType.INT64: 8,
GGUFValueType.FLOAT64: 8,
}
gguf_scalar_pack: dict[GGUFValueType, str] = {
GGUFValueType.UINT8: "B",
GGUFValueType.INT8: "b",
GGUFValueType.UINT16: "H",
GGUFValueType.INT16: "h",
GGUFValueType.UINT32: "I",
GGUFValueType.INT32: "i",
GGUFValueType.FLOAT32: "f",
GGUFValueType.BOOL: "?",
GGUFValueType.UINT64: "Q",
GGUFValueType.INT64: "q",
GGUFValueType.FLOAT64: "d",
}
class GGUFData(NamedTuple):
type: GGUFValueType | None
value: Any
data: bytes
class HuggingGGUFstream:
fp: AbstractBufferedFile
header: dict[str, GGUFData]
metadata: dict[str, GGUFData]
endian: str
metaend: int
filesize: int
def __init__(
self,
fp: AbstractBufferedFile,
) -> None:
self.fp = fp
self.header = {}
self.metadata = {}
self.endian = '<'
self.alignment = 32
self.metaend = 0
self.offset = 0
self.filesize = fp.details.get('size')
if (data := self.fp.read(4)) != b'GGUF':
raise TypeError('File is not a GGUF')
self.header['magic'] = GGUFData(
type = None,
value = None,
data = data,
)
data = self._read_field(GGUFValueType.UINT32)
if data.value != 3:
if data.value == 3 << 24:
data = GGUFData(
type = data.type,
value = 3,
data = data.data,
)
self.endian = '>'
else:
raise TypeError('Unsupported GGUF version')
self.header['version'] = data
data = self._read_field(GGUFValueType.UINT64)
self.header['tensors'] = data
data = self._read_field(GGUFValueType.UINT64)
self.header['metadata'] = data
def _unpack_field(
self,
buffer: bytes,
field_type: GGUFValueType,
repeat: int = 1,
) -> Any:
value = struct.unpack(f'{self.endian}{repeat}{gguf_scalar_pack.get(field_type)}', buffer)
return value[0] if repeat == 1 else value
def _pack_field(
self,
field_type: GGUFValueType,
*values,
) -> bytes:
return struct.pack(f'{self.endian}{len(values)}{gguf_scalar_pack.get(field_type)}', *values)
def _pack_value(
self,
val_type: GGUFValueType,
value: Any,
) -> bytes:
if isinstance(value, list):
data = self._pack_field(GGUFValueType.UINT32, val_type)
data += self._pack_field(GGUFValueType.UINT64, len(value))
if val_type == GGUFValueType.ARRAY:
raise TypeError('Array of arrays currently unsupported')
elif val_type == GGUFValueType.STRING:
if isinstance(value, list):
for v in value:
buf = str(v).encode('utf-8')
data += self._pack_field(GGUFValueType.UINT64, len(buf))
data += buf
else:
buf = str(value).encode('utf-8')
data = self._pack_field(GGUFValueType.UINT64, len(buf))
data += buf
elif val_type in gguf_scalar_pack:
if isinstance(value, list):
data += self._pack_field(val_type, *value)
else:
data = self._pack_field(val_type, value)
else:
raise TypeError('Unknown metadata type')
return data
def _read_field(
self,
field_type: GGUFValueType,
repeat: int = 1,
) -> GGUFData:
data = self.fp.read(gguf_scalar_size.get(field_type) * repeat)
value = self._unpack_field(data, field_type, repeat = repeat)
return GGUFData(
type = field_type,
value = value,
data = data,
)
def _read_value(
self,
val_type: GGUFValueType,
) -> GGUFData:
if val_type == GGUFValueType.ARRAY:
data = self._read_field(GGUFValueType.UINT32)
val_len = self._read_field(GGUFValueType.UINT64)
if data.value in gguf_scalar_pack:
val = self._read_field(data.value, repeat = val_len.value)
data = GGUFData(
type = val.type,
value = list(val.value),
data = data.data + val_len.data + val.data,
)
else:
v = []
d = [data.data, val_len.data]
for _ in range(val_len.value):
val = self._read_value(data.value)
d.append(val.data)
v.append(val.value)
data = GGUFData(
type = data.value,
value = v,
data = b''.join(d),
)
elif val_type == GGUFValueType.STRING:
data = self._read_field(GGUFValueType.UINT64)
val = self.fp.read(data.value)
data = GGUFData(
type = val_type,
value = val.decode('utf-8'),
data = data.data + val,
)
elif val_type in gguf_scalar_pack:
data = self._read_field(val_type)
else:
raise TypeError('Unknown metadata type')
return data
def _update_metacount(
self,
) -> None:
old_count = self.header['metadata']
new_count = len(self.metadata)
self.header['metadata'] = GGUFData(
type = old_count.type,
value = new_count,
data = self._pack_field(old_count.type, new_count),
)
def read_metadata(
self,
) -> Iterator[tuple[str, GGUFData]]:
if self.metadata:
for k, v in self.metadata.items():
yield k, v
else:
num_metadata = self.header['metadata'].value
for _ in range(num_metadata):
key = self._read_value(GGUFValueType.STRING)
val_type = self._read_field(GGUFValueType.UINT32)
val = self._read_value(val_type.value)
self.metadata[key.value] = val = GGUFData(
type = val.type,
value = val.value,
data = key.data + val_type.data + val.data,
)
yield key.value, val
if (alignment := self.metadata.get('general.alignment')) is not None:
self.alignment = alignment.value
self.metaend = self.fp.loc
self.offset = self.metaend % self.alignment
def adjust_padding(
self,
) -> None:
if self.header['tensors'].value == 0:
return
dummy_key = 'dummy.padding'
cur_metaend = 0
for data in self.header.values():
cur_metaend += len(data.data)
for k, v in self.metadata.items():
if k != dummy_key:
cur_metaend += len(v.data)
if (cur_metaend % self.alignment) != self.offset:
dummy_len = 8 + len(dummy_key) + 4 + 4 + 8
dummy_offset = (cur_metaend + dummy_len) % self.alignment
dummy_padding = (32 + (self.offset - dummy_offset)) % self.alignment
self.add_metadata(dummy_key, GGUFValueType.UINT8, [0] * dummy_padding)
else:
self.remove_metadata(dummy_key)
def add_metadata(
self,
key: str,
type: GGUFValueType,
value: Any,
) -> None:
data = self._pack_value(GGUFValueType.STRING, key)
data += self._pack_field(GGUFValueType.UINT32, GGUFValueType.ARRAY if isinstance(value, list) else type)
data += self._pack_value(type, value)
if (meta := self.metadata.get(key)):
self.filesize -= len(meta.data)
self.filesize += len(data)
self.metadata[key] = GGUFData(
type = type,
value = value,
data = data,
)
if not meta:
self._update_metacount()
def remove_metadata(
self,
key: str,
) -> None:
if (meta := self.metadata.get(key)):
del self.metadata[key]
self.filesize -= len(meta.data)
self._update_metacount()