Spaces:
Running
Running
File size: 3,880 Bytes
1a8deb0 c7f3542 1a8deb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
from pydantic import BaseModel, Field
class GgufParser(BaseModel):
metadata: "Metadata"
architecture: "Architecture"
tokenizer: "Tokenizer"
estimate: "Estimate"
class Metadata(BaseModel):
type_: str = Field(alias="type")
architecture: str
quantization_version: int = Field(alias="quantizationVersion")
alignment: int
name: str
file_type: int = Field(alias="fileType")
little_endian: bool = Field(alias="littleEndian")
file_size: int = Field(alias="fileSize")
size: int
parameters: int
bits_per_weight: float = Field(alias="bitsPerWeight")
class Architecture(BaseModel):
type_: str = Field(alias="type")
architecture: str
maximum_context_length: int = Field(alias="maximumContextLength")
embedding_length: int = Field(alias="embeddingLength")
block_count: int = Field(alias="blockCount")
feed_forward_length: int = Field(alias="feedForwardLength")
attention_head_count: int = Field(alias="attentionHeadCount")
attention_head_count_kv: int = Field(alias="attentionHeadCountKV")
attention_layer_norm_rmse_epsilon: float = Field(
alias="attentionLayerNormRMSEpsilon"
)
attention_key_length: int = Field(alias="attentionKeyLength")
attention_value_length: int = Field(alias="attentionValueLength")
attention_causal: bool = Field(alias="attentionCausal")
rope_dimension_count: int = Field(alias="ropeDimensionCount")
rope_frequency_base: int = Field(alias="ropeFrequencyBase")
vocabulary_length: int = Field(alias="vocabularyLength")
embedding_gqa: int = Field(alias="embeddingGQA")
embedding_key_gqa: int = Field(alias="embeddingKeyGQA")
embedding_value_gqa: int = Field(alias="embeddingValueGQA")
class Tokenizer(BaseModel):
model: str
tokens_length: int = Field(alias="tokensLength")
merges_length: int = Field(alias="mergesLength")
added_token_length: int = Field(alias="addedTokenLength")
bos_token_id: int = Field(alias="bosTokenID")
eos_token_id: int = Field(alias="eosTokenID")
eot_token_id: int = Field(alias="eotTokenID")
eom_token_id: int = Field(alias="eomTokenID")
unknown_token_id: int = Field(alias="unknownTokenID")
separator_token_id: int = Field(alias="separatorTokenID")
padding_token_id: int = Field(alias="paddingTokenID")
tokens_size: int = Field(alias="tokensSize")
merges_size: int = Field(alias="mergesSize")
class Ram(BaseModel):
handle_layers: int = Field(alias="handleLayers")
handle_last_layer: int = Field(alias="handleLastLayer")
handle_output_layer: bool = Field(alias="handleOutputLayer")
remote: bool
position: int
uma: int
nonuma: int
class Item(BaseModel):
offload_layers: int = Field(alias="offloadLayers")
full_offloaded: bool = Field(alias="fullOffloaded")
maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond")
ram: "Ram"
vrams: list["Ram"]
class Estimate(BaseModel):
items: list["Item"]
type_: str = Field(alias="type")
architecture: str
context_size: int = Field(alias="contextSize")
flash_attention: bool = Field(alias="flashAttention")
no_mmap: bool = Field(alias="noMMap")
embedding_only: bool = Field(alias="embeddingOnly")
reranking: bool
distributable: bool
logical_batch_size: int = Field(alias="logicalBatchSize")
physical_batch_size: int = Field(alias="physicalBatchSize")
type_: str = Field(alias="type")
architecture: str
context_size: int = Field(alias="contextSize")
flash_attention: bool = Field(alias="flashAttention")
no_mmap: bool = Field(alias="noMMap")
embedding_only: bool = Field(alias="embeddingOnly")
reranking: bool
distributable: bool
logical_batch_size: int = Field(alias="logicalBatchSize")
physical_batch_size: int = Field(alias="physicalBatchSize")
|