infly
/

INF-34B-Chat-GPTQ-8bit

Text Generation

8-bit precision

Model card Files Files and versions Community

INF-34B-Chat-GPTQ-8bit / modeling_inflm.py

stgzr's picture

update LICENCE, tokenizer and model weights

e18015c verified 12 months ago

history blame contribute delete

2.66 kB

	# coding=utf-8
	# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
	#
	# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
	# and OPT implementations in this library. It has been modified from its
	# original forms to accommodate minor architectural differences compared
	# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""PyTorch INFLM model."""

	import torch
	from torch import nn
	from transformers.models.llama.modeling_llama import (
	LlamaDecoderLayer,
	LlamaModel,
	LlamaForCausalLM
	)
	from .configuration_inflm import INFLMConfig

	_CONFIG_FOR_DOC = "INFLMConfig"


	class INFLMDecoderLayer(LlamaDecoderLayer):
	def __init__(self, config: INFLMConfig, layer_idx: int):
	super().__init__(config, layer_idx)
	self.input_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
	self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)


	class INFLMModel(LlamaModel):
	config_class = INFLMConfig
	_no_split_modules = ["INFLMDecoderLayer"]

	def __init__(self, config: INFLMConfig):
	super().__init__(config)
	self.padding_idx = config.pad_token_id
	self.vocab_size = config.vocab_size

	self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
	self.layers = nn.ModuleList([INFLMDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)])
	self.norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)

	self.gradient_checkpointing = False
	# Initialize weights and apply final processing
	self.post_init()


	class INFLMForCausalLM(LlamaForCausalLM):
	_tied_weights_keys = ["lm_head.weight"]

	def __init__(self, config: INFLMConfig):
	super().__init__(config)
	self.model = INFLMModel(config)
	self.vocab_size = config.vocab_size
	self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

	# Initialize weights and apply final processing
	self.post_init()