Spaces:

ChemFM
/

molecular_conditional_generation

Running on Zero

App Files Files Community

molecular_conditional_generation / llama_customized_models.py

feiyang-cai

Update llama_customized_models.py

3d13639 verified 6 months ago

raw

history blame contribute delete

7.79 kB

	from transformers.models.llama.modeling_llama import LlamaForCausalLM, LlamaModel, LlamaPreTrainedModel
	from transformers.models.llama.configuration_llama import LlamaConfig
	import torch.nn as nn
	from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
	from transformers.modeling_outputs import (
	BaseModelOutputWithPast,
	CausalLMOutputWithPast,
	QuestionAnsweringModelOutput,
	SequenceClassifierOutputWithPast,
	)
	from transformers.cache_utils import Cache

	from transformers.modeling_outputs import (
	CausalLMOutputWithPast,
	)
	from transformers.utils import (
	add_start_docstrings_to_model_forward,
	logging,
	replace_return_docstrings,
	)
	from dataclasses import dataclass

	from transformers.utils import ModelOutput

	import torch
	from typing import List, Optional, Tuple, Union

	logger = logging.get_logger(__name__)

	_CONFIG_FOR_DOC = "LlamaConfig"

	LLAMA_INPUTS_DOCSTRING = r"""
	Args:
	input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
	Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
	it.

	Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
	[`PreTrainedTokenizer.__call__`] for details.

	[What are input IDs?](../glossary#input-ids)
	attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, optional):
	Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

	- 1 for tokens that are not masked,
	- 0 for tokens that are masked.

	[What are attention masks?](../glossary#attention-mask)

	Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
	[`PreTrainedTokenizer.__call__`] for details.

	If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
	`past_key_values`).

	If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
	and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
	information on the default strategy.

	- 1 indicates the head is not masked,
	- 0 indicates the head is masked.
	position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, optional):
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
	config.n_positions - 1]`.

	[What are position IDs?](../glossary#position-ids)
	past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, optional):
	Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
	blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
	returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.

	Two formats are allowed:
	- a [`~cache_utils.Cache`] instance;
	- Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
	shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
	cache format.

	The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
	legacy cache format will be returned.

	If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
	have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
	of shape `(batch_size, sequence_length)`.
	inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, optional):
	Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
	model's internal embedding lookup matrix.
	use_cache (`bool`, optional):
	If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
	`past_key_values`).
	output_attentions (`bool`, optional):
	Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
	tensors for more detail.
	output_hidden_states (`bool`, optional):
	Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
	more detail.
	return_dict (`bool`, optional):
	Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
	cache_position (`torch.LongTensor` of shape `(sequence_length)`, optional):
	Indices depicting the position of the input sequence tokens in the sequence. Contrarily to `position_ids`,
	this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
	the complete sequence length.
	"""

	class LlamaForCausalLMWithNumericalEmbedding(LlamaForCausalLM):

	def __init__(self, config: LlamaConfig):
	super().__init__(config)
	self.numerical_embedding = torch.nn.Linear(1, config.hidden_size, bias=True)


	def forward(
	self,
	input_ids: torch.LongTensor = None,
	properties: List = None,
	properties_index: List = None,
	attention_mask: Optional[torch.Tensor] = None,
	position_ids: Optional[torch.LongTensor] = None,
	past_key_values: Optional[List[torch.FloatTensor]] = None,
	inputs_embeds: Optional[torch.FloatTensor] = None,
	cache_position=None,
	labels: Optional[torch.LongTensor] = None,
	use_cache: Optional[bool] = None,
	output_attentions: Optional[bool] = None,
	output_hidden_states: Optional[bool] = None,
	return_dict: Optional[bool] = None,
	) -> Union[Tuple, CausalLMOutputWithPast]:

	b, l = input_ids.size()
	assert len(properties) == b, "The number of properties should be equal to the batch size."
	assert len(properties_index) == b, "The number of properties_index should be equal to the batch size."

	embeddings = self.model.embed_tokens(input_ids)

	for i, (props, props_index, embeds) in enumerate(zip(properties, properties_index, embeddings)):
	assert len(props) == len(props_index), "The number of properties should be equal to the number of properties_index."
	props = torch.tensor(props, device=embeds.device, dtype=torch.float32).unsqueeze(1)
	num_embeds = self.numerical_embedding(props)
	if len(props_index) > 0:
	assert embeddings[i, props_index, :].shape == num_embeds.shape, "The shape of the embeddings and the numerical embeddings should be the same."
	embeddings[i, props_index, :] = num_embeds

	return super().forward(
	input_ids=None,
	attention_mask=attention_mask,
	position_ids=position_ids,
	past_key_values=past_key_values,
	inputs_embeds=embeddings,
	labels=labels,
	use_cache=use_cache,
	output_attentions=output_attentions,
	output_hidden_states=output_hidden_states,
	return_dict=return_dict,
	)