Upload folder using huggingface_hub

d1ceb73 verified 11 months ago

5.52 kB

	# coding=utf-8
	# Copyright 2023 The HuggingFace Inc. team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Processor class for Pop2Piano."""

	import os
	from typing import List, Optional, Union

	import numpy as np

	from ...feature_extraction_utils import BatchFeature
	from ...processing_utils import ProcessorMixin
	from ...tokenization_utils import BatchEncoding, PaddingStrategy, TruncationStrategy
	from ...utils import TensorType


	class Pop2PianoProcessor(ProcessorMixin):
	r"""
	Constructs an Pop2Piano processor which wraps a Pop2Piano Feature Extractor and Pop2Piano Tokenizer into a single
	processor.

	[`Pop2PianoProcessor`] offers all the functionalities of [`Pop2PianoFeatureExtractor`] and [`Pop2PianoTokenizer`].
	See the docstring of [`~Pop2PianoProcessor.__call__`] and [`~Pop2PianoProcessor.decode`] for more information.

	Args:
	feature_extractor (`Pop2PianoFeatureExtractor`):
	An instance of [`Pop2PianoFeatureExtractor`]. The feature extractor is a required input.
	tokenizer (`Pop2PianoTokenizer`):
	An instance of ['Pop2PianoTokenizer`]. The tokenizer is a required input.
	"""

	attributes = ["feature_extractor", "tokenizer"]
	feature_extractor_class = "Pop2PianoFeatureExtractor"
	tokenizer_class = "Pop2PianoTokenizer"

	def __init__(self, feature_extractor, tokenizer):
	super().__init__(feature_extractor, tokenizer)

	def __call__(
	self,
	audio: Union[np.ndarray, List[float], List[np.ndarray]] = None,
	sampling_rate: Union[int, List[int]] = None,
	steps_per_beat: int = 2,
	resample: Optional[bool] = True,
	notes: Union[List, TensorType] = None,
	padding: Union[bool, str, PaddingStrategy] = False,
	truncation: Union[bool, str, TruncationStrategy] = None,
	max_length: Optional[int] = None,
	pad_to_multiple_of: Optional[int] = None,
	verbose: bool = True,
	**kwargs,
	) -> Union[BatchFeature, BatchEncoding]:
	"""
	This method uses [`Pop2PianoFeatureExtractor.__call__`] method to prepare log-mel-spectrograms for the model,
	and [`Pop2PianoTokenizer.__call__`] to prepare token_ids from notes.

	Please refer to the docstring of the above two methods for more information.
	"""

	# Since Feature Extractor needs both audio and sampling_rate and tokenizer needs both token_ids and
	# feature_extractor_output, we must check for both.
	if (audio is None and sampling_rate is None) and (notes is None):
	raise ValueError(
	"You have to specify at least audios and sampling_rate in order to use feature extractor or "
	"notes to use the tokenizer part."
	)

	if audio is not None and sampling_rate is not None:
	inputs = self.feature_extractor(
	audio=audio,
	sampling_rate=sampling_rate,
	steps_per_beat=steps_per_beat,
	resample=resample,
	**kwargs,
	)
	if notes is not None:
	encoded_token_ids = self.tokenizer(
	notes=notes,
	padding=padding,
	truncation=truncation,
	max_length=max_length,
	pad_to_multiple_of=pad_to_multiple_of,
	verbose=verbose,
	**kwargs,
	)

	if notes is None:
	return inputs

	elif audio is None or sampling_rate is None:
	return encoded_token_ids

	else:
	inputs["token_ids"] = encoded_token_ids["token_ids"]
	return inputs

	def batch_decode(
	self,
	token_ids,
	feature_extractor_output: BatchFeature,
	return_midi: bool = True,
	) -> BatchEncoding:
	"""
	This method uses [`Pop2PianoTokenizer.batch_decode`] method to convert model generated token_ids to midi_notes.

	Please refer to the docstring of the above two methods for more information.
	"""

	return self.tokenizer.batch_decode(
	token_ids=token_ids, feature_extractor_output=feature_extractor_output, return_midi=return_midi
	)

	@property
	def model_input_names(self):
	tokenizer_input_names = self.tokenizer.model_input_names
	feature_extractor_input_names = self.feature_extractor.model_input_names
	return list(dict.fromkeys(tokenizer_input_names + feature_extractor_input_names))

	def save_pretrained(self, save_directory, **kwargs):
	if os.path.isfile(save_directory):
	raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
	os.makedirs(save_directory, exist_ok=True)
	return super().save_pretrained(save_directory, **kwargs)

	@classmethod
	def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
	args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
	return cls(*args)