MT3

Running

App Files Files Community

Hmjz100 commited on May 30, 2024

Commit

1db8186

verified ·

1 Parent(s): 37af952

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -35

app.py CHANGED Viewed

@@ -136,22 +136,21 @@ class InferenceModel(object):
     @property
 	def input_shapes(self):
 		return {
-					'encoder_input_tokens': (self.batch_size, self.inputs_length),
-					'decoder_input_tokens': (self.batch_size, self.outputs_length)
 		}
 	def _parse_gin(self, gin_files):
 		"""解析用于训练模型的 gin 文件。"""
 		print(f"[{current_time()}] 日志：解析 gin 文件")
 		gin_bindings = [
-				'from __gin__ import dynamic_registration',
-				'from mt3 import vocabularies',
-				'[email protected]()',
-				'vocabularies.VocabularyConfig.num_velocity_bins=%NUM_VELOCITY_BINS'
 		]
 		with gin.unlock_config():
-			gin.parse_config_files_and_bindings(
-					gin_files, gin_bindings, finalize_config=False)
 	def _load_model(self):
 		"""在解析训练 gin 配置后加载 T5X `Model`。"""
@@ -159,11 +158,11 @@ class InferenceModel(object):
 		model_config = gin.get_configurable(network.T5Config)()
 		module = network.Transformer(config=model_config)
 		return models.ContinuousInputsEncoderDecoderModel(
-				module=module,
-				input_vocabulary=self.output_features['inputs'].vocabulary,
-				output_vocabulary=self.output_features['targets'].vocabulary,
-				optimizer_def=t5x.adafactor.Adafactor(decay_rate=0.8, step_offset=0),
-				input_depth=spectrograms.input_depth(self.spectrogram_config))
 	def restore_from_checkpoint(self, checkpoint_path):
@@ -176,33 +175,31 @@ class InferenceModel(object):
 			partitioner=self.partitioner)
 		restore_checkpoint_cfg = t5x.utils.RestoreCheckpointConfig(
-				path=checkpoint_path, mode='specific', dtype='float32')
 		train_state_axes = train_state_initializer.train_state_axes
 		self._predict_fn = self._get_predict_fn(train_state_axes)
 		self._train_state = train_state_initializer.from_checkpoint_or_scratch(
-				[restore_checkpoint_cfg], init_rng=jax.random.PRNGKey(0))
 	@functools.lru_cache()
 	def _get_predict_fn(self, train_state_axes):
 		"""生成一个分区的预测函数用于解码。"""
 		print(f"[{current_time()}] 日志：生成用于解码的预测函数")
 		def partial_predict_fn(params, batch, decode_rng):
-			return self.model.predict_batch_with_aux(
-					params, batch, decoder_params={'decode_rng': None})
 		return self.partitioner.partition(
-				partial_predict_fn,
-				in_axis_resources=(
-						train_state_axes.params,
-						t5x.partitioning.PartitionSpec('data',), None),
-				out_axis_resources=t5x.partitioning.PartitionSpec('data',)
 		)
 	def predict_tokens(self, batch, seed=0):
 		"""从预处理的数据集批次中预测 tokens。"""
 		print(f"[{current_time()}] 运行：从预处理数据集中预测音符序列")
-		prediction, _ = self._predict_fn(
-self._train_state.params, batch, jax.random.PRNGKey(seed))
 		return self.vocabulary.decode_tf(prediction).numpy()
 	def __call__(self, audio):
@@ -255,16 +252,16 @@ self._train_state.params, batch, jax.random.PRNGKey(seed))
 	def preprocess(self, ds):
 		pp_chain = [
 				functools.partial(
-						t5.data.preprocessors.split_tokens_to_inputs_length,
-						sequence_length=self.sequence_length,
-						output_features=self.output_features,
-						feature_key='inputs',
-						additional_feature_keys=['input_times']),
 				# 在训练期间进行缓存。
 				preprocessors.add_dummy_targets,
 				functools.partial(
-						preprocessors.compute_spectrograms,
-						spectrogram_config=self.spectrogram_config)
 		]
 		for pp in pp_chain:
 			ds = pp(ds)
@@ -276,10 +273,10 @@ self._train_state.params, batch, jax.random.PRNGKey(seed))
 		# 向下取整到最接近的符号化时间步。
 		start_time -= start_time % (1 / self.codec.steps_per_second)
 		return {
-				'est_tokens': tokens,
-				'start_time': start_time,
-				# 内部 MT3 代码期望原始输入，这里不使用。
-				'raw_inputs': []
 		}
 	@staticmethod

     @property
 	def input_shapes(self):
 		return {
+			'encoder_input_tokens': (self.batch_size, self.inputs_length),
+			'decoder_input_tokens': (self.batch_size, self.outputs_length)
 		}
 	def _parse_gin(self, gin_files):
 		"""解析用于训练模型的 gin 文件。"""
 		print(f"[{current_time()}] 日志：解析 gin 文件")
 		gin_bindings = [
+            'from __gin__ import dynamic_registration',
+            'from mt3 import vocabularies',
+            '[email protected]()',
+            'vocabularies.VocabularyConfig.num_velocity_bins=%NUM_VELOCITY_BINS'
 		]
 		with gin.unlock_config():
+			gin.parse_config_files_and_bindings(gin_files, gin_bindings, finalize_config=False)
 	def _load_model(self):
 		"""在解析训练 gin 配置后加载 T5X `Model`。"""
 		model_config = gin.get_configurable(network.T5Config)()
 		module = network.Transformer(config=model_config)
 		return models.ContinuousInputsEncoderDecoderModel(
+            module=module,
+            input_vocabulary=self.output_features['inputs'].vocabulary,
+            output_vocabulary=self.output_features['targets'].vocabulary,
+            optimizer_def=t5x.adafactor.Adafactor(decay_rate=0.8, step_offset=0),
+            input_depth=spectrograms.input_depth(self.spectrogram_config))
 	def restore_from_checkpoint(self, checkpoint_path):
 			partitioner=self.partitioner)
 		restore_checkpoint_cfg = t5x.utils.RestoreCheckpointConfig(
+            path=checkpoint_path, mode='specific', dtype='float32')
 		train_state_axes = train_state_initializer.train_state_axes
 		self._predict_fn = self._get_predict_fn(train_state_axes)
 		self._train_state = train_state_initializer.from_checkpoint_or_scratch(
+            [restore_checkpoint_cfg], init_rng=jax.random.PRNGKey(0))
 	@functools.lru_cache()
 	def _get_predict_fn(self, train_state_axes):
 		"""生成一个分区的预测函数用于解码。"""
 		print(f"[{current_time()}] 日志：生成用于解码的预测函数")
 		def partial_predict_fn(params, batch, decode_rng):
+			return self.model.predict_batch_with_aux(params, batch, decoder_params={'decode_rng': None})
 		return self.partitioner.partition(
+            partial_predict_fn,
+            in_axis_resources=(
+                    train_state_axes.params,
+                    t5x.partitioning.PartitionSpec('data',), None),
+            out_axis_resources=t5x.partitioning.PartitionSpec('data',)
 		)
 	def predict_tokens(self, batch, seed=0):
 		"""从预处理的数据集批次中预测 tokens。"""
 		print(f"[{current_time()}] 运行：从预处理数据集中预测音符序列")
+		prediction, _ = self._predict_fn(self._train_state.params, batch, jax.random.PRNGKey(seed))
 		return self.vocabulary.decode_tf(prediction).numpy()
 	def __call__(self, audio):
 	def preprocess(self, ds):
 		pp_chain = [
 				functools.partial(
+                    t5.data.preprocessors.split_tokens_to_inputs_length,
+                    sequence_length=self.sequence_length,
+                    output_features=self.output_features,
+                    feature_key='inputs',
+                    additional_feature_keys=['input_times']),
 				# 在训练期间进行缓存。
 				preprocessors.add_dummy_targets,
 				functools.partial(
+                    preprocessors.compute_spectrograms,
+                    spectrogram_config=self.spectrogram_config)
 		]
 		for pp in pp_chain:
 			ds = pp(ds)
 		# 向下取整到最接近的符号化时间步。
 		start_time -= start_time % (1 / self.codec.steps_per_second)
 		return {
+            'est_tokens': tokens,
+            'start_time': start_time,
+            # 内部 MT3 代码期望原始输入，这里不使用。
+            'raw_inputs': []
 		}
 	@staticmethod