# Copyright 2024 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. r"""Convert checkpoints created by Estimator (tf1) to be Keras compatible.""" import numpy as np import tensorflow.compat.v1 as tf # TF 1.x # Mapping between old <=> new names. The source pattern in original variable # name will be replaced by destination pattern. BERT_NAME_REPLACEMENTS = ( ("bert", "bert_model"), ("embeddings/word_embeddings", "word_embeddings/embeddings"), ("embeddings/token_type_embeddings", "embedding_postprocessor/type_embeddings"), ("embeddings/position_embeddings", "embedding_postprocessor/position_embeddings"), ("embeddings/LayerNorm", "embedding_postprocessor/layer_norm"), ("attention/self", "self_attention"), ("attention/output/dense", "self_attention_output"), ("attention/output/LayerNorm", "self_attention_layer_norm"), ("intermediate/dense", "intermediate"), ("output/dense", "output"), ("output/LayerNorm", "output_layer_norm"), ("pooler/dense", "pooler_transform"), ) BERT_V2_NAME_REPLACEMENTS = ( ("bert/", ""), ("encoder", "transformer"), ("embeddings/word_embeddings", "word_embeddings/embeddings"), ("embeddings/token_type_embeddings", "type_embeddings/embeddings"), ("embeddings/position_embeddings", "position_embedding/embeddings"), ("embeddings/LayerNorm", "embeddings/layer_norm"), ("attention/self", "self_attention"), ("attention/output/dense", "self_attention/attention_output"), ("attention/output/LayerNorm", "self_attention_layer_norm"), ("intermediate/dense", "intermediate"), ("output/dense", "output"), ("output/LayerNorm", "output_layer_norm"), ("pooler/dense", "pooler_transform"), ("cls/predictions", "bert/cls/predictions"), ("cls/predictions/output_bias", "cls/predictions/output_bias/bias"), ("cls/seq_relationship/output_bias", "predictions/transform/logits/bias"), ("cls/seq_relationship/output_weights", "predictions/transform/logits/kernel"), ) BERT_PERMUTATIONS = () BERT_V2_PERMUTATIONS = (("cls/seq_relationship/output_weights", (1, 0)),) def _bert_name_replacement(var_name, name_replacements): """Gets the variable name replacement.""" for src_pattern, tgt_pattern in name_replacements: if src_pattern in var_name: old_var_name = var_name var_name = var_name.replace(src_pattern, tgt_pattern) tf.logging.info("Converted: %s --> %s", old_var_name, var_name) return var_name def _has_exclude_patterns(name, exclude_patterns): """Checks if a string contains substrings that match patterns to exclude.""" for p in exclude_patterns: if p in name: return True return False def _get_permutation(name, permutations): """Checks whether a variable requires transposition by pattern matching.""" for src_pattern, permutation in permutations: if src_pattern in name: tf.logging.info("Permuted: %s --> %s", name, permutation) return permutation return None def _get_new_shape(name, shape, num_heads): """Checks whether a variable requires reshape by pattern matching.""" if "self_attention/attention_output/kernel" in name: return tuple([num_heads, shape[0] // num_heads, shape[1]]) if "self_attention/attention_output/bias" in name: return shape patterns = [ "self_attention/query", "self_attention/value", "self_attention/key" ] for pattern in patterns: if pattern in name: if "kernel" in name: return tuple([shape[0], num_heads, shape[1] // num_heads]) if "bias" in name: return tuple([num_heads, shape[0] // num_heads]) return None def create_v2_checkpoint(model, src_checkpoint, output_path, checkpoint_model_name="model"): """Converts a name-based matched TF V1 checkpoint to TF V2 checkpoint.""" # Uses streaming-restore in eager model to read V1 name-based checkpoints. model.load_weights(src_checkpoint).assert_existing_objects_matched() if hasattr(model, "checkpoint_items"): checkpoint_items = model.checkpoint_items else: checkpoint_items = {} checkpoint_items[checkpoint_model_name] = model checkpoint = tf.train.Checkpoint(**checkpoint_items) checkpoint.save(output_path) def convert(checkpoint_from_path, checkpoint_to_path, num_heads, name_replacements, permutations, exclude_patterns=None): """Migrates the names of variables within a checkpoint. Args: checkpoint_from_path: Path to source checkpoint to be read in. checkpoint_to_path: Path to checkpoint to be written out. num_heads: The number of heads of the model. name_replacements: A list of tuples of the form (match_str, replace_str) describing variable names to adjust. permutations: A list of tuples of the form (match_str, permutation) describing permutations to apply to given variables. Note that match_str should match the original variable name, not the replaced one. exclude_patterns: A list of string patterns to exclude variables from checkpoint conversion. Returns: A dictionary that maps the new variable names to the Variable objects. A dictionary that maps the old variable names to the new variable names. """ with tf.Graph().as_default(): tf.logging.info("Reading checkpoint_from_path %s", checkpoint_from_path) reader = tf.train.NewCheckpointReader(checkpoint_from_path) name_shape_map = reader.get_variable_to_shape_map() new_variable_map = {} conversion_map = {} for var_name in name_shape_map: if exclude_patterns and _has_exclude_patterns(var_name, exclude_patterns): continue # Get the original tensor data. tensor = reader.get_tensor(var_name) # Look up the new variable name, if any. new_var_name = _bert_name_replacement(var_name, name_replacements) # See if we need to reshape the underlying tensor. new_shape = None if num_heads > 0: new_shape = _get_new_shape(new_var_name, tensor.shape, num_heads) if new_shape: tf.logging.info("Veriable %s has a shape change from %s to %s", var_name, tensor.shape, new_shape) tensor = np.reshape(tensor, new_shape) # See if we need to permute the underlying tensor. permutation = _get_permutation(var_name, permutations) if permutation: tensor = np.transpose(tensor, permutation) # Create a new variable with the possibly-reshaped or transposed tensor. var = tf.Variable(tensor, name=var_name) # Save the variable into the new variable map. new_variable_map[new_var_name] = var # Keep a list of converter variables for sanity checking. if new_var_name != var_name: conversion_map[var_name] = new_var_name saver = tf.train.Saver(new_variable_map) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf.logging.info("Writing checkpoint_to_path %s", checkpoint_to_path) saver.save(sess, checkpoint_to_path, write_meta_graph=False) tf.logging.info("Summary:") tf.logging.info(" Converted %d variable name(s).", len(new_variable_map)) tf.logging.info(" Converted: %s", str(conversion_map))