Keras
legal
flowise / config.py
kevin110211's picture
Upload 51 files
5d58b52
import os.path as osp
import numpy as np
import random
import torch
from easydict import EasyDict as edict
import argparse
LAYER_MAPPING = {
0: 'od_layer_0',
1: 'od_layer_1',
2: 'od_layer_2',
}
class cfg():
def __init__(self):
self.this_dir = osp.dirname(__file__)
# change
self.data_root = osp.abspath(osp.join(self.this_dir, '..', '..', 'data', ''))
# TODO: add some static variable (The frequency of change is low)
def get_args(self):
parser = argparse.ArgumentParser()
# ------------ base ------------
parser.add_argument('--train_strategy', default=1, type=int)
parser.add_argument('--batch_size', default=64, type=int)
parser.add_argument('--batch_size_ke', default=14, type=int)
parser.add_argument('--batch_size_od', default=8, type=int)
parser.add_argument('--batch_size_ad', default=32, type=int)
parser.add_argument('--epoch', default=15, type=int)
parser.add_argument("--save_model", default=1, type=int, choices=[0, 1])
# 用transformer的 save_pretrain 方式保存
parser.add_argument("--save_pretrain", default=0, type=int, choices=[0, 1])
parser.add_argument("--from_pretrain", default=0, type=int, choices=[0, 1])
# torthlight
parser.add_argument("--no_tensorboard", default=False, action="store_true")
parser.add_argument("--exp_name", default="huawei_exp", type=str, help="Experiment name")
parser.add_argument("--dump_path", default="dump/", type=str, help="Experiment dump path")
parser.add_argument("--exp_id", default="ke256_raekt_ernie2_bs20_p3_c3_5e-6", type=str, help="Experiment ID")
# or 3407
parser.add_argument("--random_seed", default=42, type=int)
# 数据参数
parser.add_argument("--data_path", default="huawei", type=str, help="Experiment path")
parser.add_argument('--train_ratio', default=1, type=float, help='ratio for train/test')
parser.add_argument("--seq_data_name", default='Seq_data_base', type=str, help="seq_data 名字")
parser.add_argument("--kg_data_name", default='KG_data_base_rule', type=str, help="kg_data 名字")
parser.add_argument("--order_data_name", default='event_order_data', type=str, help="order_data 名字")
# TODO: add some dynamic variable
parser.add_argument("--model_name", default="MacBert", type=str, help="model name")
# ------------ 训练阶段 ------------
parser.add_argument("--scheduler", default="cos", type=str, choices=["linear", "cos"])
parser.add_argument("--optim", default="adamw", type=str)
parser.add_argument("--adam_epsilon", default=1e-8, type=float)
parser.add_argument('--workers', type=int, default=8)
parser.add_argument('--accumulation_steps', type=int, default=6)
parser.add_argument('--accumulation_steps_ke', type=int, default=6)
parser.add_argument('--accumulation_steps_ad', type=int, default=6)
parser.add_argument('--accumulation_steps_od', type=int, default=6)
parser.add_argument("--train_together", default=0, type=int)
# 3e-5
parser.add_argument('--lr', type=float, default=1e-5)
# 逐层学习率衰减
parser.add_argument("--LLRD", default=0, type=int, choices=[0, 1])
parser.add_argument('--weight_decay', type=float, default=0.01)
parser.add_argument('--clip', type=float, default=1., help='gradient clipping')
parser.add_argument('--scheduler_steps', type=int, default=None,
help='total number of step for the scheduler, if None then scheduler_total_step = total_step')
parser.add_argument('--eval_step', default=100, type=int, help='evaluate each n step')
# ------------ PLM ------------
parser.add_argument('--maxlength', type=int, default=200)
parser.add_argument('--mlm_probability', type=float, default=0.15)
parser.add_argument('--final_mlm_probability', type=float, default=0.4)
parser.add_argument('--mlm_probability_increase', type=str, default="curve", choices=["linear", "curve"])
parser.add_argument("--mask_stratege", default="rand", type=str, choices=["rand", "wwm", "domain"])
# 前n个epoch 用rand,后面用wwm. multi-stage knowledge masking strategy
parser.add_argument("--ernie_stratege", default=-1, type=int)
# 用mlm任务进行训练,默认使用chinese_ref且添加新的special word
parser.add_argument("--use_mlm_task", default=1, type=int, choices=[0, 1])
# 添加新的special word
parser.add_argument("--add_special_word", default=1, type=int, choices=[0, 1])
# freeze
parser.add_argument("--freeze_layer", default=0, type=int, choices=[0, 1, 2, 3, 4])
# 是否mask 特殊token
parser.add_argument("--special_token_mask", default=0, type=int, choices=[0, 1])
parser.add_argument("--emb_init", default=1, type=int, choices=[0, 1])
parser.add_argument("--cls_head_init", default=1, type=int, choices=[0, 1])
# 是否使用自适应权重
parser.add_argument("--use_awl", default=1, type=int, choices=[0, 1])
parser.add_argument("--mask_loss_scale", default=1.0, type=float)
# ------------ KGE ------------
parser.add_argument('--ke_norm', type=int, default=1)
parser.add_argument('--ke_dim', type=int, default=768)
parser.add_argument('--ke_margin', type=float, default=1.0)
parser.add_argument('--neg_num', type=int, default=10)
parser.add_argument('--adv_temp', type=float, default=1.0, help='The temperature of sampling in self-adversarial negative sampling.')
# 5e-4
parser.add_argument('--ke_lr', type=float, default=3e-5)
parser.add_argument('--only_ke_loss', type=int, default=0)
# ------------ 数值embedding相关 ------------
parser.add_argument('--use_NumEmb', type=int, default=1)
parser.add_argument("--contrastive_loss", default=1, type=int, choices=[0, 1])
parser.add_argument("--l_layers", default=2, type=int)
parser.add_argument('--use_kpi_loss', type=int, default=1)
# ------------ 测试阶段 ------------
parser.add_argument("--only_test", default=0, type=int, choices=[0, 1])
parser.add_argument("--mask_test", default=0, type=int, choices=[0, 1])
parser.add_argument("--embed_gen", default=0, type=int, choices=[0, 1])
parser.add_argument("--ke_test", default=0, type=int, choices=[0, 1])
# -1: 测全集
parser.add_argument("--ke_test_num", default=-1, type=int)
parser.add_argument("--path_gen", default="", type=str)
# ------------ 时序阶段 ------------
# 1:预训练
# 2:时序 finetune
# 3. 异常检测 finetune + 时序, 且是迭代的
# 是否加载od模型
parser.add_argument("--order_load", default=0, type=int)
parser.add_argument("--order_num", default=2, type=int)
parser.add_argument("--od_type", default='linear_cat', type=str, choices=['linear_cat', 'vertical_attention'])
parser.add_argument("--eps", default=0.2, type=float, help='label smoothing..')
parser.add_argument("--num_od_layer", default=0, type=int)
parser.add_argument("--plm_emb_type", default='cls', type=str, choices=['cls', 'last_avg'])
parser.add_argument("--order_test_name", default='', type=str)
parser.add_argument("--order_threshold", default=0.5, type=float)
# ------------ 并行训练 ------------
# 是否并行
parser.add_argument('--rank', type=int, default=0, help='rank to dist')
parser.add_argument('--dist', type=int, default=0, help='whether to dist')
# 不要改该参数,系统会自动分配
parser.add_argument('--device', default='cuda', help='device id (i.e. 0 or 0,1 or cpu)')
# 开启的进程数(注意不是线程),不用设置该参数,会根据nproc_per_node自动设置
parser.add_argument('--world-size', default=4, type=int,
help='number of distributed processes')
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
parser.add_argument("--local_rank", default=-1, type=int)
self.cfg = parser.parse_args()
def update_train_configs(self):
# add some constraint for parameters
# e.g. cannot save and test at the same time
# 修正默认参数
# TODO: 测试逻辑有问题需要修改
if len(self.cfg.order_test_name) > 0:
self.cfg.save_model = 0
if len(self.cfg.order_test_name) == 0:
self.cfg.train_ratio = min(0.8, self.cfg.train_ratio)
# 自适应载入文件名
else:
print("od test ... ")
self.cfg.train_strategy == 5
self.cfg.plm_emb_type = 'last_avg' if 'last_avg' in self.cfg.model_name else 'cls'
for key in LAYER_MAPPING.keys():
if LAYER_MAPPING[key] in self.cfg.model_name:
self.cfg.num_od_layer = key
self.cfg.order_test_name = osp.join('downstream_task', f'{self.cfg.order_test_name}')
if self.cfg.mask_test or self.cfg.embed_gen or self.cfg.ke_test or len(self.cfg.order_test_name) > 0:
assert len(self.cfg.model_name) > 0
self.cfg.only_test = 1
if self.cfg.only_test == 1:
self.save_model = 0
self.save_pretrain = 0
# TODO: update some dynamic variable
self.cfg.data_root = self.data_root
self.cfg.data_path = osp.join(self.data_root, self.cfg.data_path)
self.cfg.plm_path = osp.join(self.data_root, 'transformer')
self.cfg.dump_path = osp.join(self.cfg.data_path, self.cfg.dump_path)
# bs 控制尽量在32
# 自适应权重的数量
self.cfg.awl_num = 1
# ------------ 数值embedding相关 ------------
self.cfg.hidden_size = 768
self.cfg.num_attention_heads = 8
self.cfg.hidden_dropout_prob = 0.1
self.cfg.num_kpi = 304
self.cfg.specail_emb_path = None
if self.cfg.emb_init:
self.cfg.specail_emb_path = osp.join(self.cfg.data_path, 'added_vocab_embedding.pt')
# ------------- 多任务学习相关 -------------
# 四个阶段
self.cfg.mask_epoch, self.cfg.ke_epoch, self.cfg.ad_epoch, self.cfg.od_epoch = None, None, None, None
# 触发多任务 学习
if self.cfg.train_strategy > 1:
self.cfg.mask_epoch = [0, 1, 1, 1, 0]
self.cfg.ke_epoch = [4, 3, 2, 2, 0]
if self.cfg.only_ke_loss:
self.cfg.mask_epoch = [0, 0, 0, 0, 0]
self.cfg.epoch = sum(self.cfg.mask_epoch) + sum(self.cfg.ke_epoch)
if self.cfg.train_strategy > 2:
self.cfg.ad_epoch = [0, 6, 3, 1, 0]
self.cfg.epoch += sum(self.cfg.ad_epoch)
if self.cfg.train_strategy > 3 and not self.cfg.only_ke_loss:
self.cfg.od_epoch = [0, 0, 9, 1, 0]
# self.cfg.mask_epoch[3] = 1
self.cfg.epoch += sum(self.cfg.od_epoch)
self.cfg.epoch_matrix = []
for epochs in [self.cfg.mask_epoch, self.cfg.ke_epoch, self.cfg.ad_epoch, self.cfg.od_epoch]:
if epochs is not None:
self.cfg.epoch_matrix.append(epochs)
if self.cfg.train_together:
# loss 直接相加,训练epoch就是mask的epoch
self.cfg.epoch = sum(self.cfg.mask_epoch)
self.cfg.batch_size = int((self.cfg.batch_size - 16) / self.cfg.train_strategy)
self.cfg.batch_size_ke = int(self.cfg.batch_size_ke / self.cfg.train_strategy) - 2
self.cfg.batch_size_ad = int(self.cfg.batch_size_ad / self.cfg.train_strategy) - 1
self.cfg.batch_size_od = int(self.cfg.batch_size_od / self.cfg.train_strategy) - 1
self.cfg.accumulation_steps = (self.cfg.accumulation_steps - 1) * self.cfg.train_strategy
self.cfg.neg_num = max(min(self.cfg.neg_num, self.cfg.batch_size_ke - 3), 1)
self.cfg.accumulation_steps_dict = {0: self.cfg.accumulation_steps, 1: self.cfg.accumulation_steps_ke, 2: self.cfg.accumulation_steps_ad, 3: self.cfg.accumulation_steps_od}
# 使用数值embedding也必须添加新词因为位置信息和tokenizer绑定
if self.cfg.use_mlm_task or self.cfg.use_NumEmb:
assert self.cfg.add_special_word == 1
if self.cfg.use_NumEmb:
self.cfg.awl_num += 1
return self.cfg