model =gpt2 total batch size=40 train num epochs=3 fp16 =True max seq length =40 eval_acc = 0.7935483870967742 eval_loss = 0.5006362595865803