import os import tempfile import fugashi import unidic_lite from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese") def save_dataframe_to_file(dataframe, file_format="csv"): temp_dir = tempfile.gettempdir() # 获取系统临时目录 file_path = os.path.join(temp_dir, f"output.{file_format}") if file_format == "csv": dataframe.to_csv(file_path, index=False) elif file_format == "xlsx": dataframe.to_excel(file_path, index=False) return file_path def tokenize_Df(examples): return tokenizer(list(examples['prompt']), list(examples['response']), return_tensors="pt", padding='max_length', max_length=60, truncation='longest_first')