Spaces:

Siyunb323
/

Two-phase_Fine-tuned_LLMs-CreativityAutoEvaluation

Running

App Files Files Community

Two-phase_Fine-tuned_LLMs-CreativityAutoEvaluation / utils.py

Siyunb323's picture

Updating

3c5573d 7 months ago

history blame contribute delete

876 Bytes

	import os
	import tempfile
	import fugashi
	import unidic_lite
	from transformers import AutoTokenizer
	tokenizer = AutoTokenizer.from_pretrained("cl-tohoku/bert-base-japanese")

	def save_dataframe_to_file(dataframe, file_format="csv"):
	temp_dir = tempfile.gettempdir() # 获取系统临时目录
	file_path = os.path.join(temp_dir, f"output.{file_format}")
	if file_format == "csv":
	dataframe.to_csv(file_path, index=False, encoding='utf-8-sig')
	elif file_format == "xlsx":
	dataframe.to_excel(file_path, index=False, encoding='utf-8-sig')
	return file_path

	def tokenize_Df(examples):
	return tokenizer(list(examples['prompt']), list(examples['response']),
	return_tensors="pt",
	padding='max_length',
	max_length=60,
	truncation='longest_first')