Spaces:

qinfeng722
/

llm-studio

Running

App Files Files Community

llm-studio / llm_studio /src /utils /export_utils.py

qinfeng722

Upload 322 files

5caedb4 verified 4 months ago

raw

history blame contribute delete

4.88 kB

	import json
	import logging
	import os
	import zipfile
	from typing import Optional

	import pandas as pd

	from llm_studio.src.utils.exceptions import LLMResourceException
	from llm_studio.src.utils.utils import add_file_to_zip


	def get_artifact_path_path(
	experiment_name: str, experiment_path: str, artifact_type: str
	):
	"""Get path to experiment artifact zipfile

	Args:
	experiment_name: name of the experiment
	experiment_path: path containing experiment related files
	artifact_type: type of the artifact

	Returns:
	Path to the zip file with experiment artifact
	"""

	return os.path.join(experiment_path, f"{artifact_type}_{experiment_name}.zip")


	def get_prediction_dataframe(experiment_path: str):
	"""Return the validation dataframe"""
	return pd.read_csv(f"{experiment_path}/validation_predictions.csv")


	def get_predictions_path(experiment_name: str, experiment_path: str):
	"""Get path to experiment predictions"""

	return get_artifact_path_path(experiment_name, experiment_path, "preds")


	def get_logs_path(experiment_name: str, experiment_path: str):
	"""Get path to experiment logs"""

	return get_artifact_path_path(experiment_name, experiment_path, "logs")


	def get_model_path(experiment_name: str, experiment_path: str):
	"""Get path to experiment model"""

	return get_artifact_path_path(experiment_name, experiment_path, "model")


	def get_adapter_model_path(experiment_name: str, experiment_path: str):
	"""Get path to adapter model"""

	return get_artifact_path_path(experiment_name, experiment_path, "adapter_model")


	def check_available_space(output_folder: str, min_disk_space: Optional[float]):
	if not min_disk_space:
	return True

	stats = os.statvfs(output_folder)
	available_size = stats.f_frsize * stats.f_bavail

	if available_size < min_disk_space:
	error = (
	f"Not enough disk space. Available space is {get_size_str(available_size)}."
	f" Required space is {get_size_str(min_disk_space)}."
	)
	raise LLMResourceException(error)


	def save_prediction_outputs(
	experiment_name: str,
	experiment_path: str,
	):
	"""Save experiment prediction

	Args:
	experiment_name: name of the experiment
	experiment_path: path containing experiment related files

	Returns:
	Path to the zip file with experiment predictions
	"""

	zip_path = get_predictions_path(experiment_name, experiment_path)
	zf = zipfile.ZipFile(zip_path, "w")

	add_file_to_zip(zf=zf, path=f"{experiment_path}/validation_raw_predictions.pkl")
	add_file_to_zip(zf=zf, path=f"{experiment_path}/validation_predictions.csv")

	zf.close()
	return zip_path


	def save_logs(experiment_name: str, experiment_path: str, logs: dict):
	"""Save experiment logs

	Args:
	experiment_name: name of the experiment
	experiment_path: path containing experiment related files
	logs: dictionary with experiment charts

	Returns:
	Path to the zip file with experiment logs
	"""

	cfg_path = os.path.join(experiment_path, "cfg.yaml")
	charts_path = f"{experiment_path}/charts_{experiment_name}.json"
	with open(charts_path, "w") as fp:
	json.dump(
	{k: v for k, v in logs.items() if k in ["meta", "train", "validation"]}, fp
	)

	zip_path = get_logs_path(experiment_name, experiment_path)
	zf = zipfile.ZipFile(zip_path, "w")
	zf.write(charts_path, os.path.basename(charts_path))
	zf.write(cfg_path, f"cfg_{experiment_name}.yaml")

	try:
	zf.write(
	f"{experiment_path}/logs.log",
	f"logs_{experiment_name}.log",
	)
	except FileNotFoundError:
	logging.warning("Log file is not available yet.")

	zf.close()

	return zip_path


	def get_size_str(
	x, sig_figs=2, input_unit="B", output_unit="dynamic", show_unit=True
	) -> str:
	"""
	Convert a small input unit such as bytes to human readable format.

	Args:
	x: input value
	sig_figs: number of significant figures
	input_unit: input unit ("B", "KB", "MB", "GB", "TB"), default "B"
	output_unit: output unit ("B", "KB", "MB", "GB", "TB", "dynamic")
	default "dynamic"
	show_unit: whether to show the unit in the output string

	Returns:
	str: Human readable string
	"""

	names = ["B", "KB", "MB", "GB", "TB"]
	names = names[names.index(input_unit) :]

	act_i = 0
	if output_unit == "dynamic":
	while x >= 1024 and act_i < len(names) - 1:
	x /= 1024
	act_i += 1
	else:
	target = names.index(output_unit)
	while act_i < target:
	x /= 1024
	act_i += 1

	ret_str = f"{str(round(x, sig_figs))}"
	if show_unit:
	ret_str += f" {names[act_i]}"

	return ret_str