File size: 4,497 Bytes
07423df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import json
import logging
import os
import zipfile
from typing import Optional

from llm_studio.src.utils.exceptions import LLMResourceException
from llm_studio.src.utils.utils import add_file_to_zip


def get_artifact_path_path(
    experiment_name: str, experiment_path: str, artifact_type: str
):
    """Get path to experiment artifact zipfile

    Args:
        experiment_name: name of the experiment
        experiment_path: path containing experiment related files
        artifact_type: type of the artifact

    Returns:
        Path to the zip file with experiment artifact
    """

    return os.path.join(experiment_path, f"{artifact_type}_{experiment_name}.zip")


def get_predictions_path(experiment_name: str, experiment_path: str):
    """Get path to experiment predictions"""

    return get_artifact_path_path(experiment_name, experiment_path, "preds")


def get_logs_path(experiment_name: str, experiment_path: str):
    """Get path to experiment logs"""

    return get_artifact_path_path(experiment_name, experiment_path, "logs")


def get_model_path(experiment_name: str, experiment_path: str):
    """Get path to experiment model"""

    return get_artifact_path_path(experiment_name, experiment_path, "model")


def check_available_space(output_folder: str, min_disk_space: Optional[float]):
    if not min_disk_space:
        return True

    stats = os.statvfs(output_folder)
    available_size = stats.f_frsize * stats.f_bavail

    if available_size < min_disk_space:
        error = (
            f"Not enough disk space. Available space is {get_size_str(available_size)}."
            f" Required space is {get_size_str(min_disk_space)}."
        )
        raise LLMResourceException(error)


def save_prediction_outputs(
    experiment_name: str,
    experiment_path: str,
):
    """Save experiment prediction

    Args:
        experiment_name: name of the experiment
        experiment_path: path containing experiment related files

    Returns:
        Path to the zip file with experiment predictions
    """

    zip_path = get_predictions_path(experiment_name, experiment_path)
    zf = zipfile.ZipFile(zip_path, "w")

    add_file_to_zip(zf=zf, path=f"{experiment_path}/validation_raw_predictions.pkl")
    add_file_to_zip(zf=zf, path=f"{experiment_path}/validation_predictions.csv")

    zf.close()
    return zip_path


def save_logs(experiment_name: str, experiment_path: str, logs: dict):
    """Save experiment logs

    Args:
        experiment_name: name of the experiment
        experiment_path: path containing experiment related files
        logs: dictionary with experiment charts

    Returns:
        Path to the zip file with experiment logs
    """

    cfg_path = os.path.join(experiment_path, "cfg.yaml")
    charts_path = f"{experiment_path}/charts_{experiment_name}.json"
    with open(charts_path, "w") as fp:
        json.dump(
            {k: v for k, v in logs.items() if k in ["meta", "train", "validation"]}, fp
        )

    zip_path = get_logs_path(experiment_name, experiment_path)
    zf = zipfile.ZipFile(zip_path, "w")
    zf.write(charts_path, os.path.basename(charts_path))
    zf.write(cfg_path, f"cfg_{experiment_name}.yaml")

    try:
        zf.write(
            f"{experiment_path}/logs.log",
            f"logs_{experiment_name}.log",
        )
    except FileNotFoundError:
        logging.warning("Log file is not available yet.")

    zf.close()

    return zip_path


def get_size_str(
    x, sig_figs=2, input_unit="B", output_unit="dynamic", show_unit=True
) -> str:
    """
    Convert a small input unit such as bytes to human readable format.

    Args:
        x: input value
        sig_figs: number of significant figures
        input_unit: input unit ("B", "KB", "MB", "GB", "TB"), default "B"
        output_unit: output unit ("B", "KB", "MB", "GB", "TB", "dynamic")
            default "dynamic"
        show_unit: whether to show the unit in the output string

    Returns:
        str: Human readable string
    """

    names = ["B", "KB", "MB", "GB", "TB"]
    names = names[names.index(input_unit) :]

    act_i = 0
    if output_unit == "dynamic":
        while x >= 1024 and act_i < len(names) - 1:
            x /= 1024
            act_i += 1
    else:
        target = names.index(output_unit)
        while act_i < target:
            x /= 1024
            act_i += 1

    ret_str = f"{str(round(x, sig_figs))}"
    if show_unit:
        ret_str += f" {names[act_i]}"

    return ret_str