File size: 3,573 Bytes
62b53be
 
d754e91
c620e0b
62b53be
40a8f4e
62b53be
 
 
 
40a8f4e
62b53be
 
 
 
 
40a8f4e
c620e0b
40a8f4e
9bf05f8
40a8f4e
62b53be
 
 
 
 
 
 
 
d754e91
 
 
40a8f4e
d754e91
5fcf47b
 
c620e0b
 
 
40a8f4e
c620e0b
5fcf47b
 
c620e0b
 
35fba55
40a8f4e
5fcf47b
 
 
35fba55
 
 
40a8f4e
35fba55
 
 
 
 
 
517781a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c620e0b
40a8f4e
c620e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import shutil
import fnmatch
import json

from ..config import Config
from ..globals import Global


def init_data_dir():
    os.makedirs(Config.data_dir, exist_ok=True)
    current_file_path = os.path.abspath(__file__)
    parent_directory_path = os.path.dirname(current_file_path)
    project_dir_path = os.path.abspath(
        os.path.join(parent_directory_path, "..", ".."))
    copy_sample_data_if_not_exists(os.path.join(project_dir_path, "templates"),
                                   os.path.join(Config.data_dir, "templates"))
    copy_sample_data_if_not_exists(os.path.join(project_dir_path, "datasets"),
                                   os.path.join(Config.data_dir, "datasets"))
    copy_sample_data_if_not_exists(os.path.join(project_dir_path, "lora_models"),
                                   os.path.join(Config.data_dir, "lora_models"))


def copy_sample_data_if_not_exists(source, destination):
    if os.path.exists(destination):
        return

    print(f"Copying sample data to \"{destination}\"")
    shutil.copytree(source, destination)


def get_available_template_names():
    templates_directory_path = os.path.join(Config.data_dir, "templates")
    all_files = os.listdir(templates_directory_path)
    names = [filename.rstrip(".json") for filename in all_files if fnmatch.fnmatch(filename, "*.json") or fnmatch.fnmatch(filename, "*.py")]
    return sorted(names)


def get_available_dataset_names():
    datasets_directory_path = os.path.join(Config.data_dir, "datasets")
    all_files = os.listdir(datasets_directory_path)
    names = [filename for filename in all_files if fnmatch.fnmatch(filename, "*.json") or fnmatch.fnmatch(filename, "*.jsonl")]
    return sorted(names)


def get_available_lora_model_names():
    lora_models_directory_path = os.path.join(Config.data_dir, "lora_models")
    all_items = os.listdir(lora_models_directory_path)
    names = [item for item in all_items if os.path.isdir(os.path.join(lora_models_directory_path, item))]
    return sorted(names)


def get_path_of_available_lora_model(name):
    datasets_directory_path = os.path.join(Config.data_dir, "lora_models")
    path = os.path.join(datasets_directory_path, name)
    if os.path.isdir(path):
        return path
    return None


def get_info_of_available_lora_model(name):
    try:
        if "/" in name:
            return None
        path_of_available_lora_model = get_path_of_available_lora_model(
            name)
        if not path_of_available_lora_model:
            return None

        with open(os.path.join(path_of_available_lora_model, "info.json"), "r") as json_file:
            return json.load(json_file)

    except Exception as e:
        return None


def get_dataset_content(name):
    file_name = os.path.join(Config.data_dir, "datasets", name)
    if not os.path.exists(file_name):
        raise ValueError(
            f"Can't read {file_name} from datasets. File does not exist.")

    with open(file_name, "r") as file:
        if fnmatch.fnmatch(name, "*.json"):
            return json.load(file)

        elif fnmatch.fnmatch(name, "*.jsonl"):
            data = []
            for line_number, line in enumerate(file, start=1):
                try:
                    data.append(json.loads(line))
                except Exception as e:
                    raise ValueError(
                        f"Error parsing JSON on line {line_number}: {e}")
            return data
        else:
            raise ValueError(
                f"Unknown file format: {file_name}. Expects '*.json' or '*.jsonl'")