File size: 3,710 Bytes
62b53be
 
d754e91
c620e0b
62b53be
40a8f4e
62b53be
 
 
40a8f4e
62b53be
 
 
 
02c87a8
 
 
 
 
 
 
 
 
 
62b53be
 
 
 
 
 
 
 
d754e91
 
 
40a8f4e
d754e91
02c87a8
 
 
 
 
5fcf47b
c620e0b
 
 
40a8f4e
c620e0b
02c87a8
 
 
 
 
5fcf47b
c620e0b
 
35fba55
40a8f4e
5fcf47b
02c87a8
 
 
 
 
5fcf47b
35fba55
 
 
40a8f4e
35fba55
 
 
 
 
 
517781a
 
 
 
 
 
 
 
 
02c87a8
 
 
517781a
 
 
 
 
 
c620e0b
40a8f4e
c620e0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02c87a8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import shutil
import fnmatch
import json

from ..config import Config


def init_data_dir():
    os.makedirs(Config.data_dir, exist_ok=True)
    current_file_path = os.path.abspath(__file__)
    parent_directory_path = os.path.dirname(current_file_path)
    project_dir_path = os.path.abspath(
        os.path.join(parent_directory_path, "..", ".."))
    sample_data_dir_path = os.path.join(project_dir_path, "sample_data")
    copy_sample_data_if_not_exists(
        os.path.join(sample_data_dir_path, "templates"),
        os.path.join(Config.data_dir, "templates"))
    copy_sample_data_if_not_exists(
        os.path.join(sample_data_dir_path, "datasets"),
        os.path.join(Config.data_dir, "datasets"))
    copy_sample_data_if_not_exists(
        os.path.join(sample_data_dir_path, "lora_models"),
        os.path.join(Config.data_dir, "lora_models"))


def copy_sample_data_if_not_exists(source, destination):
    if os.path.exists(destination):
        return

    print(f"Copying sample data to \"{destination}\"")
    shutil.copytree(source, destination)


def get_available_template_names():
    templates_directory_path = os.path.join(Config.data_dir, "templates")
    all_files = os.listdir(templates_directory_path)
    names = [
        filename.rstrip(".json") for filename in all_files
        if fnmatch.fnmatch(
            filename, "*.json") or fnmatch.fnmatch(filename, "*.py")
    ]
    return sorted(names)


def get_available_dataset_names():
    datasets_directory_path = os.path.join(Config.data_dir, "datasets")
    all_files = os.listdir(datasets_directory_path)
    names = [
        filename for filename in all_files
        if fnmatch.fnmatch(filename, "*.json")
        or fnmatch.fnmatch(filename, "*.jsonl")
    ]
    return sorted(names)


def get_available_lora_model_names():
    lora_models_directory_path = os.path.join(Config.data_dir, "lora_models")
    all_items = os.listdir(lora_models_directory_path)
    names = [
        item for item in all_items
        if os.path.isdir(
            os.path.join(lora_models_directory_path, item))
    ]
    return sorted(names)


def get_path_of_available_lora_model(name):
    datasets_directory_path = os.path.join(Config.data_dir, "lora_models")
    path = os.path.join(datasets_directory_path, name)
    if os.path.isdir(path):
        return path
    return None


def get_info_of_available_lora_model(name):
    try:
        if "/" in name:
            return None
        path_of_available_lora_model = get_path_of_available_lora_model(
            name)
        if not path_of_available_lora_model:
            return None

        with open(
            os.path.join(path_of_available_lora_model, "info.json"), "r"
        ) as json_file:
            return json.load(json_file)

    except Exception as e:
        return None


def get_dataset_content(name):
    file_name = os.path.join(Config.data_dir, "datasets", name)
    if not os.path.exists(file_name):
        raise ValueError(
            f"Can't read {file_name} from datasets. File does not exist.")

    with open(file_name, "r") as file:
        if fnmatch.fnmatch(name, "*.json"):
            return json.load(file)

        elif fnmatch.fnmatch(name, "*.jsonl"):
            data = []
            for line_number, line in enumerate(file, start=1):
                try:
                    data.append(json.loads(line))
                except Exception as e:
                    raise ValueError(
                        f"Error parsing JSON on line {line_number}: {e}")
            return data
        else:
            raise ValueError(
                f"Unknown file format: {file_name}. Expects '*.json' or '*.jsonl'"
            )