Shteyman
commited on
Commit
·
558c5b7
1
Parent(s):
d7537dd
retry
Browse files- cmd.txt +0 -1
- commit.txt +0 -5
- config.json +0 -29
- experiment_code/config/config1.yaml +0 -28
- experiment_code/config/config_redpajama.yaml +0 -27
- experiment_code/prepare_sharegpt.py +0 -44
- experiment_code/requirements.txt +0 -2
- experiment_code/run_clm.py +0 -754
- experiment_code/submit_job.sh +0 -91
- last-checkpoint/config.json +0 -29
- last-checkpoint/generation_config.json +0 -7
- last-checkpoint/model.safetensors +0 -3
- last-checkpoint/optimizer.pt +0 -3
- last-checkpoint/rng_state.pth +0 -3
- last-checkpoint/scheduler.pt +0 -3
- last-checkpoint/special_tokens_map.json +0 -24
- last-checkpoint/tokenizer.json +0 -0
- last-checkpoint/tokenizer.model +0 -3
- last-checkpoint/tokenizer_config.json +0 -45
- last-checkpoint/trainer_state.json +0 -125
- last-checkpoint/training_args.bin +0 -3
- log.txt +0 -0
- model.safetensors +0 -3
- pip_freeze.txt +0 -330
- special_tokens_map.json +0 -24
- tokenizer.json +0 -0
- tokenizer.model +0 -3
- tokenizer_config.json +0 -45
- training_args.bin +0 -3
cmd.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
/var/spool/slurmd/job117535/slurm_script 05-06_00-42
|
|
|
|
commit.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
commit c4fe47d125efdcc428a5dd46500d754dc07f4a94
|
2 |
-
Author: Shteyman <[email protected]>
|
3 |
-
Date: Sun Jun 2 08:25:22 2024 -0700
|
4 |
-
|
5 |
-
clean version of run_clm.py
|
|
|
|
|
|
|
|
|
|
|
|
config.json
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "JackFram/llama-68m",
|
3 |
-
"architectures": [
|
4 |
-
"LlamaForCausalLM"
|
5 |
-
],
|
6 |
-
"attention_bias": false,
|
7 |
-
"attention_dropout": 0.0,
|
8 |
-
"bos_token_id": 0,
|
9 |
-
"eos_token_id": 2,
|
10 |
-
"hidden_act": "silu",
|
11 |
-
"hidden_size": 768,
|
12 |
-
"initializer_range": 0.02,
|
13 |
-
"intermediate_size": 3072,
|
14 |
-
"max_position_embeddings": 2048,
|
15 |
-
"model_type": "llama",
|
16 |
-
"num_attention_heads": 12,
|
17 |
-
"num_hidden_layers": 2,
|
18 |
-
"num_key_value_heads": 12,
|
19 |
-
"pad_token_id": 1,
|
20 |
-
"pretraining_tp": 1,
|
21 |
-
"rms_norm_eps": 1e-06,
|
22 |
-
"rope_scaling": null,
|
23 |
-
"rope_theta": 10000.0,
|
24 |
-
"tie_word_embeddings": false,
|
25 |
-
"torch_dtype": "float32",
|
26 |
-
"transformers_version": "4.41.0.dev0",
|
27 |
-
"use_cache": true,
|
28 |
-
"vocab_size": 32000
|
29 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
experiment_code/config/config1.yaml
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
config_name: "JackFram/llama-68m"
|
2 |
-
tokenizer_name: "JackFram/llama-68m"
|
3 |
-
validation_split_percentage: 2
|
4 |
-
train_file: "/home/dshteyma/shareGPT_data/ShareGPT_V3_unfiltered_cleaned_split.json"
|
5 |
-
dataset_name_hub: "anon8231489123/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json"
|
6 |
-
dataset_name_local: "ShareGPT"
|
7 |
-
# max_train_samples: 1000
|
8 |
-
# max_eval_samples: 10
|
9 |
-
do_train: True
|
10 |
-
do_eval: True
|
11 |
-
output_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs"
|
12 |
-
overwrite_output_dir: True
|
13 |
-
per_device_train_batch_size: 4
|
14 |
-
gradient_accumulation_steps: 1
|
15 |
-
report_to: "tensorboard"
|
16 |
-
logging_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs"
|
17 |
-
logging_steps: 500
|
18 |
-
save_steps: 1000
|
19 |
-
eval_strategy: "steps"
|
20 |
-
eval_steps: 1000
|
21 |
-
learning_rate: 0.0001
|
22 |
-
gradient_accumulation_steps: 1
|
23 |
-
weight_decay: 0.01
|
24 |
-
warmup_ratio: 0.05
|
25 |
-
push_to_hub: True
|
26 |
-
hub_model_id: "DorinSht/ShareGPT_llama2_68M"
|
27 |
-
hub_strategy: "checkpoint"
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
experiment_code/config/config_redpajama.yaml
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
config_name: "JackFram/llama-68m"
|
2 |
-
tokenizer_name: "JackFram/llama-68m"
|
3 |
-
validation_split_percentage: 2
|
4 |
-
train_file: "/home/dshteyma/target_draft_coupling_code/dataset_dict.json"
|
5 |
-
dataset_name_local: "RedPajama"
|
6 |
-
dataset_name: "togethercomputer/RedPajama-Data-1T-Sample"
|
7 |
-
dataset_name_hub: "togethercomputer/RedPajama-Data-1T-Sample"
|
8 |
-
# max_train_samples: 1000
|
9 |
-
# max_eval_samples: 10
|
10 |
-
do_train: True
|
11 |
-
do_eval: True
|
12 |
-
output_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs"
|
13 |
-
overwrite_output_dir: True
|
14 |
-
per_device_train_batch_size: 4
|
15 |
-
gradient_accumulation_steps: 3
|
16 |
-
report_to: "tensorboard"
|
17 |
-
logging_dir: "/home/dshteyma/target_draft_coupling_code/target_draft_training/training_outputs"
|
18 |
-
logging_steps: 10000
|
19 |
-
save_steps: 10000
|
20 |
-
eval_strategy: "steps"
|
21 |
-
eval_steps: 10000
|
22 |
-
learning_rate: 0.0001
|
23 |
-
weight_decay: 0.01
|
24 |
-
warmup_ratio: 0.05
|
25 |
-
push_to_hub: False
|
26 |
-
hub_model_id: "DorinSht/llama_68M_redpajama"
|
27 |
-
hub_strategy: "all_checkpoints"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
experiment_code/prepare_sharegpt.py
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
This script is largely copied from the Vicuna repo: https://github.com/lm-sys/FastChat/blob/main/fastchat/data/split_long_conversation.py
|
3 |
-
We fixed a bug in `split_one_sample`, which previously includes long conversations in the processed data. Now we skip these long conversations.
|
4 |
-
"""
|
5 |
-
import argparse
|
6 |
-
from concurrent.futures import ProcessPoolExecutor
|
7 |
-
import json
|
8 |
-
import transformers
|
9 |
-
from tqdm import tqdm
|
10 |
-
|
11 |
-
def shareGPT_pipeline(tokenizer, raw_datasets, overwrite_cache):
|
12 |
-
|
13 |
-
def preprocess_conversation(convo):
|
14 |
-
key_mapping = {"role" : "from", "content" : "value"}
|
15 |
-
value_mapping = {"user" : "user", "human" : "user", "gpt" : "assistant", 'system': 'assitant', 'bing': 'assitant', 'chatgpt': 'assitant', 'bard': 'assitant'}
|
16 |
-
# mapping = {"human" : "user", "gpt" : "assitant"}
|
17 |
-
if value_mapping[convo[0][key_mapping['role']]] != 'user':
|
18 |
-
convo = convo[1:]
|
19 |
-
preproc_convos_user = [{"role": 'user', "content": convo_elem[key_mapping['content']]} for i, convo_elem in enumerate(convo) if (i % 2 == 0 and value_mapping[convo_elem[key_mapping['role']]] == 'user')]
|
20 |
-
preproc_convos_assistant = [{"role": 'assistant', "content": convo_elem[key_mapping['content']]} for i, convo_elem in enumerate(convo) if (i % 2 == 1 and value_mapping[convo_elem[key_mapping['role']]] == 'assistant')]
|
21 |
-
if len(preproc_convos_user) != len(preproc_convos_assistant):
|
22 |
-
return []
|
23 |
-
preproc_convos = [conv_elem for pair in zip(preproc_convos_user, preproc_convos_assistant) for conv_elem in pair]
|
24 |
-
return preproc_convos
|
25 |
-
|
26 |
-
def filter_incorrect_conversations(examples):
|
27 |
-
convos = examples["conversations"]
|
28 |
-
ids_to_remove = [True if preprocess_conversation(convo) == [] else False for convo in convos]
|
29 |
-
return { "ids_to_remove" : ids_to_remove, }
|
30 |
-
|
31 |
-
def formatting_prompts_func(examples):
|
32 |
-
convos = examples["conversations"]
|
33 |
-
# preproc_convos = [convo for convo in convos if (convo[0]['from'] == 'human' or convo[0]['from'] == 'user')]
|
34 |
-
preproc_convos = [preprocess_conversation(convo) for convo in convos]
|
35 |
-
# preproc_convos2 = [preproc_convo for preproc_convo in preproc_convos if preproc_convo[0]['role'] == 'user']
|
36 |
-
texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for i, convo in enumerate(preproc_convos)]
|
37 |
-
return { "text" : texts,}
|
38 |
-
|
39 |
-
filtered_datasets = raw_datasets.filter(lambda example: example['conversations'] != [], load_from_cache_file=not overwrite_cache,)
|
40 |
-
dataset = filtered_datasets.map(filter_incorrect_conversations, batched = True, load_from_cache_file=not overwrite_cache,)
|
41 |
-
filtered_datasets2 = dataset.filter(lambda example: example['ids_to_remove'] == False, load_from_cache_file=not overwrite_cache,)
|
42 |
-
raw_datasets_preprocessed = filtered_datasets2.map(formatting_prompts_func, batched = True, load_from_cache_file=not overwrite_cache,)
|
43 |
-
|
44 |
-
return raw_datasets_preprocessed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
experiment_code/requirements.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
huggingface-hub==0.22.2
|
2 |
-
-e git+https://github.com/huggingface/transformers.git@bbaa8ceff696c479aecdb4575b2deb1349efd3aa#egg=transformers
|
|
|
|
|
|
experiment_code/run_clm.py
DELETED
@@ -1,754 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python
|
2 |
-
# coding=utf-8
|
3 |
-
# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
|
4 |
-
#
|
5 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
-
# you may not use this file except in compliance with the License.
|
7 |
-
# You may obtain a copy of the License at
|
8 |
-
#
|
9 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
-
#
|
11 |
-
# Unless required by applicable law or agreed to in writing, software
|
12 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
-
# See the License for the specific language governing permissions and
|
15 |
-
# limitations under the License.
|
16 |
-
"""
|
17 |
-
Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
|
18 |
-
|
19 |
-
Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
|
20 |
-
https://huggingface.co/models?filter=text-generation
|
21 |
-
"""
|
22 |
-
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
|
23 |
-
import random
|
24 |
-
import logging
|
25 |
-
import math
|
26 |
-
import os
|
27 |
-
from datetime import datetime
|
28 |
-
import sys
|
29 |
-
import warnings
|
30 |
-
from dataclasses import dataclass, field
|
31 |
-
from itertools import chain
|
32 |
-
from typing import Optional
|
33 |
-
import datasets
|
34 |
-
import evaluate
|
35 |
-
import torch
|
36 |
-
from datasets import load_dataset
|
37 |
-
import argparse
|
38 |
-
import transformers
|
39 |
-
from prepare_sharegpt import shareGPT_pipeline
|
40 |
-
from transformers import (
|
41 |
-
CONFIG_MAPPING,
|
42 |
-
MODEL_FOR_CAUSAL_LM_MAPPING,
|
43 |
-
AutoConfig,
|
44 |
-
AutoModelForCausalLM,
|
45 |
-
AutoTokenizer,
|
46 |
-
HfArgumentParser,
|
47 |
-
Trainer,
|
48 |
-
TrainingArguments,
|
49 |
-
default_data_collator,
|
50 |
-
set_seed,
|
51 |
-
)
|
52 |
-
from transformers.testing_utils import CaptureLogger
|
53 |
-
from transformers.trainer_utils import get_last_checkpoint
|
54 |
-
from transformers.utils import check_min_version, send_example_telemetry
|
55 |
-
from transformers.utils.versions import require_version
|
56 |
-
from functools import partial
|
57 |
-
|
58 |
-
from omegaconf import DictConfig, OmegaConf
|
59 |
-
import hydra
|
60 |
-
|
61 |
-
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
62 |
-
check_min_version("4.41.0.dev0")
|
63 |
-
|
64 |
-
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
65 |
-
|
66 |
-
logger = logging.getLogger(__name__)
|
67 |
-
|
68 |
-
MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys())
|
69 |
-
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
70 |
-
|
71 |
-
random.seed(42)
|
72 |
-
|
73 |
-
@dataclass
|
74 |
-
class ModelArguments:
|
75 |
-
"""
|
76 |
-
Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
|
77 |
-
"""
|
78 |
-
|
79 |
-
model_name_or_path: Optional[str] = field(
|
80 |
-
default=None,
|
81 |
-
metadata={
|
82 |
-
"help": (
|
83 |
-
"The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
|
84 |
-
)
|
85 |
-
},
|
86 |
-
)
|
87 |
-
model_type: Optional[str] = field(
|
88 |
-
default=None,
|
89 |
-
metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
|
90 |
-
)
|
91 |
-
padding_side: str = field(
|
92 |
-
default="right", metadata={"help": "The padding side in tokenizer"}
|
93 |
-
)
|
94 |
-
config_overrides: Optional[str] = field(
|
95 |
-
default=None,
|
96 |
-
metadata={
|
97 |
-
"help": (
|
98 |
-
"Override some existing default config settings when a model is trained from scratch. Example: "
|
99 |
-
"n_embd=10,resid_pdrop=0.2,scale_attn_weights=false,summary_type=cls_index"
|
100 |
-
)
|
101 |
-
},
|
102 |
-
)
|
103 |
-
config_name: Optional[str] = field(
|
104 |
-
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
105 |
-
)
|
106 |
-
tokenizer_name: Optional[str] = field(
|
107 |
-
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
|
108 |
-
)
|
109 |
-
cache_dir: Optional[str] = field(
|
110 |
-
default=None,
|
111 |
-
metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
|
112 |
-
)
|
113 |
-
use_fast_tokenizer: bool = field(
|
114 |
-
default=True,
|
115 |
-
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
|
116 |
-
)
|
117 |
-
model_revision: str = field(
|
118 |
-
default="main",
|
119 |
-
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
120 |
-
)
|
121 |
-
token: str = field(
|
122 |
-
default=None,
|
123 |
-
metadata={
|
124 |
-
"help": (
|
125 |
-
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
|
126 |
-
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
|
127 |
-
)
|
128 |
-
},
|
129 |
-
)
|
130 |
-
use_auth_token: bool = field(
|
131 |
-
default=None,
|
132 |
-
metadata={
|
133 |
-
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead."
|
134 |
-
},
|
135 |
-
)
|
136 |
-
trust_remote_code: bool = field(
|
137 |
-
default=True,
|
138 |
-
metadata={
|
139 |
-
"help": (
|
140 |
-
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
|
141 |
-
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
|
142 |
-
"execute code present on the Hub on your local machine."
|
143 |
-
)
|
144 |
-
},
|
145 |
-
)
|
146 |
-
torch_dtype: Optional[str] = field(
|
147 |
-
default=None,
|
148 |
-
metadata={
|
149 |
-
"help": (
|
150 |
-
"Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
|
151 |
-
"dtype will be automatically derived from the model's weights."
|
152 |
-
),
|
153 |
-
"choices": ["auto", "bfloat16", "float16", "float32"],
|
154 |
-
},
|
155 |
-
)
|
156 |
-
low_cpu_mem_usage: bool = field(
|
157 |
-
default=False,
|
158 |
-
metadata={
|
159 |
-
"help": (
|
160 |
-
"It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
|
161 |
-
"set True will benefit LLM loading time and RAM consumption."
|
162 |
-
)
|
163 |
-
},
|
164 |
-
)
|
165 |
-
|
166 |
-
def __post_init__(self):
|
167 |
-
if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None):
|
168 |
-
raise ValueError(
|
169 |
-
"--config_overrides can't be used in combination with --config_name or --model_name_or_path"
|
170 |
-
)
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
@dataclass
|
175 |
-
class DataTrainingArguments:
|
176 |
-
"""
|
177 |
-
Arguments pertaining to what data we are going to input our model for training and eval.
|
178 |
-
"""
|
179 |
-
dataset_name: Optional[str] = field(
|
180 |
-
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
|
181 |
-
)
|
182 |
-
dataset_name_hub: Optional[str] = field(
|
183 |
-
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
|
184 |
-
)
|
185 |
-
dataset_name_local: Optional[str] = field(
|
186 |
-
default=None, metadata={"help": "The name of the dataset for identification."}
|
187 |
-
)
|
188 |
-
dataset_config_name: Optional[str] = field(
|
189 |
-
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
|
190 |
-
)
|
191 |
-
train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
|
192 |
-
validation_file: Optional[str] = field(
|
193 |
-
default=None,
|
194 |
-
metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
|
195 |
-
)
|
196 |
-
max_train_samples: Optional[int] = field(
|
197 |
-
default=None,
|
198 |
-
metadata={
|
199 |
-
"help": (
|
200 |
-
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
201 |
-
"value if set."
|
202 |
-
)
|
203 |
-
},
|
204 |
-
)
|
205 |
-
max_eval_samples: Optional[int] = field(
|
206 |
-
default=None,
|
207 |
-
metadata={
|
208 |
-
"help": (
|
209 |
-
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
210 |
-
"value if set."
|
211 |
-
)
|
212 |
-
},
|
213 |
-
)
|
214 |
-
streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
|
215 |
-
block_size: Optional[int] = field(
|
216 |
-
default=None,
|
217 |
-
metadata={
|
218 |
-
"help": (
|
219 |
-
"Optional input sequence length after tokenization. "
|
220 |
-
"The training dataset will be truncated in block of this size for training. "
|
221 |
-
"Default to the model max input length for single sentence inputs (take into account special tokens)."
|
222 |
-
)
|
223 |
-
},
|
224 |
-
)
|
225 |
-
overwrite_cache: bool = field(
|
226 |
-
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
227 |
-
)
|
228 |
-
validation_split_percentage: Optional[int] = field(
|
229 |
-
default=5,
|
230 |
-
metadata={
|
231 |
-
"help": "The percentage of the train set used as validation set in case there's no validation split"
|
232 |
-
},
|
233 |
-
)
|
234 |
-
preprocessing_num_workers: Optional[int] = field(
|
235 |
-
default=None,
|
236 |
-
metadata={"help": "The number of processes to use for the preprocessing."},
|
237 |
-
)
|
238 |
-
keep_linebreaks: bool = field(
|
239 |
-
default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
|
240 |
-
)
|
241 |
-
lazy_preprocess: bool = False
|
242 |
-
|
243 |
-
def __post_init__(self):
|
244 |
-
if self.streaming:
|
245 |
-
require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`")
|
246 |
-
|
247 |
-
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
|
248 |
-
raise ValueError("Need either a dataset name or a training/validation file.")
|
249 |
-
else:
|
250 |
-
if self.train_file is not None:
|
251 |
-
extension = self.train_file.split(".")[-1]
|
252 |
-
assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
|
253 |
-
if self.validation_file is not None:
|
254 |
-
extension = self.validation_file.split(".")[-1]
|
255 |
-
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
|
256 |
-
|
257 |
-
# @dataclass
|
258 |
-
# class TrainingArguments(transformers.TrainingArguments):
|
259 |
-
# cache_dir: Optional[str] = field(default=None)
|
260 |
-
# optim: str = field(default="adamw_torch")
|
261 |
-
# model_max_length: int = field(
|
262 |
-
# default=2048,
|
263 |
-
# metadata={
|
264 |
-
# "help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."
|
265 |
-
# },
|
266 |
-
# )
|
267 |
-
|
268 |
-
def create_output_directory(dir_root_path):
|
269 |
-
# Get the current date and time
|
270 |
-
current_time = datetime.now()
|
271 |
-
# Format the date and time as a string
|
272 |
-
# Example format: YYYYMMDD_HHMMSS
|
273 |
-
formatted_time = current_time.strftime("%Y%m%d_%H%M%S")
|
274 |
-
# Define the directory name with the formatted time
|
275 |
-
directory_full_path = os.path.join(dir_root_path, f"training_outputs_{formatted_time}")
|
276 |
-
# Create the directory
|
277 |
-
os.makedirs(directory_full_path)
|
278 |
-
print(f"Directory '{directory_full_path}' created successfully.")
|
279 |
-
return directory_full_path
|
280 |
-
|
281 |
-
def main():
|
282 |
-
# See all possible arguments in src/transformers/training_args.py
|
283 |
-
# or by passing the --help flag to this script.
|
284 |
-
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
285 |
-
parser = argparse.ArgumentParser(description="parser for arguments from .py script call")
|
286 |
-
parser.add_argument('--output_dir', type=str, help='Path for training_args.output_dir')
|
287 |
-
parser.add_argument('--logging_dir', type=str, help='Path for training_args.logging_dir')
|
288 |
-
parser.add_argument('--config_file', type=str, help='An additional required option.')
|
289 |
-
args = parser.parse_args()
|
290 |
-
|
291 |
-
parser_hf = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
|
292 |
-
if args.config_file is not None and args.output_dir is not None and args.output_dir is not None:
|
293 |
-
# If we pass only one argument to the script and it's the path to a json file,
|
294 |
-
# let's parse it to get our arguments.
|
295 |
-
model_args, data_args, training_args = parser_hf.parse_yaml_file(args.config_file)
|
296 |
-
training_args.output_dir = args.output_dir
|
297 |
-
training_args.logging_dir = args.logging_dir
|
298 |
-
else:
|
299 |
-
# use the preset config file defined in the slurm .sh script
|
300 |
-
# model_args, data_args, training_args = parser_hf.parse_yaml_file(os.getenv("DEFAULT_CONFIG_FILE"))
|
301 |
-
model_args, data_args, training_args = parser_hf.parse_yaml_file('./config/config1.yaml')
|
302 |
-
|
303 |
-
|
304 |
-
if model_args.use_auth_token is not None:
|
305 |
-
warnings.warn(
|
306 |
-
"The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token` instead.",
|
307 |
-
FutureWarning,
|
308 |
-
)
|
309 |
-
if model_args.token is not None:
|
310 |
-
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
|
311 |
-
model_args.token = model_args.use_auth_token
|
312 |
-
|
313 |
-
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
314 |
-
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
315 |
-
send_example_telemetry("run_clm", model_args, data_args)
|
316 |
-
|
317 |
-
# Setup logging
|
318 |
-
logging.basicConfig(
|
319 |
-
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
320 |
-
datefmt="%m/%d/%Y %H:%M:%S",
|
321 |
-
handlers=[logging.StreamHandler(sys.stdout)],
|
322 |
-
)
|
323 |
-
|
324 |
-
if training_args.should_log:
|
325 |
-
# The default of training_args.log_level is passive, so we set log level at info here to have that default.
|
326 |
-
transformers.utils.logging.set_verbosity_info()
|
327 |
-
|
328 |
-
log_level = training_args.get_process_log_level()
|
329 |
-
logger.setLevel(log_level)
|
330 |
-
datasets.utils.logging.set_verbosity(log_level)
|
331 |
-
transformers.utils.logging.set_verbosity(log_level)
|
332 |
-
transformers.utils.logging.enable_default_handler()
|
333 |
-
transformers.utils.logging.enable_explicit_format()
|
334 |
-
|
335 |
-
# Log on each process the small summary:
|
336 |
-
logger.warning(
|
337 |
-
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, "
|
338 |
-
+ f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
|
339 |
-
)
|
340 |
-
logger.info(f"Training/evaluation parameters {training_args}")
|
341 |
-
|
342 |
-
# Detecting last checkpoint.
|
343 |
-
last_checkpoint = None
|
344 |
-
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
|
345 |
-
last_checkpoint = get_last_checkpoint(training_args.output_dir)
|
346 |
-
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
347 |
-
raise ValueError(
|
348 |
-
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
349 |
-
"Use --overwrite_output_dir to overcome."
|
350 |
-
)
|
351 |
-
elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
|
352 |
-
logger.info(
|
353 |
-
f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
|
354 |
-
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
|
355 |
-
)
|
356 |
-
|
357 |
-
# Set seed before initializing model.
|
358 |
-
set_seed(training_args.seed)
|
359 |
-
|
360 |
-
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
361 |
-
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
362 |
-
# (the dataset will be downloaded automatically from the datasets Hub).
|
363 |
-
#
|
364 |
-
# For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
|
365 |
-
# 'text' is found. You can easily tweak this behavior (see below).
|
366 |
-
#
|
367 |
-
# In distributed training, the load_dataset function guarantee that only one local process can concurrently
|
368 |
-
# download the dataset.
|
369 |
-
if data_args.dataset_name is not None:
|
370 |
-
# Downloading and loading a dataset from the hub.
|
371 |
-
raw_datasets = load_dataset(
|
372 |
-
data_args.dataset_name,
|
373 |
-
data_args.dataset_config_name,
|
374 |
-
cache_dir=model_args.cache_dir,
|
375 |
-
token=model_args.token,
|
376 |
-
streaming=data_args.streaming,
|
377 |
-
)
|
378 |
-
if "validation" not in raw_datasets.keys():
|
379 |
-
raw_datasets["validation"] = load_dataset(
|
380 |
-
data_args.dataset_name,
|
381 |
-
data_args.dataset_config_name,
|
382 |
-
split=f"train[:{data_args.validation_split_percentage}%]",
|
383 |
-
cache_dir=model_args.cache_dir,
|
384 |
-
token=model_args.token,
|
385 |
-
streaming=data_args.streaming,
|
386 |
-
)
|
387 |
-
raw_datasets["train"] = load_dataset(
|
388 |
-
data_args.dataset_name,
|
389 |
-
data_args.dataset_config_name,
|
390 |
-
split=f"train[{data_args.validation_split_percentage}%:]",
|
391 |
-
cache_dir=model_args.cache_dir,
|
392 |
-
token=model_args.token,
|
393 |
-
streaming=data_args.streaming,
|
394 |
-
)
|
395 |
-
else:
|
396 |
-
data_files = {}
|
397 |
-
dataset_args = {}
|
398 |
-
if data_args.train_file is not None:
|
399 |
-
data_files["train"] = data_args.train_file
|
400 |
-
if data_args.validation_file is not None:
|
401 |
-
data_files["validation"] = data_args.validation_file
|
402 |
-
extension = (
|
403 |
-
data_args.train_file.split(".")[-1]
|
404 |
-
if data_args.train_file is not None
|
405 |
-
else data_args.validation_file.split(".")[-1]
|
406 |
-
)
|
407 |
-
if extension == "txt":
|
408 |
-
extension = "text"
|
409 |
-
dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
|
410 |
-
raw_datasets = load_dataset(
|
411 |
-
extension,
|
412 |
-
data_files=data_files,
|
413 |
-
cache_dir=model_args.cache_dir,
|
414 |
-
token=model_args.token,
|
415 |
-
**dataset_args,
|
416 |
-
)
|
417 |
-
# If no validation data is there, validation_split_percentage will be used to divide the dataset.
|
418 |
-
if "validation" not in raw_datasets.keys():
|
419 |
-
raw_datasets["validation"] = load_dataset(
|
420 |
-
extension,
|
421 |
-
data_files=data_files,
|
422 |
-
split=f"train[:{data_args.validation_split_percentage}%]",
|
423 |
-
cache_dir=model_args.cache_dir,
|
424 |
-
token=model_args.token,
|
425 |
-
**dataset_args,
|
426 |
-
)
|
427 |
-
raw_datasets["train"] = load_dataset(
|
428 |
-
extension,
|
429 |
-
data_files=data_files,
|
430 |
-
split=f"train[{data_args.validation_split_percentage}%:]",
|
431 |
-
cache_dir=model_args.cache_dir,
|
432 |
-
token=model_args.token,
|
433 |
-
**dataset_args,
|
434 |
-
)
|
435 |
-
|
436 |
-
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
437 |
-
# https://huggingface.co/docs/datasets/loading_datasets.
|
438 |
-
|
439 |
-
# Load pretrained model and tokenizer
|
440 |
-
#
|
441 |
-
# Distributed training:
|
442 |
-
# The .from_pretrained methods guarantee that only one local process can concurrently
|
443 |
-
# download model & vocab.
|
444 |
-
|
445 |
-
config_kwargs = {
|
446 |
-
"cache_dir": model_args.cache_dir,
|
447 |
-
"revision": model_args.model_revision,
|
448 |
-
"token": model_args.token,
|
449 |
-
"trust_remote_code": model_args.trust_remote_code,
|
450 |
-
}
|
451 |
-
if model_args.config_name:
|
452 |
-
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
|
453 |
-
elif model_args.model_name_or_path:
|
454 |
-
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
|
455 |
-
else:
|
456 |
-
config = CONFIG_MAPPING[model_args.model_type]()
|
457 |
-
logger.warning("You are instantiating a new config instance from scratch.")
|
458 |
-
if model_args.config_overrides is not None:
|
459 |
-
logger.info(f"Overriding config: {model_args.config_overrides}")
|
460 |
-
config.update_from_string(model_args.config_overrides)
|
461 |
-
logger.info(f"New config: {config}")
|
462 |
-
|
463 |
-
tokenizer_kwargs = {
|
464 |
-
"cache_dir": model_args.cache_dir,
|
465 |
-
"use_fast": model_args.use_fast_tokenizer,
|
466 |
-
"revision": model_args.model_revision,
|
467 |
-
"token": model_args.token,
|
468 |
-
"padding": 'max_length',
|
469 |
-
"trust_remote_code": model_args.trust_remote_code,
|
470 |
-
"model_max_length": config.max_position_embeddings,
|
471 |
-
"return_tensors":'pt'
|
472 |
-
}
|
473 |
-
if model_args.tokenizer_name:
|
474 |
-
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
|
475 |
-
elif model_args.model_name_or_path:
|
476 |
-
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
|
477 |
-
else:
|
478 |
-
raise ValueError(
|
479 |
-
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
|
480 |
-
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
481 |
-
)
|
482 |
-
if tokenizer.pad_token != tokenizer.unk_token:
|
483 |
-
tokenizer.pad_token = tokenizer.unk_token
|
484 |
-
|
485 |
-
if model_args.model_name_or_path:
|
486 |
-
torch_dtype = (
|
487 |
-
model_args.torch_dtype
|
488 |
-
if model_args.torch_dtype in ["auto", None]
|
489 |
-
else getattr(torch, model_args.torch_dtype)
|
490 |
-
)
|
491 |
-
model = AutoModelForCausalLM.from_pretrained(
|
492 |
-
model_args.model_name_or_path,
|
493 |
-
from_tf=bool(".ckpt" in model_args.model_name_or_path),
|
494 |
-
config=config,
|
495 |
-
cache_dir=model_args.cache_dir,
|
496 |
-
revision=model_args.model_revision,
|
497 |
-
token=model_args.token,
|
498 |
-
trust_remote_code=model_args.trust_remote_code,
|
499 |
-
torch_dtype=torch_dtype,
|
500 |
-
low_cpu_mem_usage=model_args.low_cpu_mem_usage,
|
501 |
-
)
|
502 |
-
else:
|
503 |
-
model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
|
504 |
-
n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
|
505 |
-
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
|
506 |
-
|
507 |
-
# We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
|
508 |
-
# on a small vocab and want a smaller embedding size, remove this test.
|
509 |
-
embedding_size = model.get_input_embeddings().weight.shape[0]
|
510 |
-
if len(tokenizer) > embedding_size:
|
511 |
-
model.resize_token_embeddings(len(tokenizer))
|
512 |
-
|
513 |
-
if "ShareGPT" == data_args.dataset_name_local:
|
514 |
-
raw_datasets_preprocessed = shareGPT_pipeline(tokenizer=tokenizer, raw_datasets=raw_datasets, overwrite_cache=data_args.overwrite_cache)
|
515 |
-
if "RedPajama" == data_args.dataset_name_local:
|
516 |
-
raw_datasets_preprocessed = raw_datasets
|
517 |
-
|
518 |
-
### HEREE
|
519 |
-
# Preprocessing the datasets.
|
520 |
-
# First we tokenize all the texts.
|
521 |
-
if training_args.do_train:
|
522 |
-
column_names = list(raw_datasets_preprocessed["train"].features)
|
523 |
-
else:
|
524 |
-
column_names = list(raw_datasets_preprocessed["validation"].features)
|
525 |
-
text_column_name = "text"
|
526 |
-
|
527 |
-
|
528 |
-
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
|
529 |
-
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
|
530 |
-
|
531 |
-
def tokenize_function(examples):
|
532 |
-
with CaptureLogger(tok_logger) as cl:
|
533 |
-
# print(tokenizer(examples[text_column_name]))
|
534 |
-
# output = tokenizer(examples[text_column_name])
|
535 |
-
output = tokenizer(
|
536 |
-
examples[text_column_name],
|
537 |
-
return_tensors="pt",
|
538 |
-
padding="max_length",
|
539 |
-
max_length=tokenizer.model_max_length,
|
540 |
-
truncation=True,
|
541 |
-
)
|
542 |
-
# output = input_ids.clone()
|
543 |
-
# clm input could be much much longer than block_size
|
544 |
-
if "Token indices sequence length is longer than the" in cl.out:
|
545 |
-
tok_logger.warning(
|
546 |
-
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits"
|
547 |
-
" before being passed to the model."
|
548 |
-
)
|
549 |
-
return output
|
550 |
-
|
551 |
-
with training_args.main_process_first(desc="dataset map tokenization"):
|
552 |
-
if not data_args.streaming:
|
553 |
-
tokenized_datasets = raw_datasets_preprocessed.map(
|
554 |
-
tokenize_function,
|
555 |
-
batched=True,
|
556 |
-
num_proc=data_args.preprocessing_num_workers,
|
557 |
-
remove_columns=column_names,
|
558 |
-
load_from_cache_file=not data_args.overwrite_cache,
|
559 |
-
desc="Running tokenizer on dataset",
|
560 |
-
)
|
561 |
-
else:
|
562 |
-
tokenized_datasets = raw_datasets_preprocessed.map(
|
563 |
-
tokenize_function,
|
564 |
-
batched=True,
|
565 |
-
remove_columns=column_names,
|
566 |
-
load_from_cache_file=not data_args.overwrite_cache,
|
567 |
-
)
|
568 |
-
if hasattr(config, "max_position_embeddings"):
|
569 |
-
max_pos_embeddings = config.max_position_embeddings
|
570 |
-
else:
|
571 |
-
# Define a default value if the attribute is missing in the config.
|
572 |
-
max_pos_embeddings = 1024
|
573 |
-
|
574 |
-
if data_args.block_size is None:
|
575 |
-
block_size = tokenizer.model_max_length
|
576 |
-
if block_size > max_pos_embeddings:
|
577 |
-
logger.warning(
|
578 |
-
f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
|
579 |
-
f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx."
|
580 |
-
)
|
581 |
-
if max_pos_embeddings > 0:
|
582 |
-
block_size = min(1024, max_pos_embeddings)
|
583 |
-
else:
|
584 |
-
block_size = 1024
|
585 |
-
else:
|
586 |
-
if data_args.block_size > tokenizer.model_max_length:
|
587 |
-
logger.warning(
|
588 |
-
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
|
589 |
-
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
590 |
-
)
|
591 |
-
block_size = min(data_args.block_size, tokenizer.model_max_length)
|
592 |
-
|
593 |
-
# Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
|
594 |
-
def group_texts(examples):
|
595 |
-
# Concatenate all texts.
|
596 |
-
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
|
597 |
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
598 |
-
# We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict.
|
599 |
-
# We could add padding if the model supported it instead of this drop, you can customize this part to your needs.
|
600 |
-
total_length = (total_length // block_size) * block_size
|
601 |
-
# Split by chunks of max_len.
|
602 |
-
result = {
|
603 |
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
604 |
-
for k, t in concatenated_examples.items()
|
605 |
-
}
|
606 |
-
result["labels"] = result["input_ids"].copy()
|
607 |
-
return result
|
608 |
-
|
609 |
-
# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder
|
610 |
-
# for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower
|
611 |
-
# to preprocess.
|
612 |
-
#
|
613 |
-
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
614 |
-
# https://huggingface.co/docs/datasets/process#map
|
615 |
-
|
616 |
-
with training_args.main_process_first(desc="grouping texts together"):
|
617 |
-
if not data_args.streaming:
|
618 |
-
lm_datasets = tokenized_datasets.map(
|
619 |
-
group_texts,
|
620 |
-
batched=True,
|
621 |
-
num_proc=data_args.preprocessing_num_workers,
|
622 |
-
load_from_cache_file=not data_args.overwrite_cache,
|
623 |
-
desc=f"Grouping texts in chunks of {block_size}",
|
624 |
-
)
|
625 |
-
else:
|
626 |
-
lm_datasets = tokenized_datasets.map(
|
627 |
-
group_texts,
|
628 |
-
batched=True,
|
629 |
-
load_from_cache_file=not data_args.overwrite_cache,
|
630 |
-
)
|
631 |
-
|
632 |
-
if training_args.do_train:
|
633 |
-
if "train" not in tokenized_datasets:
|
634 |
-
raise ValueError("--do_train requires a train dataset")
|
635 |
-
train_dataset = lm_datasets["train"]
|
636 |
-
if data_args.max_train_samples is not None:
|
637 |
-
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
|
638 |
-
train_dataset = train_dataset.select(range(max_train_samples))
|
639 |
-
|
640 |
-
if training_args.do_eval:
|
641 |
-
if "validation" not in tokenized_datasets:
|
642 |
-
raise ValueError("--do_eval requires a validation dataset")
|
643 |
-
eval_dataset = lm_datasets["validation"]
|
644 |
-
if data_args.max_eval_samples is not None:
|
645 |
-
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
|
646 |
-
eval_dataset = eval_dataset.select(range(max_eval_samples))
|
647 |
-
|
648 |
-
def preprocess_logits_for_metrics(logits, labels):
|
649 |
-
if isinstance(logits, tuple):
|
650 |
-
# Depending on the model and config, logits may contain extra tensors,
|
651 |
-
# like past_key_values, but logits always come first
|
652 |
-
logits = logits[0]
|
653 |
-
return logits.argmax(dim=-1)
|
654 |
-
|
655 |
-
|
656 |
-
def compute_metrics(eval_preds):
|
657 |
-
accuracy = evaluate.load("accuracy", cache_dir=model_args.cache_dir)
|
658 |
-
perplexity = evaluate.load("perplexity", module_type="metric")
|
659 |
-
preds, labels = eval_preds
|
660 |
-
# preds have the same shape as the labels, after the argmax(-1) has been calculated
|
661 |
-
# by preprocess_logits_for_metrics but we need to shift the labels
|
662 |
-
labels = labels[:, 1:].reshape(-1)
|
663 |
-
preds = preds[:, :-1].reshape(-1)
|
664 |
-
accuracy = accuracy.compute(predictions=preds, references=labels)
|
665 |
-
# perplexity = perplexity.compute(predictions=preds, model_id='llama')
|
666 |
-
return accuracy
|
667 |
-
|
668 |
-
# Initialize the optimizer
|
669 |
-
optimizer = torch.optim.AdamW(model.parameters(), lr=training_args.learning_rate, weight_decay=training_args.weight_decay)
|
670 |
-
# Calculate the number of training steps
|
671 |
-
train_steps = (len(train_dataset) // (training_args.per_device_train_batch_size * training_args._n_gpu)) * training_args.num_train_epochs
|
672 |
-
|
673 |
-
# Initialize the scheduler
|
674 |
-
linear_scheduler = transformers.get_linear_schedule_with_warmup(
|
675 |
-
optimizer,
|
676 |
-
num_warmup_steps=train_steps*training_args.warmup_ratio,
|
677 |
-
num_training_steps=train_steps
|
678 |
-
)
|
679 |
-
|
680 |
-
# Initialize our Trainer
|
681 |
-
trainer = Trainer(
|
682 |
-
model=model,
|
683 |
-
args=training_args,
|
684 |
-
train_dataset=train_dataset if training_args.do_train else None,
|
685 |
-
eval_dataset=eval_dataset if training_args.do_eval else None,
|
686 |
-
tokenizer=tokenizer,
|
687 |
-
optimizers=(optimizer, linear_scheduler),
|
688 |
-
# Data collator will default to DataCollatorWithPadding, so we change it.
|
689 |
-
data_collator=default_data_collator,
|
690 |
-
compute_metrics=compute_metrics if training_args.do_eval else None,
|
691 |
-
preprocess_logits_for_metrics=preprocess_logits_for_metrics
|
692 |
-
if training_args.do_eval else None,
|
693 |
-
)
|
694 |
-
|
695 |
-
# Training
|
696 |
-
if training_args.do_train:
|
697 |
-
checkpoint = None
|
698 |
-
if training_args.resume_from_checkpoint is not None:
|
699 |
-
checkpoint = training_args.resume_from_checkpoint
|
700 |
-
elif last_checkpoint is not None:
|
701 |
-
checkpoint = last_checkpoint
|
702 |
-
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
703 |
-
trainer.save_model() # Saves the tokenizer too for easy upload
|
704 |
-
|
705 |
-
metrics = train_result.metrics
|
706 |
-
|
707 |
-
max_train_samples = (
|
708 |
-
data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
|
709 |
-
)
|
710 |
-
metrics["train_samples"] = min(max_train_samples, len(train_dataset))
|
711 |
-
|
712 |
-
trainer.log_metrics("train", metrics)
|
713 |
-
trainer.save_metrics("train", metrics)
|
714 |
-
trainer.save_state()
|
715 |
-
try:
|
716 |
-
torch.save([vars(a) for a in [training_args, data_args, model_args]], os.path.join(training_args.output_dir, "args.bin"))
|
717 |
-
except:
|
718 |
-
logger.info("Failed to save arguments")
|
719 |
-
|
720 |
-
# Evaluation
|
721 |
-
if training_args.do_eval:
|
722 |
-
logger.info("*** Evaluate ***")
|
723 |
-
|
724 |
-
metrics = trainer.evaluate()
|
725 |
-
|
726 |
-
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
|
727 |
-
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
|
728 |
-
try:
|
729 |
-
perplexity = math.exp(metrics["eval_loss"])
|
730 |
-
except OverflowError:
|
731 |
-
perplexity = float("inf")
|
732 |
-
metrics["perplexity"] = perplexity
|
733 |
-
|
734 |
-
trainer.log_metrics("eval", metrics)
|
735 |
-
trainer.save_metrics("eval", metrics)
|
736 |
-
|
737 |
-
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-generation"}
|
738 |
-
if data_args.dataset_name is not None:
|
739 |
-
kwargs["dataset_tags"] = data_args.dataset_name
|
740 |
-
if data_args.dataset_config_name is not None:
|
741 |
-
kwargs["dataset_args"] = data_args.dataset_config_name
|
742 |
-
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
743 |
-
else:
|
744 |
-
kwargs["dataset"] = data_args.dataset_name
|
745 |
-
elif data_args.dataset_name_hub is not None:
|
746 |
-
kwargs["dataset"] = data_args.dataset_name_hub
|
747 |
-
|
748 |
-
if training_args.push_to_hub:
|
749 |
-
trainer.push_to_hub(**kwargs)
|
750 |
-
else:
|
751 |
-
trainer.create_model_card(**kwargs)
|
752 |
-
|
753 |
-
if __name__ == "__main__":
|
754 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
experiment_code/submit_job.sh
DELETED
@@ -1,91 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
#SBATCH -p g24
|
3 |
-
#SBATCH --job-name=myjob_shareGPT
|
4 |
-
#SBATCH --qos=normal
|
5 |
-
#SBATCH --nodes=1 # Number of nodes
|
6 |
-
#SBATCH --ntasks=1 # Number of tasks (one for each script)
|
7 |
-
#SBATCH --cpus-per-task=60
|
8 |
-
#SBATCH --gres=gpu:6
|
9 |
-
#SBATCH --array=1-1 # Array range
|
10 |
-
# #SBATCH --output=./slurm_outputs/run_clm_job_%A_task_%a.out # Standard output
|
11 |
-
#SBATCH --output=/dev/null # Discard standard output # Because we write to the log.txt file
|
12 |
-
|
13 |
-
# # Get the current date and time
|
14 |
-
current_time=$(date +"%d-%m_%H-%M")
|
15 |
-
OUTPUT_DIR="./training_outputs_job_${SLURM_ARRAY_JOB_ID}_${SLURM_ARRAY_TASK_ID}_${current_time}"
|
16 |
-
|
17 |
-
while test $# -gt 0; do
|
18 |
-
echo $1
|
19 |
-
case "$1" in
|
20 |
-
--output_dir)
|
21 |
-
shift
|
22 |
-
OUTPUT_DIR=$1
|
23 |
-
shift
|
24 |
-
;;
|
25 |
-
esac
|
26 |
-
done
|
27 |
-
|
28 |
-
mkdir_is_exists() {
|
29 |
-
if [ -d "$1" ]; then
|
30 |
-
echo "Directory '$1' already exists."
|
31 |
-
else
|
32 |
-
mkdir -p "$1"
|
33 |
-
echo "Directory '$1' created."
|
34 |
-
fi
|
35 |
-
}
|
36 |
-
|
37 |
-
|
38 |
-
mkdir_is_exists $OUTPUT_DIR
|
39 |
-
mkdir_is_exists $OUTPUT_DIR/experiment_code
|
40 |
-
git log -n 1 > $OUTPUT_DIR/commit.txt
|
41 |
-
pip freeze > $OUTPUT_DIR/pip_freeze.txt
|
42 |
-
echo $0 $ARGS $current_time > $OUTPUT_DIR/cmd.txt
|
43 |
-
cp -r ./run_clm.py $OUTPUT_DIR/experiment_code
|
44 |
-
cp -r ./prepare_sharegpt.py $OUTPUT_DIR/experiment_code
|
45 |
-
cp -r config $OUTPUT_DIR/experiment_code
|
46 |
-
cp -r ./submit_job.sh $OUTPUT_DIR/experiment_code
|
47 |
-
cp -r ./requirements.txt $OUTPUT_DIR/experiment_code
|
48 |
-
|
49 |
-
# Define the Python scripts and their corresponding input files
|
50 |
-
declare -A scripts_and_inputs=(
|
51 |
-
["1"]="./config/config1.yaml"
|
52 |
-
# ["2"]="./config/config_redpajama.yaml"
|
53 |
-
# ["3"]="./config/config1.yaml"
|
54 |
-
# ["4"]="./config/config1.yaml"
|
55 |
-
# ["5"]="./config/config1.yaml"
|
56 |
-
# ["6"]="./config/config1.yaml"
|
57 |
-
# ["7"]="./config/config1.yaml"
|
58 |
-
# ["8"]="./config/config1.yaml"
|
59 |
-
# ["9"]="./config/config1.yaml"
|
60 |
-
# ["10"]="./config/config1.yaml"
|
61 |
-
# ["11"]="./config/config1.yaml"
|
62 |
-
# ["12"]="./config/config1.yaml"
|
63 |
-
# ["13"]="./config/config1.yaml"
|
64 |
-
# ["14"]="./config/config1.yaml"
|
65 |
-
# ["15"]="./config/config1.yaml"
|
66 |
-
# ["16"]="./config/config1.yaml"
|
67 |
-
# ["17"]="./config/config1.yaml"
|
68 |
-
# ["18"]="./config/config1.yaml"
|
69 |
-
# ["19"]="./config/config1.yaml"
|
70 |
-
# ["20"]="./config/config1.yaml"
|
71 |
-
)
|
72 |
-
|
73 |
-
# Launch each script with its corresponding input file as a separate task
|
74 |
-
echo "Starting job array task: $SLURM_ARRAY_TASK_ID"
|
75 |
-
|
76 |
-
INPUT_DIR="${scripts_and_inputs[$SLURM_ARRAY_TASK_ID]}"
|
77 |
-
export DEFAULT_CONFIG_FILE="./config/config1.yaml"
|
78 |
-
srun --exclusive python run_clm.py --output_dir $OUTPUT_DIR --logging_dir $OUTPUT_DIR --config_file $INPUT_DIR 2>&1 | tee $OUTPUT_DIR/log.txt
|
79 |
-
|
80 |
-
|
81 |
-
# Wait for all background jobs to complete
|
82 |
-
wait
|
83 |
-
|
84 |
-
# Print a message indicating completion
|
85 |
-
echo "All Python scripts have been executed."
|
86 |
-
|
87 |
-
|
88 |
-
# mv ./slurm_outputs/run_clm_job_$SLURM_ARRAY_JOB_ID*$SLURM_ARRAY_TASK_ID* "$output_dir/"
|
89 |
-
|
90 |
-
|
91 |
-
# python -m torch.distributed.launch ~/target_draft_coupling_code/target_draft_training/run_clm.py --multirun task=1,2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/config.json
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "JackFram/llama-68m",
|
3 |
-
"architectures": [
|
4 |
-
"LlamaForCausalLM"
|
5 |
-
],
|
6 |
-
"attention_bias": false,
|
7 |
-
"attention_dropout": 0.0,
|
8 |
-
"bos_token_id": 0,
|
9 |
-
"eos_token_id": 2,
|
10 |
-
"hidden_act": "silu",
|
11 |
-
"hidden_size": 768,
|
12 |
-
"initializer_range": 0.02,
|
13 |
-
"intermediate_size": 3072,
|
14 |
-
"max_position_embeddings": 2048,
|
15 |
-
"model_type": "llama",
|
16 |
-
"num_attention_heads": 12,
|
17 |
-
"num_hidden_layers": 2,
|
18 |
-
"num_key_value_heads": 12,
|
19 |
-
"pad_token_id": 1,
|
20 |
-
"pretraining_tp": 1,
|
21 |
-
"rms_norm_eps": 1e-06,
|
22 |
-
"rope_scaling": null,
|
23 |
-
"rope_theta": 10000.0,
|
24 |
-
"tie_word_embeddings": false,
|
25 |
-
"torch_dtype": "float32",
|
26 |
-
"transformers_version": "4.41.0.dev0",
|
27 |
-
"use_cache": true,
|
28 |
-
"vocab_size": 32000
|
29 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/generation_config.json
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_from_model_config": true,
|
3 |
-
"bos_token_id": 0,
|
4 |
-
"eos_token_id": 2,
|
5 |
-
"pad_token_id": 1,
|
6 |
-
"transformers_version": "4.41.0.dev0"
|
7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:baf7620b0c51ef17a030b63cfe26c514df5d88602a1b8140fb12c4968dfa6ff4
|
3 |
-
size 272123144
|
|
|
|
|
|
|
|
last-checkpoint/optimizer.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:908d9d7ed41d479f7f47a9fd0646de3f7800df94e052115c9815ea463d99e70d
|
3 |
-
size 544259743
|
|
|
|
|
|
|
|
last-checkpoint/rng_state.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
|
3 |
-
size 14244
|
|
|
|
|
|
|
|
last-checkpoint/scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:394be853393fcf0db07e5bdfe4c0d7e15ce8f5fac5bdbb2ad1b413385499af51
|
3 |
-
size 1000
|
|
|
|
|
|
|
|
last-checkpoint/special_tokens_map.json
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": {
|
3 |
-
"content": "<s>",
|
4 |
-
"lstrip": false,
|
5 |
-
"normalized": true,
|
6 |
-
"rstrip": false,
|
7 |
-
"single_word": false
|
8 |
-
},
|
9 |
-
"eos_token": {
|
10 |
-
"content": "</s>",
|
11 |
-
"lstrip": false,
|
12 |
-
"normalized": true,
|
13 |
-
"rstrip": false,
|
14 |
-
"single_word": false
|
15 |
-
},
|
16 |
-
"pad_token": "<unk>",
|
17 |
-
"unk_token": {
|
18 |
-
"content": "<unk>",
|
19 |
-
"lstrip": false,
|
20 |
-
"normalized": true,
|
21 |
-
"rstrip": false,
|
22 |
-
"single_word": false
|
23 |
-
}
|
24 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
last-checkpoint/tokenizer.model
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
-
size 499723
|
|
|
|
|
|
|
|
last-checkpoint/tokenizer_config.json
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"add_bos_token": true,
|
3 |
-
"add_eos_token": false,
|
4 |
-
"add_prefix_space": true,
|
5 |
-
"added_tokens_decoder": {
|
6 |
-
"0": {
|
7 |
-
"content": "<unk>",
|
8 |
-
"lstrip": false,
|
9 |
-
"normalized": true,
|
10 |
-
"rstrip": false,
|
11 |
-
"single_word": false,
|
12 |
-
"special": true
|
13 |
-
},
|
14 |
-
"1": {
|
15 |
-
"content": "<s>",
|
16 |
-
"lstrip": false,
|
17 |
-
"normalized": true,
|
18 |
-
"rstrip": false,
|
19 |
-
"single_word": false,
|
20 |
-
"special": true
|
21 |
-
},
|
22 |
-
"2": {
|
23 |
-
"content": "</s>",
|
24 |
-
"lstrip": false,
|
25 |
-
"normalized": true,
|
26 |
-
"rstrip": false,
|
27 |
-
"single_word": false,
|
28 |
-
"special": true
|
29 |
-
}
|
30 |
-
},
|
31 |
-
"bos_token": "<s>",
|
32 |
-
"clean_up_tokenization_spaces": false,
|
33 |
-
"eos_token": "</s>",
|
34 |
-
"legacy": true,
|
35 |
-
"model_max_length": 2048,
|
36 |
-
"pad_token": "<unk>",
|
37 |
-
"padding": "max_length",
|
38 |
-
"return_tensors": "pt",
|
39 |
-
"sp_model_kwargs": {},
|
40 |
-
"spaces_between_special_tokens": false,
|
41 |
-
"tokenizer_class": "LlamaTokenizer",
|
42 |
-
"unk_token": "<unk>",
|
43 |
-
"use_default_system_prompt": false,
|
44 |
-
"use_fast": true
|
45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/trainer_state.json
DELETED
@@ -1,125 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"best_metric": null,
|
3 |
-
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.0576414595452142,
|
5 |
-
"eval_steps": 1000,
|
6 |
-
"global_step": 4000,
|
7 |
-
"is_hyper_param_search": false,
|
8 |
-
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero": true,
|
10 |
-
"log_history": [
|
11 |
-
{
|
12 |
-
"epoch": 0.13220518244315177,
|
13 |
-
"grad_norm": 0.8546391725540161,
|
14 |
-
"learning_rate": 8.816009873931059e-05,
|
15 |
-
"loss": 5.1118,
|
16 |
-
"step": 500
|
17 |
-
},
|
18 |
-
{
|
19 |
-
"epoch": 0.26441036488630354,
|
20 |
-
"grad_norm": 0.8593688607215881,
|
21 |
-
"learning_rate": 9.59831475011252e-05,
|
22 |
-
"loss": 3.406,
|
23 |
-
"step": 1000
|
24 |
-
},
|
25 |
-
{
|
26 |
-
"epoch": 0.26441036488630354,
|
27 |
-
"eval_accuracy": 0.5035306174465283,
|
28 |
-
"eval_loss": 3.23445987701416,
|
29 |
-
"eval_runtime": 73.8676,
|
30 |
-
"eval_samples_per_second": 24.909,
|
31 |
-
"eval_steps_per_second": 0.528,
|
32 |
-
"step": 1000
|
33 |
-
},
|
34 |
-
{
|
35 |
-
"epoch": 0.3966155473294553,
|
36 |
-
"grad_norm": 0.9617258906364441,
|
37 |
-
"learning_rate": 9.134314230431938e-05,
|
38 |
-
"loss": 3.0005,
|
39 |
-
"step": 1500
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"epoch": 0.5288207297726071,
|
43 |
-
"grad_norm": 0.8953185677528381,
|
44 |
-
"learning_rate": 8.670313710751356e-05,
|
45 |
-
"loss": 2.8119,
|
46 |
-
"step": 2000
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"epoch": 0.5288207297726071,
|
50 |
-
"eval_accuracy": 0.5365118094348038,
|
51 |
-
"eval_loss": 2.821384906768799,
|
52 |
-
"eval_runtime": 72.909,
|
53 |
-
"eval_samples_per_second": 25.237,
|
54 |
-
"eval_steps_per_second": 0.535,
|
55 |
-
"step": 2000
|
56 |
-
},
|
57 |
-
{
|
58 |
-
"epoch": 0.6610259122157589,
|
59 |
-
"grad_norm": 1.4154396057128906,
|
60 |
-
"learning_rate": 8.206313191070773e-05,
|
61 |
-
"loss": 2.686,
|
62 |
-
"step": 2500
|
63 |
-
},
|
64 |
-
{
|
65 |
-
"epoch": 0.7932310946589106,
|
66 |
-
"grad_norm": 1.821349024772644,
|
67 |
-
"learning_rate": 7.742312671390191e-05,
|
68 |
-
"loss": 2.607,
|
69 |
-
"step": 3000
|
70 |
-
},
|
71 |
-
{
|
72 |
-
"epoch": 0.7932310946589106,
|
73 |
-
"eval_accuracy": 0.5497897240925214,
|
74 |
-
"eval_loss": 2.657219886779785,
|
75 |
-
"eval_runtime": 73.4297,
|
76 |
-
"eval_samples_per_second": 25.058,
|
77 |
-
"eval_steps_per_second": 0.531,
|
78 |
-
"step": 3000
|
79 |
-
},
|
80 |
-
{
|
81 |
-
"epoch": 0.9254362771020624,
|
82 |
-
"grad_norm": 2.0297396183013916,
|
83 |
-
"learning_rate": 7.278312151709609e-05,
|
84 |
-
"loss": 2.5642,
|
85 |
-
"step": 3500
|
86 |
-
},
|
87 |
-
{
|
88 |
-
"epoch": 1.0576414595452142,
|
89 |
-
"grad_norm": 2.8318285942077637,
|
90 |
-
"learning_rate": 6.814311632029027e-05,
|
91 |
-
"loss": 2.4734,
|
92 |
-
"step": 4000
|
93 |
-
},
|
94 |
-
{
|
95 |
-
"epoch": 1.0576414595452142,
|
96 |
-
"eval_accuracy": 0.5582058048894458,
|
97 |
-
"eval_loss": 2.5735702514648438,
|
98 |
-
"eval_runtime": 73.4679,
|
99 |
-
"eval_samples_per_second": 25.045,
|
100 |
-
"eval_steps_per_second": 0.531,
|
101 |
-
"step": 4000
|
102 |
-
}
|
103 |
-
],
|
104 |
-
"logging_steps": 500,
|
105 |
-
"max_steps": 11346,
|
106 |
-
"num_input_tokens_seen": 0,
|
107 |
-
"num_train_epochs": 3,
|
108 |
-
"save_steps": 1000,
|
109 |
-
"stateful_callbacks": {
|
110 |
-
"TrainerControl": {
|
111 |
-
"args": {
|
112 |
-
"should_epoch_stop": false,
|
113 |
-
"should_evaluate": false,
|
114 |
-
"should_log": false,
|
115 |
-
"should_save": true,
|
116 |
-
"should_training_stop": false
|
117 |
-
},
|
118 |
-
"attributes": {}
|
119 |
-
}
|
120 |
-
},
|
121 |
-
"total_flos": 5.124838835670221e+16,
|
122 |
-
"train_batch_size": 24,
|
123 |
-
"trial_name": null,
|
124 |
-
"trial_params": null
|
125 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
last-checkpoint/training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9ce5f4c1939d798f9579c06cb7c41ca4f80497b830ef82299a5b5b802ba651a2
|
3 |
-
size 5176
|
|
|
|
|
|
|
|
log.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:baf7620b0c51ef17a030b63cfe26c514df5d88602a1b8140fb12c4968dfa6ff4
|
3 |
-
size 272123144
|
|
|
|
|
|
|
|
pip_freeze.txt
DELETED
@@ -1,330 +0,0 @@
|
|
1 |
-
absl-py==2.1.0
|
2 |
-
accelerate==0.26.1
|
3 |
-
aiofiles==23.2.1
|
4 |
-
aiohttp==3.8.6
|
5 |
-
aiosignal==1.3.1
|
6 |
-
altair==5.3.0
|
7 |
-
annotated-types==0.6.0
|
8 |
-
antlr4-python3-runtime==4.9.3
|
9 |
-
anyio==4.0.0
|
10 |
-
argon2-cffi==23.1.0
|
11 |
-
argon2-cffi-bindings==21.2.0
|
12 |
-
arrow==1.3.0
|
13 |
-
asttokens==2.4.0
|
14 |
-
astunparse==1.6.3
|
15 |
-
async-lru==2.0.4
|
16 |
-
async-timeout==4.0.3
|
17 |
-
attrs==23.1.0
|
18 |
-
auto-gptq==0.6.0
|
19 |
-
Babel==2.13.0
|
20 |
-
backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
|
21 |
-
beartype==0.17.2
|
22 |
-
beautifulsoup4==4.12.2
|
23 |
-
bitsandbytes==0.43.1
|
24 |
-
bleach==6.1.0
|
25 |
-
blis==0.7.11
|
26 |
-
brotlipy==0.7.0
|
27 |
-
cachetools==5.3.2
|
28 |
-
catalogue==2.0.10
|
29 |
-
certifi==2023.7.22
|
30 |
-
cffi==1.16.0
|
31 |
-
chardet==5.2.0
|
32 |
-
charset-normalizer==3.3.0
|
33 |
-
click==8.1.7
|
34 |
-
cloudpathlib==0.16.0
|
35 |
-
cloudpickle==3.0.0
|
36 |
-
colorama @ file:///tmp/build/80754af9/colorama_1607707115595/work
|
37 |
-
coloredlogs==15.0.1
|
38 |
-
comm==0.1.4
|
39 |
-
conda==4.12.0
|
40 |
-
conda-content-trust @ file:///tmp/build/80754af9/conda-content-trust_1617045594566/work
|
41 |
-
conda-package-handling @ file:///tmp/build/80754af9/conda-package-handling_1649105784853/work
|
42 |
-
confection==0.1.4
|
43 |
-
contextlib2==21.6.0
|
44 |
-
contexttimer==0.3.3
|
45 |
-
contourpy==1.1.1
|
46 |
-
cryptography @ file:///tmp/build/80754af9/cryptography_1639414572950/work
|
47 |
-
cycler==0.12.1
|
48 |
-
cymem==2.0.8
|
49 |
-
dataclasses-json==0.6.4
|
50 |
-
DataProperty==1.0.1
|
51 |
-
datasets==2.19.1
|
52 |
-
debugpy==1.8.0
|
53 |
-
decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work
|
54 |
-
defusedxml==0.7.1
|
55 |
-
dill==0.3.7
|
56 |
-
dnspython==2.6.1
|
57 |
-
docstring_parser==0.16
|
58 |
-
dos2unix==1
|
59 |
-
einops==0.8.0
|
60 |
-
eval_type_backport==0.2.0
|
61 |
-
evaluate==0.4.1
|
62 |
-
exceptiongroup==1.1.3
|
63 |
-
executing==2.0.0
|
64 |
-
fastapi==0.111.0
|
65 |
-
fastapi-cli==0.0.2
|
66 |
-
fastchat==0.1.0
|
67 |
-
fastjsonschema==2.18.1
|
68 |
-
ffmpy==0.3.2
|
69 |
-
filelock==3.12.4
|
70 |
-
fire==0.5.0
|
71 |
-
flash-attn==2.5.8
|
72 |
-
flatbuffers==23.5.26
|
73 |
-
fonttools==4.43.1
|
74 |
-
fqdn==1.5.1
|
75 |
-
frozenlist==1.4.0
|
76 |
-
fschat==0.2.36
|
77 |
-
fsspec==2023.6.0
|
78 |
-
gast==0.5.4
|
79 |
-
gekko==1.0.6
|
80 |
-
globals==0.3.36
|
81 |
-
google-auth==2.27.0
|
82 |
-
google-auth-oauthlib==1.2.0
|
83 |
-
google-pasta==0.2.0
|
84 |
-
gradio==4.29.0
|
85 |
-
gradio_client==0.16.1
|
86 |
-
greenlet==3.0.3
|
87 |
-
grpcio==1.60.1
|
88 |
-
h11==0.14.0
|
89 |
-
h5py==3.10.0
|
90 |
-
httpcore==1.0.5
|
91 |
-
httptools==0.6.1
|
92 |
-
httpx==0.27.0
|
93 |
-
huggingface-hub==0.22.2
|
94 |
-
humanfriendly==10.0
|
95 |
-
hydra-core==1.3.2
|
96 |
-
hydra-joblib-launcher==1.2.0
|
97 |
-
hydra-submitit-launcher==1.2.0
|
98 |
-
idna==3.4
|
99 |
-
importlib-metadata==6.8.0
|
100 |
-
importlib-resources==6.1.0
|
101 |
-
ipykernel==6.25.2
|
102 |
-
ipython==8.18.1
|
103 |
-
isoduration==20.11.0
|
104 |
-
jedi==0.19.1
|
105 |
-
Jinja2==3.1.2
|
106 |
-
joblib==1.3.2
|
107 |
-
json5==0.9.14
|
108 |
-
jsonlines==4.0.0
|
109 |
-
jsonpatch==1.33
|
110 |
-
jsonpointer==2.4
|
111 |
-
jsonschema==4.19.1
|
112 |
-
jsonschema-specifications==2023.7.1
|
113 |
-
jupyter-events==0.7.0
|
114 |
-
jupyter-lsp==2.2.0
|
115 |
-
jupyter_client==8.3.1
|
116 |
-
jupyter_core==5.3.2
|
117 |
-
jupyter_server==2.7.3
|
118 |
-
jupyter_server_terminals==0.4.4
|
119 |
-
jupyterlab==4.0.6
|
120 |
-
jupyterlab-pygments==0.2.2
|
121 |
-
jupyterlab_server==2.25.0
|
122 |
-
keras==2.15.0
|
123 |
-
kiwisolver==1.4.5
|
124 |
-
langchain==0.1.8
|
125 |
-
langchain-community==0.0.21
|
126 |
-
langchain-core==0.1.25
|
127 |
-
langcodes==3.3.0
|
128 |
-
langdetect==1.0.9
|
129 |
-
langsmith==0.1.5
|
130 |
-
libclang==16.0.6
|
131 |
-
lxml==5.1.0
|
132 |
-
Markdown==3.5.2
|
133 |
-
markdown-it-py==3.0.0
|
134 |
-
markdown2==2.4.13
|
135 |
-
MarkupSafe==2.1.5
|
136 |
-
marshmallow==3.20.2
|
137 |
-
matplotlib==3.8.0
|
138 |
-
matplotlib-inline @ file:///opt/conda/conda-bld/matplotlib-inline_1662014470464/work
|
139 |
-
mbstrdecoder==1.1.3
|
140 |
-
mdurl==0.1.2
|
141 |
-
mistune==3.0.2
|
142 |
-
ml-collections==0.1.1
|
143 |
-
ml-dtypes==0.2.0
|
144 |
-
more-itertools==10.2.0
|
145 |
-
mpmath==1.3.0
|
146 |
-
multidict==6.0.4
|
147 |
-
multiprocess==0.70.15
|
148 |
-
murmurhash==1.0.10
|
149 |
-
mypy-extensions==1.0.0
|
150 |
-
nbclient==0.8.0
|
151 |
-
nbconvert==7.9.2
|
152 |
-
nbformat==5.9.2
|
153 |
-
nest-asyncio==1.5.8
|
154 |
-
networkx==3.1
|
155 |
-
nh3==0.2.17
|
156 |
-
ninja==1.11.1.1
|
157 |
-
nltk==3.8.1
|
158 |
-
notebook==7.0.4
|
159 |
-
notebook_shim==0.2.3
|
160 |
-
numexpr==2.9.0
|
161 |
-
numpy==1.26.0
|
162 |
-
nvidia-cublas-cu12==12.1.3.1
|
163 |
-
nvidia-cuda-cupti-cu12==12.1.105
|
164 |
-
nvidia-cuda-nvrtc-cu12==12.1.105
|
165 |
-
nvidia-cuda-runtime-cu12==12.1.105
|
166 |
-
nvidia-cudnn-cu12==8.9.2.26
|
167 |
-
nvidia-cufft-cu12==11.0.2.54
|
168 |
-
nvidia-curand-cu12==10.3.2.106
|
169 |
-
nvidia-cusolver-cu12==11.4.5.107
|
170 |
-
nvidia-cusparse-cu12==12.1.0.106
|
171 |
-
nvidia-ml-py3==7.352.0
|
172 |
-
nvidia-nccl-cu12==2.18.1
|
173 |
-
nvidia-nvjitlink-cu12==12.2.140
|
174 |
-
nvidia-nvtx-cu12==12.1.105
|
175 |
-
oauthlib==3.2.2
|
176 |
-
omegaconf==2.3.0
|
177 |
-
opt-einsum==3.3.0
|
178 |
-
optimum==1.16.2
|
179 |
-
orjson==3.10.3
|
180 |
-
overrides==7.4.0
|
181 |
-
packaging==23.2
|
182 |
-
pandas==2.1.1
|
183 |
-
pandocfilters==1.5.0
|
184 |
-
parso @ file:///opt/conda/conda-bld/parso_1641458642106/work
|
185 |
-
pathvalidate==3.2.0
|
186 |
-
patsy==0.5.3
|
187 |
-
peft==0.8.2
|
188 |
-
pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
|
189 |
-
pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
|
190 |
-
Pillow==10.0.1
|
191 |
-
platformdirs==3.11.0
|
192 |
-
plotly==5.17.0
|
193 |
-
plotly-express==0.4.1
|
194 |
-
portalocker==2.8.2
|
195 |
-
preshed==3.0.9
|
196 |
-
prometheus-client==0.17.1
|
197 |
-
prompt-toolkit==3.0.43
|
198 |
-
protobuf==3.20.3
|
199 |
-
psutil==5.9.5
|
200 |
-
ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
|
201 |
-
pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work
|
202 |
-
pyarrow==13.0.0
|
203 |
-
pyarrow-hotfix==0.6
|
204 |
-
pyasn1==0.5.1
|
205 |
-
pyasn1-modules==0.3.0
|
206 |
-
pybind11==2.11.1
|
207 |
-
pycosat==0.6.3
|
208 |
-
pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
|
209 |
-
pydantic==2.6.1
|
210 |
-
pydantic_core==2.16.2
|
211 |
-
pydub==0.25.1
|
212 |
-
Pygments==2.16.1
|
213 |
-
pyOpenSSL @ file:///opt/conda/conda-bld/pyopenssl_1643788558760/work
|
214 |
-
pyparsing==3.1.1
|
215 |
-
PySocks @ file:///tmp/build/80754af9/pysocks_1605305812635/work
|
216 |
-
pytablewriter==1.2.0
|
217 |
-
python-dateutil==2.8.2
|
218 |
-
python-dotenv==1.0.1
|
219 |
-
python-helper==0.3.74
|
220 |
-
python-json-logger==2.0.7
|
221 |
-
python-multipart==0.0.9
|
222 |
-
pytz==2023.3.post1
|
223 |
-
PyYAML==6.0.1
|
224 |
-
pyzmq==25.1.1
|
225 |
-
referencing==0.30.2
|
226 |
-
regex==2023.10.3
|
227 |
-
requests==2.31.0
|
228 |
-
requests-oauthlib==1.3.1
|
229 |
-
responses==0.18.0
|
230 |
-
rfc3339-validator==0.1.4
|
231 |
-
rfc3986-validator==0.1.1
|
232 |
-
rich==13.7.1
|
233 |
-
rotary-embedding-torch==0.5.3
|
234 |
-
rouge==1.0.1
|
235 |
-
rouge-score==0.1.2
|
236 |
-
rpds-py==0.10.4
|
237 |
-
rsa==4.9
|
238 |
-
ruamel-yaml-conda @ file:///tmp/build/80754af9/ruamel_yaml_1616016711199/work
|
239 |
-
ruff==0.4.3
|
240 |
-
sacrebleu==2.4.0
|
241 |
-
safetensors==0.4.3
|
242 |
-
scikit-learn==1.4.1.post1
|
243 |
-
scipy==1.11.3
|
244 |
-
seaborn==0.13.0
|
245 |
-
semantic-version==2.10.0
|
246 |
-
Send2Trash==1.8.2
|
247 |
-
sentencepiece==0.2.0
|
248 |
-
shellingham==1.5.4
|
249 |
-
shortuuid==1.0.13
|
250 |
-
shtab==1.7.1
|
251 |
-
six @ file:///tmp/build/80754af9/six_1644875935023/work
|
252 |
-
smart-open==6.4.0
|
253 |
-
sniffio==1.3.0
|
254 |
-
soupsieve==2.5
|
255 |
-
spacy==3.7.4
|
256 |
-
spacy-legacy==3.0.12
|
257 |
-
spacy-loggers==1.0.5
|
258 |
-
speculative-decoding==0.1.2
|
259 |
-
SQLAlchemy==2.0.27
|
260 |
-
sqlitedict==2.1.0
|
261 |
-
srsly==2.4.8
|
262 |
-
stack-data==0.6.3
|
263 |
-
starlette==0.37.2
|
264 |
-
statsmodels==0.14.0
|
265 |
-
submitit==1.5.1
|
266 |
-
svgwrite==1.4.3
|
267 |
-
sympy==1.12
|
268 |
-
tabledata==1.3.3
|
269 |
-
tabulate==0.9.0
|
270 |
-
tcolorpy==0.1.4
|
271 |
-
tenacity==8.2.3
|
272 |
-
tensorboard==2.15.1
|
273 |
-
tensorboard-data-server==0.7.2
|
274 |
-
tensorflow==2.15.0.post1
|
275 |
-
tensorflow-estimator==2.15.0
|
276 |
-
tensorflow-io-gcs-filesystem==0.35.0
|
277 |
-
tensorrt==8.6.1.post1
|
278 |
-
tensorrt-bindings==8.6.1
|
279 |
-
tensorrt-libs==8.6.1
|
280 |
-
termcolor==2.4.0
|
281 |
-
terminado==0.17.1
|
282 |
-
thinc==8.2.3
|
283 |
-
threadpoolctl==3.3.0
|
284 |
-
tiktoken==0.6.0
|
285 |
-
tinycss2==1.2.1
|
286 |
-
tk==0.1.0
|
287 |
-
tokenizers==0.19.1
|
288 |
-
tomli==2.0.1
|
289 |
-
tomlkit==0.12.0
|
290 |
-
toolz==0.12.1
|
291 |
-
torch==2.1.0
|
292 |
-
torchaudio==2.1.0
|
293 |
-
torchvision==0.16.0
|
294 |
-
tornado==6.3.3
|
295 |
-
tqdm==4.66.1
|
296 |
-
tqdm-multiprocess==0.0.11
|
297 |
-
traitlets==5.11.2
|
298 |
-
-e git+https://github.com/huggingface/transformers.git@bbaa8ceff696c479aecdb4575b2deb1349efd3aa#egg=transformers
|
299 |
-
triton==2.1.0
|
300 |
-
trl==0.8.6
|
301 |
-
typepy==1.3.2
|
302 |
-
typer==0.12.3
|
303 |
-
types-python-dateutil==2.8.19.14
|
304 |
-
typing-inspect==0.9.0
|
305 |
-
typing_extensions==4.8.0
|
306 |
-
tyro==0.8.3
|
307 |
-
tzdata==2023.3
|
308 |
-
ujson==5.9.0
|
309 |
-
unsloth @ git+https://github.com/unslothai/unsloth.git@4211cc01409e3ced4f7abebaf68e244193b46e2c
|
310 |
-
uri-template==1.3.0
|
311 |
-
urllib3==2.0.6
|
312 |
-
uvicorn==0.29.0
|
313 |
-
uvloop==0.19.0
|
314 |
-
wasabi==1.1.2
|
315 |
-
watchfiles==0.21.0
|
316 |
-
wavedrom==2.0.3.post3
|
317 |
-
wcwidth==0.2.8
|
318 |
-
weasel==0.3.4
|
319 |
-
webcolors==1.13
|
320 |
-
webencodings==0.5.1
|
321 |
-
websocket-client==1.6.4
|
322 |
-
websockets==11.0.3
|
323 |
-
Werkzeug==3.0.1
|
324 |
-
word2number==1.1
|
325 |
-
wrapt==1.14.1
|
326 |
-
xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.22.post7-cp39-cp39-manylinux2014_x86_64.whl
|
327 |
-
xxhash==3.4.1
|
328 |
-
yarl==1.9.2
|
329 |
-
zipp==3.17.0
|
330 |
-
zstandard==0.22.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
special_tokens_map.json
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": {
|
3 |
-
"content": "<s>",
|
4 |
-
"lstrip": false,
|
5 |
-
"normalized": true,
|
6 |
-
"rstrip": false,
|
7 |
-
"single_word": false
|
8 |
-
},
|
9 |
-
"eos_token": {
|
10 |
-
"content": "</s>",
|
11 |
-
"lstrip": false,
|
12 |
-
"normalized": true,
|
13 |
-
"rstrip": false,
|
14 |
-
"single_word": false
|
15 |
-
},
|
16 |
-
"pad_token": "<unk>",
|
17 |
-
"unk_token": {
|
18 |
-
"content": "<unk>",
|
19 |
-
"lstrip": false,
|
20 |
-
"normalized": true,
|
21 |
-
"rstrip": false,
|
22 |
-
"single_word": false
|
23 |
-
}
|
24 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
-
size 499723
|
|
|
|
|
|
|
|
tokenizer_config.json
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"add_bos_token": true,
|
3 |
-
"add_eos_token": false,
|
4 |
-
"add_prefix_space": true,
|
5 |
-
"added_tokens_decoder": {
|
6 |
-
"0": {
|
7 |
-
"content": "<unk>",
|
8 |
-
"lstrip": false,
|
9 |
-
"normalized": true,
|
10 |
-
"rstrip": false,
|
11 |
-
"single_word": false,
|
12 |
-
"special": true
|
13 |
-
},
|
14 |
-
"1": {
|
15 |
-
"content": "<s>",
|
16 |
-
"lstrip": false,
|
17 |
-
"normalized": true,
|
18 |
-
"rstrip": false,
|
19 |
-
"single_word": false,
|
20 |
-
"special": true
|
21 |
-
},
|
22 |
-
"2": {
|
23 |
-
"content": "</s>",
|
24 |
-
"lstrip": false,
|
25 |
-
"normalized": true,
|
26 |
-
"rstrip": false,
|
27 |
-
"single_word": false,
|
28 |
-
"special": true
|
29 |
-
}
|
30 |
-
},
|
31 |
-
"bos_token": "<s>",
|
32 |
-
"clean_up_tokenization_spaces": false,
|
33 |
-
"eos_token": "</s>",
|
34 |
-
"legacy": true,
|
35 |
-
"model_max_length": 2048,
|
36 |
-
"pad_token": "<unk>",
|
37 |
-
"padding": "max_length",
|
38 |
-
"return_tensors": "pt",
|
39 |
-
"sp_model_kwargs": {},
|
40 |
-
"spaces_between_special_tokens": false,
|
41 |
-
"tokenizer_class": "LlamaTokenizer",
|
42 |
-
"unk_token": "<unk>",
|
43 |
-
"use_default_system_prompt": false,
|
44 |
-
"use_fast": true
|
45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9ce5f4c1939d798f9579c06cb7c41ca4f80497b830ef82299a5b5b802ba651a2
|
3 |
-
size 5176
|
|
|
|
|
|
|
|