File size: 8,249 Bytes
ed0dca2 a40632d bdf3e70 38d7f73 2f1a468 1c03d71 bdf3e70 30da7cc 9e53c43 b1cf10f 30da7cc b1cf10f a40632d aeb447f a52308c 6664e37 aeb447f 6664e37 aeb447f 6664e37 aeb447f a52308c aeb447f 2f1a468 aeb447f 6d57565 aeb447f 02fc014 aeb447f dfd7ad6 6d57565 5a49926 2cfc3c5 5a49926 2f1a468 d8dc2cd 2f1a468 30da7cc 2f1a468 42cd34a bdf3e70 2f1a468 ed0dca2 52589e7 17dfda2 c59143e 17dfda2 ab084df f830874 17dfda2 a52308c 36a6459 17dfda2 44d732c 1c03d71 44d732c dac9ef5 1c03d71 ab084df 02fc014 6664e37 17dfda2 6664e37 44d732c 8ac1f0f 6664e37 a52308c eab212d ab084df aeb447f b0f7f09 ab084df 6664e37 fdca17f ab084df 761e8ba 4b6fd72 ab084df f830874 bc14b30 02fc014 bc14b30 17dfda2 f58a3a5 717f53b bc14b30 611772e e751c8d c5cebfc 717f53b e751c8d bc14b30 17dfda2 ab084df 17dfda2 f830874 8c9400b 2f1a468 ab084df 2f1a468 8d7ed18 717f53b bc14b30 8d7ed18 17dfda2 2f1a468 17dfda2 52589e7 17dfda2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
##################################### Imports ######################################
# Generic imports
import gradio as gr
import json
# Specialized imports
#from utilities.modeling import modeling
from datasets import load_dataset
# Module imports
from utilities.setup import get_json_cfg
from utilities.templates import prompt_template
########################### Global objects and functions ###########################
conf = get_json_cfg()
class update_visibility:
def textbox_vis(radio):
value = radio
if value == "Hugging Face Hub Dataset":
return gr.Dropdown(visible=bool(1))
else:
return gr.Dropdown(visible=bool(0))
def textbox_button_vis(radio):
value = radio
if value == "Hugging Face Hub Dataset":
return gr.Button(visible=bool(1))
else:
return gr.Button(visible=bool(0))
def upload_vis(radio):
value = radio
if value == "Upload Your Own":
return gr.UploadButton(visible=bool(1)) #make it visible
else:
return gr.UploadButton(visible=bool(0))
class get_datasets:
def predefined_dataset(dataset_name):
dataset = load_dataset(dataset_name, split = "train") #dataset_name
return dataset, 'Successfully loaded dataset'
def uploaded_dataset(file):
global dataset # bad practice, I know... But just bear with me. Will later update to state dict.
dataset = []
if file is None:
return "File not found. Please upload the file again."
try:
with open(file,'r') as file:
for line in file:
dataset.append(json.loads(line.strip()))
print(dataset[0]['query'])
return "File retrieved"
except FileNotFoundError:
return "File not found. Please upload the file again."
def train(model_name,
inject_prompt,
dataset_predefined,
peft,
sft,
max_seq_length,
random_seed,
num_epochs,
max_steps,
data_field,
repository,
model_out_name):
"""The model call"""
# Get models
# trainer = modeling(model_name, max_seq_length, random_seed,
# peft, sft, dataset, data_field)
# trainer_stats = trainer.train()
# Return outputs of training.
return f"Hello!! Using model: {model_name} with template: {inject_prompt}"
def submit_weights(model, repository, model_out_name, token):
"""submits model to repository"""
repo = repository + '/' + model_out_name
model.push_to_hub(repo, token = token)
tokenizer.push_to_hub(repo, token = token)
return 0
##################################### App UI #######################################
def main():
with gr.Blocks() as demo:
##### Title Block #####
gr.Markdown("# Instruction Tuning with Unsloth")
##### Initial Model Inputs #####
gr.Markdown("### Model Inputs")
# Select Model
modelnames = conf['model']['choices']
model_name = gr.Dropdown(label="Supported Models",
choices=modelnames,
value=modelnames[0])
# Prompt template
inject_prompt = gr.Textbox(label="Prompt Template",
value=prompt_template())
# Dataset choice
dataset_choice = gr.Radio(label="Choose Dataset",
choices=["Hugging Face Hub Dataset", "Upload Your Own"],
value="Hugging Face Hub Dataset")
dataset_predefined = gr.Textbox(label="Hugging Face Hub Training Dataset",
value='yahma/alpaca-cleaned',
visible=True)
#dataset_uploaded_load = gr.File(label="Upload Dataset",
# file_types=[".csv",".jsonl", ".txt"],
# visible=False)
dataset_predefined_load = gr.Button("Upload Dataset (.csv, .jsonl, or .txt)")
dataset_uploaded_load = gr.UploadButton(label="Upload Dataset (.csv, .jsonl, or .txt)",
file_types=[".csv",".jsonl", ".txt"],
visible=False)
data_snippet = gr.Markdown()
dataset_choice.change(update_visibility.textbox_vis,
dataset_choice,
dataset_predefined)
dataset_choice.change(update_visibility.upload_vis,
dataset_choice,
dataset_uploaded_load)
dataset_choice.change(update_visibility.textbox_button_vis,
dataset_choice,
dataset_predefined_load)
# Dataset button
dataset_predefined_load.click(fn=get_datasets.predefined_dataset,
inputs=dataset_predefined,
outputs=data_snippet)
dataset_uploaded_load.click(fn=get_datasets.uploaded_dataset,
inputs=dataset_uploaded_load,
outputs=data_snippet)
##### Model Parameter Inputs #####
gr.Markdown("### Model Parameter Selection")
# Parameters
data_field = gr.Textbox(label="Dataset Training Field Name",
value=conf['model']['general']["dataset_text_field"])
max_seq_length = gr.Textbox(label="Maximum sequence length",
value=conf['model']['general']["max_seq_length"])
random_seed = gr.Textbox(label="Seed",
value=conf['model']['general']["seed"])
num_epochs = gr.Textbox(label="Training Epochs",
value=conf['model']['general']["num_train_epochs"])
max_steps = gr.Textbox(label="Maximum steps",
value=conf['model']['general']["max_steps"])
repository = gr.Textbox(label="Repository Name",
value=conf['model']['general']["repository"])
model_out_name = gr.Textbox(label="Model Output Name",
value=conf['model']['general']["model_name"])
# Hyperparameters (allow selection, but hide in accordion.)
with gr.Accordion("Advanced Tuning", open=False):
sftparams = conf['model']['general']
# accordion container content
dict_string = json.dumps(dict(conf['model']['peft']), indent=4)
peft = gr.Textbox(label="PEFT Parameters (json)", value=dict_string)
dict_string = json.dumps(dict(conf['model']['sft']), indent=4)
sft = gr.Textbox(label="SFT Parameters (json)", value=dict_string)
##### Execution #####
# Setup buttons
tune_btn = gr.Button("Start Fine Tuning")
gr.Markdown("### Model Progress")
# Text output (for now)
output = gr.Textbox(label="Output")
# Data retrieval
# Execute buttons
tune_btn.click(fn=train,
inputs=[model_name,
inject_prompt,
dataset_predefined,
peft,
sft,
max_seq_length,
random_seed,
num_epochs,
max_steps,
data_field,
repository,
model_out_name
],
outputs=output)
# stop button
# submit button
# Launch baby
demo.launch()
##################################### Launch #######################################
if __name__ == "__main__":
main() |