tloen/alpaca-lora-7b
.Load training parameters from selected model
button, then un-select it.',
+ allowHTML: true,
+ });
+
+ tippy('#finetune_continue_from_checkpoint', {
+ placement: 'right',
+ delay: [500, 0],
+ animation: 'scale-subtle',
+ content:
+ 'If a checkpoint is selected, training will resume from that specific checkpoint, bypassing any previously completed steps up to the checkpoint\'s moment. Load training parameters from selected model
button and select the same dataset for training.',
+ allowHTML: true,
+ });
+ }, 100);
+
+ // Show/hide start and stop button base on the state.
+ setTimeout(function () {
+ // Make the '#finetune_training_indicator > .wrap' element appear
+ // if (!document.querySelector('#finetune_training_indicator > .wrap')) {
+ // document.getElementById('finetune_confirm_stop_btn').click();
+ // }
+
+ setTimeout(function () {
+ let resetStopButtonTimer;
+ document
+ .getElementById('finetune_stop_btn')
+ .addEventListener('click', function () {
+ if (resetStopButtonTimer) clearTimeout(resetStopButtonTimer);
+ resetStopButtonTimer = setTimeout(function () {
+ document.getElementById('finetune_stop_btn').style.display = 'block';
+ document.getElementById('finetune_confirm_stop_btn').style.display =
+ 'none';
+ }, 5000);
+ document.getElementById('finetune_confirm_stop_btn').style['pointer-events'] =
+ 'none';
+ setTimeout(function () {
+ document.getElementById('finetune_confirm_stop_btn').style['pointer-events'] =
+ 'inherit';
+ }, 300);
+ document.getElementById('finetune_stop_btn').style.display = 'none';
+ document.getElementById('finetune_confirm_stop_btn').style.display =
+ 'block';
+ });
+ // const training_indicator_wrap_element = document.querySelector(
+ // '#finetune_training_indicator > .wrap'
+ // );
+ const training_indicator_element = document.querySelector(
+ '#finetune_training_indicator'
+ );
+ let isTraining = undefined;
+ function handle_training_indicator_change() {
+ // const wrapperHidden = Array.from(training_indicator_wrap_element.classList).includes('hide');
+ const hidden = Array.from(training_indicator_element.classList).includes('hidden');
+ const newIsTraining = !(/* wrapperHidden && */ hidden);
+ if (newIsTraining === isTraining) return;
+ isTraining = newIsTraining;
+ if (!isTraining) {
+ if (resetStopButtonTimer) clearTimeout(resetStopButtonTimer);
+ document.getElementById('finetune_start_btn').style.display = 'block';
+ document.getElementById('finetune_stop_btn').style.display = 'none';
+ document.getElementById('finetune_confirm_stop_btn').style.display =
+ 'none';
+ } else {
+ document.getElementById('finetune_start_btn').style.display = 'none';
+ document.getElementById('finetune_stop_btn').style.display = 'block';
+ document.getElementById('finetune_confirm_stop_btn').style.display =
+ 'none';
+ }
+ }
+ // new MutationObserver(function (mutationsList, observer) {
+ // handle_training_indicator_change();
+ // }).observe(training_indicator_wrap_element, {
+ // attributes: true,
+ // attributeFilter: ['class'],
+ // });
+ new MutationObserver(function (mutationsList, observer) {
+ handle_training_indicator_change();
+ }).observe(training_indicator_element, {
+ attributes: true,
+ attributeFilter: ['class'],
+ });
+ handle_training_indicator_change();
+ }, 500);
+ }, 0);
+
+ return [];
+}
diff --git a/llama_lora/ui/finetune/style.css b/llama_lora/ui/finetune/style.css
new file mode 100644
index 0000000000000000000000000000000000000000..b5d280bb2c0429fd5040984f36e25dd3a4c37582
--- /dev/null
+++ b/llama_lora/ui/finetune/style.css
@@ -0,0 +1,421 @@
+#finetune_dataset_text_load_sample_button {
+ margin: -4px 12px 8px;
+}
+
+#finetune_reload_selections_button {
+ position: absolute;
+ top: 0;
+ right: 0;
+ margin: 16px;
+ margin-bottom: auto;
+ height: 42px !important;
+ min-width: 42px !important;
+ width: 42px !important;
+ z-index: 1;
+}
+
+#finetune_dataset_from_data_dir {
+ border: 0;
+ box-shadow: none;
+}
+
+#finetune_ui_content > .tabs > .tab-nav::before {
+ content: "Training Dataset:";
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ padding-right: 12px;
+ padding-left: 8px;
+}
+
+#finetune_template,
+#finetune_template + * {
+ border: 0;
+ box-shadow: none;
+}
+
+#finetune_dataset_text_input_group .form {
+ border: 0;
+ box-shadow: none;
+ padding: 0;
+}
+
+#finetune_dataset_text_input_textbox > .wrap:last-of-type {
+ margin-top: -20px;
+}
+
+#finetune_dataset_plain_text_separators_group * {
+ font-size: 0.8rem;
+}
+#finetune_dataset_plain_text_separators_group textarea {
+ height: auto !important;
+}
+#finetune_dataset_plain_text_separators_group > .form {
+ gap: 0 !important;
+}
+
+#finetune_dataset_from_text_message p,
+#finetune_dataset_from_text_message + * p {
+ font-size: 80%;
+}
+#finetune_dataset_from_text_message,
+#finetune_dataset_from_text_message *,
+#finetune_dataset_from_text_message + *,
+#finetune_dataset_from_text_message + * * {
+ display: inline;
+}
+
+
+#finetune_dataset_from_data_dir_message,
+#finetune_dataset_from_data_dir_message * {
+ min-height: 0 !important;
+}
+#finetune_dataset_from_data_dir_message {
+ margin: -20px 24px 0;
+ font-size: 0.8rem;
+}
+
+#finetune_dataset_from_text_message > .wrap > *:first-child,
+#finetune_dataset_from_data_dir_message > .wrap > *:first-child {
+ display: none;
+}
+#finetune_dataset_from_data_dir_message > .wrap {
+ top: -18px;
+}
+#finetune_dataset_from_text_message > .wrap svg,
+#finetune_dataset_from_data_dir_message > .wrap svg {
+ margin: -32px -16px;
+}
+
+#finetune_continue_from_model_box {
+ /* padding: 0; */
+}
+#finetune_continue_from_model_box .block {
+ border: 0;
+ box-shadow: none;
+ padding: 0;
+}
+#finetune_continue_from_model_box > * {
+ /* gap: 0; */
+}
+#finetune_continue_from_model_box button {
+ margin-top: 16px;
+}
+#finetune_continue_from_model {
+ flex-grow: 2;
+}
+
+.finetune_dataset_error_message {
+ color: var(--error-text-color) !important;
+}
+
+#finetune_dataset_preview_info_message {
+ align-items: flex-end;
+ flex-direction: row;
+ display: flex;
+ margin-bottom: -4px;
+}
+
+#finetune_dataset_preview td {
+ white-space: pre-wrap;
+}
+
+/*
+#finetune_dataset_preview {
+ max-height: 100vh;
+ overflow: auto;
+ border: var(--block-border-width) solid var(--border-color-primary);
+ border-radius: var(--radius-lg);
+}
+#finetune_dataset_preview .table-wrap {
+ border: 0 !important;
+}
+*/
+
+#finetune_max_seq_length {
+ flex: 2;
+}
+
+#finetune_lora_target_modules_box,
+#finetune_lora_target_modules_box + #finetune_lora_modules_to_save_box {
+ margin-top: calc((var(--layout-gap) + 8px) * -1);
+ flex-grow: 0 !important;
+}
+#finetune_lora_target_modules_box > .form,
+#finetune_lora_target_modules_box + #finetune_lora_modules_to_save_box > .form {
+ padding-top: calc((var(--layout-gap) + 8px) / 3);
+ border-top: 0;
+ border-top-left-radius: 0;
+ border-top-right-radius: 0;
+ background: var(--block-background-fill);
+ position: relative;
+}
+#finetune_lora_target_modules_box > .form::before,
+#finetune_lora_target_modules_box + #finetune_lora_modules_to_save_box > .form::before {
+ content: "";
+ display: block;
+ position: absolute;
+ top: calc((var(--layout-gap) + 8px) / 3);
+ left: 0;
+ right: 0;
+ height: 1px;
+ z-index: 1;
+ background: var(--block-border-color);
+}
+#finetune_lora_target_modules_add_box,
+#finetune_lora_modules_to_save_add_box {
+ margin-top: -24px;
+ padding-top: 8px;
+ border-top-left-radius: 0;
+ border-top-right-radius: 0;
+ border-top: 0;
+}
+#finetune_lora_target_modules_add_box > * > .form,
+#finetune_lora_modules_to_save_add_box > * > .form {
+ border: 0;
+ box-shadow: none;
+}
+#finetune_lora_target_modules_add,
+#finetune_lora_modules_to_save_add {
+ padding: 0;
+}
+#finetune_lora_target_modules_add input,
+#finetune_lora_modules_to_save_add input {
+ padding: 4px 8px;
+}
+#finetune_lora_target_modules_add_btn,
+#finetune_lora_modules_to_save_add_btn {
+ min-width: 60px;
+}
+
+#finetune_advance_lora_options_accordion > *:last-child:not(.label-wrap) > *:first-child {
+ margin-top: 8px;
+}
+#finetune_advance_lora_options_accordion #finetune_lora_modules_to_save,
+#finetune_advance_lora_options_accordion #finetune_lora_modules_to_save_add_box {
+ padding: var(--spacing-lg);
+ background: var(--panel-background-fill);
+ border: 0;
+}
+#finetune_advance_lora_options_accordion #finetune_lora_modules_to_save_box > .form,
+#finetune_advance_lora_options_accordion #finetune_lora_modules_to_save,
+#finetune_advance_lora_options_accordion #finetune_lora_modules_to_save_add_box {
+ border: 0;
+}
+
+#finetune_save_total_limit,
+#finetune_save_steps,
+#finetune_logging_steps {
+ min-width: min(120px,100%) !important;
+ padding-top: 4px;
+}
+#finetune_save_total_limit span,
+#finetune_save_steps span,
+#finetune_logging_steps span {
+ font-size: 12px;
+ margin-bottom: 5px;
+}
+#finetune_save_total_limit input,
+#finetune_save_steps input,
+#finetune_logging_steps input {
+ padding: 4px 8px;
+}
+
+#finetune_advance_options_accordion > *:last-child:not(.label-wrap) > *:first-child {
+ margin-top: 8px;
+}
+#finetune_advanced_options_checkboxes > * > * {
+ min-width: auto;
+}
+
+#finetune_log_and_save_options_group_container {
+ flex-grow: 0 !important;
+}
+#finetune_model_name_group {
+ flex-grow: 0 !important;
+}
+
+#finetune_eval_data_group {
+ flex-grow: 0 !important;
+}
+
+#finetune_additional_training_arguments_box > .form,
+#finetune_additional_lora_config_box > .form {
+ border: 0;
+ background: transparent;
+}
+.form:has(> #finetune_additional_training_arguments_textbox_for_label_display),
+.form:has(> #finetune_additional_lora_config_textbox_for_label_display) {
+ box-shadow: none;
+ border-radius: 0;
+ margin-bottom: -8px;
+}
+#finetune_additional_training_arguments_textbox_for_label_display,
+#finetune_additional_lora_config_textbox_for_label_display {
+ padding: 0;
+ margin-bottom: -8px;
+ background: transparent;
+}
+#finetune_additional_training_arguments_textbox_for_label_display textarea,
+#finetune_additional_lora_config_textbox_for_label_display textarea {
+ display: none;
+}
+
+#finetune_training_status > .wrap,
+#finetune_loss_plot_container > .wrap,
+#finetune_loss_plot > .wrap {
+ border: 0;
+ background: transparent;
+ pointer-events: none;
+ top: 0;
+ bottom: 0;
+ left: 0;
+ right: 0;
+}
+#finetune_training_status > .wrap:not(.generating)::after {
+ content: "Refresh the page if this takes too long.";
+ position: absolute;
+ top: 0;
+ left: 0;
+ right: 0;
+ bottom: 0;
+ padding-top: 64px;
+ opacity: 0.5;
+ text-align: center;
+}
+#finetune_training_status > .wrap .meta-text-center {
+ transform: none !important;
+}
+
+#finetune_training_status .progress-block {
+ min-height: 100px;
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ align-items: center;
+ background: var(--panel-background-fill);
+ border-radius: var(--radius-lg);
+ border: var(--block-border-width) solid var(--border-color-primary);
+ padding: var(--block-padding);
+}
+#finetune_training_status .progress-block.is_training {
+ min-height: 160px;
+}
+#finetune_training_status .progress-block .empty-text {
+ text-transform: uppercase;
+ font-weight: 700;
+ font-size: 120%;
+ opacity: 0.12;
+}
+#finetune_training_status .progress-block .meta-text {
+ position: absolute;
+ top: 0;
+ right: 0;
+ z-index: var(--layer-2);
+ padding: var(--size-1) var(--size-2);
+ font-size: var(--text-sm);
+ font-family: var(--font-mono);
+ text-align: right;
+}
+#finetune_training_status .progress-block .status {
+ white-space: pre-wrap;
+}
+#finetune_training_status .progress-block .progress-level {
+ flex-grow: 1;
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ align-items: center;
+ z-index: var(--layer-2);
+ width: var(--size-full);
+ padding: 8px 0;
+ text-align: center;
+}
+#finetune_training_status .progress-block .progress-level-inner {
+ margin: var(--size-2) auto;
+ color: var(--body-text-color);
+ font-size: var(--text-sm);
+ font-family: var(--font-mono);
+}
+#finetune_training_status .progress-block .progress-bar-wrap {
+ border: 1px solid var(--border-color-primary);
+ background: var(--background-fill-primary);
+ width: 55.5%;
+ height: var(--size-4);
+}
+#finetune_training_status .progress-block .progress-bar {
+ transform-origin: left;
+ background-color: var(--loader-color);
+ width: var(--size-full);
+ height: var(--size-full);
+ transition: all 150ms ease 0s;
+}
+
+#finetune_training_status .progress-block .params-info {
+ font-size: var(--text-sm);
+ font-weight: var(--weight-light);
+ margin-top: 8px;
+ margin-bottom: -4px !important;
+ opacity: 0.4;
+}
+#finetune_training_status .progress-block .progress-level + .params-info {
+ margin-top: -8px;
+}
+
+#finetune_training_status .progress-block .output {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ align-items: center;
+}
+#finetune_training_status .progress-block .output .title {
+ padding: var(--size-1) var(--size-3);
+ font-weight: var(--weight-bold);
+ font-size: var(--text-lg);
+ line-height: var(--line-xs);
+}
+#finetune_training_status .progress-block .output .message {
+ padding: var(--size-1) var(--size-3);
+ color: var(--body-text-color) !important;
+ font-family: var(--font-mono);
+ white-space: pre-wrap;
+}
+
+#finetune_training_status .progress-block .error {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ align-items: center;
+}
+#finetune_training_status .progress-block .error .title {
+ padding: var(--size-1) var(--size-3);
+ color: var(--color-red-500);
+ font-weight: var(--weight-bold);
+ font-size: var(--text-lg);
+ line-height: var(--line-xs);
+}
+#finetune_training_status .progress-block .error .error-message {
+ padding: var(--size-1) var(--size-3);
+ color: var(--body-text-color) !important;
+ font-family: var(--font-mono);
+ white-space: pre-wrap;
+}
+#finetune_training_status .progress-block.is_error {
+ /* background: var(--error-background-fill) !important; */
+ border: 1px solid var(--error-border-color) !important;
+}
+#finetune_loss_plot {
+ padding: var(--block-padding);
+}
+#finetune_loss_plot .altair {
+ overflow: auto !important;
+}
+#finetune_loss_plot .altair > * {
+ margin: auto !important;
+}
+#finetune_loss_plot .vega-embed summary {
+ border: 0;
+ box-shadow: none;
+}
+
+#finetune_training_indicator { display: none; }
diff --git a/llama_lora/ui/finetune/training.py b/llama_lora/ui/finetune/training.py
new file mode 100644
index 0000000000000000000000000000000000000000..e73713a823fdaa1000032d83246adfa18c1be014
--- /dev/null
+++ b/llama_lora/ui/finetune/training.py
@@ -0,0 +1,525 @@
+import os
+import json
+import time
+import math
+import datetime
+import pytz
+import socket
+import threading
+import traceback
+import altair as alt
+import pandas as pd
+import gradio as gr
+
+from huggingface_hub import try_to_load_from_cache, snapshot_download
+from transformers import TrainingArguments
+
+from ...config import Config
+from ...globals import Global
+from ...models import clear_cache, unload_models
+from ...utils.prompter import Prompter
+from ...utils.sample_evenly import sample_evenly
+from ..trainer_callback import (
+ UiTrainerCallback, reset_training_status,
+ update_training_states, set_train_output
+)
+
+from .data_processing import get_data_from_input
+
+
+def status_message_callback(message):
+ if Global.should_stop_training:
+ return True
+
+ Global.training_status_text = message
+
+
+def params_info_callback(all_params, trainable_params):
+ Global.training_params_info_text = f"Params: {trainable_params}/{all_params} ({100 * trainable_params / all_params:.4f}% trainable)"
+
+
+def do_train(
+ # Dataset
+ template,
+ load_dataset_from,
+ dataset_from_data_dir,
+ dataset_text,
+ dataset_text_format,
+ dataset_plain_text_input_variables_separator,
+ dataset_plain_text_input_and_output_separator,
+ dataset_plain_text_data_separator,
+ # Training Options
+ max_seq_length,
+ evaluate_data_count,
+ micro_batch_size,
+ gradient_accumulation_steps,
+ epochs,
+ learning_rate,
+ train_on_inputs,
+ lora_r,
+ lora_alpha,
+ lora_dropout,
+ lora_target_modules,
+ lora_modules_to_save,
+ load_in_8bit,
+ fp16,
+ bf16,
+ gradient_checkpointing,
+ save_steps,
+ save_total_limit,
+ logging_steps,
+ additional_training_arguments,
+ additional_lora_config,
+ model_name,
+ continue_from_model,
+ continue_from_checkpoint,
+ progress=gr.Progress(track_tqdm=False),
+):
+ if Global.is_training or Global.is_train_starting:
+ return render_training_status() + render_loss_plot()
+
+ reset_training_status()
+ Global.is_train_starting = True
+
+ try:
+ base_model_name = Global.base_model_name
+ tokenizer_name = Global.tokenizer_name or Global.base_model_name
+
+ resume_from_checkpoint_param = None
+ if continue_from_model == "-" or continue_from_model == "None":
+ continue_from_model = None
+ if continue_from_checkpoint == "-" or continue_from_checkpoint == "None":
+ continue_from_checkpoint = None
+ if continue_from_model:
+ resume_from_model_path = os.path.join(
+ Config.data_dir, "lora_models", continue_from_model)
+ resume_from_checkpoint_param = resume_from_model_path
+ if continue_from_checkpoint:
+ resume_from_checkpoint_param = os.path.join(
+ resume_from_checkpoint_param, continue_from_checkpoint)
+ will_be_resume_from_checkpoint_file = os.path.join(
+ resume_from_checkpoint_param, "pytorch_model.bin")
+ if not os.path.exists(will_be_resume_from_checkpoint_file):
+ raise ValueError(
+ f"Unable to resume from checkpoint {continue_from_model}/{continue_from_checkpoint}. Resuming is only possible from checkpoints stored locally in the data directory. Please ensure that the file '{will_be_resume_from_checkpoint_file}' exists.")
+ else:
+ will_be_resume_from_checkpoint_file = os.path.join(
+ resume_from_checkpoint_param, "adapter_model.bin")
+ if not os.path.exists(will_be_resume_from_checkpoint_file):
+ # Try to get model in Hugging Face cache
+ resume_from_checkpoint_param = None
+ possible_hf_model_name = None
+ possible_model_info_file = os.path.join(
+ resume_from_model_path, "info.json")
+ if "/" in continue_from_model:
+ possible_hf_model_name = continue_from_model
+ elif os.path.exists(possible_model_info_file):
+ with open(possible_model_info_file, "r") as file:
+ model_info = json.load(file)
+ possible_hf_model_name = model_info.get(
+ "hf_model_name")
+ if possible_hf_model_name:
+ possible_hf_model_cached_path = try_to_load_from_cache(
+ possible_hf_model_name, 'adapter_model.bin')
+ if not possible_hf_model_cached_path:
+ snapshot_download(possible_hf_model_name)
+ possible_hf_model_cached_path = try_to_load_from_cache(
+ possible_hf_model_name, 'adapter_model.bin')
+ if possible_hf_model_cached_path:
+ resume_from_checkpoint_param = os.path.dirname(
+ possible_hf_model_cached_path)
+
+ if not resume_from_checkpoint_param:
+ raise ValueError(
+ f"Unable to continue from model {continue_from_model}. Continuation is only possible from models stored locally in the data directory. Please ensure that the file '{will_be_resume_from_checkpoint_file}' exists.")
+
+ output_dir = os.path.join(Config.data_dir, "lora_models", model_name)
+ if os.path.exists(output_dir):
+ if (not os.path.isdir(output_dir)) or os.path.exists(os.path.join(output_dir, 'adapter_config.json')):
+ raise ValueError(
+ f"The output directory already exists and is not empty. ({output_dir})")
+
+ wandb_group = template
+ wandb_tags = [f"template:{template}"]
+ if load_dataset_from == "Data Dir" and dataset_from_data_dir:
+ wandb_group += f"/{dataset_from_data_dir}"
+ wandb_tags.append(f"dataset:{dataset_from_data_dir}")
+
+ finetune_args = {
+ 'base_model': base_model_name,
+ 'tokenizer': tokenizer_name,
+ 'output_dir': output_dir,
+ 'micro_batch_size': micro_batch_size,
+ 'gradient_accumulation_steps': gradient_accumulation_steps,
+ 'num_train_epochs': epochs,
+ 'learning_rate': learning_rate,
+ 'cutoff_len': max_seq_length,
+ 'val_set_size': evaluate_data_count,
+ 'lora_r': lora_r,
+ 'lora_alpha': lora_alpha,
+ 'lora_dropout': lora_dropout,
+ 'lora_target_modules': lora_target_modules,
+ 'lora_modules_to_save': lora_modules_to_save,
+ 'train_on_inputs': train_on_inputs,
+ 'load_in_8bit': load_in_8bit,
+ 'fp16': fp16,
+ 'bf16': bf16,
+ 'gradient_checkpointing': gradient_checkpointing,
+ 'group_by_length': False,
+ 'resume_from_checkpoint': resume_from_checkpoint_param,
+ 'save_steps': save_steps,
+ 'save_total_limit': save_total_limit,
+ 'logging_steps': logging_steps,
+ 'additional_training_arguments': additional_training_arguments,
+ 'additional_lora_config': additional_lora_config,
+ 'wandb_api_key': Config.wandb_api_key,
+ 'wandb_project': Config.default_wandb_project if Config.enable_wandb else None,
+ 'wandb_group': wandb_group,
+ 'wandb_run_name': model_name,
+ 'wandb_tags': wandb_tags
+ }
+
+ prompter = Prompter(template)
+ data = get_data_from_input(
+ load_dataset_from=load_dataset_from,
+ dataset_text=dataset_text,
+ dataset_text_format=dataset_text_format,
+ dataset_plain_text_input_variables_separator=dataset_plain_text_input_variables_separator,
+ dataset_plain_text_input_and_output_separator=dataset_plain_text_input_and_output_separator,
+ dataset_plain_text_data_separator=dataset_plain_text_data_separator,
+ dataset_from_data_dir=dataset_from_data_dir,
+ prompter=prompter
+ )
+
+ def training():
+ Global.is_training = True
+
+ try:
+ # Need RAM for training
+ unload_models()
+ Global.new_base_model_that_is_ready_to_be_used = None
+ Global.name_of_new_base_model_that_is_ready_to_be_used = None
+ clear_cache()
+
+ train_data = prompter.get_train_data_from_dataset(data)
+
+ if Config.ui_dev_mode:
+ Global.training_args = TrainingArguments(
+ logging_steps=logging_steps, output_dir=""
+ )
+
+ message = "Currently in UI dev mode, not doing the actual training."
+ message += f"\n\nArgs: {json.dumps(finetune_args, indent=2)}"
+ message += f"\n\nTrain data (first 5):\n{json.dumps(train_data[:5], indent=2)}"
+
+ print(message)
+
+ total_epochs = epochs
+ total_steps = len(train_data) * epochs
+ if total_steps < 1500:
+ total_steps = 1500
+ log_history = []
+ initial_loss = 2
+ loss_decay_rate = 0.8
+ for i in range(total_steps):
+ if (Global.should_stop_training):
+ break
+
+ current_step = i + 1
+ current_epoch = i / (total_steps / total_epochs)
+
+ if (current_step % logging_steps == 0):
+ loss = initial_loss * \
+ math.exp(-loss_decay_rate * current_epoch)
+ log_history.append({
+ 'loss': loss,
+ 'learning_rate': 0.0001,
+ 'epoch': current_epoch
+ })
+
+ update_training_states(
+ total_steps=total_steps,
+ current_step=current_step,
+ total_epochs=total_epochs,
+ current_epoch=current_epoch,
+ log_history=log_history
+ )
+ time.sleep(0.01)
+
+ result_message = set_train_output(message)
+ print(result_message)
+ time.sleep(3)
+ Global.is_training = False
+ return
+
+ training_callbacks = [UiTrainerCallback]
+
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ with open(os.path.join(output_dir, "info.json"), 'w') as info_json_file:
+ dataset_name = "N/A (from text input)"
+ if load_dataset_from == "Data Dir":
+ dataset_name = dataset_from_data_dir
+
+ info = {
+ 'base_model': base_model_name,
+ 'prompt_template': template,
+ 'dataset_name': dataset_name,
+ 'dataset_rows': len(train_data),
+ 'trained_on_machine': socket.gethostname(),
+ 'timestamp': time.time(),
+ }
+ if continue_from_model:
+ info['continued_from_model'] = continue_from_model
+ if continue_from_checkpoint:
+ info['continued_from_checkpoint'] = continue_from_checkpoint
+
+ if Global.version:
+ info['tuner_version'] = Global.version
+
+ json.dump(info, info_json_file, indent=2)
+
+ train_output = Global.finetune_train_fn(
+ train_data=train_data,
+ callbacks=training_callbacks,
+ status_message_callback=status_message_callback,
+ params_info_callback=params_info_callback,
+ additional_wandb_config=info,
+ **finetune_args,
+ )
+
+ result_message = set_train_output(train_output)
+ print(result_message + "\n" + str(train_output))
+
+ clear_cache()
+
+ Global.is_training = False
+
+ except Exception as e:
+ traceback.print_exc()
+ Global.training_error_message = str(e)
+ finally:
+ Global.is_training = False
+
+ training_thread = threading.Thread(target=training)
+ training_thread.daemon = True
+ training_thread.start()
+
+ except Exception as e:
+ Global.is_training = False
+ traceback.print_exc()
+ Global.training_error_message = str(e)
+ finally:
+ Global.is_train_starting = False
+
+ return render_training_status() + render_loss_plot()
+
+
+def render_training_status():
+ if not Global.is_training:
+ if Global.is_train_starting:
+ html_content = """
+ tloen/alpaca-lora-7b
).',
+ allowHTML: true,
+ });
+
+ tippy('#inference_prompt_template', {
+ placement: 'top-start',
+ delay: [500, 0],
+ animation: 'scale-subtle',
+ content:
+ 'Templates are loaded from the "templates" folder of your data directory. Be sure to select the template that matches your selected LoRA model to get the best results.',
+ });
+
+ tippy('#inference_reload_selections_button', {
+ placement: 'bottom-end',
+ delay: [500, 0],
+ animation: 'scale-subtle',
+ content: 'Press to reload LoRA Model and Prompt Template selections.',
+ });
+
+ document
+ .querySelector('#inference_preview_prompt_container .label-wrap')
+ .addEventListener('click', function () {
+ tippy('#inference_preview_prompt', {
+ placement: 'right',
+ delay: [500, 0],
+ animation: 'scale-subtle',
+ content: 'This is the prompt that will be sent to the language model.',
+ });
+
+ const update_btn = document.getElementById(
+ 'inference_update_prompt_preview_btn'
+ );
+ if (update_btn) update_btn.click();
+ });
+
+ function setTooltipForOptions() {
+ tippy('#inference_temperature', {
+ placement: 'right',
+ delay: [500, 0],
+ animation: 'scale-subtle',
+ content:
+ 'Controls randomness: Higher values (e.g., 1.0
) make the model generate more diverse and random outputs. As the temperature approaches zero, the model will become deterministic and repetitive.0
will enable sampling.',
+ allowHTML: true,
+ });
+
+ tippy('#inference_top_p', {
+ placement: 'right',
+ delay: [500, 0],
+ animation: 'scale-subtle',
+ content:
+ 'Controls diversity via nucleus sampling: only the tokens whose cumulative probability exceeds top_p
are considered. 0.5
means half of all likelihood-weighted options are considered.top_k
tokens with the highest probabilities. This method can lead to more focused and coherent outputs by reducing the impact of low probability tokens.