Spaces:

sebdg
/

unsloth

Paused

App Files Files Community

Sebastien De Greef commited on Jul 15, 2024

Commit

6baccb3

1 Parent(s): 4af8a78

adds "gradio" to the requirements.txt and handle buttons up to training

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +278 -18
requirements.txt +4 -2
unsloth.png +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv/*

app.py CHANGED Viewed

@@ -1,32 +1,292 @@
 import gradio as gr
-def process_input(model_name, checkbox1, checkbox2, text1, text2):
-    result = f"Model: {model_name}\nCheckbox 1: {checkbox1}\nCheckbox 2: {checkbox2}\nText Field 1: {text1}\nText Field 2: {text2}"
-    return result
 # Dropdown options
-model_options = ["Model A", "Model B", "Model C"]
 # Create the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Gradio Interface Example")
-    # Dropdown for model_name
-    model_name = gr.Dropdown(choices=model_options, label="Select Model")
-    # Checkboxes
-    checkbox1 = gr.Checkbox(label="Checkbox 1")
-    checkbox2 = gr.Checkbox(label="Checkbox 2")
-    # Text fields
-    text1 = gr.Textbox(label="Text Field 1")
-    text2 = gr.Textbox(label="Text Field 2")
     # Output
-    output = gr.Textbox(label="Output")
-    # Button to submit and process the input
-    submit_btn = gr.Button("Submit")
-    submit_btn.click(process_input, inputs=[model_name, checkbox1, checkbox2, text1, text2], outputs=output)
 demo.launch()

 import gradio as gr
+from huggingface_hub import HfApi
+from unsloth import FastLanguageModel
+from trl import SFTTrainer
+from transformers import TrainingArguments, TrainerCallback
+from unsloth import is_bfloat16_supported
+import torch
+from datasets import load_dataset
+import logging
+from io import StringIO
+import time
+import asyncio
+# Create a string stream to capture log messages
+log_stream = StringIO()
+# Configure logging to use the string stream
+logging.basicConfig(stream=log_stream, level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+log_contents = log_stream.getvalue()
+print(log_contents)
+logger.debug('This is a debug message')
 # Dropdown options
+model_options = [
+    "unsloth/mistral-7b-v0.3-bnb-4bit",      # New Mistral v3 2x faster!
+    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
+    "unsloth/llama-3-8b-bnb-4bit",           # Llama-3 15 trillion tokens model 2x faster!
+    "unsloth/llama-3-8b-Instruct-bnb-4bit",
+    "unsloth/llama-3-70b-bnb-4bit",
+    "unsloth/Phi-3-mini-4k-instruct",        # Phi-3 2x faster!
+    "unsloth/Phi-3-medium-4k-instruct",
+    "unsloth/mistral-7b-bnb-4bit",
+    "unsloth/gemma-2-9b-bnb-4bit",
+    "unsloth/gemma-2-27b-bnb-4bit",          # Gemma 2x faster!
+]
+gpu_stats = torch.cuda.get_device_properties(0)
+start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+model=None
+tokenizer = None
+dataset = None
+max_seq_length = 2048
+class PrinterCallback(TrainerCallback):
+    step = 0
+    def __init__(self, progress):
+        self.progress = progress
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        _ = logs.pop("total_flos", None)
+        if state.is_local_process_zero:
+            print(logs)
+    def on_step_end(self, args, state, control, **kwargs):
+        if state.is_local_process_zero:
+            self.step = state.global_step
+            self.progress.update(self.step)
+            print("**Step ", state.global_step)
+def formatting_prompts_func(examples, prompt):
+    EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
+    instructions = examples["instruction"]
+    inputs       = examples["input"]
+    outputs      = examples["output"]
+    texts = []
+    for instruction, input, output in zip(instructions, inputs, outputs):
+        # Must add EOS_TOKEN, otherwise your generation will go on forever!
+        text = prompt.format(instruction, input, output) + EOS_TOKEN
+        texts.append(text)
+    return { "text" : texts, }
+pass
+def load_model(initial_model_name, load_in_4bit, max_sequence_length):
+    global model, tokenizer, max_seq_length
+    dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
+    max_seq_length = max_sequence_length
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name = initial_model_name,
+        max_seq_length = max_sequence_length,
+        dtype = dtype,
+        load_in_4bit = load_in_4bit,
+        # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
+    )
+    log_contents = log_stream.getvalue()
+    print(log_contents)
+    return f"Model {initial_model_name} loaded, using {max_sequence_length} as max sequence length.", gr.update(visible=True, interactive=True), gr.update(interactive=True),gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
+def load_data(dataset_name, data_template_style, data_template):
+    global dataset
+    dataset = load_dataset(dataset_name, split = "train")
+    dataset = dataset.map(lambda examples: formatting_prompts_func(examples, data_template), batched=True)
+    return f"Data loaded {len(dataset)} records loaded.", gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=True)
+async def train_model(model_name: str, lora_r: int, lora_alpha: int, lora_dropout: float, per_device_train_batch_size: int, warmup_steps: int, max_steps: int,
+        gradient_accumulation_steps: int, logging_steps: int, log_to_tensorboard: bool, optim, learning_rate, weight_decay, lr_scheduler_type, seed: int, output_dir, progress= gr.Progress()):
+    global model, tokenizer
+    print(f"$$$ Training model {model_name} with {lora_r} R, {lora_alpha} alpha, {lora_dropout} dropout, {per_device_train_batch_size} per device train batch size, {warmup_steps} warmup steps, {max_steps} max steps, {gradient_accumulation_steps} gradient accumulation steps, {logging_steps} logging steps, {log_to_tensorboard} log to tensorboard, {optim} optimizer, {learning_rate} learning rate, {weight_decay} weight decay, {lr_scheduler_type} lr scheduler type, {seed} seed, {output_dir} output dir")
+    iseed = seed
+    model = FastLanguageModel.get_peft_model(
+        model,
+        r = lora_r,
+        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
+                        "gate_proj", "up_proj", "down_proj",],
+        lora_alpha = lora_alpha,
+        lora_dropout = lora_dropout,
+        bias = "none",
+        use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
+        random_state=iseed,
+        use_rslora = False,  # We support rank stabilized LoRA
+        loftq_config = None, # And LoftQ
+    )
+    trainer = SFTTrainer(
+        model = model,
+        tokenizer = tokenizer,
+        train_dataset = dataset,
+        dataset_text_field = "text",
+        max_seq_length = max_seq_length,
+        dataset_num_proc = 2,
+        packing = False, # Can make training 5x faster for short sequences.
+        callbacks = [PrinterCallback(progress)],
+        args = TrainingArguments(
+            per_device_train_batch_size = per_device_train_batch_size,
+            gradient_accumulation_steps = gradient_accumulation_steps,
+            warmup_steps = warmup_steps,
+            max_steps = 60, # Set num_train_epochs = 1 for full training runs
+            learning_rate = learning_rate,
+            fp16 = not is_bfloat16_supported(),
+            bf16 = is_bfloat16_supported(),
+            logging_steps = logging_steps,
+            optim = "adamw_8bit",
+            weight_decay = weight_decay,
+            lr_scheduler_type = "linear",
+            seed = iseed,
+            report_to="tensorboard" if log_to_tensorboard else None,
+            output_dir = output_dir
+        ),
+    )
+    trainer.train()
+    return "Model training",gr.update(visible=True, interactive=False), gr.update(visible=True, interactive=True), gr.update(interactive=True)
+def save_model():
+    return "Model saved", gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=False), gr.update(interactive=False)
 # Create the Gradio interface
 with gr.Blocks() as demo:
+    with gr.Column():
+        gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
+    with gr.Column():
+        gr.Markdown(f"**GPU Information:** {gpu_stats.name} ({max_memory} GB)")
+    with gr.Tab("Base Model Parameters"):
+        with gr.Row():
+            initial_model_name = gr.Dropdown(choices=model_options, label="Select Base Model", allow_custom_value=True)
+            load_in_4bit = gr.Checkbox(label="Load 4bit model", value=True)
+        gr.Markdown("### Target Model Parameters")
+        with gr.Row():
+            max_sequence_length = gr.Slider(minimum=128, value=512, step=64, maximum=128*1024, interactive=True, label="Max Sequence Length")
+        load_btn = gr.Button("Load")
+        output = gr.Textbox(label="Model Load Status", value="Model not loaded", interactive=False)
+        gr.Markdown("---")
+    with gr.Tab("Data Preparation"):
+        with gr.Row():
+            dataset_name = gr.Textbox(label="Dataset Name", value="yahma/alpaca-cleaned")
+            data_template_style = gr.Dropdown(label="Template", choices=["alpaca","custom"], value="alpaca",  allow_custom_value=True)
+        with gr.Row():
+            data_tempalte =  gr.TextArea(label="Data Template", value="""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+### Instruction:
+{}
+### Input:
+{}
+### Response:
+{}""")
+        gr.Markdown("---")
+        output_load_data = gr.Textbox(label="Data Load Status", value="Data not loaded", interactive=False)
+        load_data_btn = gr.Button("Load Dataset", interactive=True)
+        load_data_btn.click(load_data, inputs=[dataset_name, data_template_style, data_tempalte], outputs=[output_load_data, load_data_btn])
+    with gr.Tab("Fine-Tuning"):
+        gr.Markdown("""### Fine-Tuned Model Parameters""")
+        with gr.Row():
+            model_name = gr.Textbox(label="Model Name", value=initial_model_name.value, interactive=True)
+        gr.Markdown("""### Lora Parameters""")
+        with gr.Row():
+            lora_r = gr.Number(label="R", value=16, interactive=True)
+            lora_alpha = gr.Number(label="Lora Alpha", value=16, interactive=True)
+            lora_dropout = gr.Number(label="Lora Dropout", value=0.1, interactive=True)
+        gr.Markdown("---")
+        gr.Markdown("""### Training Parameters""")
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    per_device_train_batch_size = gr.Number(label="Per Device Train Batch Size", value=2, interactive=True)
+                    warmup_steps = gr.Number(label="Warmup Steps", value=5, interactive=True)
+                    max_steps = gr.Number(label="Max Steps", value=60, interactive=True)
+                    gradient_accumulation_steps = gr.Number(label="Gradient Accumulation Steps", value=4, interactive=True)
+                with gr.Row():
+                    logging_steps = gr.Number(label="Logging Steps", value=1, interactive=True)
+                    log_to_tensorboard = gr.Checkbox(label="Log to Tensorboard", value=True, interactive=True)
+                with gr.Row():
+                    optim = gr.Dropdown(choices=["adamw_8bit", "adamw", "sgd"], label="Optimizer", value="adamw_8bit")
+                    learning_rate = gr.Number(label="Learning Rate", value=2e-4, interactive=True)
+                with gr.Row():
+                    weight_decay = gr.Number(label="Weight Decay", value=0.01, interactive=True)
+                    lr_scheduler_type = gr.Dropdown(choices=["linear", "cosine", "constant"], label="LR Scheduler Type", value="linear")
+        gr.Markdown("---")
+        with gr.Row():
+            seed = gr.Number(label="Seed", value=3407, interactive=True)
+            output_dir = gr.Textbox(label="Output Directory", value="outputs", interactive=True)
+        gr.Markdown("---")
+        train_output = gr.Textbox(label="Training Status", value="Model not trained", interactive=False)
+        train_btn = gr.Button("Train", visible=True)
+        train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
+    with gr.Tab("Save & Push Options"):
+        with gr.Row():
+            gr.Markdown("### Merging Options")
+            with gr.Column():
+                merge_16bit = gr.Checkbox(label="Merge to 16bit", value=False, interactive=True)
+                merge_4bit = gr.Checkbox(label="Merge to 4bit", value=False, interactive=True)
+            just_lora = gr.Checkbox(label="Just LoRA Adapter", value=False, interactive=True)
+        gr.Markdown("---")
+        with gr.Row():
+            gr.Markdown("### GGUF Options")
+            with gr.Column():
+                merge_16bit = gr.Checkbox(label="Quantize to f16", value=False, interactive=True)
+                merge_16bit = gr.Checkbox(label="Quantize to 8bit (Q8_0)", value=False, interactive=True)
+                merge_16bit = gr.Checkbox(label="Quantize to 4bit (q4_k_m)", value=False, interactive=True)
+            with gr.Column():
+                merge_custom = gr.Checkbox(label="Custom", value=False, interactive=True)
+                merge_custom_value = gr.Textbox(label="", value="Q5_K", interactive=True)
+        gr.Markdown("---")
+        with gr.Row():
+            gr.Markdown("### Hugging Face Hub Options")
+            push_to_hub = gr.Checkbox(label="Push to Hub", value=False, interactive=True)
+            with gr.Column():
+                hub_model_name = gr.Textbox(label="Hub Model Name", value=f"username/model_name", interactive=True)
+                hub_token = gr.Textbox(label="Hub Token", interactive=True, type="password")
+                ollama_pub_key = gr.Button("HuggingFace Access Token")
+        gr.Markdown("---")
+        with gr.Row():
+            gr.Markdown("### Ollama options")
+            with gr.Column():
+                ollama_create_local = gr.Checkbox(label="Create in Ollama (local)", value=False, interactive=True)
+                ollama_push_to_hub = gr.Checkbox(label="Push to Ollama", value=False, interactive=True)
+            with gr.Column():
+                ollama_model_name = gr.Textbox(label="Ollama Model Name", value="user/model_name")
+                ollama_pub_key = gr.Button("Ollama Pub Key")
+        gr.Markdown("---")
+    with gr.Tab("Inference"):
+        with gr.Row():
+            gr.Textbox(label="Input Text", lines=4, value="""\
+Continue the fibonnaci sequence.
+# instruction
+1, 1, 2, 3, 5, 8
+# input
+""", interactive=True)
+            gr.Textbox(label="Output Text", lines=4, value="""\
+""", interactive=False)
+        inference_button = gr.Button("Inference", visible=False, interactive=False)
     # Output
+     # Button click events
+    load_btn.click(load_model, inputs=[initial_model_name, load_in_4bit, max_sequence_length], outputs=[output, load_btn, train_btn, initial_model_name, load_in_4bit, max_sequence_length])
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
 unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
-xformers
 trl<0.9.0
 peft
 accelerate
-bitsandbytes

 unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
+xformers<0.0.27
 trl<0.9.0
 peft
 accelerate
+bitsandbytes
+gradio
+tensorboard

unsloth.png ADDED Viewed