Spaces:

Gopal2002
/

dpo_training

Sleeping

App Files Files Community

Gopal2002 commited on Apr 17, 2024

Commit

4a047b7

verified ·

1 Parent(s): 3367f4b

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -81

app.py CHANGED Viewed

@@ -29,88 +29,90 @@ def greet(traindata_,output_repo):
     # new_model = "Gopal2002/zehpyr-gemma-dpo-finetune"
     new_model = output_repo
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.padding_side = "left"
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        load_in_4bit=True
-    )
-    model.config.use_cache = False
-    # Reference model
-    ref_model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        load_in_4bit=True
-    )
-    # specify how to quantize the model
-    quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.bfloat16,
-    )
-    device_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None
-    # Step 1: load the base model (Mistral-7B in our case) in 4-bit
-    model_kwargs = dict(
-        # attn_implementation="flash_attention_2", # set this to True if your GPU supports it (Flash Attention drastically speeds up model computations)
-        torch_dtype="auto",
-        use_cache=False,  # set to False as we're going to use gradient checkpointing
-        device_map=device_map,
-        quantization_config=quantization_config,
-    )
-    model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
-# Training arguments
-    peft_config = LoraConfig(
-        r=16,
-        lora_alpha=16,
-        lora_dropout=0.05,
-        bias="none",
-        task_type="CAUSAL_LM",
-        target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
-    )
-    training_args = TrainingArguments(
-        per_device_train_batch_size=4,
-        gradient_accumulation_steps=4,
-        gradient_checkpointing=True,
-        learning_rate=5e-5,
-        lr_scheduler_type="cosine",
-        max_steps=200,
-        save_strategy="no",
-        logging_steps=1,
-        output_dir=new_model,
-        optim="paged_adamw_32bit",
-        warmup_steps=100,
-        bf16=True,
-        report_to="wandb",
-    )
-#load the dataset
-    dataset = load_dataset(traindata_, split='train')
-    # dataset = load_dataset('Gopal2002/zephyr-gemma-finetune-dpo', split='train')
-# Create DPO trainer
-    dpo_trainer = DPOTrainer(
-        model,
-        ref_model=None,
-        args=training_args,
-        train_dataset=dataset,
-        tokenizer=tokenizer,
-        peft_config=peft_config,
-        beta=0.1,
-        max_prompt_length=2048,
-        max_length=1536,
-    )
-    dpo_trainer.train()
-    return "Training Done"
 with gr.Blocks() as demo:

     # new_model = "Gopal2002/zehpyr-gemma-dpo-finetune"
     new_model = output_repo
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        tokenizer.pad_token = tokenizer.eos_token
+        tokenizer.padding_side = "left"
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            load_in_4bit=True
+        )
+        model.config.use_cache = False
+        # Reference model
+        ref_model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            load_in_4bit=True
+        )
+        # specify how to quantize the model
+        quantization_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+        )
+        device_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None
+        # Step 1: load the base model (Mistral-7B in our case) in 4-bit
+        model_kwargs = dict(
+            # attn_implementation="flash_attention_2", # set this to True if your GPU supports it (Flash Attention drastically speeds up model computations)
+            torch_dtype="auto",
+            use_cache=False,  # set to False as we're going to use gradient checkpointing
+            device_map=device_map,
+            quantization_config=quantization_config,
+        )
+        model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
+    # Training arguments
+        peft_config = LoraConfig(
+            r=16,
+            lora_alpha=16,
+            lora_dropout=0.05,
+            bias="none",
+            task_type="CAUSAL_LM",
+            target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
+        )
+        training_args = TrainingArguments(
+            per_device_train_batch_size=4,
+            gradient_accumulation_steps=4,
+            gradient_checkpointing=True,
+            learning_rate=5e-5,
+            lr_scheduler_type="cosine",
+            max_steps=200,
+            save_strategy="no",
+            logging_steps=1,
+            output_dir=new_model,
+            optim="paged_adamw_32bit",
+            warmup_steps=100,
+            bf16=True,
+            report_to="wandb",
+        )
+    #load the dataset
+        dataset = load_dataset(traindata_, split='train')
+        # dataset = load_dataset('Gopal2002/zephyr-gemma-finetune-dpo', split='train')
+    # Create DPO trainer
+        dpo_trainer = DPOTrainer(
+            model,
+            ref_model=None,
+            args=training_args,
+            train_dataset=dataset,
+            tokenizer=tokenizer,
+            peft_config=peft_config,
+            beta=0.1,
+            max_prompt_length=2048,
+            max_length=1536,
+        )
+        dpo_trainer.train()
+        return "Training Done"
+    except MyException as e:
+        return str(e)
 with gr.Blocks() as demo: