Gopal2002 commited on
Commit
4a047b7
·
verified ·
1 Parent(s): 3367f4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -81
app.py CHANGED
@@ -29,88 +29,90 @@ def greet(traindata_,output_repo):
29
  # new_model = "Gopal2002/zehpyr-gemma-dpo-finetune"
30
  new_model = output_repo
31
 
32
-
33
- tokenizer = AutoTokenizer.from_pretrained(model_name)
34
- tokenizer.pad_token = tokenizer.eos_token
35
- tokenizer.padding_side = "left"
36
-
37
-
38
- model = AutoModelForCausalLM.from_pretrained(
39
- model_name,
40
- torch_dtype=torch.float16,
41
- load_in_4bit=True
42
- )
43
- model.config.use_cache = False
44
-
45
- # Reference model
46
- ref_model = AutoModelForCausalLM.from_pretrained(
47
- model_name,
48
- torch_dtype=torch.float16,
49
- load_in_4bit=True
50
- )
51
-
52
- # specify how to quantize the model
53
- quantization_config = BitsAndBytesConfig(
54
- load_in_4bit=True,
55
- bnb_4bit_quant_type="nf4",
56
- bnb_4bit_compute_dtype=torch.bfloat16,
57
- )
58
- device_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None
59
-
60
- # Step 1: load the base model (Mistral-7B in our case) in 4-bit
61
- model_kwargs = dict(
62
- # attn_implementation="flash_attention_2", # set this to True if your GPU supports it (Flash Attention drastically speeds up model computations)
63
- torch_dtype="auto",
64
- use_cache=False, # set to False as we're going to use gradient checkpointing
65
- device_map=device_map,
66
- quantization_config=quantization_config,
67
- )
68
- model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
69
-
70
- # Training arguments
71
- peft_config = LoraConfig(
72
- r=16,
73
- lora_alpha=16,
74
- lora_dropout=0.05,
75
- bias="none",
76
- task_type="CAUSAL_LM",
77
- target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
78
- )
79
- training_args = TrainingArguments(
80
- per_device_train_batch_size=4,
81
- gradient_accumulation_steps=4,
82
- gradient_checkpointing=True,
83
- learning_rate=5e-5,
84
- lr_scheduler_type="cosine",
85
- max_steps=200,
86
- save_strategy="no",
87
- logging_steps=1,
88
- output_dir=new_model,
89
- optim="paged_adamw_32bit",
90
- warmup_steps=100,
91
- bf16=True,
92
- report_to="wandb",
93
- )
94
-
95
- #load the dataset
96
- dataset = load_dataset(traindata_, split='train')
97
 
98
- # dataset = load_dataset('Gopal2002/zephyr-gemma-finetune-dpo', split='train')
99
-
100
- # Create DPO trainer
101
- dpo_trainer = DPOTrainer(
102
- model,
103
- ref_model=None,
104
- args=training_args,
105
- train_dataset=dataset,
106
- tokenizer=tokenizer,
107
- peft_config=peft_config,
108
- beta=0.1,
109
- max_prompt_length=2048,
110
- max_length=1536,
111
- )
112
- dpo_trainer.train()
113
- return "Training Done"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
 
116
  with gr.Blocks() as demo:
 
29
  # new_model = "Gopal2002/zehpyr-gemma-dpo-finetune"
30
  new_model = output_repo
31
 
32
+ try:
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
34
+ tokenizer.pad_token = tokenizer.eos_token
35
+ tokenizer.padding_side = "left"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+
38
+ model = AutoModelForCausalLM.from_pretrained(
39
+ model_name,
40
+ torch_dtype=torch.float16,
41
+ load_in_4bit=True
42
+ )
43
+ model.config.use_cache = False
44
+
45
+ # Reference model
46
+ ref_model = AutoModelForCausalLM.from_pretrained(
47
+ model_name,
48
+ torch_dtype=torch.float16,
49
+ load_in_4bit=True
50
+ )
51
+
52
+ # specify how to quantize the model
53
+ quantization_config = BitsAndBytesConfig(
54
+ load_in_4bit=True,
55
+ bnb_4bit_quant_type="nf4",
56
+ bnb_4bit_compute_dtype=torch.bfloat16,
57
+ )
58
+ device_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None
59
+
60
+ # Step 1: load the base model (Mistral-7B in our case) in 4-bit
61
+ model_kwargs = dict(
62
+ # attn_implementation="flash_attention_2", # set this to True if your GPU supports it (Flash Attention drastically speeds up model computations)
63
+ torch_dtype="auto",
64
+ use_cache=False, # set to False as we're going to use gradient checkpointing
65
+ device_map=device_map,
66
+ quantization_config=quantization_config,
67
+ )
68
+ model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
69
+
70
+ # Training arguments
71
+ peft_config = LoraConfig(
72
+ r=16,
73
+ lora_alpha=16,
74
+ lora_dropout=0.05,
75
+ bias="none",
76
+ task_type="CAUSAL_LM",
77
+ target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
78
+ )
79
+ training_args = TrainingArguments(
80
+ per_device_train_batch_size=4,
81
+ gradient_accumulation_steps=4,
82
+ gradient_checkpointing=True,
83
+ learning_rate=5e-5,
84
+ lr_scheduler_type="cosine",
85
+ max_steps=200,
86
+ save_strategy="no",
87
+ logging_steps=1,
88
+ output_dir=new_model,
89
+ optim="paged_adamw_32bit",
90
+ warmup_steps=100,
91
+ bf16=True,
92
+ report_to="wandb",
93
+ )
94
+
95
+ #load the dataset
96
+ dataset = load_dataset(traindata_, split='train')
97
+
98
+ # dataset = load_dataset('Gopal2002/zephyr-gemma-finetune-dpo', split='train')
99
+
100
+ # Create DPO trainer
101
+ dpo_trainer = DPOTrainer(
102
+ model,
103
+ ref_model=None,
104
+ args=training_args,
105
+ train_dataset=dataset,
106
+ tokenizer=tokenizer,
107
+ peft_config=peft_config,
108
+ beta=0.1,
109
+ max_prompt_length=2048,
110
+ max_length=1536,
111
+ )
112
+ dpo_trainer.train()
113
+ return "Training Done"
114
+ except MyException as e:
115
+ return str(e)
116
 
117
 
118
  with gr.Blocks() as demo: