darpanaswal commited on
Commit
61330e7
·
verified ·
1 Parent(s): 64c7578

Update finetune.py

Browse files
Files changed (1) hide show
  1. finetune.py +9 -28
finetune.py CHANGED
@@ -13,7 +13,7 @@ from transformers import (AutoTokenizer, BitsAndBytesConfig, MBart50TokenizerFas
13
  MBartForConditionalGeneration, TrainingArguments,
14
  DataCollatorForSeq2Seq, EarlyStoppingCallback)
15
  from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
16
- # Get the absolute path of the current script
17
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
18
 
19
  MODELS = {
@@ -53,7 +53,6 @@ def experiments(model_name, finetune_type):
53
  """Runs an experiment with the given model and dataset."""
54
  print(f"Starting Experiment: on {model_name}")
55
 
56
- # Construct dataset paths dynamically
57
  train = pd.read_csv(os.path.join(BASE_DIR, "datasets/train.csv"))
58
  train_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_fr.csv"))
59
  train_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_cross.csv"))
@@ -64,16 +63,6 @@ def experiments(model_name, finetune_type):
64
  test_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_fr.csv"))
65
  test_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_cross.csv"))
66
 
67
- # print(len(train))
68
- # print(len(train_fr))
69
- # print(len(train_cross))
70
- # print(len(val))
71
- # print(len(val_fr))
72
- # print(len(val_cross))
73
- # print(len(test))
74
- # print(len(test_fr))
75
- # print(len(test_cross))
76
-
77
  model, tokenizer = download_model(model_name)
78
  print(f"Model {model_name} loaded successfully.")
79
 
@@ -94,11 +83,10 @@ def fine_tune(model_name, finetune_type, model, tokenizer, summarize_text, train
94
  print("Starting Fine-tuning...")
95
  if model_name == "mT5":
96
  max_input = 512
97
- max_output = 60
98
  else:
99
  max_input = 1024
100
- max_output = 60
101
 
 
102
  train_dataset = train
103
  eval_dataset = val
104
  if finetune_type == "multilingual":
@@ -124,27 +112,21 @@ def fine_tune(model_name, finetune_type, model, tokenizer, summarize_text, train
124
  return model_inputs
125
 
126
  tokenized_train = train_dataset.map(preprocess_function, batched=True)
127
-
128
- # Create a small evaluation dataset
129
-
130
  tokenized_eval = eval_dataset.map(preprocess_function, batched=True)
131
 
132
- # Apply QLoRA only for mT5
133
  if model_name == "mT5":
134
- # PEFT Configuration for Quantized Fine-tuning
135
  lora_config = LoraConfig(
136
- r=8, # Rank of the LoRA update matrices
137
- lora_alpha=32, # Scaling factor for the LoRA update matrices
138
- lora_dropout=0.05, # Dropout probability for the LoRA update matrices
139
- bias="none", # Whether to apply a bias to the LoRA update matrices
140
- task_type=TaskType.SEQ_2_SEQ_LM # Task type for the model
141
  )
142
 
143
- # Prepare model for int8 training and apply LoRA
144
  model = prepare_model_for_kbit_training(model)
145
  model = get_peft_model(model, lora_config)
146
 
147
- # Use DataCollatorForSeq2Seq for dynamic padding
148
  data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model) # Initialize the DataCollatorForSeq2Seq
149
 
150
  training_args = TrainingArguments(
@@ -156,7 +138,7 @@ def fine_tune(model_name, finetune_type, model, tokenizer, summarize_text, train
156
  per_device_eval_batch_size=4,
157
  num_train_epochs=3,
158
  weight_decay=0.01,
159
- push_to_hub=True, # Automatically push at the end
160
  fp16=True,
161
  report_to="none",
162
  )
@@ -171,7 +153,6 @@ def fine_tune(model_name, finetune_type, model, tokenizer, summarize_text, train
171
 
172
  trainer.train()
173
 
174
- # Save tokenizer and push manually
175
  tokenizer.save_pretrained(training_args.output_dir)
176
  tokenizer.push_to_hub(f"{model_name}-{finetune_type}-finetuned")
177
 
 
13
  MBartForConditionalGeneration, TrainingArguments,
14
  DataCollatorForSeq2Seq, EarlyStoppingCallback)
15
  from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
16
+
17
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
18
 
19
  MODELS = {
 
53
  """Runs an experiment with the given model and dataset."""
54
  print(f"Starting Experiment: on {model_name}")
55
 
 
56
  train = pd.read_csv(os.path.join(BASE_DIR, "datasets/train.csv"))
57
  train_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_fr.csv"))
58
  train_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_cross.csv"))
 
63
  test_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_fr.csv"))
64
  test_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_cross.csv"))
65
 
 
 
 
 
 
 
 
 
 
 
66
  model, tokenizer = download_model(model_name)
67
  print(f"Model {model_name} loaded successfully.")
68
 
 
83
  print("Starting Fine-tuning...")
84
  if model_name == "mT5":
85
  max_input = 512
 
86
  else:
87
  max_input = 1024
 
88
 
89
+ max_output = 60
90
  train_dataset = train
91
  eval_dataset = val
92
  if finetune_type == "multilingual":
 
112
  return model_inputs
113
 
114
  tokenized_train = train_dataset.map(preprocess_function, batched=True)
 
 
 
115
  tokenized_eval = eval_dataset.map(preprocess_function, batched=True)
116
 
117
+ # QLoRA config for mT5
118
  if model_name == "mT5":
 
119
  lora_config = LoraConfig(
120
+ r=8,
121
+ lora_alpha=32,
122
+ lora_dropout=0.05,
123
+ bias="none",
124
+ task_type=TaskType.SEQ_2_SEQ_LM
125
  )
126
 
 
127
  model = prepare_model_for_kbit_training(model)
128
  model = get_peft_model(model, lora_config)
129
 
 
130
  data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model) # Initialize the DataCollatorForSeq2Seq
131
 
132
  training_args = TrainingArguments(
 
138
  per_device_eval_batch_size=4,
139
  num_train_epochs=3,
140
  weight_decay=0.01,
141
+ push_to_hub=True,
142
  fp16=True,
143
  report_to="none",
144
  )
 
153
 
154
  trainer.train()
155
 
 
156
  tokenizer.save_pretrained(training_args.output_dir)
157
  tokenizer.push_to_hub(f"{model_name}-{finetune_type}-finetuned")
158