Kevin Fink commited on
Commit
6527df5
·
1 Parent(s): eb75c06
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -83,7 +83,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
83
  print("Loading model from checkpoint...")
84
  model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
85
 
86
- max_length = 128
87
  #max_length = model.get_input_embeddings().weight.shape[0]
88
  try:
89
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
@@ -109,7 +109,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
109
  examples['text'],
110
  max_length=max_length, # Set to None for dynamic padding
111
  truncation=True,
112
- padding=True,
 
113
  )
114
 
115
  # Setup the decoder input IDs (shifted right)
@@ -117,8 +118,9 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
117
  examples['target'],
118
  max_length=max_length, # Set to None for dynamic padding
119
  truncation=True,
120
- padding=True,
121
- text_target=examples['target'] # Use text_target for target text
 
122
  )
123
 
124
  # Add labels to the model inputs
@@ -178,7 +180,6 @@ def run_train(dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
178
  config = AutoConfig.from_pretrained("google/t5-efficient-tiny")
179
  model = AutoModelForSeq2SeqLM.from_config(config)
180
  initialize_weights(model)
181
- print(list(model.named_parameters()))
182
  lora_config = LoraConfig(
183
  r=16, # Rank of the low-rank adaptation
184
  lora_alpha=32, # Scaling factor
 
83
  print("Loading model from checkpoint...")
84
  model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
85
 
86
+ max_length = 512
87
  #max_length = model.get_input_embeddings().weight.shape[0]
88
  try:
89
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
 
109
  examples['text'],
110
  max_length=max_length, # Set to None for dynamic padding
111
  truncation=True,
112
+ padding='max_length',
113
+ return_tensors='pt',
114
  )
115
 
116
  # Setup the decoder input IDs (shifted right)
 
118
  examples['target'],
119
  max_length=max_length, # Set to None for dynamic padding
120
  truncation=True,
121
+ padding='max_length',
122
+ text_target=examples['target'],
123
+ return_tensors='pt',
124
  )
125
 
126
  # Add labels to the model inputs
 
180
  config = AutoConfig.from_pretrained("google/t5-efficient-tiny")
181
  model = AutoModelForSeq2SeqLM.from_config(config)
182
  initialize_weights(model)
 
183
  lora_config = LoraConfig(
184
  r=16, # Rank of the low-rank adaptation
185
  lora_alpha=32, # Scaling factor