Kevin Fink commited on
Commit
64a72dd
·
1 Parent(s): ee975a5
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -6,7 +6,7 @@ from datasets import load_dataset, concatenate_datasets, load_from_disk
6
  import traceback
7
  from sklearn.metrics import accuracy_score
8
  import numpy as np
9
-
10
  import os
11
  from huggingface_hub import login
12
  from peft import get_peft_model, LoraConfig
@@ -147,8 +147,17 @@ def predict(text):
147
 
148
  @spaces.GPU(duration=120)
149
  def run_train(dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
 
 
 
 
 
 
 
150
  config = AutoConfig.from_pretrained("google/t5-efficient-tiny")
151
  model = AutoModelForSeq2SeqLM.from_config(config)
 
 
152
  lora_config = LoraConfig(
153
  r=16, # Rank of the low-rank adaptation
154
  lora_alpha=32, # Scaling factor
 
6
  import traceback
7
  from sklearn.metrics import accuracy_score
8
  import numpy as np
9
+ import torch
10
  import os
11
  from huggingface_hub import login
12
  from peft import get_peft_model, LoraConfig
 
147
 
148
  @spaces.GPU(duration=120)
149
  def run_train(dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
150
+ def initialize_weights(model):
151
+ for name, param in model.named_parameters():
152
+ if 'encoder.block.0.layer.0.DenseReluDense.wi.weight' in name: # Example layer
153
+ torch.nn.init.xavier_uniform_(param.data) # Xavier initialization
154
+ elif 'encoder.block.0.layer.0.DenseReluDense.wo.weight' in name: # Another example layer
155
+ torch.nn.init.kaiming_normal_(param.data) # Kaiming initialization
156
+
157
  config = AutoConfig.from_pretrained("google/t5-efficient-tiny")
158
  model = AutoModelForSeq2SeqLM.from_config(config)
159
+ print(model.named_parameters())
160
+ initialize_weights(model)
161
  lora_config = LoraConfig(
162
  r=16, # Rank of the low-rank adaptation
163
  lora_alpha=32, # Scaling factor