Kevin Fink commited on
Commit
cab69d9
·
1 Parent(s): 4dd1004
Files changed (2) hide show
  1. app.py +17 -3
  2. requirements.txt +2 -0
app.py CHANGED
@@ -4,6 +4,9 @@ from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelFor
4
  from transformers import DataCollatorForSeq2Seq
5
  from datasets import load_dataset, concatenate_datasets, load_from_disk
6
  import traceback
 
 
 
7
  import os
8
  from huggingface_hub import login
9
  from peft import get_peft_model, LoraConfig
@@ -12,7 +15,15 @@ os.environ['HF_HOME'] = '/data/.huggingface'
12
 
13
  @spaces.GPU(duration=120)
14
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
15
- try:
 
 
 
 
 
 
 
 
16
  login(api_key.strip())
17
  lora_config = LoraConfig(
18
  r=16, # Rank of the low-rank adaptation
@@ -23,7 +34,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
23
 
24
  # Load the model and tokenizer
25
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
26
- model = get_peft_model(model, lora_config)
27
 
28
 
29
  # Set training arguments
@@ -58,6 +69,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
58
  max_length = 128
59
  try:
60
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
 
61
  tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
62
 
63
  # Create Trainer
@@ -66,6 +78,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
66
  args=training_args,
67
  train_dataset=tokenized_train_dataset,
68
  eval_dataset=tokenized_test_dataset,
 
69
  #callbacks=[LoggingCallback()],
70
  )
71
  except:
@@ -107,6 +120,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
107
  args=training_args,
108
  train_dataset=tokenized_datasets['train'],
109
  eval_dataset=tokenized_datasets['test'],
 
110
  #callbacks=[LoggingCallback()],
111
  )
112
 
@@ -140,7 +154,7 @@ try:
140
  gr.Textbox(label="HF hub to push to after training"),
141
  gr.Textbox(label="HF API token"),
142
  gr.Slider(minimum=1, maximum=10, value=3, label="Number of Epochs", step=1),
143
- gr.Slider(minimum=1, maximum=500, value=1, label="Batch Size", step=1),
144
  gr.Slider(minimum=1, maximum=1000, value=1, label="Learning Rate (e-5)", step=1),
145
  gr.Slider(minimum=1, maximum=100, value=1, label="Gradient accumulation", step=1),
146
  ],
 
4
  from transformers import DataCollatorForSeq2Seq
5
  from datasets import load_dataset, concatenate_datasets, load_from_disk
6
  import traceback
7
+ from sklearn.metrics import accuracy_score
8
+ import numpy as np
9
+
10
  import os
11
  from huggingface_hub import login
12
  from peft import get_peft_model, LoraConfig
 
15
 
16
  @spaces.GPU(duration=120)
17
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
18
+ try:
19
+ def compute_metrics(eval_pred):
20
+ logits, labels = eval_pred
21
+ predictions = np.argmax(logits, axis=1)
22
+ accuracy = accuracy_score(labels, predictions)
23
+ return {
24
+ 'eval_accuracy': accuracy,
25
+ 'eval_loss': eval_pred.loss, # If you want to include loss as well
26
+ }
27
  login(api_key.strip())
28
  lora_config = LoraConfig(
29
  r=16, # Rank of the low-rank adaptation
 
34
 
35
  # Load the model and tokenizer
36
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
37
+ #model = get_peft_model(model, lora_config)
38
 
39
 
40
  # Set training arguments
 
69
  max_length = 128
70
  try:
71
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
72
+ print(tokenized_train_dataset[0])
73
  tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
74
 
75
  # Create Trainer
 
78
  args=training_args,
79
  train_dataset=tokenized_train_dataset,
80
  eval_dataset=tokenized_test_dataset,
81
+ compute_metrics=compute_metrics,
82
  #callbacks=[LoggingCallback()],
83
  )
84
  except:
 
120
  args=training_args,
121
  train_dataset=tokenized_datasets['train'],
122
  eval_dataset=tokenized_datasets['test'],
123
+ compute_metrics=compute_metrics,
124
  #callbacks=[LoggingCallback()],
125
  )
126
 
 
154
  gr.Textbox(label="HF hub to push to after training"),
155
  gr.Textbox(label="HF API token"),
156
  gr.Slider(minimum=1, maximum=10, value=3, label="Number of Epochs", step=1),
157
+ gr.Slider(minimum=1, maximum=2000, value=1, label="Batch Size", step=1),
158
  gr.Slider(minimum=1, maximum=1000, value=1, label="Learning Rate (e-5)", step=1),
159
  gr.Slider(minimum=1, maximum=100, value=1, label="Gradient accumulation", step=1),
160
  ],
requirements.txt CHANGED
@@ -3,3 +3,5 @@ transformers
3
  datasets
4
  peft
5
  huggingface_hub
 
 
 
3
  datasets
4
  peft
5
  huggingface_hub
6
+ scikit-learn
7
+ numpy