File size: 5,009 Bytes
bc63b44
194731c
c0d76c2
194731c
 
038610e
a6cb7c8
194731c
c0d76c2
 
 
 
 
 
 
 
 
 
 
 
 
bc63b44
038610e
4d120d4
f9e951b
a6cb7c8
 
 
 
 
 
194731c
0958d38
194731c
 
0958d38
d06ead9
194731c
 
 
 
9a0b862
8bef298
 
 
 
 
 
 
 
 
 
194731c
 
 
 
 
 
22093a9
 
5c817b9
c0d76c2
3c24b96
c0d76c2
194731c
b3ffd25
194731c
 
 
 
 
6c1ed42
0958d38
4d120d4
3c24b96
194731c
5c817b9
194731c
 
 
 
 
6c1ed42
9a0b862
194731c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c17c736
194731c
c17c736
 
 
038610e
3c24b96
 
 
 
194731c
 
 
 
 
172d00c
 
194731c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import spaces
import gradio as gr
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM, TrainerCallback
from datasets import load_dataset
import traceback
from huggingface_hub import login
from peft import get_peft_model, LoraConfig

class LoggingCallback(TrainerCallback):
    def on_step_end(self, args, state, control, kwargs):
        # Log the learning rate
        current_lr = state.optimizer.param_groups[0]['lr']
        print(f"Current Learning Rate: {current_lr}")

    def on_epoch_end(self, args, state, control, kwargs):
        # Log the error rate (assuming you have a metric to calculate it)
        # Here we assume you have a way to get the validation loss
        if state.best_metric is not None:
            error_rate = 1 - state.best_metric  # Assuming best_metric is accuracy
            print(f"Current Error Rate: {error_rate:.4f}")

@spaces.GPU
def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
    try:        
        login(api_key.strip())
        lora_config = LoraConfig(
            r=16,  # Rank of the low-rank adaptation
            lora_alpha=32,  # Scaling factor
            lora_dropout=0.1,  # Dropout for LoRA layers
            bias="none"  # Bias handling
        )        
        # Load the dataset
        dataset = load_dataset(dataset_name.strip())
    
        # Load the model and tokenizer
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name.strip(), num_labels=2)
        #model = get_peft_model(model, lora_config)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
    
        # Tokenize the dataset
        def tokenize_function(examples):
            max_length = 32
            # Assuming 'text' is the input and 'target' is the expected output
            model_inputs = tokenizer(examples['text'], max_length=max_length, truncation=True)
        
            # Setup the decoder input IDs (shifted right)
            with tokenizer.as_target_tokenizer():
                labels = tokenizer(examples['target'], max_length=max_length, truncation=True)
        
            # Add labels to the model inputs
            model_inputs["labels"] = labels["input_ids"]
            return model_inputs
    
        tokenized_datasets = dataset.map(tokenize_function, batched=True)
    
        # Set training arguments
        training_args = TrainingArguments(
            output_dir='./results',
            eval_strategy="epoch",
            save_strategy='epoch', 
            learning_rate=lr*0.00001,
            per_device_train_batch_size=int(batch_size),
            per_device_eval_batch_size=1, 
            num_train_epochs=int(num_epochs),
            weight_decay=0.01,
            #gradient_accumulation_steps=grad*0.1,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            greater_is_better=True,
            logging_dir='./logs',
            logging_steps=10,
            #push_to_hub=True,
            hub_model_id=hub_id.strip(),
            fp16=True,
            #lr_scheduler_type='cosine',
        )
        
        # Create Trainer
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=tokenized_datasets['train'],
            eval_dataset=tokenized_datasets['test'],
            #callbacks=[LoggingCallback()], 
        )
    
        # Fine-tune the model
        trainer.train()
        trainer.push_to_hub(commit_message="Training complete!")
    except Exception as e:
        return f"An error occurred: {str(e)}, TB: {traceback.format_exc()}"
    return 'DONE!'#model
'''
# Define Gradio interface
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = model(inputs)
    predictions = outputs.logits.argmax(dim=-1)
    return "Positive" if predictions.item() == 1 else "Negative"
'''
# Create Gradio interface
try:
    
    iface = gr.Interface(
        fn=fine_tune_model,
        inputs=[
            gr.Textbox(label="Model Name (e.g., 'google/t5-efficient-tiny-nh8')"),
            gr.Textbox(label="Dataset Name (e.g., 'imdb')"),
            gr.Textbox(label="HF hub to push to after training"),
            gr.Textbox(label="HF API token"),
            gr.Slider(minimum=1, maximum=10, value=3, label="Number of Epochs", step=1),
            gr.Slider(minimum=1, maximum=16, value=4, label="Batch Size", step=1),
            gr.Slider(minimum=1, maximum=1000, value=50, label="Learning Rate (e-5)", step=1),
            gr.Slider(minimum=1, maximum=100, value=1, label="Gradient accumulation (e-1)", step=1), 
        ],
        outputs="text",
        title="Fine-Tune Hugging Face Model",
        description="This interface allows you to fine-tune a Hugging Face model on a specified dataset."
    )
    # Launch the interface
    iface.launch()    
except Exception as e:
    print(f"An error occurred: {str(e)}, TB: {traceback.format_exc()}")