File size: 8,531 Bytes
63d7972
9438973
63d7972
 
 
d501a8a
63d7972
 
d501a8a
 
63d7972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d501a8a
63d7972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d501a8a
63d7972
d501a8a
63d7972
 
 
 
 
 
 
 
 
 
 
 
 
d501a8a
63d7972
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#0.1 Install Dependencies
#!pip install unsloth torch transformers datasets trl huggingface_hub

#0.2 Import Dependencies
from unsloth import FastLanguageModel
import torch
import os
from transformers import TextStreamer
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported 

# 1. Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

instruction = """This assistant is trained to code executive ranks and roles along the following categories with 1 or 0.

Ranks:
- VP: 1 if Vice President (VP), 0 otherwise
- SVP: 1 if Senior Vice President (SVP), 0 otherwise
- EVP: 1 if Executive Vice President (EVP), 0 otherwise
- SEVP: 1 if Senior Executive Vice President (SEVP), 0 otherwise
- Director: 1 if Director, 0 otherwise
- Senior Director: 1 if Senior Director, 0 otherwise
- MD: 1 if Managing Director (MD), 0 otherwise
- SMD: 1 if Senior Managing Director (SMD), 0 otherwise
- SE: 1 if Senior Executive, 0 otherwise
- VC: 1 if Vice Chair (VC), 0 otherwise
- SVC: 1 if Senior Vice Chair (SVC), 0 otherwise
- President: 1 if President of the parent company, 0 when President of subsidiary or division but not parent company.

Roles:
- Board: 1 when role suggests person is a member of the board of directors, 0 otherwise
- CEO: 1 when Chief Executive Officer of parent company, 0 when Chief Executive Officer of a subsidiary but not parent company.
- CXO: 1 when C-Suite title, i.e., Chief X Officer, where X can be any type of designation, 0 otherwise. Chief Executive Officer of the parent company. Not Chief AND Officer, e.g., only officer of a function.
- Primary: 1 when responsible for primary activity of value chain, i.e., Supply Chain, Manufacturing, Operations, Marketing & Sales, Customer Service and alike, 0 when not a primary value chain activity.
- Support: 1 when responsible for a support activity of the value chain, i.e., Procurement, IT, HR, Management, Strategy, HR, Finance, Legal, R&D, Investor Relations, Technology, General Counsel and alike, 0 when not support activity of the value.
- BU: 1 when involved with an entity/distinct unit responsible for Product, Customer, or Geographical domain/unit; or role is about a subsidiary, 0 when responsibility is not for a specific product/customer/geography area but, for example, for the entire parent company."""
input = "In 2015 the company 'cms' had an executive with the name david mengebier, whose official role title was: 'senior vice president, cms energy and consumers energy'."
huggingface_model_name = "daresearch/Llama-3.1"


# 2. Before Training
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = os.getenv("HF_TOKEN")
)

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        instruction, # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)

# 3. Load data

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass
#dataset = load_dataset("daresearch/orgdatabase-training0-data", split = "train")
#dataset = dataset.map(formatting_prompts_func, batched = True,)


# Load train and validation datasets
train_dataset = load_dataset("csv", data_files="train.csv", split="train")
valid_dataset = load_dataset("csv", data_files="valid.csv", split="train")

# Apply formatting to both datasets
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
valid_dataset = valid_dataset.map(formatting_prompts_func, batched=True)


# 4. Training
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,  # Supports any, but = 0 is optimized
    bias="none",  # Supports any, but = "none" is optimized
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None,  # And LoftQ
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,  # Updated to use train_dataset
    eval_dataset=valid_dataset,  # Added eval_dataset for validation
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,  # Can make training 5x faster for short sequences.
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        evaluation_strategy="steps",  # Enables evaluation during training
        eval_steps=10,  # Frequency of evaluation
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

# Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

trainer_stats = trainer.train()

# Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

# Optionally evaluate after training if desired
eval_stats = trainer.evaluate(eval_dataset=valid_dataset)
print(f"Validation Loss: {eval_stats['eval_loss']}")
if "eval_accuracy" in eval_stats:
    print(f"Validation Accuracy: {eval_stats['eval_accuracy']}")


# 5. After Training
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        instruction, # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)


# 6. Saving
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")
model.push_to_hub(huggingface_model_name, token = os.getenv("HF_TOKEN"))
tokenizer.push_to_hub(huggingface_model_name, token = os.getenv("HF_TOKEN"))

# Merge to 16bit
if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
if True: model.push_to_hub_merged(huggingface_model_name, tokenizer, save_method = "merged_16bit", token = os.getenv("HF_TOKEN"))

# # Merge to 4bit
#if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
#if True: model.push_to_hub_merged(huggingface_model_name, tokenizer, save_method = "merged_4bit", token = os.getenv("HF_TOKEN"))