Cylanoid commited on
Commit
406313e
·
verified ·
1 Parent(s): 5997cdc

Update train_llama4.py

Browse files
Files changed (1) hide show
  1. train_llama4.py +55 -85
train_llama4.py CHANGED
@@ -1,8 +1,7 @@
1
  # train_llama4.py
2
- # Script to fine-tune Llama 4 Maverick for healthcare fraud detection (text-only with CPU offloading)
3
 
4
- from transformers import AutoTokenizer, Llama4ForConditionalGeneration, Trainer, TrainingArguments
5
- from transformers import BitsAndBytesConfig
6
  import datasets
7
  import torch
8
  from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
@@ -10,11 +9,8 @@ from accelerate import Accelerator
10
  import huggingface_hub
11
  import os
12
 
13
- # Version and CUDA check
14
- print(f"PyTorch version: {torch.__version__}")
15
- print(f"CUDA version: {torch.version.cuda}")
16
- print(f"Is CUDA available: {torch.cuda.is_available()}")
17
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
18
 
19
  # Authenticate with Hugging Face
20
  LLama = os.getenv("LLama")
@@ -22,111 +18,85 @@ if not LLama:
22
  raise ValueError("LLama token not found. Set it in Hugging Face Space secrets as 'LLama'.")
23
  huggingface_hub.login(token=LLama)
24
 
25
- # Load Llama 4 model and tokenizer
26
  MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
27
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
28
 
29
- # Add padding token if it doesn't exist
30
  if tokenizer.pad_token is None:
31
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
32
 
33
- # Custom device map to offload some layers to CPU
34
  device_map = {
35
  "model.embed_tokens": 0,
36
- "model.layers.0-15": 0, # Keep first 16 layers on GPU
37
- "model.layers.16-31": "cpu", # Offload remaining layers to CPU
38
  "model.norm": 0,
39
  "lm_head": 0
40
  }
41
 
42
- # Quantization config for A100 80 GB VRAM
43
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
44
 
 
45
  model = Llama4ForConditionalGeneration.from_pretrained(
46
  MODEL_ID,
47
  torch_dtype=torch.bfloat16,
48
  device_map=device_map,
49
- quantization_config=quantization_config,
50
  llm_int8_enable_fp32_cpu_offload=True,
51
  attn_implementation="flex_attention"
52
  )
53
 
54
- # Prepare for LoRA
55
- model = prepare_model_for_kbit_training(model)
56
- peft_config = LoraConfig(
 
 
 
 
 
 
 
 
 
57
  r=16,
58
  lora_alpha=32,
 
59
  lora_dropout=0.05,
60
  bias="none",
61
- task_type="CAUSAL_LM",
62
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
63
  )
64
- model = get_peft_model(model, peft_config)
65
- model.print_trainable_parameters()
66
 
67
- # Load dataset
68
- dataset = datasets.load_dataset("json", data_files="Bingaman_training_data.json", field="training_pairs")
69
- print("First example from dataset:", dataset["train"][0])
70
-
71
- # Tokenization
72
- def tokenize_data(example):
73
- formatted_text = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"
74
- inputs = tokenizer(formatted_text, padding="max_length", truncation=True, max_length=4096, return_tensors="pt")
75
- input_ids = inputs["input_ids"].squeeze(0).tolist()
76
- attention_mask = inputs["attention_mask"].squeeze(0).tolist()
77
- labels = input_ids.copy()
78
- return {
79
- "input_ids": input_ids,
80
- "labels": labels,
81
- "attention_mask": attention_mask
82
- }
83
-
84
- tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
85
- print("First tokenized example:", {k: (type(v), len(v)) for k, v in tokenized_dataset[0].items()})
86
-
87
- # Data collator
88
- def custom_data_collator(features):
89
- input_ids = [torch.tensor(f["input_ids"]) for f in features]
90
- attention_mask = [torch.tensor(f["attention_mask"]) for f in features]
91
- labels = [torch.tensor(f["labels"]) for f in features]
92
- return {
93
- "input_ids": torch.stack(input_ids),
94
- "attention_mask": torch.stack(attention_mask),
95
- "labels": torch.stack(labels)
96
- }
97
-
98
- # Training setup
99
- accelerator = Accelerator()
100
- training_args = TrainingArguments(
101
- output_dir="./fine_tuned_llama4_healthcare",
102
- per_device_train_batch_size=2,
103
- gradient_accumulation_steps=8,
104
- eval_strategy="steps",
105
- eval_steps=10,
106
- save_strategy="steps",
107
- save_steps=20,
108
- save_total_limit=3,
109
- num_train_epochs=5,
110
- learning_rate=2e-5,
111
- weight_decay=0.01,
112
- logging_dir="./logs",
113
- logging_steps=5,
114
- bf16=True,
115
- gradient_checkpointing=True,
116
- optim="adamw_torch",
117
- warmup_steps=50
118
- )
119
 
120
- trainer = Trainer(
121
- model=model,
122
- args=training_args,
123
- train_dataset=tokenized_dataset,
124
- eval_dataset=tokenized_dataset.select(range(min(5, len(tokenized_dataset)))),
125
- data_collator=custom_data_collator
 
126
  )
127
 
128
- # Start training
129
  trainer.train()
130
- model.save_pretrained("./fine_tuned_llama4_healthcare")
131
- tokenizer.save_pretrained("./fine_tuned_llama4_healthcare")
132
- print("Training complete. Model and tokenizer saved to ./fine_tuned_llama4_healthcare")
 
1
  # train_llama4.py
2
+ # Script to fine-tune Llama 4 Maverick for healthcare fraud detection
3
 
4
+ from transformers import AutoTokenizer, Llama4ForConditionalGeneration
 
5
  import datasets
6
  import torch
7
  from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
 
9
  import huggingface_hub
10
  import os
11
 
12
+ # Debug: Confirm file version
13
+ print("Running train_llama4.py with CPU offloading (version: 2025-04-21 v2)")
 
 
 
14
 
15
  # Authenticate with Hugging Face
16
  LLama = os.getenv("LLama")
 
18
  raise ValueError("LLama token not found. Set it in Hugging Face Space secrets as 'LLama'.")
19
  huggingface_hub.login(token=LLama)
20
 
21
+ # Model setup
22
  MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
24
 
 
25
  if tokenizer.pad_token is None:
26
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})
27
 
28
+ # Custom device map for CPU offloading
29
  device_map = {
30
  "model.embed_tokens": 0,
31
+ "model.layers.0-15": 0,
32
+ "model.layers.16-31": "cpu",
33
  "model.norm": 0,
34
  "lm_head": 0
35
  }
36
 
37
+ # Debug: Confirm offloading settings
38
+ print("Loading model with CPU offloading: llm_int8_enable_fp32_cpu_offload=True, device_map=", device_map)
39
 
40
+ # Load model with 8-bit quantization and CPU offloading
41
  model = Llama4ForConditionalGeneration.from_pretrained(
42
  MODEL_ID,
43
  torch_dtype=torch.bfloat16,
44
  device_map=device_map,
45
+ quantization_config={"load_in_8bit": True},
46
  llm_int8_enable_fp32_cpu_offload=True,
47
  attn_implementation="flex_attention"
48
  )
49
 
50
+ # Resize token embeddings
51
+ model.resize_token_embeddings(len(tokenizer))
52
+
53
+ # Initialize Accelerator
54
+ accelerator = Accelerator()
55
+ model = accelerator.prepare(model)
56
+
57
+ # Load dataset
58
+ dataset = datasets.load_dataset('json', data_files="Bingaman_training_data.json")['train']
59
+
60
+ # LoRA configuration
61
+ lora_config = LoraConfig(
62
  r=16,
63
  lora_alpha=32,
64
+ target_modules=["q_proj", "v_proj"],
65
  lora_dropout=0.05,
66
  bias="none",
67
+ task_type="CAUSAL_LM"
 
68
  )
 
 
69
 
70
+ # Prepare model for fine-tuning
71
+ model = prepare_model_for_kbit_training(model)
72
+ model = get_peft_model(model, lora_config)
73
+
74
+ # Training arguments
75
+ training_args = {
76
+ "output_dir": "./results",
77
+ "num_train_epochs": 1,
78
+ "per_device_train_batch_size": 2,
79
+ "gradient_accumulation_steps": 8,
80
+ "optim": "adamw_torch",
81
+ "save_steps": 500,
82
+ "logging_steps": 100,
83
+ "learning_rate": 2e-4,
84
+ "fp16": True,
85
+ "max_grad_norm": 0.3,
86
+ "warmup_ratio": 0.03,
87
+ "lr_scheduler_type": "cosine"
88
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # Initialize trainer
91
+ trainer = accelerator.prepare(
92
+ datasets.Trainer(
93
+ model=model,
94
+ args=datasets.TrainingArguments(**training_args),
95
+ train_dataset=dataset,
96
+ )
97
  )
98
 
99
+ # Train
100
  trainer.train()
101
+ model.save_pretrained("./fine_tuned_model")
102
+ print("Training completed!")