dad1909 commited on
Commit
2812054
·
verified ·
1 Parent(s): f260b79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -17,15 +17,14 @@ current_num = os.getenv("NUM")
17
  print(f"stage ${current_num}")
18
 
19
  api = HfApi(token=hf_token)
20
- models = f"dad1909/cybersentinal-2.0-{current_num}"
21
-
22
- # model_base = "dad1909/cybersentinal-2.0"
23
 
24
  print("Starting model and tokenizer loading...")
25
 
26
  # Load the model and tokenizer
27
  model, tokenizer = FastLanguageModel.from_pretrained(
28
- model_name=models,
29
  max_seq_length=max_seq_length,
30
  dtype=dtype,
31
  load_in_4bit=load_in_4bit,
@@ -113,14 +112,14 @@ trainer = SFTTrainer(
113
  dataset_num_proc=2,
114
  packing=False,
115
  args=TrainingArguments(
116
- per_device_train_batch_size=5,
117
- gradient_accumulation_steps=5,
118
  learning_rate=2e-4,
119
  fp16=not is_bfloat16_supported(),
120
  bf16=is_bfloat16_supported(),
121
  warmup_steps=5,
122
  logging_steps=10,
123
- max_steps=200,
124
  optim="adamw_8bit",
125
  weight_decay=0.01,
126
  lr_scheduler_type="linear",
@@ -139,13 +138,15 @@ num += 1
139
 
140
  uploads_models = f"cybersentinal-2.0-{str(num)}"
141
 
 
 
142
  print("Saving the trained model...")
143
  model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
144
  print("Model saved successfully.")
145
 
146
  print("Pushing the model to the hub...")
147
  model.push_to_hub_merged(
148
- uploads_models,
149
  tokenizer,
150
  save_method="merged_16bit",
151
  token=hf_token
 
17
  print(f"stage ${current_num}")
18
 
19
  api = HfApi(token=hf_token)
20
+ # models = f"dad1909/cybersentinal-2.0-{current_num}"
21
+ model_base = "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
 
22
 
23
  print("Starting model and tokenizer loading...")
24
 
25
  # Load the model and tokenizer
26
  model, tokenizer = FastLanguageModel.from_pretrained(
27
+ model_name=model_base,
28
  max_seq_length=max_seq_length,
29
  dtype=dtype,
30
  load_in_4bit=load_in_4bit,
 
112
  dataset_num_proc=2,
113
  packing=False,
114
  args=TrainingArguments(
115
+ per_device_train_batch_size=2,
116
+ gradient_accumulation_steps=2,
117
  learning_rate=2e-4,
118
  fp16=not is_bfloat16_supported(),
119
  bf16=is_bfloat16_supported(),
120
  warmup_steps=5,
121
  logging_steps=10,
122
+ max_steps=100,
123
  optim="adamw_8bit",
124
  weight_decay=0.01,
125
  lr_scheduler_type="linear",
 
138
 
139
  uploads_models = f"cybersentinal-2.0-{str(num)}"
140
 
141
+ up = "sentinal-3.1-70B"
142
+
143
  print("Saving the trained model...")
144
  model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
145
  print("Model saved successfully.")
146
 
147
  print("Pushing the model to the hub...")
148
  model.push_to_hub_merged(
149
+ up,
150
  tokenizer,
151
  save_method="merged_16bit",
152
  token=hf_token