Update app.py
Browse files
app.py
CHANGED
@@ -17,15 +17,14 @@ current_num = os.getenv("NUM")
|
|
17 |
print(f"stage ${current_num}")
|
18 |
|
19 |
api = HfApi(token=hf_token)
|
20 |
-
models = f"dad1909/cybersentinal-2.0-{current_num}"
|
21 |
-
|
22 |
-
# model_base = "dad1909/cybersentinal-2.0"
|
23 |
|
24 |
print("Starting model and tokenizer loading...")
|
25 |
|
26 |
# Load the model and tokenizer
|
27 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
28 |
-
model_name=
|
29 |
max_seq_length=max_seq_length,
|
30 |
dtype=dtype,
|
31 |
load_in_4bit=load_in_4bit,
|
@@ -113,14 +112,14 @@ trainer = SFTTrainer(
|
|
113 |
dataset_num_proc=2,
|
114 |
packing=False,
|
115 |
args=TrainingArguments(
|
116 |
-
per_device_train_batch_size=
|
117 |
-
gradient_accumulation_steps=
|
118 |
learning_rate=2e-4,
|
119 |
fp16=not is_bfloat16_supported(),
|
120 |
bf16=is_bfloat16_supported(),
|
121 |
warmup_steps=5,
|
122 |
logging_steps=10,
|
123 |
-
max_steps=
|
124 |
optim="adamw_8bit",
|
125 |
weight_decay=0.01,
|
126 |
lr_scheduler_type="linear",
|
@@ -139,13 +138,15 @@ num += 1
|
|
139 |
|
140 |
uploads_models = f"cybersentinal-2.0-{str(num)}"
|
141 |
|
|
|
|
|
142 |
print("Saving the trained model...")
|
143 |
model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
|
144 |
print("Model saved successfully.")
|
145 |
|
146 |
print("Pushing the model to the hub...")
|
147 |
model.push_to_hub_merged(
|
148 |
-
|
149 |
tokenizer,
|
150 |
save_method="merged_16bit",
|
151 |
token=hf_token
|
|
|
17 |
print(f"stage ${current_num}")
|
18 |
|
19 |
api = HfApi(token=hf_token)
|
20 |
+
# models = f"dad1909/cybersentinal-2.0-{current_num}"
|
21 |
+
model_base = "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
|
|
|
22 |
|
23 |
print("Starting model and tokenizer loading...")
|
24 |
|
25 |
# Load the model and tokenizer
|
26 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
27 |
+
model_name=model_base,
|
28 |
max_seq_length=max_seq_length,
|
29 |
dtype=dtype,
|
30 |
load_in_4bit=load_in_4bit,
|
|
|
112 |
dataset_num_proc=2,
|
113 |
packing=False,
|
114 |
args=TrainingArguments(
|
115 |
+
per_device_train_batch_size=2,
|
116 |
+
gradient_accumulation_steps=2,
|
117 |
learning_rate=2e-4,
|
118 |
fp16=not is_bfloat16_supported(),
|
119 |
bf16=is_bfloat16_supported(),
|
120 |
warmup_steps=5,
|
121 |
logging_steps=10,
|
122 |
+
max_steps=100,
|
123 |
optim="adamw_8bit",
|
124 |
weight_decay=0.01,
|
125 |
lr_scheduler_type="linear",
|
|
|
138 |
|
139 |
uploads_models = f"cybersentinal-2.0-{str(num)}"
|
140 |
|
141 |
+
up = "sentinal-3.1-70B"
|
142 |
+
|
143 |
print("Saving the trained model...")
|
144 |
model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
|
145 |
print("Model saved successfully.")
|
146 |
|
147 |
print("Pushing the model to the hub...")
|
148 |
model.push_to_hub_merged(
|
149 |
+
up,
|
150 |
tokenizer,
|
151 |
save_method="merged_16bit",
|
152 |
token=hf_token
|