Details to replicate the train. Current Version Struggles with closing `</think>` tags

from datasets import load_dataset, concatenate_datasets
from unsloth.chat_templates import get_chat_template
import os

# Expanded list of dataset identifiers
datasets_list = [
    "Nitral-AI/Toxicity_ShareGPT",
]

# Directory to save the temporary dataset
output_dir = "temp_training_dataset"

# Chat template setup
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",  # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},  # ShareGPT style
    map_eos_token=False,
)

# Function to format conversations using the chat template
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        for convo in convos
    ]
    return {"text": texts}

# Function to load, format, and sample datasets
def load_format_and_sample(datasets_list, formatting_function, sample_size=6853):
    sampled_datasets = []
    for dataset_id in datasets_list:
        # Load the dataset
        dataset = load_dataset(dataset_id, split="train")
        # Apply formatting
        formatted_dataset = dataset.map(formatting_function, batched=True)
        # Shuffle and sample
        sampled_dataset = formatted_dataset.shuffle(seed=42).select(range(min(len(formatted_dataset), sample_size)))
        sampled_datasets.append(sampled_dataset)
    return sampled_datasets

# Load, format, and sample datasets
sampled_datasets = load_format_and_sample(datasets_list, formatting_prompts_func, sample_size=6853)

# Combine sampled datasets into one temporary set
temporary_training_set = concatenate_datasets(sampled_datasets)

# Save the dataset locally
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
temporary_training_set.save_to_disk(output_dir)

# Redefine the temporary training set as 'dataset' for further use
dataset = temporary_training_set

# Print info about the combined set
print(f"Temporary training dataset saved to '{output_dir}'")
print(dataset)

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
import wandb
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=8,
        gradient_accumulation_steps=2,
        warmup_steps=15,
        num_train_epochs=2,
        learning_rate=1e-6,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="wandb",
        max_grad_norm=1,
    ),
)

wandb.init(project="test", entity="nitral")

trainer_stats = trainer.train()

model = FastLanguageModel.get_peft_model(
    model,
    r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64, # Alpha should = rank (r)
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = True,
    loftq_config = None,
)

Nitrals-Loras
/

DeepseekR1-Distill-Uncensored-L3-8B-v0.1e2-lora

Details to replicate the train. Current Version Struggles with closing `</think>` tags

Model tree for Nitrals-Loras/DeepseekR1-Distill-Uncensored-L3-8B-v0.1e2-lora

Details to replicate the train. Current Version Struggles with closing </think> tags

Model tree for Nitrals-Loras/DeepseekR1-Distill-Uncensored-L3-8B-v0.1e2-lora

Details to replicate the train. Current Version Struggles with closing `</think>` tags