Spaces:

helamouri
/

medichat_assignment

Running

File size: 2,256 Bytes

eca6215

from src.training import train_model
from src.model import load_model
from src.dataset import formatting_prompts_func
from datasets import Dataset
import pytest
import torch


@pytest.mark.gpu
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
def test_gpu_feature():
    # Your test code that needs a GPU
    assert torch.cuda.is_available()

@pytest.mark.gpu
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
@pytest.fixture
def mock_dataset():
    """Fixture to provide a mock dataset for training"""
    data = {
        "instruction": ["Test instruction 1", "Test instruction 2"],
        "input": ["Test input 1", "Test input 2"],
        "output": ["Test output 1", "Test output 2"]
    }
    formatted_data = formatting_prompts_func(data, template="Instruction: {}\nInput: {}\nOutput: {}", eos_token="<EOS>")
    return Dataset.from_dict(formatted_data)

@pytest.mark.gpu
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU")
def test_train_model(mock_dataset):
    """Test to ensure the training model function works with a mock dataset"""
    
    # Load model
    model_name = "unsloth/Meta-Llama-3.1-8B"
    model, tokenizer = load_model(model_name, 16, None, True, {'': 0})

    # Training arguments
    training_args = {
        "max_steps": 1,
        "output_dir": "outputs"
    }

    # Train the model
    train_stats = train_model(
        model=model,
        tokenizer=tokenizer,
        train_dataset=mock_dataset,
        dataset_text_field="text",
        max_seq_length=16,
        dataset_num_proc=1,
        packing=False,
        training_args=training_args
    )
    
    # Assert that training statistics are returned
    assert train_stats is not None
    
    # Optionally, check for specific fields in `train_stats` (e.g., loss, global_step)
    # Since trainer.train() returns an object that has 'global_step' and 'train_loss', we can assert them
    assert hasattr(train_stats, "global_step")
    assert hasattr(train_stats, "train_loss")
    
    # For further validation, assert that the model directory was created (outputs directory)
    assert "outputs" in train_stats.args.output_dir