chad-brouze's picture
Update README.md
c8db23c verified
metadata
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
datasets:
  - generator
library_name: peft
license: apache-2.0
tags:
  - trl
  - sft
  - generated_from_trainer
  - african-languages
benchmark_visualization: assets/Benchmarks_(1).pdf
model-index:
  - name: llama-8b-south-africa
    results:
      - task:
          type: text-generation
          name: African Language Evaluation
        dataset:
          name: afrimgsm_direct_xho
          type: text-classification
          split: test
        metrics:
          - name: Accuracy
            type: accuracy
            value: 0.02
      - task:
          type: text-generation
          name: African Language Evaluation
        dataset:
          name: afrimgsm_direct_zul
          type: text-classification
          split: test
        metrics:
          - name: Accuracy
            type: accuracy
            value: 0.045
      - task:
          type: text-generation
          name: African Language Evaluation
        dataset:
          name: afrimmlu_direct_xho
          type: text-classification
          split: test
        metrics:
          - name: Accuracy
            type: accuracy
            value: 0.29
      - task:
          type: text-generation
          name: African Language Evaluation
        dataset:
          name: afrimmlu_direct_zul
          type: text-classification
          split: test
        metrics:
          - name: Accuracy
            type: accuracy
            value: 0.29
      - task:
          type: text-generation
          name: African Language Evaluation
        dataset:
          name: afrixnli_en_direct_xho
          type: text-classification
          split: test
        metrics:
          - name: Accuracy
            type: accuracy
            value: 0.44
      - task:
          type: text-generation
          name: African Language Evaluation
        dataset:
          name: afrixnli_en_direct_zul
          type: text-classification
          split: test
        metrics:
          - name: Accuracy
            type: accuracy
            value: 0.43
model_description: >
  This model is a fine-tuned version of
  [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)
  on the generator dataset.

  [Alpaca Cleaned](https://huggingface.co/datasets/yahma/alpaca-cleaned)
  translated into Xhose, Zulu, Tswana, Northern Sotho and Afrikaans using
  machine translation.


  The model could only be evaluated in Xhosa and Zulu due to Iroko language
  availability. Its aim is to show cross-lingual transfer can be achieved at a
  low cost. Translation cost roughly $370 per language and training cost roughly
  $15 using an Akash Compute Network GPU.
training_details:
  loss: 1.0571
  hyperparameters:
    learning_rate: 0.0002
    train_batch_size: 4
    eval_batch_size: 8
    seed: 42
    distributed_type: multi-GPU
    gradient_accumulation_steps: 2
    total_train_batch_size: 8
    optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
    lr_scheduler_type: cosine
    lr_scheduler_warmup_ratio: 0.1
    num_epochs: 1
training_results:
  final_loss: 1.0959
  epochs: 0.9999
  steps: 5596
  validation_loss: 1.0571
framework_versions:
  peft: 0.12.0
  transformers: 4.44.2
  pytorch: 2.4.1+cu121
  datasets: 3.0.0
  tokenizers: 0.19.1