africa-intelligence
/

llama-8b-south-africa

@@ -1,109 +1,115 @@
 base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
 library_name: peft
-license: llama3.1
 model-index:
   - name: llama-8b-south-africa
-model_description:
-  name: llama-8b-south-africa
-  description: |
-    This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
-    [Alapa Cleaned](https://huggingface.co/datasets/yahma/alpaca-cleaned) translated into Xhose, Zulu, Tswana, Northern Sotho and Afrikaans using machine translation.
-  details: |
-    The model could only be evaluated in Xhosa and Zulu due to Iroko language availability. Its aim is to show cross-lingual transfer can be achieved at a low cost. Translation cost roughly $370 per language and training cost roughly $15 using an Akash Compute Network GPU.
-  intended_use: This model is intended to be used for research.
-evaluation_results:
-  - task:
-      type: text-generation
-      name: African Language Evaluation
-    dataset:
-      name: afrimgsm_direct_xho
-      type: text-classification
-      split: test
-    metrics:
-      - name: Accuracy
-        type: accuracy
-        value: 0.02
-      - name: Dataset
-        type: dataset
-        value: MGS-Xho Direct
-  - task:
-      type: text-generation
-      name: African Language Evaluation
-    dataset:
-      name: afrimmlu_direct_xho
-      type: text-classification
-      split: test
-    metrics:
-      - name: Accuracy
-        type: accuracy
-        value: 0.29
-      - name: Dataset
-        type: dataset
-        value: MMLU-Xho Direct
-  - task:
-      type: text-generation
-      name: African Language Evaluation
-    dataset:
-      name: afrixnli_en_direct_xho
-      type: text-classification
-      split: test
-    metrics:
-      - name: Accuracy
-        type: accuracy
-        value: 0.44
-      - name: Dataset
-        type: dataset
-        value: XNLI-Xho Direct
-  - task:
-      type: text-generation
-      name: African Language Evaluation
-    dataset:
-      name: afrimgsm_direct_zul
-      type: text-classification
-      split: test
-    metrics:
-      - name: Accuracy
-        type: accuracy
-        value: 0.045
-      - name: Dataset
-        type: dataset
-        value: MGS-Zul Direct
-  - task:
-      type: text-generation
-      name: African Language Evaluation
-    dataset:
-      name: afrimmlu_direct_zul
-      type: text-classification
-      split: test
-    metrics:
-      - name: Accuracy
-        type: accuracy
-        value: 0.29
-      - name: Dataset
-        type: dataset
-        value: MMLU-Zul Direct
-  - task:
-      type: text-generation
-      name: African Language Evaluation
-    dataset:
-      name: afrixnli_en_direct_zul
-      type: text-classification
-      split: test
-    metrics:
-      - name: Accuracy
-        type: accuracy
-        value: 0.43
-      - name: Dataset
-        type: dataset
-        value: XNLI-Zul Direct
-terms_of_use: This model is governed by a Apache 2.0 License.

+---
 base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
+datasets:
+  - generator
 library_name: peft
+license: apache-2.0
+tags:
+  - trl
+  - sft
+  - generated_from_trainer
+  - african-languages
 model-index:
   - name: llama-8b-south-africa
+    results:
+      - task:
+          type: text-generation
+          name: African Language Evaluation
+        dataset:
+          name: afrimgsm_direct_xho
+          type: text-classification
+          split: test
+        metrics:
+          - name: Accuracy
+            type: accuracy
+            value: 0.02
+      - task:
+          type: text-generation
+          name: African Language Evaluation
+        dataset:
+          name: afrimgsm_direct_zul
+          type: text-classification
+          split: test
+        metrics:
+          - name: Accuracy
+            type: accuracy
+            value: 0.045
+      - task:
+          type: text-generation
+          name: African Language Evaluation
+        dataset:
+          name: afrimmlu_direct_xho
+          type: text-classification
+          split: test
+        metrics:
+          - name: Accuracy
+            type: accuracy
+            value: 0.29
+      - task:
+          type: text-generation
+          name: African Language Evaluation
+        dataset:
+          name: afrimmlu_direct_zul
+          type: text-classification
+          split: test
+        metrics:
+          - name: Accuracy
+            type: accuracy
+            value: 0.29
+      - task:
+          type: text-generation
+          name: African Language Evaluation
+        dataset:
+          name: afrixnli_en_direct_xho
+          type: text-classification
+          split: test
+        metrics:
+          - name: Accuracy
+            type: accuracy
+            value: 0.44
+      - task:
+          type: text-generation
+          name: African Language Evaluation
+        dataset:
+          name: afrixnli_en_direct_zul
+          type: text-classification
+          split: test
+        metrics:
+          - name: Accuracy
+            type: accuracy
+            value: 0.43
+model_description: |
+  This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
+  [Alpaca Cleaned](https://huggingface.co/datasets/yahma/alpaca-cleaned) translated into Xhose, Zulu, Tswana, Northern Sotho and Afrikaans using machine translation.
+  The model could only be evaluated in Xhosa and Zulu due to Iroko language availability. Its aim is to show cross-lingual transfer can be achieved at a low cost. Translation cost roughly $370 per language and training cost roughly $15 using an Akash Compute Network GPU.
+training_details:
+  loss: 1.0571
+  hyperparameters:
+    learning_rate: 0.0002
+    train_batch_size: 4
+    eval_batch_size: 8
+    seed: 42
+    distributed_type: multi-GPU
+    gradient_accumulation_steps: 2
+    total_train_batch_size: 8
+    optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+    lr_scheduler_type: cosine
+    lr_scheduler_warmup_ratio: 0.1
+    num_epochs: 1
+training_results:
+  final_loss: 1.0959
+  epochs: 0.9999
+  steps: 5596
+  validation_loss: 1.0571
+framework_versions:
+  peft: 0.12.0
+  transformers: 4.44.2
+  pytorch: 2.4.1+cu121
+  datasets: 3.0.0
+  tokenizers: 0.19.1
+---