Adding Evaluation Results

This is an automated PR created with https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr

The purpose of this PR is to add evaluation results from the Open LLM Leaderboard to your model card.

If you encounter any issues, please report them to https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr/discussions

Files changed (1) hide show

README.md +125 -59

README.md CHANGED Viewed

@@ -1,22 +1,70 @@
 ---
-license: apache-2.0
 language:
-  - en
-pipeline_tag: text-generation
 base_model: EleutherAI/pythia-31m
 datasets:
-  - totally-not-an-llm/EverythingLM-data-V3
-  - databricks/databricks-dolly-15k
-  - THUDM/webglm-qa
-  - starfishmedical/webGPT_x_dolly
-  - Amod/mental_health_counseling_conversations
-  - sablo/oasst2_curated
-  - cognitivecomputations/wizard_vicuna_70k_unfiltered
-  - mlabonne/chatml_dpo_pairs
 model-index:
 - name: Pythia-31M-Chat-v1
   results:
-  - task:
       type: text-generation
       name: Text Generation
     dataset:
@@ -27,13 +75,13 @@ model-index:
       args:
         num_few_shot: 25
     metrics:
-       - type: acc_norm
-         name: normalized accuracy
-         value: 22.7
     source:
-      name: Open LLM Leaderboard
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
-  - task:
       type: text-generation
       name: Text Generation
     dataset:
@@ -43,13 +91,13 @@ model-index:
       args:
         num_few_shot: 10
     metrics:
-       - type: acc_norm
-         name: normalized accuracy
-         value: 25.6
     source:
-      name: Open LLM Leaderboard
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
-  - task:
       type: text-generation
       name: Text Generation
     dataset:
@@ -60,13 +108,13 @@ model-index:
       args:
         num_few_shot: 5
     metrics:
-       - type: acc
-         name: accuracy
-         value: 23.24
     source:
-      name: Open LLM Leaderboard
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
-  - task:
       type: text-generation
       name: Text Generation
     dataset:
@@ -77,41 +125,45 @@ model-index:
       args:
         num_few_shot: 5
     metrics:
-       - type: acc
-         name: accuracy
-         value: 47.99
     source:
       name: Open LLM Leaderboard
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
-widget:
-  - text: |-
-      <|im_start|>system
-      You are a career counselor. The user will provide you with an individual looking for guidance in their professional life, and your task is to assist them in determining what careers they are most suited for based on their skills, interests, and experience. You should also conduct research into the various options available, explain the job market trends in different industries, and advice on which qualifications would be beneficial for pursuing particular fields.<|im_end|>
-      <|im_start|>user
-      Heya!<|im_end|>
-      <|im_start|>assistant
-      Hi! How may I help you?<|im_end|>
-      <|im_start|>user
-      I am interested in developing a career in software engineering. What would you recommend me to do?<|im_end|>
-      <|im_start|>assistant
-  - text: |-
-      <|im_start|>system
-      You are a helpful assistant who answers user's questions with details and curiosity.<|im_end|>
-      <|im_start|>user
-      What are some potential applications for quantum computing?<|im_end|>
-      <|im_start|>assistant
-  - text: |-
-      <|im_start|>system
-      You are a highly knowledgeable assistant. Help the user as much as you can.<|im_end|>
-      <|im_start|>user
-      What are some steps I can take to become a healthier person?<|im_end|>
-      <|im_start|>assistant
-inference:
-  parameters:
-    max_new_tokens: 250
-    penalty_alpha: 0.5
-    top_k: 2
-    repetition_penalty: 1.0016
 ---
 # A Pythia Chat Model of 31M Parameters
@@ -227,3 +279,17 @@ DPOTrainer(
     ],
 )
 ```

 ---
 language:
+- en
+license: apache-2.0
 base_model: EleutherAI/pythia-31m
 datasets:
+- totally-not-an-llm/EverythingLM-data-V3
+- databricks/databricks-dolly-15k
+- THUDM/webglm-qa
+- starfishmedical/webGPT_x_dolly
+- Amod/mental_health_counseling_conversations
+- sablo/oasst2_curated
+- cognitivecomputations/wizard_vicuna_70k_unfiltered
+- mlabonne/chatml_dpo_pairs
+pipeline_tag: text-generation
+widget:
+- text: '<|im_start|>system
+    You are a career counselor. The user will provide you with an individual looking
+    for guidance in their professional life, and your task is to assist them in determining
+    what careers they are most suited for based on their skills, interests, and experience.
+    You should also conduct research into the various options available, explain the
+    job market trends in different industries, and advice on which qualifications
+    would be beneficial for pursuing particular fields.<|im_end|>
+    <|im_start|>user
+    Heya!<|im_end|>
+    <|im_start|>assistant
+    Hi! How may I help you?<|im_end|>
+    <|im_start|>user
+    I am interested in developing a career in software engineering. What would you
+    recommend me to do?<|im_end|>
+    <|im_start|>assistant'
+- text: '<|im_start|>system
+    You are a helpful assistant who answers user''s questions with details and curiosity.<|im_end|>
+    <|im_start|>user
+    What are some potential applications for quantum computing?<|im_end|>
+    <|im_start|>assistant'
+- text: '<|im_start|>system
+    You are a highly knowledgeable assistant. Help the user as much as you can.<|im_end|>
+    <|im_start|>user
+    What are some steps I can take to become a healthier person?<|im_end|>
+    <|im_start|>assistant'
+inference:
+  parameters:
+    max_new_tokens: 250
+    penalty_alpha: 0.5
+    top_k: 2
+    repetition_penalty: 1.0016
 model-index:
 - name: Pythia-31M-Chat-v1
   results:
+  - task:
       type: text-generation
       name: Text Generation
     dataset:
       args:
         num_few_shot: 25
     metrics:
+    - type: acc_norm
+      value: 22.7
+      name: normalized accuracy
     source:
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
+      name: Open LLM Leaderboard
+  - task:
       type: text-generation
       name: Text Generation
     dataset:
       args:
         num_few_shot: 10
     metrics:
+    - type: acc_norm
+      value: 25.6
+      name: normalized accuracy
     source:
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
+      name: Open LLM Leaderboard
+  - task:
       type: text-generation
       name: Text Generation
     dataset:
       args:
         num_few_shot: 5
     metrics:
+    - type: acc
+      value: 23.24
+      name: accuracy
     source:
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
+      name: Open LLM Leaderboard
+  - task:
       type: text-generation
       name: Text Generation
     dataset:
       args:
         num_few_shot: 5
     metrics:
+    - type: acc
+      value: 47.99
+      name: accuracy
     source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
       name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: TruthfulQA (0-shot)
+      type: truthful_qa
+      config: multiple_choice
+      split: validation
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: mc2
+      value: 0.0
+    source:
       url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: GSM8k (5-shot)
+      type: gsm8k
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 0.0
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Pythia-31M-Chat-v1
+      name: Open LLM Leaderboard
 ---
 # A Pythia Chat Model of 31M Parameters
     ],
 )
 ```
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Felladrin__Pythia-31M-Chat-v1)
+|             Metric              |Value|
+|---------------------------------|----:|
+|Avg.                             |19.92|
+|AI2 Reasoning Challenge (25-Shot)|22.70|
+|HellaSwag (10-Shot)              |25.60|
+|MMLU (5-Shot)                    |23.24|
+|TruthfulQA (0-shot)              | 0.00|
+|Winogrande (5-shot)              |47.99|
+|GSM8k (5-shot)                   | 0.00|