leaderboard-pr-bot's picture
Adding Evaluation Results
dd6a880 verified
|
raw
history blame
4.64 kB
metadata
language:
  - en
license: apache-2.0
library_name: transformers
base_model:
  - Qwen/Qwen2.5-7B
pipeline_tag: text-generation
model-index:
  - name: jeffmeloy_Qwen2.5-7B-minperplexity-1
    results:
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: IFEval (0-Shot)
          type: HuggingFaceH4/ifeval
          args:
            num_few_shot: 0
        metrics:
          - type: inst_level_strict_acc and prompt_level_strict_acc
            value: 37.57
            name: strict accuracy
        source:
          url: >-
            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1
          name: Open LLM Leaderboard
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: BBH (3-Shot)
          type: BBH
          args:
            num_few_shot: 3
        metrics:
          - type: acc_norm
            value: 37.82
            name: normalized accuracy
        source:
          url: >-
            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1
          name: Open LLM Leaderboard
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: MATH Lvl 5 (4-Shot)
          type: hendrycks/competition_math
          args:
            num_few_shot: 4
        metrics:
          - type: exact_match
            value: 26.81
            name: exact match
        source:
          url: >-
            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1
          name: Open LLM Leaderboard
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: GPQA (0-shot)
          type: Idavidrein/gpqa
          args:
            num_few_shot: 0
        metrics:
          - type: acc_norm
            value: 10.96
            name: acc_norm
        source:
          url: >-
            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1
          name: Open LLM Leaderboard
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: MuSR (0-shot)
          type: TAUR-Lab/MuSR
          args:
            num_few_shot: 0
        metrics:
          - type: acc_norm
            value: 11.93
            name: acc_norm
        source:
          url: >-
            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1
          name: Open LLM Leaderboard
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: MMLU-PRO (5-shot)
          type: TIGER-Lab/MMLU-Pro
          config: main
          split: test
          args:
            num_few_shot: 5
        metrics:
          - type: acc
            value: 37.42
            name: accuracy
        source:
          url: >-
            https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1
          name: Open LLM Leaderboard

Model Description

Model created by analyzing and selecting the lowest perplexity for each layer from other Qwen2.5-7B models.

  • "edgerunner-ai/EdgeRunner-Command-Nested"
  • "EVA-UNIT-01/EVA-Qwen2.5-7B-v0.1"
  • "fblgit/cybertron-v4-qw7B-MGS"
  • "FourOhFour/Vapor_v2_7B"
  • "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2"
  • "happzy2633/qwen2.5-7b-ins-v3"
  • "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2"
  • "HumanLLMs/Humanish-Qwen2.5-7B-Instruct"
  • "katanemo/Arch-Function-7B"
  • "Orion-zhen/Meissa-Qwen2.5-7B-Instruct"
  • "Orion-zhen/Qwen2.5-7B-Gutenberg-KTO"
  • "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored"
  • "newsbang/Homer-7B-v0.1"
  • "nhyha/merge_Qwen2.5-7B-Instruct_20241023_0314"
  • "Qwen/Qwen2.5-7B"
  • "Qwen/Qwen2.5-7B-Instruct"
  • "scilo/qwen_ft_no_temp"
  • "sethuiyer/Qwen2.5-7B-Anvita"
  • "Siheng99/Qwen2.5-7B-Instruct-SEALONG"
  • "thomas-yanxin/XinYuan-Qwen2.5-7B-0917"
  • "newsbang/Homer-v0.5-Qwen2.5-7B"
  • "fblgit/cybertron-v4-qw7B-UNAMGS"
  • "win10/Verdandi-Qwen2.5-7B"
  • "AmberYifan/Qwen2.5-7B-dpo-2k"
  • "nextvalueup/Qwen2.5-7B-Instruct_v3"
  • "jbjeong91/Qwen2.5_7B_IST_StoryGen_vanilla"
  • "AmberYifan/Qwen2.5-7B-dpo-2k-hhrlhf"
  • "jbjeong91/Qwen2.5_7B_IST_StoryGen_test2"
  • "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix"
  • "bunnycore/Qandora-2.5-7B"

Open LLM Leaderboard Evaluation Results

Detailed results can be found here

Metric Value
Avg. 27.09
IFEval (0-Shot) 37.57
BBH (3-Shot) 37.82
MATH Lvl 5 (4-Shot) 26.81
GPQA (0-shot) 10.96
MuSR (0-shot) 11.93
MMLU-PRO (5-shot) 37.42