File size: 12,134 Bytes
f07adc9 ed3b869 f07adc9 ed3b869 067de39 ed3b869 4a51bc1 2cd41a9 4a51bc1 92d5e4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
---
license: apache-2.0
inference: false
---
# Given-MPT-7B
This is a merge of the following MPT-7B models:
- **g**orilla-llm/gorilla-mpt-7b-hf-v0
- **i**bm/mpt-7b-instruct2
- Teh**V**enom/MPT-7b-WizardLM_Uncensored-Storywriter-Merge
- **e**mozilla/mpt-7b-storysummarizer
- **n**omic-ai/gpt4all-mpt
## Model License
Apache 2.0
## Purpose
This model is for experimenting with merging and routing to expert layers.
# Test eval on only 10% of eval set
hf-causal (pretrained=Multi-Domain-Expert-Layers/given-mpt-7b,dtype=bfloat16,trust_remote_code=True), limit: 0.1, provide_description: False, num_fewshot: 0, batch_size: None
| Task |Version| Metric | Value | |Stderr|
|-------------------------------------------------|------:|-----------|------:|---|-----:|
|arc_challenge | 0|acc | 0.4274|± |0.0459|
| | |acc_norm | 0.3846|± |0.0452|
|arc_easy | 0|acc | 0.7863|± |0.0381|
| | |acc_norm | 0.7350|± |0.0410|
|hellaswag | 0|acc | 0.5556|± |0.0461|
| | |acc_norm | 0.8120|± |0.0363|
|hendrycksTest-college_chemistry | 0|acc | 0.3600|± |0.0482|
| | |acc_norm | 0.3700|± |0.0485|
|hendrycksTest-college_computer_science | 0|acc | 0.3400|± |0.0476|
| | |acc_norm | 0.3600|± |0.0482|
|hendrycksTest-college_mathematics | 0|acc | 0.2500|± |0.0435|
| | |acc_norm | 0.2900|± |0.0456|
|hendrycksTest-college_medicine | 0|acc | 0.3675|± |0.0448|
| | |acc_norm | 0.3162|± |0.0432|
|hendrycksTest-college_physics | 0|acc | 0.2451|± |0.0428|
| | |acc_norm | 0.2941|± |0.0453|
|hendrycksTest-computer_security | 0|acc | 0.4800|± |0.0502|
| | |acc_norm | 0.4400|± |0.0499|
|hendrycksTest-conceptual_physics | 0|acc | 0.2051|± |0.0375|
| | |acc_norm | 0.1709|± |0.0350|
|hendrycksTest-econometrics | 0|acc | 0.2982|± |0.0430|
| | |acc_norm | 0.2368|± |0.0400|
|hendrycksTest-electrical_engineering | 0|acc | 0.3248|± |0.0435|
| | |acc_norm | 0.3590|± |0.0445|
|hendrycksTest-elementary_mathematics | 0|acc | 0.3333|± |0.0438|
| | |acc_norm | 0.3162|± |0.0432|
|hendrycksTest-formal_logic | 0|acc | 0.3077|± |0.0429|
| | |acc_norm | 0.3248|± |0.0435|
|hendrycksTest-global_facts | 0|acc | 0.3000|± |0.0461|
| | |acc_norm | 0.2700|± |0.0446|
|hendrycksTest-high_school_biology | 0|acc | 0.3675|± |0.0448|
| | |acc_norm | 0.3077|± |0.0429|
|hendrycksTest-high_school_chemistry | 0|acc | 0.2564|± |0.0405|
| | |acc_norm | 0.2906|± |0.0422|
|hendrycksTest-high_school_computer_science | 0|acc | 0.4100|± |0.0494|
| | |acc_norm | 0.4400|± |0.0499|
|hendrycksTest-high_school_european_history | 0|acc | 0.4359|± |0.0460|
| | |acc_norm | 0.3590|± |0.0445|
|hendrycksTest-high_school_geography | 0|acc | 0.3248|± |0.0435|
| | |acc_norm | 0.3675|± |0.0448|
|hendrycksTest-high_school_government_and_politics| 0|acc | 0.3932|± |0.0454|
| | |acc_norm | 0.3932|± |0.0454|
|hendrycksTest-high_school_macroeconomics | 0|acc | 0.3333|± |0.0438|
| | |acc_norm | 0.3248|± |0.0435|
|hendrycksTest-high_school_mathematics | 0|acc | 0.2051|± |0.0375|
| | |acc_norm | 0.2564|± |0.0405|
|hendrycksTest-high_school_microeconomics | 0|acc | 0.3504|± |0.0443|
| | |acc_norm | 0.4188|± |0.0458|
|hendrycksTest-high_school_physics | 0|acc | 0.2650|± |0.0410|
| | |acc_norm | 0.2906|± |0.0422|
|hendrycksTest-high_school_psychology | 0|acc | 0.3761|± |0.0450|
| | |acc_norm | 0.3419|± |0.0440|
|hendrycksTest-high_school_statistics | 0|acc | 0.3077|± |0.0429|
| | |acc_norm | 0.3504|± |0.0443|
|hendrycksTest-high_school_us_history | 0|acc | 0.3333|± |0.0438|
| | |acc_norm | 0.3333|± |0.0438|
|hendrycksTest-high_school_world_history | 0|acc | 0.3333|± |0.0438|
| | |acc_norm | 0.3419|± |0.0440|
|hendrycksTest-human_aging | 0|acc | 0.3761|± |0.0450|
| | |acc_norm | 0.3162|± |0.0432|
|hendrycksTest-human_sexuality | 0|acc | 0.4274|± |0.0459|
| | |acc_norm | 0.3761|± |0.0450|
|hendrycksTest-international_law | 0|acc | 0.4188|± |0.0458|
| | |acc_norm | 0.4957|± |0.0464|
|hendrycksTest-jurisprudence | 0|acc | 0.3148|± |0.0449|
| | |acc_norm | 0.4815|± |0.0483|
|hendrycksTest-logical_fallacies | 0|acc | 0.3504|± |0.0443|
| | |acc_norm | 0.3675|± |0.0448|
|hendrycksTest-machine_learning | 0|acc | 0.3214|± |0.0443|
| | |acc_norm | 0.2946|± |0.0433|
|hendrycksTest-management | 0|acc | 0.3786|± |0.0480|
| | |acc_norm | 0.3495|± |0.0472|
|hendrycksTest-marketing | 0|acc | 0.5043|± |0.0464|
| | |acc_norm | 0.4188|± |0.0458|
|hendrycksTest-medical_genetics | 0|acc | 0.3200|± |0.0469|
| | |acc_norm | 0.4100|± |0.0494|
|hendrycksTest-miscellaneous | 0|acc | 0.5299|± |0.0463|
| | |acc_norm | 0.4872|± |0.0464|
|hendrycksTest-moral_disputes | 0|acc | 0.3248|± |0.0435|
| | |acc_norm | 0.3162|± |0.0432|
|hendrycksTest-moral_scenarios | 0|acc | 0.3248|± |0.0435|
| | |acc_norm | 0.2479|± |0.0401|
|hendrycksTest-nutrition | 0|acc | 0.3675|± |0.0448|
| | |acc_norm | 0.3932|± |0.0454|
|hendrycksTest-philosophy | 0|acc | 0.2991|± |0.0425|
| | |acc_norm | 0.3504|± |0.0443|
|hendrycksTest-prehistory | 0|acc | 0.2821|± |0.0418|
| | |acc_norm | 0.3248|± |0.0435|
|hendrycksTest-professional_accounting | 0|acc | 0.2137|± |0.0381|
| | |acc_norm | 0.2222|± |0.0386|
|hendrycksTest-professional_law | 0|acc | 0.3077|± |0.0429|
| | |acc_norm | 0.2735|± |0.0414|
|hendrycksTest-professional_medicine | 0|acc | 0.2991|± |0.0425|
| | |acc_norm | 0.2650|± |0.0410|
|hendrycksTest-professional_psychology | 0|acc | 0.3248|± |0.0435|
| | |acc_norm | 0.3419|± |0.0440|
|hendrycksTest-public_relations | 0|acc | 0.3909|± |0.0467|
| | |acc_norm | 0.3545|± |0.0458|
|hendrycksTest-security_studies | 0|acc | 0.3419|± |0.0440|
| | |acc_norm | 0.2906|± |0.0422|
|hendrycksTest-sociology | 0|acc | 0.3761|± |0.0450|
| | |acc_norm | 0.3162|± |0.0432|
|hendrycksTest-us_foreign_policy | 0|acc | 0.5000|± |0.0503|
| | |acc_norm | 0.4100|± |0.0494|
|hendrycksTest-virology | 0|acc | 0.3932|± |0.0454|
| | |acc_norm | 0.3248|± |0.0435|
|hendrycksTest-world_religions | 0|acc | 0.5299|± |0.0463|
| | |acc_norm | 0.5128|± |0.0464|
|truthfulqa_gen | 1|bleurt_max |-0.8551|± |0.0501|
| | |bleurt_acc | 0.3590|± |0.0445|
| | |bleurt_diff|-0.1292|± |0.0483|
| | |bleu_max |19.3738|± |1.8461|
| | |bleu_acc | 0.3932|± |0.0454|
| | |bleu_diff |-4.3883|± |2.1748|
| | |rouge1_max |41.8428|± |2.6156|
| | |rouge1_acc | 0.3162|± |0.0432|
| | |rouge1_diff|-8.8583|± |2.7745|
| | |rouge2_max |26.3956|± |2.8311|
| | |rouge2_acc | 0.2137|± |0.0381|
| | |rouge2_diff|-9.5287|± |3.3258|
| | |rougeL_max |39.5215|± |2.5620|
| | |rougeL_acc | 0.3162|± |0.0432|
| | |rougeL_diff|-8.5753|± |2.8259|
|