ManyICLBench_Leaderboard / src /Global Context Understanding_full_200.csv
zkjzou's picture
update benchmark
239e0f7
Task,1000,2000,4000,8000,16000,32000,64000,128000,avg,avg.L
glm-4-9b-chat,40.50698427968745,40.282514030593525,42.03514758697741,42.77812732118466,40.698943674486365,40.46429917509213,38.84619005124577,39.12544210472614,40.59220602799918,39.47864377702135
Mistral-Nemo-Instruct,38.25174117482217,39.06953517749783,39.28141110303884,38.98529668370538,33.061083213923375,32.830517013377566,30.45934094500344,27.110396670750852,34.88116524776493,30.133418209710623
Mistral-Large-Instruct-AWQ,61.46838941271965,61.09587095162697,61.22583603295307,60.86833794699957,60.861933554427466,58.84010374503828,50.01197314627164,16.691663977868544,53.88301359598815,41.84791362305949
Llama-3.1-8B-Instruct,37.30836868392976,38.84125527848912,41.2505736280164,40.794258067687366,39.83444694842053,39.773843356283,39.11869701930997,34.41209536535099,38.916692293435894,37.76821191364799
Llama-3.1-70B-Instruct-AWQ,53.32457767581259,54.83670752732655,55.75854084271325,55.86647085146588,56.41586374288468,56.344722041867136,54.42144605517714,18.728575329808265,50.712113008381934,43.164914475617515
Qwen2-7B-Instruct,39.521038462895056,41.95617309618548,45.16961923996148,45.388604614144,45.49983279321517,37.29314690113598,36.97410652200931,33.99279484585449,40.72441455942512,36.08668275633326
Qwen2-72B-Instruct-AWQ,50.70543177597992,51.90185989326735,53.231917024118985,53.42772383454989,53.60456776354168,50.86853335158976,50.62023898985967,52.0547180736628,52.05187383832126,51.181163471704075
Phi-3-Mini-Instruct,33.53774232920733,32.97066354630036,29.80053724275289,29.74568289344541,30.124298230078637,28.776180888367108,28.06044761555888,25.761550190503566,29.847137867026774,27.532726231476516
Phi-3-Medium-Instruct,41.587771292424094,40.912198239966216,34.854733073016845,35.634932868512294,36.90794452772758,36.83888019383616,36.37856781039721,28.30918854780435,36.42802706921059,33.842212184012574
Phi-3-Small-Instruct,41.61109509590759,41.61237571572385,41.606181105263296,35.57902498126983,37.16841325462178,37.72671633709681,36.90648845260399,35.328537592998906,38.442354066935756,36.653914127566566
Jamba-1.5-Mini,31.96393502346349,33.0804151894422,32.97355597731929,32.70266682086676,31.65788220876118,28.82458742147524,27.14127682848188,25.874333552669743,30.527331627809975,27.280065934208952
Gemini-1.5-Pro,57.87,63.39,64.15,66.78,68.02,67.78,66.14,66.42,65.07,66.78