ChineseSafe-Benchmark / data /subclass_per.csv
Jerry0723's picture
feat: update models and subclass
0885a6e
raw
history blame
6.15 kB
Model,Size,Discrimination_Accuracy,Discrimination_Precision,Discrimination_Recall,Variant_Accuracy,Variant_Precision,Variant_Recall,Psychology_Accuracy,Psychology_Precision,Psychology_Recall,Politics_Accuracy,Politics_Precision,Politics_Recall,Eroticism_Accuracy,Eroticism_Precision,Eroticism_Recall,Vulgarity_Accuracy,Vulgarity_Precision,Vulgarity_Recall,Property_Accuracy,Property_Precision,Property_Recall,Injury_Accuracy,Injury_Precision,Injury_Recall,Criminality_Accuracy,Criminality_Precision,Criminality_Recall,Ethics_Accuracy,Ethics_Precision,Ethics_Recall
DeepSeek-LLM-67B-Chat,>65B,0.6948,0.9451,0.3989,0.6447,0.9375,0.3259,0.5122,0.5824,0.033,0.7673,0.9695,0.5903,0.6865,0.9516,0.4092,0.899,0.9725,0.8159,0.66,0.9341,0.326,0.5479,0.8184,0.1017,0.8777,0.9706,0.7709,0.5142,0.6736,0.0456
Qwen1.5-72B-Chat,>65B,0.6479,0.581,0.9985,0.6609,0.6019,0.9938,0.6472,0.5837,0.9906,0.5928,0.5895,0.8276,0.6544,0.5996,0.9796,0.6488,0.5823,0.9987,0.6448,0.5792,0.9932,0.6255,0.5712,0.9493,0.6433,0.5763,0.9951,0.6485,0.5872,0.9874
Qwen2.5-72B,>65B,0.4889,0.4886,0.9993,0.5119,0.5114,1.0,0.465,0.4783,0.9413,0.5446,0.5442,1.0,0.5132,0.5128,1.0,0.4904,0.4899,1.0,0.4834,0.4855,0.9892,0.4779,0.4841,0.9716,0.4852,0.4847,1.0,0.4958,0.4966,0.9947
Qwen2-72B,>65B,0.5005,0.4943,1.0,0.5227,0.517,1.0,0.5051,0.499,1.0,0.5511,0.5481,0.9935,0.5238,0.5182,0.9994,0.5017,0.4955,1.0,0.5,0.4938,1.0,0.5027,0.4967,0.9993,0.4967,0.4903,1.0,0.5096,0.5035,1.0
Opt-66B,>65B,0.645,0.5831,0.9572,0.3981,0.417,0.4471,0.6667,0.5971,0.9953,0.6232,0.6095,0.8551,0.4854,0.4984,0.6176,0.652,0.5874,0.9698,0.6511,0.5859,0.9706,0.6604,0.5926,0.9853,0.6556,0.586,0.9846,0.655,0.5943,0.9665
Llama3-ChatQA-1.5-70B,>65B,0.3666,0.2082,0.1069,0.339,0.169,0.0752,0.3147,0.0148,0.0059,0.2947,0.075,0.0261,0.7758,0.7167,0.9293,0.5528,0.5482,0.4877,0.3396,0.111,0.0507,0.3207,0.0374,0.0156,0.4392,0.3806,0.2524,0.3214,0.0614,0.0253
Yi-1.5-34B-Chat,~30B,0.7139,0.8341,0.5176,0.7722,0.8735,0.6482,0.475,0.2581,0.0357,0.7162,0.8717,0.5603,0.6206,0.7912,0.353,0.8816,0.8938,0.8601,0.6412,0.7813,0.3672,0.497,0.4306,0.0769,0.8472,0.8832,0.7889,0.4818,0.3646,0.0576
Qwen2.5-32B,~30B,0.5101,0.4992,0.998,0.5328,0.5224,1.0,0.5114,0.5022,0.9914,0.5642,0.5551,1.0,0.5341,0.5237,1.0,0.5124,0.5009,1.0,0.5047,0.496,0.9876,0.5024,0.4964,0.9768,0.5073,0.4957,1.0,0.513,0.5054,0.986
Opt-30B,~30B,0.5831,0.5754,0.5565,0.3952,0.338,0.1915,0.6784,0.6507,0.7506,0.5798,0.6281,0.5559,0.357,0.2405,0.1185,0.406,0.3224,0.1945,0.6203,0.6061,0.633,0.6188,0.6076,0.6293,0.6031,0.5886,0.5976,0.6244,0.6184,0.6415
Baichuan2-13B-Chat,10B~20B,0.7346,0.6715,0.8932,0.7703,0.7043,0.9491,0.6303,0.6129,0.6785,0.7435,0.7152,0.8777,0.779,0.7088,0.9649,0.7677,0.6883,0.9601,0.6763,0.6388,0.7738,0.6359,0.6149,0.6904,0.7096,0.6554,0.8436,0.7306,0.6762,0.8788
Qwen1.5-14B,10B~20B,0.625,0.5683,0.964,0.6549,0.5977,0.9932,0.5983,0.5571,0.9038,0.6561,0.6193,0.9535,0.6592,0.6005,0.9994,0.6382,0.5759,0.9897,0.5579,0.53,0.8275,0.5009,0.4938,0.7077,0.6256,0.566,0.9705,0.6063,0.5643,0.914
Ziya2-13B-Chat,10B~20B,0.6322,0.6632,0.502,0.381,0.0822,0.0212,0.4263,0.2557,0.086,0.4352,0.4474,0.1651,0.612,0.6721,0.4744,0.812,0.7741,0.8691,0.4904,0.4516,0.2102,0.5309,0.5403,0.2964,0.7186,0.7235,0.6777,0.4811,0.4512,0.2021
InternLM2-Chat-20B,10B~20B,0.5184,0.5912,0.0441,0.4754,0.0222,0.0006,0.4929,0.0222,0.0006,0.4744,0.7043,0.0573,0.605,0.904,0.256,0.5265,0.6774,0.0625,0.5689,0.8292,0.146,0.5046,0.4073,0.0202,0.7142,0.9352,0.44,0.498,0.4041,0.0196
Opt-13B,10B~20B,0.5011,0.0392,0.0015,0.4792,0.0695,0.0018,0.4958,0,0,0.4492,0.237,0.0055,0.4897,0.5438,0.0249,0.4996,0.0333,0.0006,0.5037,0.1931,0.0055,0.5454,0.8065,0.0965,0.5155,0.499,0.0228,0.5016,0.4815,0.0203
Gemma-1.1-7B,5B~10B,0.6885,0.6193,0.9389,0.7201,0.6502,0.9795,0.6709,0.6133,0.8985,0.7171,0.6709,0.9421,0.5993,0.5861,0.7426,0.7164,0.634,0.9953,0.6316,0.5872,0.8235,0.5207,0.5098,0.595,0.6874,0.616,0.9415,0.6164,0.5853,0.7856
Qwen1.5-7B-Chat,5B~10B,0.6415,0.5933,0.8439,0.7295,0.6542,0.9987,0.5495,0.5352,0.6535,0.7415,0.6808,0.9875,0.7286,0.6545,0.9955,0.7167,0.6339,0.9966,0.6122,0.5749,0.784,0.4866,0.4788,0.5265,0.6887,0.6165,0.9449,0.4276,0.4219,0.4072
Yi-1.5-9B-Chat,5B~10B,0.7089,0.8612,0.4825,0.5418,0.7129,0.1741,0.4846,0.2932,0.0308,0.5376,0.7743,0.2115,0.6185,0.8236,0.3254,0.818,0.9011,0.7057,0.5819,0.7416,0.2207,0.4893,0.3279,0.0365,0.7959,0.8937,0.6572,0.477,0.2414,0.0233
DeepSeek-LLM-7B-Chat,5B~10B,0.5078,0.4247,0.0246,0.5288,0.7841,0.1076,0.4923,0.0435,0.0019,0.5924,0.9137,0.2765,0.6125,0.9021,0.2737,0.6802,0.9215,0.3786,0.542,0.7419,0.0938,0.503,0.3766,0.0194,0.7217,0.9323,0.4588,0.4987,0.4142,0.0238
GPT-J-6B,5B~10B,0.4991,0.3302,0.0256,0.4649,0,0,0.4979,0.3838,0.0323,0.4836,0.6888,0.0916,0.9195,0.9496,0.8899,0.4914,0.1924,0.0123,0.5186,0.5541,0.0647,0.5155,0.5553,0.0641,0.5632,0.7398,0.1505,0.5932,0.8229,0.2323
Baichuan2-7B,5B~10B,0.4946,0.3641,0.0834,0.475,0.4213,0.0801,0.4753,0.2928,0.0516,0.442,0.4168,0.0681,0.8239,0.8619,0.7567,0.4889,0.3678,0.0737,0.4868,0.339,0.0659,0.478,0.2945,0.0538,0.6055,0.6911,0.3029,0.4752,0.3189,0.0577
GLM-4-9B-Chat,5B~10B,0.4974,0.4928,0.9986,0.5202,0.5158,0.9994,0.4984,0.4957,0.9914,0.5521,0.5483,0.9989,0.5179,0.5152,0.9918,0.4992,0.4944,1,0.4923,0.4899,0.9892,0.478,0.484,0.9541,0.4924,0.4881,0.9958,0.5039,0.5006,0.9928
InternLM2-Chat-7B,5B~10B,0.4988,0,0,0.4767,0,0,0.4943,0,0,0.4453,0.0513,0.0011,0.5829,0.8965,0.21,0.4977,0,0,0.4997,0.0278,0.0007,0.4964,0,0,0.5026,0,0,0.4901,0.0278,0.0006
Opt-6.7B,5B~10B,0.5189,0.5038,0.9645,0.3756,0.4266,0.6456,0.5227,0.5083,0.9638,0.549,0.5504,0.9314,0.2606,0.3276,0.4205,0.4833,0.4847,0.8892,0.5274,0.508,0.9831,0.5244,0.508,0.971,0.5105,0.4973,0.9551,0.5322,0.5159,0.9757
Mistral-7B,5B~10B,0.4091,0.3399,0.2241,0.3013,0.0672,0.0286,0.3093,0.0548,0.0246,0.3554,0.3176,0.1618,0.4671,0.473,0.3538,0.62,0.6022,0.655,0.432,0.3832,0.2701,0.3362,0.1517,0.0771,0.6338,0.6081,0.6844,0.3814,0.2943,0.1744
Llama3-ChatQA-1.5-8B,5B~10B,0.387,0.2816,0.1665,0.3232,0.1355,0.0603,0.3054,0.011,0.0045,0.292,0.0948,0.0354,0.7946,0.7193,0.9821,0.5375,0.5306,0.4746,0.3702,0.2367,0.1312,0.318,0.0621,0.0276,0.4823,0.4562,0.3594,0.3398,0.1632,0.0793