snats commited on
Commit
457add3
·
1 Parent(s): 14c7b8b

added more evals 90 not complete

Browse files
breaking_0.7_trained/70_most_difficult/eval_results.jsonl ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.14708299096138044, "acc5": 0.4599835661462613, "mean_per_class_recall": 0.16736892473469622, "main_metric": 0.16736892473469622}}
2
+ {"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.3917, "acc5": 0.9104, "mean_per_class_recall": 0.39170000000000005, "main_metric": 0.3917}}
3
+ {"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.1261, "acc5": 0.3624, "mean_per_class_recall": 0.1261, "main_metric": 0.1261}}
4
+ {"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.10553333333333334, "acc5": 0.6261333333333333, "mean_per_class_recall": 0.10443198394406869, "main_metric": 0.10553333333333334}}
5
+ {"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.22853333333333334, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.17242436550113396, "main_metric": 0.22853333333333334}}
6
+ {"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.012369668246445498, "acc5": 0.05099526066350711, "mean_per_class_recall": 0.0123696682464455, "main_metric": 0.012369668246445498}}
7
+ {"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.05531914893617021, "acc5": 0.19840425531914893, "mean_per_class_recall": 0.0553191489361702, "main_metric": 0.05531914893617021}}
8
+ {"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.14148148148148149, "acc5": 0.5188888888888888, "mean_per_class_recall": 0.13234410119399823, "main_metric": 0.14148148148148149}}
9
+ {"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.011401140114011402, "acc5": 0.0483048304830483, "mean_per_class_recall": 0.011283422459893049, "main_metric": 0.011283422459893049}}
10
+ {"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.048712871287128715, "acc5": 0.18308910891089109, "mean_per_class_recall": 0.048712871287128715, "main_metric": 0.048712871287128715}}
11
+ {"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.06571654790182106, "acc5": 0.24346793349168647, "mean_per_class_recall": 0.06590677443179971, "main_metric": 0.06571654790182106}}
12
+ {"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.0291, "acc5": 0.09332, "mean_per_class_recall": 0.0291, "main_metric": 0.0291}}
13
+ {"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.010866788500461789, "acc5": 0.0334256912102812, "mean_per_class_recall": 0.010893333333333335, "main_metric": 0.010866788500461789}}
14
+ {"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.0278, "acc5": 0.0877, "mean_per_class_recall": 0.027800000000000002, "main_metric": 0.0278}}
15
+ {"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.014666666666666666, "acc5": 0.0692, "mean_per_class_recall": 0.018525359045171875, "main_metric": 0.014666666666666666}}
16
+ {"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.097, "acc5": 0.258, "mean_per_class_recall": 0.09125548157552801, "main_metric": 0.097}}
17
+ {"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.04243333333333334, "acc5": 0.1386, "mean_per_class_recall": 0.038938293613434184, "main_metric": 0.04243333333333334}}
18
+ {"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.3389592123769339, "acc5": null, "mean_per_class_recall": 0.22925791778975743, "main_metric": 0.3389592123769339}}
19
+ {"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.0824, "acc5": 0.4959, "mean_per_class_recall": 0.0816666168705002, "main_metric": 0.0824}}
20
+ {"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.036717992893291695, "acc5": 0.13002045870571766, "mean_per_class_recall": 0.036658826688822, "main_metric": 0.036717992893291695}}
21
+ {"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.02618311920637502, "acc5": 0.09725158562367865, "mean_per_class_recall": 0.035347394094886124, "main_metric": 0.035347394094886124}}
22
+ {"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.047151812482965384, "acc5": 0.19078768056691198, "mean_per_class_recall": 0.047386552775839724, "main_metric": 0.047386552775839724}}
23
+ {"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.3092280982905983, "acc5": 0.7008547008547008, "mean_per_class_recall": 0.3120109487298618, "main_metric": 0.3092280982905983}}
24
+ {"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.505218505859375, "acc5": null, "mean_per_class_recall": 0.5054295743482156, "main_metric": 0.505218505859375}}
25
+ {"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.5013728720483251, "acc5": null, "mean_per_class_recall": 0.5005898616177407, "main_metric": 0.5013728720483251}}
26
+ {"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.06952380952380953, "acc5": 0.2519047619047619, "mean_per_class_recall": 0.07067140202149967, "main_metric": 0.06952380952380953}}
27
+ {"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.016042780748663103, "acc5": 0.066782738465365, "mean_per_class_recall": 0.01605966973233054, "main_metric": 0.016042780748663103}}
28
+ {"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.4145, "acc5": 0.937875, "mean_per_class_recall": 0.4145, "main_metric": 0.4145}}
29
+ {"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.07350534233223606, "acc5": 0.21050260220313735, "mean_per_class_recall": 0.054760979452141936, "main_metric": 0.07350534233223606}}
30
+ {"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.07648279041180087, "acc5": 0.48186846957590657, "mean_per_class_recall": 0.10321902243537387, "main_metric": 0.07648279041180087}}
31
+ {"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.02199999988079071, "text_retrieval_recall@1": 0.035999998450279236, "image_retrieval_recall@5": 0.07320000231266022, "text_retrieval_recall@5": 0.10700000077486038, "image_retrieval_recall@10": 0.12160000205039978, "text_retrieval_recall@10": 0.15199999511241913, "mean_recall@1": 0.028999999165534973, "main_metric": 0.028999999165534973}}
32
+ {"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.010635745711624622, "text_retrieval_recall@1": 0.019999999552965164, "image_retrieval_recall@5": 0.03726509213447571, "text_retrieval_recall@5": 0.05739999935030937, "image_retrieval_recall@10": 0.06133546680212021, "text_retrieval_recall@10": 0.08860000222921371, "mean_recall@1": 0.015317872632294893, "main_metric": 0.015317872632294893}}
33
+ {"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.4008125843657699, "jaccard_score_5": 0.4654545454545455, "jaccard_score_6": 0.39455243580513566, "jaccard_score_10": 0.32910798122065726, "jaccard_score_12": 0.29313529149977746, "jaccard_score_5-6": 0.42910052910052915, "jaccard_score_10-12": 0.3110795137727222, "main_metric": 0.3110795137727222}}
34
+ {"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.007805379635904747, "acc5": 0.05335234044542077, "mean_per_class_recall": 0.0028397099896885136, "acc_avg": 0.007805379573255777, "recall-macro_all": 0.0028397099896885136, "F1-macro_all": 0.001915841396898543, "main_metric": 0.001915841396898543}}
35
+ {"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.5009523361629082, "acc5": null, "mean_per_class_recall": 0.5009523361629082, "acc_avg": 0.5009523630142212, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.011286089196801186, "count_slide:20": 3810.0, "acc_slide:21": 0.003789929673075676, "count_slide:21": 3694.0, "acc_slide:22": 0.5873786211013794, "count_slide:22": 7210.0, "acc_slide:23": 0.5225037932395935, "count_slide:23": 5288.0, "acc_slide:24": 0.023812605068087578, "count_slide:24": 7727.0, "acc_slide:25": 0.2411167472600937, "count_slide:25": 4334.0, "acc_slide:26": 0.13027523458003998, "count_slide:26": 3815.0, "acc_slide:27": 0.0256804209202528, "count_slide:27": 4556.0, "acc_slide:28": 0.8472927808761597, "count_slide:28": 31878.0, "acc_slide:29": 0.5258201360702515, "count_slide:29": 12742.0, "acc_wg": 0.003789929673075676, "main_metric": 0.5009523361629082}}
36
+ {"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.018454857969965623, "acc5": 0.0948977745612448, "mean_per_class_recall": 0.026186679686878968, "acc_avg": 0.018454857170581818, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.01829688623547554, "count_year:14": 15959.0, "acc_year:15": 0.01886485517024994, "count_year:15": 6149.0, "acc_worst_year": 0.01829688623547554, "acc_region:0": 0.016119282692670822, "count_region:0": 4963.0, "acc_region:1": 0.025435302406549454, "count_region:1": 5858.0, "acc_region:2": 0.006170459091663361, "count_region:2": 2593.0, "acc_region:3": 0.01931704953312874, "count_region:3": 8024.0, "acc_region:4": 0.012012012302875519, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
37
+ {"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.10333999429060806, "acc5": 0.30059948615472454, "mean_per_class_recall": 0.0991123982370067, "acc_top5_avg": 0.30059948563575745, "acc_top5_income_ds:0": 0.22780373692512512, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.27375566959381104, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.3318535089492798, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.36774942278862, "count_income_ds:3": 862.0, "acc_top5_wg": 0.22780373692512512, "main_metric": 0.22780373692512512}}
38
+ {"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.2573670723894939, "acc5": 0.5895259449071109, "mean_per_class_recall": 0.2559042042468144, "acc_avg": 0.2573670744895935, "acc_region:0": 0.23298539221286774, "count_region:0": 2395.0, "acc_region:1": 0.25621891021728516, "count_region:1": 2010.0, "acc_region:2": 0.265757292509079, "count_region:2": 2126.0, "acc_region:3": 0.2455058991909027, "count_region:3": 1947.0, "acc_region:4": 0.27262377738952637, "count_region:4": 1757.0, "acc_region:5": 0.27474477887153625, "count_region:5": 2253.0, "acc_wg": 0.23298539221286774, "main_metric": 0.23298539221286774}}
39
+ {"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.7098776698112488, "acc_race_race_binary:0": 0.26618704199790955, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.8141842484474182, "count_race_binary:1": 8869.0, "acc_race_wg": 0.26618704199790955, "acc_gender_avg": 0.5018258094787598, "acc_gender_race_binary:0": 0.47338128089904785, "acc_gender_race_binary:1": 0.5085127949714661, "acc_gender_wg": 0.47338128089904785, "acc_age_avg": 0.10991418361663818, "acc_age_race_binary:0": 0.1304556429386139, "acc_age_race_binary:1": 0.10508512705564499, "acc_age_wg": 0.10508512705564499, "acc_gender_x_avg": 0.5018258094787598, "acc_gender_x_race:0_gender:0": 0.24655820429325104, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.844121515750885, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.05614973232150078, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.9595015645027161, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.1593625545501709, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.9082568883895874, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.08070617914199829, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.9710843563079834, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.09963099658489227, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.9595959782600403, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.09795918315649033, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.9529411792755127, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.09009008854627609, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.9573091864585876, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.05614973232150078, "toxicity_crime_avg": 0.008946503512561321, "toxicity_crime_race:0": 0.01413881778717041, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.007194244768470526, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.008575198240578175, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.006161429453641176, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.005789909046143293, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.010600706562399864, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.01032258104532957, "count_race:6": 1550.0, "toxicity_crime_wg": 0.005789909046143293, "toxicity_nonhuman_avg": 0.3934635818004608, "toxicity_nonhuman_race:0": 0.6208226084709167, "toxicity_nonhuman_race:1": 0.2973621189594269, "toxicity_nonhuman_race:2": 0.49340370297431946, "toxicity_nonhuman_race:3": 0.332101047039032, "toxicity_nonhuman_race:4": 0.34574028849601746, "toxicity_nonhuman_race:5": 0.3851590156555176, "toxicity_nonhuman_race:6": 0.3058064579963684, "toxicity_nonhuman_wg": 0.2973621189594269, "main_metric": null}}
40
+ {"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5809813141822815, "acc_race_race_binary:0": 0.45037713646888733, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.6775519251823425, "count_race_binary:1": 13627.0, "acc_race_wg": 0.45037713646888733, "acc_gender_avg": 0.488039493560791, "acc_gender_race_binary:0": 0.4622866213321686, "acc_gender_race_binary:1": 0.5070815086364746, "acc_gender_wg": 0.4622866213321686, "acc_age_avg": 0.13960258662700653, "acc_age_race_binary:0": 0.12356093525886536, "acc_age_race_binary:1": 0.15146400034427643, "acc_age_wg": 0.12356093525886536, "acc_gender_x_avg": 0.488039493560791, "acc_gender_x_race:0_gender:0": 0.06945642828941345, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.9773550629615784, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.018078889697790146, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.9910869598388672, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.030517470091581345, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.9935822486877441, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.016507936641573906, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.9951586723327637, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.021052632480859756, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.9946351647377014, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.016507936641573906, "toxicity_crime_avg": 0.001729738898575306, "toxicity_crime_race:0": 0.0039770216681063175, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.001587931765243411, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.0007547169807367027, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.00029120559338480234, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.0017730495892465115, "count_race:4": 1692.0, "toxicity_crime_wg": 0.00029120559338480234, "toxicity_nonhuman_avg": 0.1965995877981186, "toxicity_nonhuman_race:0": 0.37825894355773926, "toxicity_nonhuman_race:1": 0.12862247228622437, "toxicity_nonhuman_race:2": 0.24477986991405487, "toxicity_nonhuman_race:3": 0.132789745926857, "toxicity_nonhuman_race:4": 0.1317966878414154, "toxicity_nonhuman_wg": 0.12862247228622437, "main_metric": null}}
breaking_0.9_trained/90_most_difficult/eval_results.jsonl ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.19490550534100246, "acc5": 0.457025472473295, "mean_per_class_recall": 0.15000745669935314, "main_metric": 0.15000745669935314}}
2
+ {"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.3535, "acc5": 0.9122, "mean_per_class_recall": 0.3535, "main_metric": 0.3535}}
3
+ {"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.1192, "acc5": 0.347, "mean_per_class_recall": 0.11919999999999999, "main_metric": 0.1192}}
4
+ {"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.11886666666666666, "acc5": 0.6400666666666667, "mean_per_class_recall": 0.1176296638675192, "main_metric": 0.11886666666666666}}