diff --git "a/ood_results.json" "b/ood_results.json" new file mode 100644--- /dev/null +++ "b/ood_results.json" @@ -0,0 +1,3051 @@ +[{ + "model_name": "mpt-30b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6557377049180327, + "f1": 0.609451219512195, + "precision": 0.7961165048543689, + "recall": 0.6557377049180328, + "auc": 0.5048374092985757 + }, + { + "model_name": "mpt-30b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.708008599838753 + }, + { + "model_name": "mpt-30b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.6883902176834185 + }, + { + "model_name": "mpt-30b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.6490190808922334 + }, + { + "model_name": "mpt-30b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.707874227358237 + }, + { + "model_name": "openllama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.5226697353279632, + "precision": 0.6629273504273505, + "recall": 0.5819672131147541, + "auc": 0.6484815909701692 + }, + { + "model_name": "openllama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5737704918032787, + "f1": 0.5566116857701985, + "precision": 0.587281399046105, + "recall": 0.5737704918032787, + "auc": 0.570276807309863 + }, + { + "model_name": "openllama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5709047369602633, + "precision": 0.6940909090909091, + "recall": 0.6147540983606558, + "auc": 0.6772373018005912 + }, + { + "model_name": "openllama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.6052136522440205 + }, + { + "model_name": "openllama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6311475409836066, + "f1": 0.5788262370540851, + "precision": 0.7606837606837606, + "recall": 0.6311475409836065, + "auc": 0.5925826390755173 + }, + { + "model_name": "vicuna-33b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.6834184359043268 + }, + { + "model_name": "vicuna-33b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.680327868852459, + "f1": 0.6439422285414952, + "precision": 0.8049999999999999, + "recall": 0.680327868852459, + "auc": 0.6049449072829884 + }, + { + "model_name": "vicuna-33b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.8071754904595538 + }, + { + "model_name": "vicuna-33b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.7786885245901639 + }, + { + "model_name": "vicuna-33b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5655737704918032, + "f1": 0.4645134575569358, + "precision": 0.7675438596491229, + "recall": 0.5655737704918032, + "auc": 0.8501746842246708 + }, + { + "model_name": "pythia-410m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5151033386327504, + "precision": 0.7367678193366267, + "recall": 0.5901639344262295, + "auc": 0.7304488040849234 + }, + { + "model_name": "pythia-410m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6229508196721312, + "f1": 0.5665739882607352, + "precision": 0.7563025210084033, + "recall": 0.6229508196721312, + "auc": 0.7973662993818866 + }, + { + "model_name": "pythia-410m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.7041117979037892 + }, + { + "model_name": "pythia-410m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5245901639344263, + "f1": 0.38576388888888885, + "precision": 0.7563025210084033, + "recall": 0.5245901639344263, + "auc": 0.6750873421123353 + }, + { + "model_name": "pythia-410m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5327868852459017, + "f1": 0.4136099165190994, + "precision": 0.6752873563218391, + "recall": 0.5327868852459017, + "auc": 0.6057511421660844 + }, + { + "model_name": "alfred-40b-0723", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.9684224670787422 + }, + { + "model_name": "alfred-40b-0723", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.7125772641762966 + }, + { + "model_name": "alfred-40b-0723", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.8250470303681807 + }, + { + "model_name": "alfred-40b-0723", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.903520558989519 + }, + { + "model_name": "alfred-40b-0723", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.7132491265788767 + }, + { + "model_name": "vicuna-13b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.8238376780435367 + }, + { + "model_name": "vicuna-13b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5983606557377049, + "f1": 0.5211087078426659, + "precision": 0.7772727272727273, + "recall": 0.5983606557377049, + "auc": 0.8001881214727223 + }, + { + "model_name": "vicuna-13b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6311475409836066, + "f1": 0.573061668870052, + "precision": 0.7877358490566038, + "recall": 0.6311475409836066, + "auc": 0.5052405267401237 + }, + { + "model_name": "vicuna-13b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.8797366299381887 + }, + { + "model_name": "vicuna-13b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6967213114754098, + "f1": 0.666000739918609, + "precision": 0.8112244897959184, + "recall": 0.6967213114754098, + "auc": 0.8152378392905133 + }, + { + "model_name": "pythia-2.8b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5477293790546802, + "precision": 0.7221288515406162, + "recall": 0.6065573770491803, + "auc": 0.7187583982800322 + }, + { + "model_name": "pythia-2.8b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.4820155411359449, + "precision": 0.6848484848484848, + "recall": 0.5655737704918032, + "auc": 0.7571889277076055 + }, + { + "model_name": "pythia-2.8b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5722560975609756, + "precision": 0.7337761880429228, + "recall": 0.6229508196721312, + "auc": 0.7511421660843859 + }, + { + "model_name": "pythia-2.8b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6229508196721312, + "f1": 0.5722560975609756, + "precision": 0.7337761880429228, + "recall": 0.6229508196721312, + "auc": 0.6691749529696318 + }, + { + "model_name": "pythia-2.8b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5655737704918032, + "f1": 0.4820155411359449, + "precision": 0.6848484848484848, + "recall": 0.5655737704918032, + "auc": 0.5747110991668907 + }, + { + "model_name": "bloom-7.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.6815372211771029 + }, + { + "model_name": "bloom-7.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6147540983606558, + "f1": 0.5601074031453778, + "precision": 0.7280982905982906, + "recall": 0.6147540983606558, + "auc": 0.7308519215264714 + }, + { + "model_name": "bloom-7.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5737704918032787, + "f1": 0.4877260981912145, + "precision": 0.7248157248157248, + "recall": 0.5737704918032787, + "auc": 0.6871808653587745 + }, + { + "model_name": "bloom-7.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5901639344262295, + "f1": 0.5222431077694234, + "precision": 0.7090342679127726, + "recall": 0.5901639344262295, + "auc": 0.7199677506046762 + }, + { + "model_name": "bloom-7.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.680327868852459, + "f1": 0.6479467258601554, + "precision": 0.7852891156462585, + "recall": 0.680327868852459, + "auc": 0.7274926095135715 + }, + { + "model_name": "pythia-160m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5573770491803278, + "f1": 0.4591133004926109, + "precision": 0.7099311701081612, + "recall": 0.5573770491803278, + "auc": 0.779226014512228 + }, + { + "model_name": "pythia-160m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6557377049180327, + "f1": 0.6142728093947607, + "precision": 0.7732201791607731, + "recall": 0.6557377049180327, + "auc": 0.8505778016662188 + }, + { + "model_name": "pythia-160m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.7669981187852728 + }, + { + "model_name": "pythia-160m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5491803278688525, + "f1": 0.44430641821946165, + "precision": 0.700657894736842, + "recall": 0.5491803278688524, + "auc": 0.6178446654125235 + }, + { + "model_name": "pythia-160m-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5327868852459017, + "f1": 0.4136099165190994, + "precision": 0.6752873563218391, + "recall": 0.5327868852459017, + "auc": 0.7280300994356356 + }, + { + "model_name": "bloom-1.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6639344262295082, + "f1": 0.6256828556461873, + "precision": 0.7772727272727273, + "recall": 0.6639344262295082, + "auc": 0.7944101048105348 + }, + { + "model_name": "bloom-1.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5491803278688525, + "f1": 0.43418500716755204, + "precision": 0.7629310344827587, + "recall": 0.5491803278688525, + "auc": 0.7434829346949744 + }, + { + "model_name": "bloom-1.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5540866319309432, + "precision": 0.7517688679245282, + "recall": 0.6147540983606558, + "auc": 0.7004837409298575 + }, + { + "model_name": "bloom-1.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.7375705455522709 + }, + { + "model_name": "bloom-1.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.6273851115291589 + }, + { + "model_name": "openllama-2-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5151033386327504, + "precision": 0.7367678193366267, + "recall": 0.5901639344262295, + "auc": 0.7120397742542327 + }, + { + "model_name": "openllama-2-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5491803278688525, + "f1": 0.45371651876577385, + "precision": 0.6633928571428571, + "recall": 0.5491803278688524, + "auc": 0.6130072561139479 + }, + { + "model_name": "openllama-2-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5737704918032787, + "f1": 0.5031328320802004, + "precision": 0.6710280373831776, + "recall": 0.5737704918032787, + "auc": 0.6729373824240795 + }, + { + "model_name": "openllama-2-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5655737704918032, + "f1": 0.49716152111361694, + "precision": 0.6438679245283019, + "recall": 0.5655737704918034, + "auc": 0.5732330018812148 + }, + { + "model_name": "openllama-2-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.5709047369602633, + "precision": 0.6940909090909091, + "recall": 0.6147540983606558, + "auc": 0.6140822359580758 + }, + { + "model_name": "opt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5491803278688525, + "f1": 0.43418500716755204, + "precision": 0.7629310344827587, + "recall": 0.5491803278688525, + "auc": 0.5853265251276539 + }, + { + "model_name": "opt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5327868852459017, + "f1": 0.4136099165190994, + "precision": 0.6752873563218391, + "recall": 0.5327868852459017, + "auc": 0.4273044880408492 + }, + { + "model_name": "opt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5409836065573771, + "f1": 0.429144385026738, + "precision": 0.6894409937888198, + "recall": 0.540983606557377, + "auc": 0.584251545283526 + }, + { + "model_name": "opt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5245901639344263, + "f1": 0.40875668449197855, + "precision": 0.613664596273292, + "recall": 0.5245901639344263, + "auc": 0.4486697124428917 + }, + { + "model_name": "opt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5491803278688525, + "f1": 0.46246895778258434, + "precision": 0.6386363636363637, + "recall": 0.5491803278688524, + "auc": 0.5111529158828272 + }, + { + "model_name": "opt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5573770491803278, + "f1": 0.4495320855614974, + "precision": 0.7652173913043478, + "recall": 0.5573770491803278, + "auc": 0.6719967750604676 + }, + { + "model_name": "opt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5245901639344263, + "f1": 0.3976847122914538, + "precision": 0.6564102564102564, + "recall": 0.5245901639344263, + "auc": 0.48938457403923674 + }, + { + "model_name": "opt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5573770491803278, + "f1": 0.46802325581395354, + "precision": 0.6748566748566749, + "recall": 0.5573770491803278, + "auc": 0.4778285407148616 + }, + { + "model_name": "opt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5327868852459017, + "f1": 0.42409937888198757, + "precision": 0.6337719298245614, + "recall": 0.5327868852459017, + "auc": 0.52848696586939 + }, + { + "model_name": "opt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5409836065573771, + "f1": 0.429144385026738, + "precision": 0.6894409937888198, + "recall": 0.540983606557377, + "auc": 0.583042192958882 + }, + { + "model_name": "llama-2-70b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.680327868852459, + "f1": 0.6479467258601554, + "precision": 0.7852891156462585, + "recall": 0.680327868852459, + "auc": 0.7471109916689063 + }, + { + "model_name": "llama-2-70b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6721311475409836, + "f1": 0.6479076479076479, + "precision": 0.7374860956618465, + "recall": 0.6721311475409837, + "auc": 0.6390755173340499 + }, + { + "model_name": "llama-2-70b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.639344262295082, + "f1": 0.6049455401825139, + "precision": 0.7138144329896907, + "recall": 0.639344262295082, + "auc": 0.6890620800859983 + }, + { + "model_name": "llama-2-70b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6557377049180327, + "f1": 0.6418786692759295, + "precision": 0.6842607313195549, + "recall": 0.6557377049180327, + "auc": 0.625369524321419 + }, + { + "model_name": "llama-2-70b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6311475409836066, + "f1": 0.5788262370540851, + "precision": 0.7606837606837606, + "recall": 0.6311475409836065, + "auc": 0.7374361730717549 + }, + { + "model_name": "llama-2-70b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6721311475409836, + "f1": 0.6408595819841036, + "precision": 0.7641237113402062, + "recall": 0.6721311475409836, + "auc": 0.6365224402042462 + }, + { + "model_name": "llama-2-70b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.680327868852459, + "f1": 0.6516582473094663, + "precision": 0.7688301282051282, + "recall": 0.680327868852459, + "auc": 0.5325181402848697 + }, + { + "model_name": "llama-2-70b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6721311475409836, + "f1": 0.6408595819841036, + "precision": 0.7641237113402062, + "recall": 0.6721311475409836, + "auc": 0.6847621607094866 + }, + { + "model_name": "llama-2-70b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6967213114754098, + "f1": 0.6855451062347615, + "precision": 0.7293233082706767, + "recall": 0.6967213114754098, + "auc": 0.6886589626444504 + }, + { + "model_name": "llama-2-70b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6967213114754098, + "f1": 0.6727799927509968, + "precision": 0.7781155015197568, + "recall": 0.6967213114754098, + "auc": 0.7678043536683689 + }, + { + "model_name": "llama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.7651169040580489 + }, + { + "model_name": "llama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6065573770491803, + "f1": 0.5690314983809244, + "precision": 0.6635051546391753, + "recall": 0.6065573770491803, + "auc": 0.5888202096210696 + }, + { + "model_name": "llama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5737704918032787, + "f1": 0.4791461412151067, + "precision": 0.7699115044247787, + "recall": 0.5737704918032787, + "auc": 0.7668637463047567 + }, + { + "model_name": "llama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6311475409836066, + "f1": 0.573061668870052, + "precision": 0.7877358490566038, + "recall": 0.6311475409836066, + "auc": 0.6726686374630476 + }, + { + "model_name": "llama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.7143241064230045 + }, + { + "model_name": "gpt2", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.5486428379467885 + }, + { + "model_name": "gpt2", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.7868852459016393 + }, + { + "model_name": "gpt2", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5655737704918032, + "f1": 0.4735813726288366, + "precision": 0.7178571428571429, + "recall": 0.5655737704918032, + "auc": 0.694974469228702 + }, + { + "model_name": "gpt2", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5491803278688525, + "f1": 0.44430641821946165, + "precision": 0.700657894736842, + "recall": 0.5491803278688524, + "auc": 0.5154528352593388 + }, + { + "model_name": "gpt2", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.6834184359043267 + }, + { + "model_name": "gpt2-large", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5655737704918032, + "f1": 0.4645134575569358, + "precision": 0.7675438596491229, + "recall": 0.5655737704918032, + "auc": 0.5912389142703574 + }, + { + "model_name": "gpt2-large", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.7073367374361731 + }, + { + "model_name": "gpt2-large", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.6655468959957002 + }, + { + "model_name": "gpt2-large", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5409836065573771, + "f1": 0.4390804597701149, + "precision": 0.6499508357915438, + "recall": 0.540983606557377, + "auc": 0.6768341843590432 + }, + { + "model_name": "gpt2-large", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6065573770491803, + "f1": 0.5536585365853659, + "precision": 0.7026060296371998, + "recall": 0.6065573770491803, + "auc": 0.7256113947863477 + }, + { + "model_name": "pythia-1.4b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5222431077694234, + "precision": 0.7090342679127726, + "recall": 0.5901639344262295, + "auc": 0.664606288632088 + }, + { + "model_name": "pythia-1.4b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.48986193293885605, + "precision": 0.6613756613756614, + "recall": 0.5655737704918032, + "auc": 0.6636656812684761 + }, + { + "model_name": "pythia-1.4b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.5226697353279632, + "precision": 0.6629273504273505, + "recall": 0.5819672131147541, + "auc": 0.5730986294006987 + }, + { + "model_name": "pythia-1.4b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5737704918032787, + "f1": 0.4957074721780603, + "precision": 0.6937191249117854, + "recall": 0.5737704918032787, + "auc": 0.7194302606826122 + }, + { + "model_name": "pythia-1.4b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.5950013437248052 + }, + { + "model_name": "gpt2-medium", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.7125772641762966 + }, + { + "model_name": "gpt2-medium", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.4820155411359449, + "precision": 0.6848484848484848, + "recall": 0.5655737704918032, + "auc": 0.6558720773985487 + }, + { + "model_name": "gpt2-medium", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.5226697353279632, + "precision": 0.6629273504273505, + "recall": 0.5819672131147541, + "auc": 0.5919107766729373 + }, + { + "model_name": "gpt2-medium", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5573770491803278, + "f1": 0.46802325581395354, + "precision": 0.6748566748566749, + "recall": 0.5573770491803278, + "auc": 0.6076323568933082 + }, + { + "model_name": "gpt2-medium", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5573770491803278, + "f1": 0.4495320855614974, + "precision": 0.7652173913043478, + "recall": 0.5573770491803278, + "auc": 0.6408223595807578 + }, + { + "model_name": "pythia-2.8b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5477293790546802, + "precision": 0.7221288515406162, + "recall": 0.6065573770491803, + "auc": 0.6428379467884977 + }, + { + "model_name": "pythia-2.8b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5737704918032787, + "f1": 0.4957074721780603, + "precision": 0.6937191249117854, + "recall": 0.5737704918032787, + "auc": 0.6195915076592313 + }, + { + "model_name": "pythia-2.8b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5601074031453778, + "precision": 0.7280982905982906, + "recall": 0.6147540983606558, + "auc": 0.6062886320881483 + }, + { + "model_name": "pythia-2.8b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.7747917226552002 + }, + { + "model_name": "pythia-2.8b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.5540866319309432, + "precision": 0.7517688679245282, + "recall": 0.6147540983606558, + "auc": 0.48589088954582105 + }, + { + "model_name": "opt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5409836065573771, + "f1": 0.41845420497105895, + "precision": 0.7606837606837606, + "recall": 0.5409836065573771, + "auc": 0.6909432948132223 + }, + { + "model_name": "opt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5409836065573771, + "f1": 0.429144385026738, + "precision": 0.6894409937888198, + "recall": 0.540983606557377, + "auc": 0.5338618650900295 + }, + { + "model_name": "opt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5245901639344263, + "f1": 0.38576388888888885, + "precision": 0.7563025210084033, + "recall": 0.5245901639344263, + "auc": 0.7308519215264715 + }, + { + "model_name": "opt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5245901639344263, + "f1": 0.3976847122914538, + "precision": 0.6564102564102564, + "recall": 0.5245901639344263, + "auc": 0.6367911851652781 + }, + { + "model_name": "opt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5491803278688525, + "f1": 0.43418500716755204, + "precision": 0.7629310344827587, + "recall": 0.5491803278688525, + "auc": 0.7231926901370599 + }, + { + "model_name": "falcon-40b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5737704918032787, + "f1": 0.5031328320802004, + "precision": 0.6710280373831776, + "recall": 0.5737704918032787, + "auc": 0.7186240257995162 + }, + { + "model_name": "falcon-40b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6229508196721312, + "f1": 0.5665739882607352, + "precision": 0.7563025210084033, + "recall": 0.6229508196721312, + "auc": 0.7162053211502284 + }, + { + "model_name": "falcon-40b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5901639344262295, + "f1": 0.5288847698486252, + "precision": 0.6879551820728291, + "recall": 0.5901639344262295, + "auc": 0.6510346680999731 + }, + { + "model_name": "falcon-40b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6229508196721312, + "f1": 0.5950937950937951, + "precision": 0.6696329254727476, + "recall": 0.6229508196721312, + "auc": 0.6557377049180328 + }, + { + "model_name": "falcon-40b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6065573770491803, + "f1": 0.5642857142857143, + "precision": 0.6741326306543698, + "recall": 0.6065573770491803, + "auc": 0.6566783122816446 + }, + { + "model_name": "pythia-1b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.6549314700349368 + }, + { + "model_name": "pythia-1b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5737704918032787, + "f1": 0.4877260981912145, + "precision": 0.7248157248157248, + "recall": 0.5737704918032787, + "auc": 0.7660575114216608 + }, + { + "model_name": "pythia-1b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.5226697353279632, + "precision": 0.6629273504273505, + "recall": 0.5819672131147541, + "auc": 0.5987637731792528 + }, + { + "model_name": "pythia-1b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.6272507390486428 + }, + { + "model_name": "pythia-1b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5655737704918032, + "f1": 0.4735813726288366, + "precision": 0.7178571428571429, + "recall": 0.5655737704918032, + "auc": 0.6736092448266596 + }, + { + "model_name": "vicuna-7b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6311475409836066, + "f1": 0.573061668870052, + "precision": 0.7877358490566038, + "recall": 0.6311475409836066, + "auc": 0.666353130878796 + }, + { + "model_name": "vicuna-7b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.5658425154528353 + }, + { + "model_name": "vicuna-7b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.7696855683955925 + }, + { + "model_name": "vicuna-7b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5211087078426659, + "precision": 0.7772727272727273, + "recall": 0.5983606557377049, + "auc": 0.7334049986562752 + }, + { + "model_name": "vicuna-7b-v1.3", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.7417360924482665 + }, + { + "model_name": "openllama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.5343859911696476, + "precision": 0.6386363636363637, + "recall": 0.5819672131147541, + "auc": 0.666756248320344 + }, + { + "model_name": "openllama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.4820155411359449, + "precision": 0.6848484848484848, + "recall": 0.5655737704918032, + "auc": 0.6182477828540714 + }, + { + "model_name": "openllama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6311475409836066, + "f1": 0.5841854124062713, + "precision": 0.7392156862745098, + "recall": 0.6311475409836065, + "auc": 0.7313894114485352 + }, + { + "model_name": "openllama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5901639344262295, + "f1": 0.5288847698486252, + "precision": 0.6879551820728291, + "recall": 0.5901639344262295, + "auc": 0.6490190808922333 + }, + { + "model_name": "openllama-2-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5737704918032787, + "f1": 0.4791461412151067, + "precision": 0.7699115044247787, + "recall": 0.5737704918032787, + "auc": 0.7278957269551196 + }, + { + "model_name": "opt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.5204246170384306 + }, + { + "model_name": "opt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5737704918032787, + "f1": 0.5031328320802004, + "precision": 0.6710280373831776, + "recall": 0.5737704918032787, + "auc": 0.6091104541789841 + }, + { + "model_name": "opt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.5161365580527257, + "precision": 0.6798349056603774, + "recall": 0.5819672131147541, + "auc": 0.7471109916689063 + }, + { + "model_name": "opt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5409836065573771, + "f1": 0.4390804597701149, + "precision": 0.6499508357915438, + "recall": 0.540983606557377, + "auc": 0.5815640956732061 + }, + { + "model_name": "opt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.5174684224670787 + }, + { + "model_name": "opt-66b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5737704918032787, + "f1": 0.5164634146341464, + "precision": 0.6402657128257536, + "recall": 0.5737704918032787, + "auc": 0.5911045417898414 + }, + { + "model_name": "opt-66b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6147540983606558, + "f1": 0.5757306696263411, + "precision": 0.6815476190476191, + "recall": 0.6147540983606558, + "auc": 0.5382961569470572 + }, + { + "model_name": "opt-66b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.5161365580527257, + "precision": 0.6798349056603774, + "recall": 0.5819672131147541, + "auc": 0.6823434560601989 + }, + { + "model_name": "opt-66b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5573770491803278, + "f1": 0.46802325581395354, + "precision": 0.6748566748566749, + "recall": 0.5573770491803278, + "auc": 0.5163934426229508 + }, + { + "model_name": "opt-66b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5737704918032787, + "f1": 0.4877260981912145, + "precision": 0.7248157248157248, + "recall": 0.5737704918032787, + "auc": 0.6748185971513034 + }, + { + "model_name": "cerebras-gpt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6147540983606558, + "f1": 0.5540866319309432, + "precision": 0.7517688679245282, + "recall": 0.6147540983606558, + "auc": 0.7946788497715669 + }, + { + "model_name": "cerebras-gpt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6229508196721312, + "f1": 0.5665739882607352, + "precision": 0.7563025210084033, + "recall": 0.6229508196721312, + "auc": 0.806503628056974 + }, + { + "model_name": "cerebras-gpt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5540866319309432, + "precision": 0.7517688679245282, + "recall": 0.6147540983606558, + "auc": 0.7625638269282451 + }, + { + "model_name": "cerebras-gpt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6311475409836066, + "f1": 0.5788262370540851, + "precision": 0.7606837606837606, + "recall": 0.6311475409836065, + "auc": 0.707605482397205 + }, + { + "model_name": "cerebras-gpt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5901639344262295, + "f1": 0.5151033386327504, + "precision": 0.7367678193366267, + "recall": 0.5901639344262295, + "auc": 0.6560064498790648 + }, + { + "model_name": "bloom-560m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.846277882289707 + }, + { + "model_name": "bloom-560m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6557377049180327, + "f1": 0.6142728093947607, + "precision": 0.7732201791607731, + "recall": 0.6557377049180327, + "auc": 0.7846009137328676 + }, + { + "model_name": "bloom-560m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6639344262295082, + "f1": 0.6211467090812693, + "precision": 0.7990196078431373, + "recall": 0.6639344262295082, + "auc": 0.764579414135985 + }, + { + "model_name": "bloom-560m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.777613544746036 + }, + { + "model_name": "bloom-560m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.8129535071217413 + }, + { + "model_name": "cerebras-gpt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5737704918032787, + "f1": 0.4957074721780603, + "precision": 0.6937191249117854, + "recall": 0.5737704918032787, + "auc": 0.8089223327062617 + }, + { + "model_name": "cerebras-gpt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.7440204246170384 + }, + { + "model_name": "cerebras-gpt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5737704918032787, + "f1": 0.4957074721780603, + "precision": 0.6937191249117854, + "recall": 0.5737704918032787, + "auc": 0.6440472991131417 + }, + { + "model_name": "cerebras-gpt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5409836065573771, + "f1": 0.4390804597701149, + "precision": 0.6499508357915438, + "recall": 0.540983606557377, + "auc": 0.7139209889814565 + }, + { + "model_name": "cerebras-gpt-6.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.5662456328943832 + }, + { + "model_name": "pythia-1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.6879871002418705 + }, + { + "model_name": "pythia-1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.7390486428379467 + }, + { + "model_name": "pythia-1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5665739882607352, + "precision": 0.7563025210084033, + "recall": 0.6229508196721312, + "auc": 0.6499596882558452 + }, + { + "model_name": "pythia-1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.6961838215533459 + }, + { + "model_name": "pythia-1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5901639344262295, + "f1": 0.5222431077694234, + "precision": 0.7090342679127726, + "recall": 0.5901639344262295, + "auc": 0.5990325181402849 + }, + { + "model_name": "falcon-7b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.8481590970169308 + }, + { + "model_name": "falcon-7b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6311475409836066, + "f1": 0.573061668870052, + "precision": 0.7877358490566038, + "recall": 0.6311475409836066, + "auc": 0.7617575920451491 + }, + { + "model_name": "falcon-7b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.6005106154259607 + }, + { + "model_name": "falcon-7b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6475409836065574, + "f1": 0.5975450709627925, + "precision": 0.7932692307692308, + "recall": 0.6475409836065573, + "auc": 0.8382155334587477 + }, + { + "model_name": "falcon-7b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.7886320881483473 + }, + { + "model_name": "gpt2-xl", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.6983337812416016 + }, + { + "model_name": "gpt2-xl", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5491803278688525, + "f1": 0.44430641821946165, + "precision": 0.700657894736842, + "recall": 0.5491803278688524, + "auc": 0.583310937919914 + }, + { + "model_name": "gpt2-xl", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5655737704918032, + "f1": 0.49716152111361694, + "precision": 0.6438679245283019, + "recall": 0.5655737704918034, + "auc": 0.5331900026874496 + }, + { + "model_name": "gpt2-xl", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5573770491803278, + "f1": 0.4911955514365153, + "precision": 0.6196078431372549, + "recall": 0.5573770491803278, + "auc": 0.5395055092717012 + }, + { + "model_name": "gpt2-xl", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5491803278688525, + "f1": 0.45371651876577385, + "precision": 0.6633928571428571, + "recall": 0.5491803278688524, + "auc": 0.7543671056167697 + }, + { + "model_name": "llama-2-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.8804084923407686 + }, + { + "model_name": "llama-2-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.5455522708949208 + }, + { + "model_name": "llama-2-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6311475409836066, + "f1": 0.5788262370540851, + "precision": 0.7606837606837606, + "recall": 0.6311475409836065, + "auc": 0.708680462241333 + }, + { + "model_name": "llama-2-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5351115949918346, + "precision": 0.7158018867924528, + "recall": 0.5983606557377049, + "auc": 0.569739317387799 + }, + { + "model_name": "llama-2-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.5757306696263411, + "precision": 0.6815476190476191, + "recall": 0.6147540983606558, + "auc": 0.7049180327868851 + }, + { + "model_name": "pythia-12b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5151033386327504, + "precision": 0.7367678193366267, + "recall": 0.5901639344262295, + "auc": 0.6510346680999731 + }, + { + "model_name": "pythia-12b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.4735813726288366, + "precision": 0.7178571428571429, + "recall": 0.5655737704918032, + "auc": 0.6038699274388606 + }, + { + "model_name": "pythia-12b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.6659500134372481 + }, + { + "model_name": "pythia-12b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.624025799516259 + }, + { + "model_name": "pythia-12b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.6165009406073636 + }, + { + "model_name": "bloom-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.7049180327868853, + "f1": 0.6831168831168831, + "precision": 0.7827215424545791, + "recall": 0.7049180327868853, + "auc": 0.6734748723461434 + }, + { + "model_name": "bloom-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6639344262295082, + "f1": 0.6256828556461873, + "precision": 0.7772727272727273, + "recall": 0.6639344262295082, + "auc": 0.7714324106423005 + }, + { + "model_name": "bloom-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.7049180327868853, + "f1": 0.6859267734553776, + "precision": 0.7702942219071252, + "recall": 0.7049180327868853, + "auc": 0.7594732598763774 + }, + { + "model_name": "bloom-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5819672131147541, + "f1": 0.5161365580527257, + "precision": 0.6798349056603774, + "recall": 0.5819672131147541, + "auc": 0.625235151840903 + }, + { + "model_name": "bloom-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6721311475409836, + "f1": 0.6408595819841036, + "precision": 0.7641237113402062, + "recall": 0.6721311475409836, + "auc": 0.7198333781241602 + }, + { + "model_name": "llama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5351115949918346, + "precision": 0.7158018867924528, + "recall": 0.5983606557377049, + "auc": 0.8235689330825046 + }, + { + "model_name": "llama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5983606557377049, + "f1": 0.5526453640649555, + "precision": 0.6663636363636364, + "recall": 0.5983606557377049, + "auc": 0.5597957538296157 + }, + { + "model_name": "llama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5642857142857143, + "precision": 0.6741326306543698, + "recall": 0.6065573770491803, + "auc": 0.6471378661650095 + }, + { + "model_name": "llama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5655737704918032, + "f1": 0.48986193293885605, + "precision": 0.6613756613756614, + "recall": 0.5655737704918032, + "auc": 0.6366568126847622 + }, + { + "model_name": "llama-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.5287434673937742, + "precision": 0.6495098039215687, + "recall": 0.5819672131147541, + "auc": 0.7244020424617037 + }, + { + "model_name": "cerebras-gpt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.7867508734211233 + }, + { + "model_name": "cerebras-gpt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6229508196721312, + "f1": 0.5665739882607352, + "precision": 0.7563025210084033, + "recall": 0.6229508196721312, + "auc": 0.6627250739048642 + }, + { + "model_name": "cerebras-gpt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5722560975609756, + "precision": 0.7337761880429228, + "recall": 0.6229508196721312, + "auc": 0.6474066111260414 + }, + { + "model_name": "cerebras-gpt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6311475409836066, + "f1": 0.5891641098555713, + "precision": 0.7218181818181818, + "recall": 0.6311475409836065, + "auc": 0.7819134641225477 + }, + { + "model_name": "cerebras-gpt-13b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5983606557377049, + "f1": 0.5211087078426659, + "precision": 0.7772727272727273, + "recall": 0.5983606557377049, + "auc": 0.6861058855146466 + }, + { + "model_name": "pythia-1.4b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.6159634506852997 + }, + { + "model_name": "pythia-1.4b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5737704918032787, + "f1": 0.4791461412151067, + "precision": 0.7699115044247787, + "recall": 0.5737704918032787, + "auc": 0.6412254770223058 + }, + { + "model_name": "pythia-1.4b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.6788497715667831 + }, + { + "model_name": "pythia-1.4b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6147540983606558, + "f1": 0.5540866319309432, + "precision": 0.7517688679245282, + "recall": 0.6147540983606558, + "auc": 0.6588282719699006 + }, + { + "model_name": "pythia-1.4b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6229508196721312, + "f1": 0.5665739882607352, + "precision": 0.7563025210084033, + "recall": 0.6229508196721312, + "auc": 0.6660843859177641 + }, + { + "model_name": "bloom-1.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6311475409836066, + "f1": 0.5788262370540851, + "precision": 0.7606837606837606, + "recall": 0.6311475409836065, + "auc": 0.8325718892770761 + }, + { + "model_name": "bloom-1.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6311475409836066, + "f1": 0.5841854124062713, + "precision": 0.7392156862745098, + "recall": 0.6311475409836065, + "auc": 0.6961838215533459 + }, + { + "model_name": "bloom-1.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.7702230583176565 + }, + { + "model_name": "bloom-1.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6065573770491803, + "f1": 0.5477293790546802, + "precision": 0.7221288515406162, + "recall": 0.6065573770491803, + "auc": 0.7156678312281645 + }, + { + "model_name": "bloom-1.1b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.639344262295082, + "f1": 0.5854185974667903, + "precision": 0.7904761904761906, + "recall": 0.639344262295082, + "auc": 0.8121472722386456 + }, + { + "model_name": "llama-2-13b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.778016662187584 + }, + { + "model_name": "llama-2-13b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6557377049180327, + "f1": 0.6187499999999999, + "precision": 0.7545015371102327, + "recall": 0.6557377049180327, + "auc": 0.7487234614350982 + }, + { + "model_name": "llama-2-13b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5655737704918032, + "f1": 0.4645134575569358, + "precision": 0.7675438596491229, + "recall": 0.5655737704918032, + "auc": 0.7725073904864284 + }, + { + "model_name": "llama-2-13b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.5154528352593388 + }, + { + "model_name": "llama-2-13b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6311475409836066, + "f1": 0.573061668870052, + "precision": 0.7877358490566038, + "recall": 0.6311475409836066, + "auc": 0.6652781510346681 + }, + { + "model_name": "llama-65b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.8497715667831229 + }, + { + "model_name": "llama-65b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6557377049180327, + "f1": 0.6335812356979404, + "precision": 0.705423608649415, + "recall": 0.6557377049180328, + "auc": 0.5665143778554151 + }, + { + "model_name": "llama-65b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5757306696263411, + "precision": 0.6815476190476191, + "recall": 0.6147540983606558, + "auc": 0.7852727761354474 + }, + { + "model_name": "llama-65b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.639344262295082, + "f1": 0.5959048479373683, + "precision": 0.7444601603017444, + "recall": 0.639344262295082, + "auc": 0.6769685568395593 + }, + { + "model_name": "llama-65b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.584342152953969, + "precision": 0.6622340425531915, + "recall": 0.6147540983606558, + "auc": 0.6132760010749798 + }, + { + "model_name": "pythia-12b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5477293790546802, + "precision": 0.7221288515406162, + "recall": 0.6065573770491803, + "auc": 0.680059123891427 + }, + { + "model_name": "pythia-12b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.4820155411359449, + "precision": 0.6848484848484848, + "recall": 0.5655737704918032, + "auc": 0.5999731255038967 + }, + { + "model_name": "pythia-12b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5536585365853659, + "precision": 0.7026060296371998, + "recall": 0.6065573770491803, + "auc": 0.6796560064498791 + }, + { + "model_name": "pythia-12b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5737704918032787, + "f1": 0.5164634146341464, + "precision": 0.6402657128257536, + "recall": 0.5737704918032787, + "auc": 0.7188927707605481 + }, + { + "model_name": "pythia-12b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5573770491803278, + "f1": 0.4911955514365153, + "precision": 0.6196078431372549, + "recall": 0.5573770491803278, + "auc": 0.584923407686106 + }, + { + "model_name": "openllama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5288847698486252, + "precision": 0.6879551820728291, + "recall": 0.5901639344262295, + "auc": 0.6036011824778286 + }, + { + "model_name": "openllama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.48986193293885605, + "precision": 0.6613756613756614, + "recall": 0.5655737704918032, + "auc": 0.49758129535071216 + }, + { + "model_name": "openllama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5409836065573771, + "f1": 0.4390804597701149, + "precision": 0.6499508357915438, + "recall": 0.540983606557377, + "auc": 0.7339424885783391 + }, + { + "model_name": "openllama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5327868852459017, + "f1": 0.433851664902711, + "precision": 0.6089285714285715, + "recall": 0.5327868852459016, + "auc": 0.6339693630744423 + }, + { + "model_name": "openllama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.6326256382692824 + }, + { + "model_name": "opt-125m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5655737704918032, + "f1": 0.4735813726288366, + "precision": 0.7178571428571429, + "recall": 0.5655737704918032, + "auc": 0.723327062617576 + }, + { + "model_name": "opt-125m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5409836065573771, + "f1": 0.429144385026738, + "precision": 0.6894409937888198, + "recall": 0.540983606557377, + "auc": 0.6902714324106423 + }, + { + "model_name": "opt-125m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5409836065573771, + "f1": 0.429144385026738, + "precision": 0.6894409937888198, + "recall": 0.540983606557377, + "auc": 0.6660843859177641 + }, + { + "model_name": "opt-125m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5655737704918032, + "f1": 0.4645134575569358, + "precision": 0.7675438596491229, + "recall": 0.5655737704918032, + "auc": 0.7484547164740661 + }, + { + "model_name": "opt-125m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5655737704918032, + "f1": 0.4735813726288366, + "precision": 0.7178571428571429, + "recall": 0.5655737704918032, + "auc": 0.6703843052942758 + }, + { + "model_name": "openllama-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5737704918032787, + "f1": 0.4791461412151067, + "precision": 0.7699115044247787, + "recall": 0.5737704918032787, + "auc": 0.7656543939801129 + }, + { + "model_name": "openllama-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5491803278688525, + "f1": 0.46246895778258434, + "precision": 0.6386363636363637, + "recall": 0.5491803278688524, + "auc": 0.5318462778822897 + }, + { + "model_name": "openllama-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5491803278688525, + "f1": 0.43418500716755204, + "precision": 0.7629310344827587, + "recall": 0.5491803278688525, + "auc": 0.832571889277076 + }, + { + "model_name": "openllama-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5491803278688525, + "f1": 0.45371651876577385, + "precision": 0.6633928571428571, + "recall": 0.5491803278688524, + "auc": 0.5812953507121741 + }, + { + "model_name": "openllama-3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5573770491803278, + "f1": 0.46802325581395354, + "precision": 0.6748566748566749, + "recall": 0.5573770491803278, + "auc": 0.5165278151034669 + }, + { + "model_name": "pythia-410m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.5015621244893054, + "precision": 0.731060606060606, + "recall": 0.5819672131147541, + "auc": 0.6229508196721312 + }, + { + "model_name": "pythia-410m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5409836065573771, + "f1": 0.429144385026738, + "precision": 0.6894409937888198, + "recall": 0.540983606557377, + "auc": 0.6890620800859983 + }, + { + "model_name": "pythia-410m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.6863746304756786 + }, + { + "model_name": "pythia-410m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5737704918032787, + "f1": 0.4877260981912145, + "precision": 0.7248157248157248, + "recall": 0.5737704918032787, + "auc": 0.6327600107497984 + }, + { + "model_name": "pythia-410m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.5601074031453778, + "precision": 0.7280982905982906, + "recall": 0.6147540983606558, + "auc": 0.7167428110722923 + }, + { + "model_name": "llama-2-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.680327868852459, + "f1": 0.6439422285414952, + "precision": 0.8049999999999999, + "recall": 0.680327868852459, + "auc": 0.7976350443429187 + }, + { + "model_name": "llama-2-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6475409836065574, + "f1": 0.5975450709627925, + "precision": 0.7932692307692308, + "recall": 0.6475409836065573, + "auc": 0.7179521633969362 + }, + { + "model_name": "llama-2-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.639344262295082, + "f1": 0.5854185974667903, + "precision": 0.7904761904761906, + "recall": 0.639344262295082, + "auc": 0.7269551195915076 + }, + { + "model_name": "llama-2-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5211087078426659, + "precision": 0.7772727272727273, + "recall": 0.5983606557377049, + "auc": 0.8016662187583983 + }, + { + "model_name": "llama-2-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6311475409836066, + "f1": 0.573061668870052, + "precision": 0.7877358490566038, + "recall": 0.6311475409836066, + "auc": 0.6991400161246976 + }, + { + "model_name": "mpt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5655737704918032, + "f1": 0.48986193293885605, + "precision": 0.6613756613756614, + "recall": 0.5655737704918032, + "auc": 0.7256113947863478 + }, + { + "model_name": "mpt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5737704918032787, + "f1": 0.4957074721780603, + "precision": 0.6937191249117854, + "recall": 0.5737704918032787, + "auc": 0.6851652781510347 + }, + { + "model_name": "mpt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5591689250225835, + "precision": 0.6869401225836869, + "recall": 0.6065573770491803, + "auc": 0.6088417092179522 + }, + { + "model_name": "mpt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5409836065573771, + "f1": 0.4390804597701149, + "precision": 0.6499508357915438, + "recall": 0.540983606557377, + "auc": 0.861058855146466 + }, + { + "model_name": "mpt-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5737704918032787, + "f1": 0.5100401606425702, + "precision": 0.6537815126050419, + "recall": 0.5737704918032787, + "auc": 0.6517065305025531 + }, + { + "model_name": "cerebras-gpt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.7328675087342112 + }, + { + "model_name": "cerebras-gpt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.708546089760817 + }, + { + "model_name": "cerebras-gpt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5344992050874404, + "precision": 0.7798165137614679, + "recall": 0.6065573770491803, + "auc": 0.6690405804891159 + }, + { + "model_name": "cerebras-gpt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.6019887127116367 + }, + { + "model_name": "cerebras-gpt-2.7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6311475409836066, + "f1": 0.5841854124062713, + "precision": 0.7392156862745098, + "recall": 0.6311475409836065, + "auc": 0.6170384305294276 + }, + { + "model_name": "pythia-6.9b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5074289405684755, + "precision": 0.7747747747747749, + "recall": 0.5901639344262295, + "auc": 0.6156947057242677 + }, + { + "model_name": "pythia-6.9b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5573770491803278, + "f1": 0.46802325581395354, + "precision": 0.6748566748566749, + "recall": 0.5573770491803278, + "auc": 0.568798710024187 + }, + { + "model_name": "pythia-6.9b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5737704918032787, + "f1": 0.4957074721780603, + "precision": 0.6937191249117854, + "recall": 0.5737704918032787, + "auc": 0.7940069873689868 + }, + { + "model_name": "pythia-6.9b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5526453640649555, + "precision": 0.6663636363636364, + "recall": 0.5983606557377049, + "auc": 0.5176027949475948 + }, + { + "model_name": "pythia-6.9b-deduped", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5491803278688525, + "f1": 0.45371651876577385, + "precision": 0.6633928571428571, + "recall": 0.5491803278688524, + "auc": 0.48172534264982536 + }, + { + "model_name": "llama-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6639344262295082, + "f1": 0.6407383466206997, + "precision": 0.7210144927536233, + "recall": 0.6639344262295082, + "auc": 0.6748185971513034 + }, + { + "model_name": "llama-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6475409836065574, + "f1": 0.6074234827508793, + "precision": 0.7495454545454545, + "recall": 0.6475409836065573, + "auc": 0.6734748723461433 + }, + { + "model_name": "llama-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5983606557377049, + "f1": 0.5526453640649555, + "precision": 0.6663636363636364, + "recall": 0.5983606557377049, + "auc": 0.8099973125503896 + }, + { + "model_name": "llama-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6311475409836066, + "f1": 0.6121511833274462, + "precision": 0.6631016042780749, + "recall": 0.6311475409836065, + "auc": 0.6226820747110992 + }, + { + "model_name": "llama-30b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6475409836065574, + "f1": 0.629388908512893, + "precision": 0.6834893048128342, + "recall": 0.6475409836065573, + "auc": 0.7000806234883096 + }, + { + "model_name": "llama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.8019349637194303 + }, + { + "model_name": "llama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6065573770491803, + "f1": 0.5477293790546802, + "precision": 0.7221288515406162, + "recall": 0.6065573770491803, + "auc": 0.5862671324912657 + }, + { + "model_name": "llama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5722560975609756, + "precision": 0.7337761880429228, + "recall": 0.6229508196721312, + "auc": 0.6752217145928514 + }, + { + "model_name": "llama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6065573770491803, + "f1": 0.5591689250225835, + "precision": 0.6869401225836869, + "recall": 0.6065573770491803, + "auc": 0.6397473797366299 + }, + { + "model_name": "llama-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6147540983606558, + "f1": 0.5601074031453778, + "precision": 0.7280982905982906, + "recall": 0.6147540983606558, + "auc": 0.5939263638806772 + }, + { + "model_name": "falcon-40b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6475409836065574, + "f1": 0.5975450709627925, + "precision": 0.7932692307692308, + "recall": 0.6475409836065573, + "auc": 0.5874764848159097 + }, + { + "model_name": "falcon-40b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.7540983606557377, + "f1": 0.7441990494828068, + "precision": 0.8006359300476948, + "recall": 0.7540983606557377, + "auc": 0.6627250739048642 + }, + { + "model_name": "falcon-40b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.7131147540983607, + "f1": 0.6904675607104024, + "precision": 0.8012917933130699, + "recall": 0.7131147540983607, + "auc": 0.7863477559795754 + }, + { + "model_name": "falcon-40b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6639344262295082, + "f1": 0.6211467090812693, + "precision": 0.7990196078431373, + "recall": 0.6639344262295082, + "auc": 0.8234345606019886 + }, + { + "model_name": "falcon-40b-instruct", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.7131147540983607, + "f1": 0.6933132227249874, + "precision": 0.7873188405797101, + "recall": 0.7131147540983607, + "auc": 0.7320612738511153 + }, + { + "model_name": "mpt-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5901639344262295, + "f1": 0.5222431077694234, + "precision": 0.7090342679127726, + "recall": 0.5901639344262295, + "auc": 0.6847621607094867 + }, + { + "model_name": "mpt-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5409836065573771, + "f1": 0.4483204134366925, + "precision": 0.6248976248976249, + "recall": 0.540983606557377, + "auc": 0.5201558720773986 + }, + { + "model_name": "mpt-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5591689250225835, + "precision": 0.6869401225836869, + "recall": 0.6065573770491803, + "auc": 0.4875033593120129 + }, + { + "model_name": "mpt-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5472241157312732, + "precision": 0.6794117647058824, + "recall": 0.5983606557377049, + "auc": 0.5837140553614619 + }, + { + "model_name": "mpt-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.6025262026337007 + }, + { + "model_name": "mpt-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.8902176834184359 + }, + { + "model_name": "mpt-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6639344262295082, + "f1": 0.6211467090812693, + "precision": 0.7990196078431373, + "recall": 0.6639344262295082, + "auc": 0.7312550389680194 + }, + { + "model_name": "mpt-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.6617844665412523 + }, + { + "model_name": "mpt-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.8586401504971782 + }, + { + "model_name": "mpt-7b-chat", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.6229508196721312, + "f1": 0.5604636591478697, + "precision": 0.7850467289719626, + "recall": 0.6229508196721312, + "auc": 0.7890352055898953 + }, + { + "model_name": "pythia-6.9b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.610857296425692 + }, + { + "model_name": "pythia-6.9b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6229508196721312, + "f1": 0.5775368864799759, + "precision": 0.7157001414427157, + "recall": 0.6229508196721312, + "auc": 0.6620532115022842 + }, + { + "model_name": "pythia-6.9b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6147540983606558, + "f1": 0.5657047640687722, + "precision": 0.709313725490196, + "recall": 0.6147540983606556, + "auc": 0.6621875839828003 + }, + { + "model_name": "pythia-6.9b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5283629191321499, + "precision": 0.7420634920634921, + "recall": 0.5983606557377049, + "auc": 0.6556033324375168 + }, + { + "model_name": "pythia-6.9b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5983606557377049, + "f1": 0.5351115949918346, + "precision": 0.7158018867924528, + "recall": 0.5983606557377049, + "auc": 0.5610051061542596 + }, + { + "model_name": "falcon-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.7235958075786079 + }, + { + "model_name": "falcon-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5655737704918032, + "f1": 0.5312794490757521, + "precision": 0.5927051671732523, + "recall": 0.5655737704918034, + "auc": 0.5540177371674281 + }, + { + "model_name": "falcon-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6229508196721312, + "f1": 0.5775368864799759, + "precision": 0.7157001414427157, + "recall": 0.6229508196721312, + "auc": 0.6457941413598496 + }, + { + "model_name": "falcon-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5819672131147541, + "f1": 0.5161365580527257, + "precision": 0.6798349056603774, + "recall": 0.5819672131147541, + "auc": 0.5921795216339694 + }, + { + "model_name": "falcon-7b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5409836065573771, + "f1": 0.4390804597701149, + "precision": 0.6499508357915438, + "recall": 0.540983606557377, + "auc": 0.7046492878258532 + }, + { + "model_name": "opt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6147540983606558, + "f1": 0.5476134122287968, + "precision": 0.7824074074074074, + "recall": 0.6147540983606558, + "auc": 0.7256113947863478 + }, + { + "model_name": "opt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.5819672131147541, + "f1": 0.5161365580527257, + "precision": 0.6798349056603774, + "recall": 0.5819672131147541, + "auc": 0.6788497715667832 + }, + { + "model_name": "opt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.5819672131147541, + "f1": 0.4934462264918993, + "precision": 0.7723214285714286, + "recall": 0.5819672131147541, + "auc": 0.5786079011018543 + }, + { + "model_name": "opt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5819672131147541, + "f1": 0.509112426035503, + "precision": 0.7017195767195767, + "recall": 0.5819672131147541, + "auc": 0.7042461703843053 + }, + { + "model_name": "opt-1.3b", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.5573770491803278, + "f1": 0.4495320855614974, + "precision": 0.7652173913043478, + "recall": 0.5573770491803278, + "auc": 0.6202633700618114 + }, + { + "model_name": "pythia-160m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1", + "accuracy": 0.6721311475409836, + "f1": 0.6445221445221445, + "precision": 0.7497076023391813, + "recall": 0.6721311475409836, + "auc": 0.8653587745229776 + }, + { + "model_name": "pythia-160m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "666", + "accuracy": 0.6639344262295082, + "f1": 0.6298927118017018, + "precision": 0.7593537414965986, + "recall": 0.6639344262295082, + "auc": 0.7406611126041386 + }, + { + "model_name": "pythia-160m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "42", + "accuracy": 0.6065573770491803, + "f1": 0.5413533834586466, + "precision": 0.7470404984423675, + "recall": 0.6065573770491803, + "auc": 0.7694168234345606 + }, + { + "model_name": "pythia-160m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "25", + "accuracy": 0.5983606557377049, + "f1": 0.5211087078426659, + "precision": 0.7772727272727273, + "recall": 0.5983606557377049, + "auc": 0.8133566245632894 + }, + { + "model_name": "pythia-160m", + "exp_name": "deberta__openwebtext-10k____float16__NO_SAMPLING__beams-5__max-new-tokens-256__repetition-penalty-1.0__seed-42", + "seed": "1337", + "accuracy": 0.7131147540983607, + "f1": 0.6983398092546802, + "precision": 0.7650401069518716, + "recall": 0.7131147540983607, + "auc": 0.721311475409836 + } +] \ No newline at end of file