Added results for 7B v1.1 model
Browse files
README.md
CHANGED
@@ -713,39 +713,49 @@ All results reported below are on a 5-shot setting.
|
|
713 |
</tr></thead>
|
714 |
<tbody>
|
715 |
<tr>
|
716 |
-
<td>Commonsense Reasoning</td>
|
|
|
|
|
|
|
|
|
|
|
717 |
<td>xstorycloze_es</td>
|
718 |
<td>acc</td>
|
719 |
-
<td>74.
|
720 |
</tr>
|
721 |
<tr>
|
722 |
<td rowspan="2">NLI</td>
|
723 |
<td>wnli_es</td>
|
724 |
<td>acc</td>
|
725 |
-
<td>
|
726 |
</tr>
|
727 |
<tr>
|
728 |
<td>xnli_es</td>
|
729 |
<td>acc</td>
|
730 |
-
<td>46.
|
731 |
</tr>
|
732 |
<tr>
|
733 |
<td>Paraphrasing</td>
|
734 |
<td>paws_es</td>
|
735 |
<td>acc</td>
|
736 |
-
<td>
|
|
|
|
|
|
|
|
|
|
|
|
|
737 |
</tr>
|
738 |
<tr>
|
739 |
-
<td>QA</td>
|
740 |
<td>xquad_es</td>
|
741 |
<td>acc</td>
|
742 |
-
<td>
|
743 |
</tr>
|
744 |
<tr>
|
745 |
<td>Translation</td>
|
746 |
<td>flores_es</td>
|
747 |
<td>bleu</td>
|
748 |
-
<td>23.
|
749 |
</tr>
|
750 |
</tbody>
|
751 |
</table>
|
@@ -764,66 +774,66 @@ All results reported below are on a 5-shot setting.
|
|
764 |
<td rowspan="2">Commonsense Reasoning</td>
|
765 |
<td>copa_ca</td>
|
766 |
<td>acc</td>
|
767 |
-
<td>
|
768 |
</tr>
|
769 |
<tr>
|
770 |
<td>xstorycloze_ca</td>
|
771 |
<td>acc</td>
|
772 |
-
<td>
|
773 |
</tr>
|
774 |
<tr>
|
775 |
<td rowspan="2">NLI</td>
|
776 |
<td>wnli_ca</td>
|
777 |
<td>acc</td>
|
778 |
-
<td>
|
779 |
</tr>
|
780 |
<tr>
|
781 |
<td>xnli_ca</td>
|
782 |
<td>acc</td>
|
783 |
-
<td>
|
784 |
</tr>
|
785 |
<tr>
|
786 |
<td rowspan="2">Paraphrasing</td>
|
787 |
<td>parafraseja</td>
|
788 |
<td>acc</td>
|
789 |
-
<td>
|
790 |
</tr>
|
791 |
<tr>
|
792 |
<td>paws_ca</td>
|
793 |
<td>acc</td>
|
794 |
-
<td>
|
795 |
</tr>
|
796 |
<tr>
|
797 |
<td rowspan="5">QA</td>
|
798 |
<td>arc_ca_easy</td>
|
799 |
<td>acc</td>
|
800 |
-
<td>
|
801 |
</tr>
|
802 |
<tr>
|
803 |
<td>arc_ca_challenge</td>
|
804 |
<td>acc</td>
|
805 |
-
<td>
|
806 |
</tr>
|
807 |
<tr>
|
808 |
<td>openbookqa_ca</td>
|
809 |
<td>acc</td>
|
810 |
-
<td>
|
811 |
</tr>
|
812 |
<tr>
|
813 |
<td>piqa_ca</td>
|
814 |
<td>acc</td>
|
815 |
-
<td>
|
816 |
</tr>
|
817 |
<tr>
|
818 |
<td>siqa_ca</td>
|
819 |
<td>acc</td>
|
820 |
-
<td>
|
821 |
</tr>
|
822 |
<tr>
|
823 |
<td>Translation</td>
|
824 |
<td>flores_ca</td>
|
825 |
<td>bleu</td>
|
826 |
-
<td>30.
|
827 |
</tr>
|
828 |
</tbody></table>
|
829 |
|
@@ -841,51 +851,51 @@ All results reported below are on a 5-shot setting.
|
|
841 |
<td rowspan="2">Commonsense Reasoning</td>
|
842 |
<td>xcopa_eu</td>
|
843 |
<td>acc</td>
|
844 |
-
<td>68</td>
|
845 |
</tr>
|
846 |
<tr>
|
847 |
<td>xstorycloze_eu</td>
|
848 |
<td>acc</td>
|
849 |
-
<td>
|
850 |
</tr>
|
851 |
<tr>
|
852 |
<td rowspan="2">NLI</td>
|
853 |
<td>wnli_eu</td>
|
854 |
<td>acc</td>
|
855 |
-
<td>
|
856 |
</tr>
|
857 |
<tr>
|
858 |
<td>xnli_eu</td>
|
859 |
<td>acc</td>
|
860 |
-
<td>
|
861 |
</tr>
|
862 |
<tr>
|
863 |
<td rowspan="3">QA</td>
|
864 |
<td>eus_exams</td>
|
865 |
<td>acc</td>
|
866 |
-
<td>
|
867 |
</tr>
|
868 |
<tr>
|
869 |
<td>eus_proficiency</td>
|
870 |
<td>acc</td>
|
871 |
-
<td>
|
872 |
</tr>
|
873 |
<tr>
|
874 |
<td>eus_trivia</td>
|
875 |
<td>acc</td>
|
876 |
-
<td>
|
877 |
</tr>
|
878 |
<tr>
|
879 |
<td>Reading Comprehension</td>
|
880 |
<td>eus_reading</td>
|
881 |
<td>acc</td>
|
882 |
-
<td>33.
|
883 |
</tr>
|
884 |
<tr>
|
885 |
<td>Translation</td>
|
886 |
<td>flores_eu</td>
|
887 |
<td>bleu</td>
|
888 |
-
<td>16.
|
889 |
</tr>
|
890 |
</tbody></table>
|
891 |
|
@@ -903,24 +913,24 @@ All results reported below are on a 5-shot setting.
|
|
903 |
<td rowspan="2">Paraphrasing</td>
|
904 |
<td>parafrases_gl</td>
|
905 |
<td>acc</td>
|
906 |
-
<td>
|
907 |
</tr>
|
908 |
<tr>
|
909 |
<td>paws_gl</td>
|
910 |
<td>acc</td>
|
911 |
-
<td>
|
912 |
</tr>
|
913 |
<tr>
|
914 |
<td>QA</td>
|
915 |
<td>openbookqa_gl</td>
|
916 |
<td>acc</td>
|
917 |
-
<td>34.
|
918 |
</tr>
|
919 |
<tr>
|
920 |
<td>Translation</td>
|
921 |
<td>flores_gl</td>
|
922 |
<td>bleu</td>
|
923 |
-
<td>27.
|
924 |
</tr>
|
925 |
</tbody>
|
926 |
</table>
|
@@ -939,60 +949,60 @@ All results reported below are on a 5-shot setting.
|
|
939 |
<td rowspan="2">Commonsense Reasoning</td>
|
940 |
<td>copa</td>
|
941 |
<td>acc</td>
|
942 |
-
<td>
|
943 |
</tr>
|
944 |
<tr>
|
945 |
<td>xstorycloze_en</td>
|
946 |
<td>acc</td>
|
947 |
-
<td>79.
|
948 |
</tr>
|
949 |
<tr>
|
950 |
<td rowspan="2">NLI</td>
|
951 |
<td>wnli</td>
|
952 |
<td>acc</td>
|
953 |
-
<td>
|
954 |
</tr>
|
955 |
<tr>
|
956 |
<td>xnli_en</td>
|
957 |
<td>acc</td>
|
958 |
-
<td>
|
959 |
</tr>
|
960 |
<tr>
|
961 |
<td>Paraphrasing</td>
|
962 |
<td>paws *</td>
|
963 |
<td>acc</td>
|
964 |
-
<td>
|
965 |
</tr>
|
966 |
<tr>
|
967 |
<td rowspan="6">QA</td>
|
968 |
<td>arc_easy</td>
|
969 |
<td>acc</td>
|
970 |
-
<td>
|
971 |
</tr>
|
972 |
<tr>
|
973 |
<td>arc_challenge</td>
|
974 |
<td>acc</td>
|
975 |
-
<td>
|
976 |
</tr>
|
977 |
<tr>
|
978 |
<td>openbookqa</td>
|
979 |
<td>acc</td>
|
980 |
-
<td>
|
981 |
</tr>
|
982 |
<tr>
|
983 |
<td>piqa</td>
|
984 |
<td>acc</td>
|
985 |
-
<td>
|
986 |
</tr>
|
987 |
<tr>
|
988 |
<td>social_iqa</td>
|
989 |
<td>acc</td>
|
990 |
-
<td>50.
|
991 |
</tr>
|
992 |
<tr>
|
993 |
-
<td>
|
994 |
<td>acc</td>
|
995 |
-
<td>
|
996 |
</tr>
|
997 |
</tbody></table>
|
998 |
|
|
|
713 |
</tr></thead>
|
714 |
<tbody>
|
715 |
<tr>
|
716 |
+
<td rowspan="2">Commonsense Reasoning</td>
|
717 |
+
<td>copa_es</td>
|
718 |
+
<td>acc</td>
|
719 |
+
<td>86</td>
|
720 |
+
</tr>
|
721 |
+
<tr>
|
722 |
<td>xstorycloze_es</td>
|
723 |
<td>acc</td>
|
724 |
+
<td>74.32</td>
|
725 |
</tr>
|
726 |
<tr>
|
727 |
<td rowspan="2">NLI</td>
|
728 |
<td>wnli_es</td>
|
729 |
<td>acc</td>
|
730 |
+
<td>59.15</td>
|
731 |
</tr>
|
732 |
<tr>
|
733 |
<td>xnli_es</td>
|
734 |
<td>acc</td>
|
735 |
+
<td>46.59</td>
|
736 |
</tr>
|
737 |
<tr>
|
738 |
<td>Paraphrasing</td>
|
739 |
<td>paws_es</td>
|
740 |
<td>acc</td>
|
741 |
+
<td>60.3</td>
|
742 |
+
</tr>
|
743 |
+
<tr>
|
744 |
+
<td rowspan="2">QA</td>
|
745 |
+
<td>openbookqa_es</td>
|
746 |
+
<td>acc</td>
|
747 |
+
<td>41.6</td>
|
748 |
</tr>
|
749 |
<tr>
|
|
|
750 |
<td>xquad_es</td>
|
751 |
<td>acc</td>
|
752 |
+
<td>72.26</td>
|
753 |
</tr>
|
754 |
<tr>
|
755 |
<td>Translation</td>
|
756 |
<td>flores_es</td>
|
757 |
<td>bleu</td>
|
758 |
+
<td>23.43</td>
|
759 |
</tr>
|
760 |
</tbody>
|
761 |
</table>
|
|
|
774 |
<td rowspan="2">Commonsense Reasoning</td>
|
775 |
<td>copa_ca</td>
|
776 |
<td>acc</td>
|
777 |
+
<td>84</td>
|
778 |
</tr>
|
779 |
<tr>
|
780 |
<td>xstorycloze_ca</td>
|
781 |
<td>acc</td>
|
782 |
+
<td>75.51</td>
|
783 |
</tr>
|
784 |
<tr>
|
785 |
<td rowspan="2">NLI</td>
|
786 |
<td>wnli_ca</td>
|
787 |
<td>acc</td>
|
788 |
+
<td>59.15</td>
|
789 |
</tr>
|
790 |
<tr>
|
791 |
<td>xnli_ca</td>
|
792 |
<td>acc</td>
|
793 |
+
<td>50.16</td>
|
794 |
</tr>
|
795 |
<tr>
|
796 |
<td rowspan="2">Paraphrasing</td>
|
797 |
<td>parafraseja</td>
|
798 |
<td>acc</td>
|
799 |
+
<td>65.83</td>
|
800 |
</tr>
|
801 |
<tr>
|
802 |
<td>paws_ca</td>
|
803 |
<td>acc</td>
|
804 |
+
<td>67.45</td>
|
805 |
</tr>
|
806 |
<tr>
|
807 |
<td rowspan="5">QA</td>
|
808 |
<td>arc_ca_easy</td>
|
809 |
<td>acc</td>
|
810 |
+
<td>71.72</td>
|
811 |
</tr>
|
812 |
<tr>
|
813 |
<td>arc_ca_challenge</td>
|
814 |
<td>acc</td>
|
815 |
+
<td>45.56</td>
|
816 |
</tr>
|
817 |
<tr>
|
818 |
<td>openbookqa_ca</td>
|
819 |
<td>acc</td>
|
820 |
+
<td>38.8</td>
|
821 |
</tr>
|
822 |
<tr>
|
823 |
<td>piqa_ca</td>
|
824 |
<td>acc</td>
|
825 |
+
<td>71.27</td>
|
826 |
</tr>
|
827 |
<tr>
|
828 |
<td>siqa_ca</td>
|
829 |
<td>acc</td>
|
830 |
+
<td>49.85</td>
|
831 |
</tr>
|
832 |
<tr>
|
833 |
<td>Translation</td>
|
834 |
<td>flores_ca</td>
|
835 |
<td>bleu</td>
|
836 |
+
<td>30.63</td>
|
837 |
</tr>
|
838 |
</tbody></table>
|
839 |
|
|
|
851 |
<td rowspan="2">Commonsense Reasoning</td>
|
852 |
<td>xcopa_eu</td>
|
853 |
<td>acc</td>
|
854 |
+
<td>68.8</td>
|
855 |
</tr>
|
856 |
<tr>
|
857 |
<td>xstorycloze_eu</td>
|
858 |
<td>acc</td>
|
859 |
+
<td>66.12</td>
|
860 |
</tr>
|
861 |
<tr>
|
862 |
<td rowspan="2">NLI</td>
|
863 |
<td>wnli_eu</td>
|
864 |
<td>acc</td>
|
865 |
+
<td>57.75</td>
|
866 |
</tr>
|
867 |
<tr>
|
868 |
<td>xnli_eu</td>
|
869 |
<td>acc</td>
|
870 |
+
<td>43.51</td>
|
871 |
</tr>
|
872 |
<tr>
|
873 |
<td rowspan="3">QA</td>
|
874 |
<td>eus_exams</td>
|
875 |
<td>acc</td>
|
876 |
+
<td>41.04</td>
|
877 |
</tr>
|
878 |
<tr>
|
879 |
<td>eus_proficiency</td>
|
880 |
<td>acc</td>
|
881 |
+
<td>39.72</td>
|
882 |
</tr>
|
883 |
<tr>
|
884 |
<td>eus_trivia</td>
|
885 |
<td>acc</td>
|
886 |
+
<td>52.36</td>
|
887 |
</tr>
|
888 |
<tr>
|
889 |
<td>Reading Comprehension</td>
|
890 |
<td>eus_reading</td>
|
891 |
<td>acc</td>
|
892 |
+
<td>33.52</td>
|
893 |
</tr>
|
894 |
<tr>
|
895 |
<td>Translation</td>
|
896 |
<td>flores_eu</td>
|
897 |
<td>bleu</td>
|
898 |
+
<td>16.95</td>
|
899 |
</tr>
|
900 |
</tbody></table>
|
901 |
|
|
|
913 |
<td rowspan="2">Paraphrasing</td>
|
914 |
<td>parafrases_gl</td>
|
915 |
<td>acc</td>
|
916 |
+
<td>54.42</td>
|
917 |
</tr>
|
918 |
<tr>
|
919 |
<td>paws_gl</td>
|
920 |
<td>acc</td>
|
921 |
+
<td>63.2</td>
|
922 |
</tr>
|
923 |
<tr>
|
924 |
<td>QA</td>
|
925 |
<td>openbookqa_gl</td>
|
926 |
<td>acc</td>
|
927 |
+
<td>34.4</td>
|
928 |
</tr>
|
929 |
<tr>
|
930 |
<td>Translation</td>
|
931 |
<td>flores_gl</td>
|
932 |
<td>bleu</td>
|
933 |
+
<td>27.75</td>
|
934 |
</tr>
|
935 |
</tbody>
|
936 |
</table>
|
|
|
949 |
<td rowspan="2">Commonsense Reasoning</td>
|
950 |
<td>copa</td>
|
951 |
<td>acc</td>
|
952 |
+
<td>91</td>
|
953 |
</tr>
|
954 |
<tr>
|
955 |
<td>xstorycloze_en</td>
|
956 |
<td>acc</td>
|
957 |
+
<td>79.09</td>
|
958 |
</tr>
|
959 |
<tr>
|
960 |
<td rowspan="2">NLI</td>
|
961 |
<td>wnli</td>
|
962 |
<td>acc</td>
|
963 |
+
<td>56.34</td>
|
964 |
</tr>
|
965 |
<tr>
|
966 |
<td>xnli_en</td>
|
967 |
<td>acc</td>
|
968 |
+
<td>50</td>
|
969 |
</tr>
|
970 |
<tr>
|
971 |
<td>Paraphrasing</td>
|
972 |
<td>paws *</td>
|
973 |
<td>acc</td>
|
974 |
+
<td>64.05</td>
|
975 |
</tr>
|
976 |
<tr>
|
977 |
<td rowspan="6">QA</td>
|
978 |
<td>arc_easy</td>
|
979 |
<td>acc</td>
|
980 |
+
<td>82.2</td>
|
981 |
</tr>
|
982 |
<tr>
|
983 |
<td>arc_challenge</td>
|
984 |
<td>acc</td>
|
985 |
+
<td>52.82</td>
|
986 |
</tr>
|
987 |
<tr>
|
988 |
<td>openbookqa</td>
|
989 |
<td>acc</td>
|
990 |
+
<td>36</td>
|
991 |
</tr>
|
992 |
<tr>
|
993 |
<td>piqa</td>
|
994 |
<td>acc</td>
|
995 |
+
<td>80.03</td>
|
996 |
</tr>
|
997 |
<tr>
|
998 |
<td>social_iqa</td>
|
999 |
<td>acc</td>
|
1000 |
+
<td>50.31</td>
|
1001 |
</tr>
|
1002 |
<tr>
|
1003 |
+
<td>xquad_en **</td>
|
1004 |
<td>acc</td>
|
1005 |
+
<td>77.74</td>
|
1006 |
</tr>
|
1007 |
</tbody></table>
|
1008 |
|