Text Generation
Transformers
Safetensors
llama
text-generation-inference
javier-ab-bsc commited on
Commit
b09c9dc
·
verified ·
1 Parent(s): 74a64a1

Added results for 7B v1.1 model

Browse files
Files changed (1) hide show
  1. README.md +55 -45
README.md CHANGED
@@ -713,39 +713,49 @@ All results reported below are on a 5-shot setting.
713
  </tr></thead>
714
  <tbody>
715
  <tr>
716
- <td>Commonsense Reasoning</td>
 
 
 
 
 
717
  <td>xstorycloze_es</td>
718
  <td>acc</td>
719
- <td>74.06</td>
720
  </tr>
721
  <tr>
722
  <td rowspan="2">NLI</td>
723
  <td>wnli_es</td>
724
  <td>acc</td>
725
- <td>46.48</td>
726
  </tr>
727
  <tr>
728
  <td>xnli_es</td>
729
  <td>acc</td>
730
- <td>46.47</td>
731
  </tr>
732
  <tr>
733
  <td>Paraphrasing</td>
734
  <td>paws_es</td>
735
  <td>acc</td>
736
- <td>57.65</td>
 
 
 
 
 
 
737
  </tr>
738
  <tr>
739
- <td>QA</td>
740
  <td>xquad_es</td>
741
  <td>acc</td>
742
- <td>71.48</td>
743
  </tr>
744
  <tr>
745
  <td>Translation</td>
746
  <td>flores_es</td>
747
  <td>bleu</td>
748
- <td>23.56</td>
749
  </tr>
750
  </tbody>
751
  </table>
@@ -764,66 +774,66 @@ All results reported below are on a 5-shot setting.
764
  <td rowspan="2">Commonsense Reasoning</td>
765
  <td>copa_ca</td>
766
  <td>acc</td>
767
- <td>80.8</td>
768
  </tr>
769
  <tr>
770
  <td>xstorycloze_ca</td>
771
  <td>acc</td>
772
- <td>73.73</td>
773
  </tr>
774
  <tr>
775
  <td rowspan="2">NLI</td>
776
  <td>wnli_ca</td>
777
  <td>acc</td>
778
- <td>56.34</td>
779
  </tr>
780
  <tr>
781
  <td>xnli_ca</td>
782
  <td>acc</td>
783
- <td>49.4</td>
784
  </tr>
785
  <tr>
786
  <td rowspan="2">Paraphrasing</td>
787
  <td>parafraseja</td>
788
  <td>acc</td>
789
- <td>64.88</td>
790
  </tr>
791
  <tr>
792
  <td>paws_ca</td>
793
  <td>acc</td>
794
- <td>61.5</td>
795
  </tr>
796
  <tr>
797
  <td rowspan="5">QA</td>
798
  <td>arc_ca_easy</td>
799
  <td>acc</td>
800
- <td>69.23</td>
801
  </tr>
802
  <tr>
803
  <td>arc_ca_challenge</td>
804
  <td>acc</td>
805
- <td>44.54</td>
806
  </tr>
807
  <tr>
808
  <td>openbookqa_ca</td>
809
  <td>acc</td>
810
- <td>36.8</td>
811
  </tr>
812
  <tr>
813
  <td>piqa_ca</td>
814
  <td>acc</td>
815
- <td>70.35</td>
816
  </tr>
817
  <tr>
818
  <td>siqa_ca</td>
819
  <td>acc</td>
820
- <td>48.26</td>
821
  </tr>
822
  <tr>
823
  <td>Translation</td>
824
  <td>flores_ca</td>
825
  <td>bleu</td>
826
- <td>30.34</td>
827
  </tr>
828
  </tbody></table>
829
 
@@ -841,51 +851,51 @@ All results reported below are on a 5-shot setting.
841
  <td rowspan="2">Commonsense Reasoning</td>
842
  <td>xcopa_eu</td>
843
  <td>acc</td>
844
- <td>68</td>
845
  </tr>
846
  <tr>
847
  <td>xstorycloze_eu</td>
848
  <td>acc</td>
849
- <td>64.79</td>
850
  </tr>
851
  <tr>
852
  <td rowspan="2">NLI</td>
853
  <td>wnli_eu</td>
854
  <td>acc</td>
855
- <td>38.03</td>
856
  </tr>
857
  <tr>
858
  <td>xnli_eu</td>
859
  <td>acc</td>
860
- <td>42.85</td>
861
  </tr>
862
  <tr>
863
  <td rowspan="3">QA</td>
864
  <td>eus_exams</td>
865
  <td>acc</td>
866
- <td>38.41</td>
867
  </tr>
868
  <tr>
869
  <td>eus_proficiency</td>
870
  <td>acc</td>
871
- <td>31.13</td>
872
  </tr>
873
  <tr>
874
  <td>eus_trivia</td>
875
  <td>acc</td>
876
- <td>45.36</td>
877
  </tr>
878
  <tr>
879
  <td>Reading Comprehension</td>
880
  <td>eus_reading</td>
881
  <td>acc</td>
882
- <td>33.24</td>
883
  </tr>
884
  <tr>
885
  <td>Translation</td>
886
  <td>flores_eu</td>
887
  <td>bleu</td>
888
- <td>16.29</td>
889
  </tr>
890
  </tbody></table>
891
 
@@ -903,24 +913,24 @@ All results reported below are on a 5-shot setting.
903
  <td rowspan="2">Paraphrasing</td>
904
  <td>parafrases_gl</td>
905
  <td>acc</td>
906
- <td>58.84</td>
907
  </tr>
908
  <tr>
909
  <td>paws_gl</td>
910
  <td>acc</td>
911
- <td>60.85</td>
912
  </tr>
913
  <tr>
914
  <td>QA</td>
915
  <td>openbookqa_gl</td>
916
  <td>acc</td>
917
- <td>34.6</td>
918
  </tr>
919
  <tr>
920
  <td>Translation</td>
921
  <td>flores_gl</td>
922
  <td>bleu</td>
923
- <td>27.98</td>
924
  </tr>
925
  </tbody>
926
  </table>
@@ -939,60 +949,60 @@ All results reported below are on a 5-shot setting.
939
  <td rowspan="2">Commonsense Reasoning</td>
940
  <td>copa</td>
941
  <td>acc</td>
942
- <td>90</td>
943
  </tr>
944
  <tr>
945
  <td>xstorycloze_en</td>
946
  <td>acc</td>
947
- <td>79.22</td>
948
  </tr>
949
  <tr>
950
  <td rowspan="2">NLI</td>
951
  <td>wnli</td>
952
  <td>acc</td>
953
- <td>52.11</td>
954
  </tr>
955
  <tr>
956
  <td>xnli_en</td>
957
  <td>acc</td>
958
- <td>47.27</td>
959
  </tr>
960
  <tr>
961
  <td>Paraphrasing</td>
962
  <td>paws *</td>
963
  <td>acc</td>
964
- <td>59.6</td>
965
  </tr>
966
  <tr>
967
  <td rowspan="6">QA</td>
968
  <td>arc_easy</td>
969
  <td>acc</td>
970
- <td>81.36</td>
971
  </tr>
972
  <tr>
973
  <td>arc_challenge</td>
974
  <td>acc</td>
975
- <td>50.6</td>
976
  </tr>
977
  <tr>
978
  <td>openbookqa</td>
979
  <td>acc</td>
980
- <td>34.4</td>
981
  </tr>
982
  <tr>
983
  <td>piqa</td>
984
  <td>acc</td>
985
- <td>78.78</td>
986
  </tr>
987
  <tr>
988
  <td>social_iqa</td>
989
  <td>acc</td>
990
- <td>50.15</td>
991
  </tr>
992
  <tr>
993
- <td>squad_en **</td>
994
  <td>acc</td>
995
- <td>78.06</td>
996
  </tr>
997
  </tbody></table>
998
 
 
713
  </tr></thead>
714
  <tbody>
715
  <tr>
716
+ <td rowspan="2">Commonsense Reasoning</td>
717
+ <td>copa_es</td>
718
+ <td>acc</td>
719
+ <td>86</td>
720
+ </tr>
721
+ <tr>
722
  <td>xstorycloze_es</td>
723
  <td>acc</td>
724
+ <td>74.32</td>
725
  </tr>
726
  <tr>
727
  <td rowspan="2">NLI</td>
728
  <td>wnli_es</td>
729
  <td>acc</td>
730
+ <td>59.15</td>
731
  </tr>
732
  <tr>
733
  <td>xnli_es</td>
734
  <td>acc</td>
735
+ <td>46.59</td>
736
  </tr>
737
  <tr>
738
  <td>Paraphrasing</td>
739
  <td>paws_es</td>
740
  <td>acc</td>
741
+ <td>60.3</td>
742
+ </tr>
743
+ <tr>
744
+ <td rowspan="2">QA</td>
745
+ <td>openbookqa_es</td>
746
+ <td>acc</td>
747
+ <td>41.6</td>
748
  </tr>
749
  <tr>
 
750
  <td>xquad_es</td>
751
  <td>acc</td>
752
+ <td>72.26</td>
753
  </tr>
754
  <tr>
755
  <td>Translation</td>
756
  <td>flores_es</td>
757
  <td>bleu</td>
758
+ <td>23.43</td>
759
  </tr>
760
  </tbody>
761
  </table>
 
774
  <td rowspan="2">Commonsense Reasoning</td>
775
  <td>copa_ca</td>
776
  <td>acc</td>
777
+ <td>84</td>
778
  </tr>
779
  <tr>
780
  <td>xstorycloze_ca</td>
781
  <td>acc</td>
782
+ <td>75.51</td>
783
  </tr>
784
  <tr>
785
  <td rowspan="2">NLI</td>
786
  <td>wnli_ca</td>
787
  <td>acc</td>
788
+ <td>59.15</td>
789
  </tr>
790
  <tr>
791
  <td>xnli_ca</td>
792
  <td>acc</td>
793
+ <td>50.16</td>
794
  </tr>
795
  <tr>
796
  <td rowspan="2">Paraphrasing</td>
797
  <td>parafraseja</td>
798
  <td>acc</td>
799
+ <td>65.83</td>
800
  </tr>
801
  <tr>
802
  <td>paws_ca</td>
803
  <td>acc</td>
804
+ <td>67.45</td>
805
  </tr>
806
  <tr>
807
  <td rowspan="5">QA</td>
808
  <td>arc_ca_easy</td>
809
  <td>acc</td>
810
+ <td>71.72</td>
811
  </tr>
812
  <tr>
813
  <td>arc_ca_challenge</td>
814
  <td>acc</td>
815
+ <td>45.56</td>
816
  </tr>
817
  <tr>
818
  <td>openbookqa_ca</td>
819
  <td>acc</td>
820
+ <td>38.8</td>
821
  </tr>
822
  <tr>
823
  <td>piqa_ca</td>
824
  <td>acc</td>
825
+ <td>71.27</td>
826
  </tr>
827
  <tr>
828
  <td>siqa_ca</td>
829
  <td>acc</td>
830
+ <td>49.85</td>
831
  </tr>
832
  <tr>
833
  <td>Translation</td>
834
  <td>flores_ca</td>
835
  <td>bleu</td>
836
+ <td>30.63</td>
837
  </tr>
838
  </tbody></table>
839
 
 
851
  <td rowspan="2">Commonsense Reasoning</td>
852
  <td>xcopa_eu</td>
853
  <td>acc</td>
854
+ <td>68.8</td>
855
  </tr>
856
  <tr>
857
  <td>xstorycloze_eu</td>
858
  <td>acc</td>
859
+ <td>66.12</td>
860
  </tr>
861
  <tr>
862
  <td rowspan="2">NLI</td>
863
  <td>wnli_eu</td>
864
  <td>acc</td>
865
+ <td>57.75</td>
866
  </tr>
867
  <tr>
868
  <td>xnli_eu</td>
869
  <td>acc</td>
870
+ <td>43.51</td>
871
  </tr>
872
  <tr>
873
  <td rowspan="3">QA</td>
874
  <td>eus_exams</td>
875
  <td>acc</td>
876
+ <td>41.04</td>
877
  </tr>
878
  <tr>
879
  <td>eus_proficiency</td>
880
  <td>acc</td>
881
+ <td>39.72</td>
882
  </tr>
883
  <tr>
884
  <td>eus_trivia</td>
885
  <td>acc</td>
886
+ <td>52.36</td>
887
  </tr>
888
  <tr>
889
  <td>Reading Comprehension</td>
890
  <td>eus_reading</td>
891
  <td>acc</td>
892
+ <td>33.52</td>
893
  </tr>
894
  <tr>
895
  <td>Translation</td>
896
  <td>flores_eu</td>
897
  <td>bleu</td>
898
+ <td>16.95</td>
899
  </tr>
900
  </tbody></table>
901
 
 
913
  <td rowspan="2">Paraphrasing</td>
914
  <td>parafrases_gl</td>
915
  <td>acc</td>
916
+ <td>54.42</td>
917
  </tr>
918
  <tr>
919
  <td>paws_gl</td>
920
  <td>acc</td>
921
+ <td>63.2</td>
922
  </tr>
923
  <tr>
924
  <td>QA</td>
925
  <td>openbookqa_gl</td>
926
  <td>acc</td>
927
+ <td>34.4</td>
928
  </tr>
929
  <tr>
930
  <td>Translation</td>
931
  <td>flores_gl</td>
932
  <td>bleu</td>
933
+ <td>27.75</td>
934
  </tr>
935
  </tbody>
936
  </table>
 
949
  <td rowspan="2">Commonsense Reasoning</td>
950
  <td>copa</td>
951
  <td>acc</td>
952
+ <td>91</td>
953
  </tr>
954
  <tr>
955
  <td>xstorycloze_en</td>
956
  <td>acc</td>
957
+ <td>79.09</td>
958
  </tr>
959
  <tr>
960
  <td rowspan="2">NLI</td>
961
  <td>wnli</td>
962
  <td>acc</td>
963
+ <td>56.34</td>
964
  </tr>
965
  <tr>
966
  <td>xnli_en</td>
967
  <td>acc</td>
968
+ <td>50</td>
969
  </tr>
970
  <tr>
971
  <td>Paraphrasing</td>
972
  <td>paws *</td>
973
  <td>acc</td>
974
+ <td>64.05</td>
975
  </tr>
976
  <tr>
977
  <td rowspan="6">QA</td>
978
  <td>arc_easy</td>
979
  <td>acc</td>
980
+ <td>82.2</td>
981
  </tr>
982
  <tr>
983
  <td>arc_challenge</td>
984
  <td>acc</td>
985
+ <td>52.82</td>
986
  </tr>
987
  <tr>
988
  <td>openbookqa</td>
989
  <td>acc</td>
990
+ <td>36</td>
991
  </tr>
992
  <tr>
993
  <td>piqa</td>
994
  <td>acc</td>
995
+ <td>80.03</td>
996
  </tr>
997
  <tr>
998
  <td>social_iqa</td>
999
  <td>acc</td>
1000
+ <td>50.31</td>
1001
  </tr>
1002
  <tr>
1003
+ <td>xquad_en **</td>
1004
  <td>acc</td>
1005
+ <td>77.74</td>
1006
  </tr>
1007
  </tbody></table>
1008