leaderboard-pt-pr-bot commited on
Commit
9543be0
1 Parent(s): 00ca6e1

Adding the Open Portuguese LLM Leaderboard Evaluation Results

Browse files

This is an automated PR created with https://huggingface.co/spaces/eduagarcia-temp/portuguese-leaderboard-results-to-modelcard

The purpose of this PR is to add evaluation results from the Open Portuguese LLM Leaderboard to your model card.

If you encounter any issues, please report them to https://huggingface.co/spaces/eduagarcia-temp/portuguese-leaderboard-results-to-modelcard/discussions

Files changed (1) hide show
  1. README.md +187 -49
README.md CHANGED
@@ -1,58 +1,179 @@
1
  ---
2
  language:
3
- - pt
4
- - en
5
  license: cc
6
  tags:
7
- - text-generation-inference
8
- - transformers
9
- - mistral
10
- - gguf
11
- - brazil
12
- - brasil
13
- - portuguese
14
  base_model: mistralai/Mistral-7B-Instruct-v0.2
15
- pipeline_tag: text-generation
16
  metrics:
17
- - name: assin2_rte f1_macro
18
- type: assin2_rte
19
- value: 90.13
20
- - name: assin2_rte acc
21
- type: assin2_rte
22
- value: 90.16
23
- - name: assin2_sts pearson
24
- type: assin2_sts
25
- value: 71.51
26
- - name: assin2_sts mse
27
- type: assin2_sts
28
- value: 68.03
29
- - name: bluex acc
30
- type: bluex
31
- value: 47.98
32
- - name: enem acc
33
- type: enem
34
- value: 58.43
35
- - name: faquad_nli f1_macro
36
- type: faquad_nli
37
- value: 64.24
38
- - name: faquad_nli acc
39
- type: faquad_nli
40
- value: 67.69
41
- - name: hatebr_offensive_binary f1_macro
42
- type: hatebr_offensive_binary
43
- value: 83.61
44
- - name: hatebr_offensive_binary acc
45
- type: hatebr_offensive_binary
46
- value: 83.71
47
- - name: oab_exams acc
48
- type: oab_exams
49
- value: 38.41
50
- - name: portuguese_hate_speech_binary f1_macro
51
- type: portuguese_hate_speech_binary
52
- value: 61.87
53
- - name: portuguese_hate_speech_binary acc
54
- type: portuguese_hate_speech_binary
55
- value: 63.22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  ---
57
  # Cabra Mistral 7b v2
58
  <img src="https://uploads-ssl.webflow.com/65f77c0240ae1c68f8192771/660b1a4d574293d8a1ce48ca_cabra1.png" width="400" height="400">
@@ -193,3 +314,20 @@ O modelo é destinado, por agora, a fins de pesquisa. As áreas e tarefas de pes
193
  | portuguese_hate_speech_binary | 1 | all | 25 | f1_macro | 0.6187 | ± 0.0119 |
194
  | | | all | 25 | acc | 0.6322 | ± 0.0117 |
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  language:
3
+ - pt
4
+ - en
5
  license: cc
6
  tags:
7
+ - text-generation-inference
8
+ - transformers
9
+ - mistral
10
+ - gguf
11
+ - brazil
12
+ - brasil
13
+ - portuguese
14
  base_model: mistralai/Mistral-7B-Instruct-v0.2
 
15
  metrics:
16
+ - name: assin2_rte f1_macro
17
+ type: assin2_rte
18
+ value: 90.13
19
+ - name: assin2_rte acc
20
+ type: assin2_rte
21
+ value: 90.16
22
+ - name: assin2_sts pearson
23
+ type: assin2_sts
24
+ value: 71.51
25
+ - name: assin2_sts mse
26
+ type: assin2_sts
27
+ value: 68.03
28
+ - name: bluex acc
29
+ type: bluex
30
+ value: 47.98
31
+ - name: enem acc
32
+ type: enem
33
+ value: 58.43
34
+ - name: faquad_nli f1_macro
35
+ type: faquad_nli
36
+ value: 64.24
37
+ - name: faquad_nli acc
38
+ type: faquad_nli
39
+ value: 67.69
40
+ - name: hatebr_offensive_binary f1_macro
41
+ type: hatebr_offensive_binary
42
+ value: 83.61
43
+ - name: hatebr_offensive_binary acc
44
+ type: hatebr_offensive_binary
45
+ value: 83.71
46
+ - name: oab_exams acc
47
+ type: oab_exams
48
+ value: 38.41
49
+ - name: portuguese_hate_speech_binary f1_macro
50
+ type: portuguese_hate_speech_binary
51
+ value: 61.87
52
+ - name: portuguese_hate_speech_binary acc
53
+ type: portuguese_hate_speech_binary
54
+ value: 63.22
55
+ pipeline_tag: text-generation
56
+ model-index:
57
+ - name: CabraMistral7b
58
+ results:
59
+ - task:
60
+ type: text-generation
61
+ name: Text Generation
62
+ dataset:
63
+ name: ENEM Challenge (No Images)
64
+ type: eduagarcia/enem_challenge
65
+ split: train
66
+ args:
67
+ num_few_shot: 3
68
+ metrics:
69
+ - type: acc
70
+ value: 60.81
71
+ name: accuracy
72
+ source:
73
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
74
+ name: Open Portuguese LLM Leaderboard
75
+ - task:
76
+ type: text-generation
77
+ name: Text Generation
78
+ dataset:
79
+ name: BLUEX (No Images)
80
+ type: eduagarcia-temp/BLUEX_without_images
81
+ split: train
82
+ args:
83
+ num_few_shot: 3
84
+ metrics:
85
+ - type: acc
86
+ value: 46.87
87
+ name: accuracy
88
+ source:
89
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
90
+ name: Open Portuguese LLM Leaderboard
91
+ - task:
92
+ type: text-generation
93
+ name: Text Generation
94
+ dataset:
95
+ name: OAB Exams
96
+ type: eduagarcia/oab_exams
97
+ split: train
98
+ args:
99
+ num_few_shot: 3
100
+ metrics:
101
+ - type: acc
102
+ value: 38.59
103
+ name: accuracy
104
+ source:
105
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
106
+ name: Open Portuguese LLM Leaderboard
107
+ - task:
108
+ type: text-generation
109
+ name: Text Generation
110
+ dataset:
111
+ name: Assin2 RTE
112
+ type: assin2
113
+ split: test
114
+ args:
115
+ num_few_shot: 15
116
+ metrics:
117
+ - type: f1_macro
118
+ value: 90.27
119
+ name: f1-macro
120
+ - type: pearson
121
+ value: 72.25
122
+ name: pearson
123
+ source:
124
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
125
+ name: Open Portuguese LLM Leaderboard
126
+ - task:
127
+ type: text-generation
128
+ name: Text Generation
129
+ dataset:
130
+ name: FaQuAD NLI
131
+ type: ruanchaves/faquad-nli
132
+ split: test
133
+ args:
134
+ num_few_shot: 15
135
+ metrics:
136
+ - type: f1_macro
137
+ value: 64.35
138
+ name: f1-macro
139
+ source:
140
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
141
+ name: Open Portuguese LLM Leaderboard
142
+ - task:
143
+ type: text-generation
144
+ name: Text Generation
145
+ dataset:
146
+ name: HateBR Binary
147
+ type: eduagarcia/portuguese_benchmark
148
+ split: test
149
+ args:
150
+ num_few_shot: 25
151
+ metrics:
152
+ - type: f1_macro
153
+ value: 83.15
154
+ name: f1-macro
155
+ - type: f1_macro
156
+ value: 64.82
157
+ name: f1-macro
158
+ source:
159
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
160
+ name: Open Portuguese LLM Leaderboard
161
+ - task:
162
+ type: text-generation
163
+ name: Text Generation
164
+ dataset:
165
+ name: tweetSentBR
166
+ type: eduagarcia-temp/tweetsentbr
167
+ split: test
168
+ args:
169
+ num_few_shot: 25
170
+ metrics:
171
+ - type: f1_macro
172
+ value: 64.8
173
+ name: f1-macro
174
+ source:
175
+ url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=nicolasdec/CabraMistral7b
176
+ name: Open Portuguese LLM Leaderboard
177
  ---
178
  # Cabra Mistral 7b v2
179
  <img src="https://uploads-ssl.webflow.com/65f77c0240ae1c68f8192771/660b1a4d574293d8a1ce48ca_cabra1.png" width="400" height="400">
 
314
  | portuguese_hate_speech_binary | 1 | all | 25 | f1_macro | 0.6187 | ± 0.0119 |
315
  | | | all | 25 | acc | 0.6322 | ± 0.0117 |
316
 
317
+
318
+ # [Open Portuguese LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard)
319
+ Detailed results can be found [here](https://huggingface.co/datasets/eduagarcia-temp/llm_pt_leaderboard_raw_results/tree/main/nicolasdec/CabraMistral7b)
320
+
321
+ | Metric | Value |
322
+ |--------------------------|--------|
323
+ |Average |**65.1**|
324
+ |ENEM Challenge (No Images)| 60.81|
325
+ |BLUEX (No Images) | 46.87|
326
+ |OAB Exams | 38.59|
327
+ |Assin2 RTE | 90.27|
328
+ |Assin2 STS | 72.25|
329
+ |FaQuAD NLI | 64.35|
330
+ |HateBR Binary | 83.15|
331
+ |PT Hate Speech Binary | 64.82|
332
+ |tweetSentBR | 64.80|
333
+