illorca commited on
Commit
6a56d7d
·
1 Parent(s): 68b945f

Update Readme

Browse files
Files changed (1) hide show
  1. README.md +29 -46
README.md CHANGED
@@ -3,7 +3,7 @@ title: FairEval
3
  tags:
4
  - evaluate
5
  - metric
6
- description: "TODO: add a description here"
7
  sdk: gradio
8
  sdk_version: 3.0.2
9
  app_file: app.py
@@ -82,59 +82,42 @@ Considering the following input annotated sentences:
82
  The output for different modes and error_formats is:
83
  ```python
84
  >>> faireval.compute(predictions=y_pred, references=y_true, mode='fair', error_format='count')
85
- {"PER": {"precision": 1.0,"recall": 0.5,"f1": 0.6666,
86
- "trad_prec": 0.5,"trad_rec": 0.5,"trad_f1": 0.5,
87
- "TP": 1,"FP": 0.0,"FN": 1.0,"LE": 0.0,"BE": 0.0,"LBE": 0.0},
88
- "INT": {"precision": 0.0,"recall": 0.0,"f1": 0.0,
89
- "trad_prec": 0.0,"trad_rec": 0.0,"trad_f1": 0.0,
90
- "TP": 0,"FP": 0.0,"FN": 0.0,"LE": 0.0,"BE": 1.0,"LBE": 1.0},
91
- "OUT": {"precision": 0.6666,"recall": 0.6666,"f1": 0.666,
92
- "trad_prec": 0.5,"trad_rec": 0.5,"trad_f1": 0.5,
93
- "TP": 1,"FP": 0.0,"FN": 0.0,"LE": 1.0,"BE": 0.0,"LBE": 0.0},
94
- "overall_precision": 0.5714,
95
- "overall_recall": 0.4444,
96
- "overall_f1": 0.5,
97
- "overall_trad_prec": 0.4,
98
- "overall_trad_rec": 0.3333,
99
- "overall_trad_f1": 0.3636,
100
- "TP": 2,
101
- "FP": 0.0,
102
- "FN": 1.0,
103
- "LE": 1.0,
104
- "BE": 1.0,
105
- "LBE": 1.0}
106
  ```
107
 
108
  ```python
109
  >>> faireval.compute(predictions=y_pred, references=y_true, mode='traditional', error_format='count')
110
- {"PER": {"precision": 0.5,"recall": 0.5,"f1": 0.5,
111
- "TP": 1,"FP": 1.0,"FN": 1.0},
112
- "INT": {"precision": 0.0,"recall": 0.0,"f1": 0.0,
113
- "TP": 0,"FP": 1.0,"FN": 2.0},
114
- "OUT": {"precision": 0.5,"recall": 0.5,"f1": 0.5,
115
- "TP": 1,"FP": 1.0,"FN": 1.0},
116
- "overall_precision": 0.4,
117
- "overall_recall": 0.3333,
118
- "overall_f1": 0.3636,
119
- "TP": 2,
120
- "FP": 3.0,
121
- "FN": 4.0}
122
  ```
123
 
124
  ```python
125
  >>> faireval.compute(predictions=y_pred, references=y_true, mode='traditional', error_format='error_ratio')
126
- {"PER": {"precision": 0.5,"recall": 0.5,"f1": 0.5,
127
- "TP": 1,"FP": 0.1428,"FN": 0.1428},
128
- "INT": {"precision": 0.0,"recall": 0.0,"f1": 0.0,
129
- "TP": 0,"FP": 0.14285714285714285,"FN": 0.2857},
130
- "OUT": {"precision": 0.5,"recall": 0.5,"f1": 0.5,
131
- "TP": 1,"FP": 0.1428,"FN": 0.1428},
132
- "overall_precision": 0.4,
133
- "overall_recall": 0.3333,
134
- "overall_f1": 0.3636,
135
- "TP": 2,
136
- "FP": 0.4285,
137
- "FN": 0.5714}
138
  ```
139
 
140
  #### Values from Popular Papers
 
3
  tags:
4
  - evaluate
5
  - metric
6
+ description: "Fair Evaluation for Squence labeling"
7
  sdk: gradio
8
  sdk_version: 3.0.2
9
  app_file: app.py
 
82
  The output for different modes and error_formats is:
83
  ```python
84
  >>> faireval.compute(predictions=y_pred, references=y_true, mode='fair', error_format='count')
85
+ {"PER": {"precision": 1.0, "recall": 0.5, "f1": 0.6666,
86
+ "trad_prec": 0.5, "trad_rec": 0.5, "trad_f1": 0.5,
87
+ "TP": 1, "FP": 0.0, "FN": 1.0, "LE": 0.0, "BE": 0.0, "LBE": 0.0},
88
+ "INT": {"precision": 0.0, "recall": 0.0, "f1": 0.0,
89
+ "trad_prec": 0.0, "trad_rec": 0.0, "trad_f1": 0.0,
90
+ "TP": 0, "FP": 0.0, "FN": 0.0, "LE": 0.0, "BE": 1.0, "LBE": 1.0},
91
+ "OUT": {"precision": 0.6666, "recall": 0.6666, "f1": 0.666,
92
+ "trad_prec": 0.5, "trad_rec": 0.5, "trad_f1": 0.5,
93
+ "TP": 1, "FP": 0.0, "FN": 0.0, "LE": 1.0, "BE": 0.0, "LBE": 0.0},
94
+ "overall_precision": 0.5714, "overall_recall": 0.4444, "overall_f1": 0.5,
95
+ "overall_trad_prec": 0.4, "overall_trad_rec": 0.3333, "overall_trad_f1": 0.3636,
96
+ "TP": 2, "FP": 0.0, "FN": 1.0, "LE": 1.0, "BE": 1.0, "LBE": 1.0}
 
 
 
 
 
 
 
 
 
97
  ```
98
 
99
  ```python
100
  >>> faireval.compute(predictions=y_pred, references=y_true, mode='traditional', error_format='count')
101
+ {"PER": {"precision": 0.5, "recall": 0.5, "f1": 0.5,
102
+ "TP": 1, "FP": 1.0, "FN": 1.0},
103
+ "INT": {"precision": 0.0, "recall": 0.0, "f1": 0.0,
104
+ "TP": 0, "FP": 1.0, "FN": 2.0},
105
+ "OUT": {"precision": 0.5, "recall": 0.5, "f1": 0.5,
106
+ "TP": 1, "FP": 1.0, "FN": 1.0},
107
+ "overall_precision": 0.4, "overall_recall": 0.3333, "overall_f1": 0.3636,
108
+ "TP": 2, "FP": 3.0, "FN": 4.0}
 
 
 
 
109
  ```
110
 
111
  ```python
112
  >>> faireval.compute(predictions=y_pred, references=y_true, mode='traditional', error_format='error_ratio')
113
+ {"PER": {"precision": 0.5, "recall": 0.5, "f1": 0.5,
114
+ "TP": 1, "FP": 0.1428, "FN": 0.1428},
115
+ "INT": {"precision": 0.0, "recall": 0.0, "f1": 0.0,
116
+ "TP": 0, "FP": 0.1428, "FN": 0.2857},
117
+ "OUT": {"precision": 0.5, "recall": 0.5, "f1": 0.5,
118
+ "TP": 1, "FP": 0.1428, "FN": 0.1428},
119
+ "overall_precision": 0.4, "overall_recall": 0.3333, "overall_f1": 0.3636,
120
+ "TP": 2, "FP": 0.4285, "FN": 0.5714}
 
 
 
 
121
  ```
122
 
123
  #### Values from Popular Papers