Spaces:
Runtime error
Runtime error
Commit
·
76e1a38
1
Parent(s):
b97e015
updated README
Browse files- README.md +39 -4
- perplexity.py +6 -6
- requirements.txt +1 -0
README.md
CHANGED
@@ -29,11 +29,46 @@ It is defined as the exponentiated average negative log-likelihood of a sequence
|
|
29 |
For more information, see https://huggingface.co/docs/transformers/perplexity
|
30 |
|
31 |
## How to Use
|
32 |
-
At minimum, this metric requires the model and
|
33 |
```python
|
34 |
-
>>>
|
|
|
35 |
>>> input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue"]
|
36 |
-
>>> results = perplexity.compute(model='distilgpt2',
|
37 |
>>> print(results)
|
38 |
{'accuracy': 1.0}
|
39 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
For more information, see https://huggingface.co/docs/transformers/perplexity
|
30 |
|
31 |
## How to Use
|
32 |
+
At minimum, this metric requires the model and data as inputs.
|
33 |
```python
|
34 |
+
>>> import evaluate
|
35 |
+
>>> perplexity = evaluate.load("perplexity", module_type="metric")
|
36 |
>>> input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue"]
|
37 |
+
>>> results = perplexity.compute(model='distilgpt2',data=input_texts)
|
38 |
>>> print(results)
|
39 |
{'accuracy': 1.0}
|
40 |
+
```
|
41 |
+
|
42 |
+
### Inputs
|
43 |
+
- **model** (`Union`[`str`,`AutoModelForCausalLM`]): model used for calculating Perplexity
|
44 |
+
- **data** (`list` of `str`): input text, each separate text snippet is one list entry.
|
45 |
+
- **device** (`str`): device to run on, defaults to 'cuda' when available.
|
46 |
+
- **max_length** (`int`): maximum sequence length, defaults to 2048.
|
47 |
+
|
48 |
+
### Output Values
|
49 |
+
- **loss** (`float`): the loss of the model predictions compared to the reference
|
50 |
+
- **perplexity**(`float`): measures the uncertainty of a model predicting text. Model performance is better when perplexity is lower.
|
51 |
+
|
52 |
+
Output Example(s):
|
53 |
+
```python
|
54 |
+
{'accuracy': 1.0}
|
55 |
+
```
|
56 |
+
This metric outputs a dictionary, containing the loss and perplexity score.
|
57 |
+
|
58 |
+
### Examples
|
59 |
+
```python
|
60 |
+
>>> import evaluate
|
61 |
+
>>> from datasets import load_dataset
|
62 |
+
>>> perplexity = evaluate.load("d-matrix/perplexity", module_type="metric")
|
63 |
+
>>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10]
|
64 |
+
>>> results = perplexity.compute(model='distilgpt2',data=input_texts)
|
65 |
+
>>> print(list(results.keys()))
|
66 |
+
['loss', 'perplexity']
|
67 |
+
>>> print(results['loss'])
|
68 |
+
3.8299286365509033
|
69 |
+
>>> print(results['perplexity'])
|
70 |
+
46.05925369262695
|
71 |
+
```
|
72 |
+
|
73 |
+
## Citation(s)
|
74 |
+
https://huggingface.co/docs/transformers/perplexity
|
perplexity.py
CHANGED
@@ -20,7 +20,7 @@ Args:
|
|
20 |
causal versions of t5, and more (the full list can be found
|
21 |
in the AutoModelForCausalLM documentation here:
|
22 |
https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForCausalLM )
|
23 |
-
|
24 |
device (str): device to run on, defaults to 'cuda' when available.
|
25 |
max_length (int): maximum sequence length, defaults to 2048.
|
26 |
Returns:
|
@@ -31,7 +31,7 @@ Examples:
|
|
31 |
>>> perplexity = evaluate.load("dmx_perplexity", module_type="metric")
|
32 |
>>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10] # doctest: +SKIP
|
33 |
>>> results = perplexity.compute(model='distilgpt2',
|
34 |
-
...
|
35 |
>>> print(list(results.keys()))
|
36 |
['loss', 'perplexity']
|
37 |
>>> print(results['loss']) # doctest: +SKIP
|
@@ -40,8 +40,8 @@ Examples:
|
|
40 |
46.05925369262695
|
41 |
"""
|
42 |
|
43 |
-
|
44 |
-
class
|
45 |
def _info(self):
|
46 |
return evaluate.MetricInfo(
|
47 |
module_type="metric",
|
@@ -58,7 +58,7 @@ class DmxPerplexity(evaluate.Metric):
|
|
58 |
|
59 |
def _compute(
|
60 |
self,
|
61 |
-
|
62 |
model: Union[str, AutoModelForCausalLM],
|
63 |
device=None,
|
64 |
max_length=None,
|
@@ -91,7 +91,7 @@ class DmxPerplexity(evaluate.Metric):
|
|
91 |
max_seq_len = 2048
|
92 |
|
93 |
model = model.to(device)
|
94 |
-
encodings = tokenizer("\n\n".join(
|
95 |
|
96 |
stride = max_seq_len
|
97 |
seq_len = encodings.input_ids.size(1)
|
|
|
20 |
causal versions of t5, and more (the full list can be found
|
21 |
in the AutoModelForCausalLM documentation here:
|
22 |
https://huggingface.co/docs/transformers/master/en/model_doc/auto#transformers.AutoModelForCausalLM )
|
23 |
+
data (list of str): input text, each separate text snippet is one list entry.
|
24 |
device (str): device to run on, defaults to 'cuda' when available.
|
25 |
max_length (int): maximum sequence length, defaults to 2048.
|
26 |
Returns:
|
|
|
31 |
>>> perplexity = evaluate.load("dmx_perplexity", module_type="metric")
|
32 |
>>> input_texts = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")["text"][:10] # doctest: +SKIP
|
33 |
>>> results = perplexity.compute(model='distilgpt2',
|
34 |
+
... data=input_texts)
|
35 |
>>> print(list(results.keys()))
|
36 |
['loss', 'perplexity']
|
37 |
>>> print(results['loss']) # doctest: +SKIP
|
|
|
40 |
46.05925369262695
|
41 |
"""
|
42 |
|
43 |
+
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
44 |
+
class Perplexity(evaluate.Metric):
|
45 |
def _info(self):
|
46 |
return evaluate.MetricInfo(
|
47 |
module_type="metric",
|
|
|
58 |
|
59 |
def _compute(
|
60 |
self,
|
61 |
+
data,
|
62 |
model: Union[str, AutoModelForCausalLM],
|
63 |
device=None,
|
64 |
max_length=None,
|
|
|
91 |
max_seq_len = 2048
|
92 |
|
93 |
model = model.to(device)
|
94 |
+
encodings = tokenizer("\n\n".join(data), return_tensors="pt")
|
95 |
|
96 |
stride = max_seq_len
|
97 |
seq_len = encodings.input_ids.size(1)
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
evaluate
|
2 |
transformers
|
3 |
torch
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@main
|
2 |
evaluate
|
3 |
transformers
|
4 |
torch
|