ybelkada commited on
Commit
2eb6dd9
·
2 Parent(s): 661cf94 5b79adc

Merge branch 'main' of https://huggingface.co/ybelkada/flan-t5-large into main

Browse files
Files changed (2) hide show
  1. README.md +6 -11
  2. config.json +0 -29
README.md CHANGED
@@ -62,9 +62,7 @@ language:
62
  - no
63
 
64
  tags:
65
- - summarization
66
- - translation
67
- - text-generation
68
 
69
  datasets:
70
  - svakulenk0/qrecc
@@ -101,7 +99,7 @@ license: apache-2.0
101
 
102
  # TL;DR
103
 
104
- If you already know T5, FLAN-T5 is just better at everything. For the same number of parameters, these models have been fine-tuned on more than 1000 additional tasks covering also more languages.
105
  As mentioned in the first few lines of the abstract :
106
  > Flan-PaLM 540B achieves state-of-the-art performance on several benchmarks, such as 75.2% on five-shot MMLU. We also publicly release Flan-T5 checkpoints,1 which achieve strong few-shot performance even compared to much larger models, such as PaLM 62B. Overall, instruction finetuning is a general method for improving the performance and usability of pretrained language models.
107
 
@@ -155,7 +153,7 @@ print(tokenizer.decode(outputs[0]))
155
  <summary> Click to expand </summary>
156
 
157
  ```python
158
-
159
  from transformers import T5Tokenizer, T5ForConditionalGeneration
160
 
161
  tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
@@ -178,6 +176,7 @@ print(tokenizer.decode(outputs[0]))
178
  <summary> Click to expand </summary>
179
 
180
  ```python
 
181
  import torch
182
  from transformers import T5Tokenizer, T5ForConditionalGeneration
183
 
@@ -199,7 +198,7 @@ print(tokenizer.decode(outputs[0]))
199
  <summary> Click to expand </summary>
200
 
201
  ```python
202
- # pip install bitsandbytes
203
  from transformers import T5Tokenizer, T5ForConditionalGeneration
204
 
205
  tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
@@ -308,8 +307,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
308
 
309
  copyright = {Creative Commons Attribution 4.0 International}
310
  }
311
- ```
312
-
313
- # Model Card Authors
314
-
315
- This model card was written by the team at Hugging Face.
 
62
  - no
63
 
64
  tags:
65
+ - text2text-generation
 
 
66
 
67
  datasets:
68
  - svakulenk0/qrecc
 
99
 
100
  # TL;DR
101
 
102
+ If you already know T5, FLAN-T5 is just better at everything. For the same number of parameters, these models have been fine-tuned on more than 1000 additional tasks covering also more languages.
103
  As mentioned in the first few lines of the abstract :
104
  > Flan-PaLM 540B achieves state-of-the-art performance on several benchmarks, such as 75.2% on five-shot MMLU. We also publicly release Flan-T5 checkpoints,1 which achieve strong few-shot performance even compared to much larger models, such as PaLM 62B. Overall, instruction finetuning is a general method for improving the performance and usability of pretrained language models.
105
 
 
153
  <summary> Click to expand </summary>
154
 
155
  ```python
156
+ # pip install accelerate
157
  from transformers import T5Tokenizer, T5ForConditionalGeneration
158
 
159
  tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
 
176
  <summary> Click to expand </summary>
177
 
178
  ```python
179
+ # pip install accelerate
180
  import torch
181
  from transformers import T5Tokenizer, T5ForConditionalGeneration
182
 
 
198
  <summary> Click to expand </summary>
199
 
200
  ```python
201
+ # pip install bitsandbytes accelerate
202
  from transformers import T5Tokenizer, T5ForConditionalGeneration
203
 
204
  tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
 
307
 
308
  copyright = {Creative Commons Attribution 4.0 International}
309
  }
310
+ ```
 
 
 
 
config.json CHANGED
@@ -23,35 +23,6 @@
23
  "pad_token_id": 0,
24
  "relative_attention_max_distance": 128,
25
  "relative_attention_num_buckets": 32,
26
- "task_specific_params": {
27
- "summarization": {
28
- "early_stopping": true,
29
- "length_penalty": 2.0,
30
- "max_length": 200,
31
- "min_length": 30,
32
- "no_repeat_ngram_size": 3,
33
- "num_beams": 4,
34
- "prefix": "summarize: "
35
- },
36
- "translation_en_to_de": {
37
- "early_stopping": true,
38
- "max_length": 300,
39
- "num_beams": 4,
40
- "prefix": "translate English to German: "
41
- },
42
- "translation_en_to_fr": {
43
- "early_stopping": true,
44
- "max_length": 300,
45
- "num_beams": 4,
46
- "prefix": "translate English to French: "
47
- },
48
- "translation_en_to_ro": {
49
- "early_stopping": true,
50
- "max_length": 300,
51
- "num_beams": 4,
52
- "prefix": "translate English to Romanian: "
53
- }
54
- },
55
  "tie_word_embeddings": false,
56
  "transformers_version": "4.23.1",
57
  "use_cache": true,
 
23
  "pad_token_id": 0,
24
  "relative_attention_max_distance": 128,
25
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "tie_word_embeddings": false,
27
  "transformers_version": "4.23.1",
28
  "use_cache": true,