Update README.md
Browse files
README.md
CHANGED
@@ -8,9 +8,9 @@ datasets:
|
|
8 |
- twitter-api
|
9 |
---
|
10 |
|
11 |
-
# Twitter
|
12 |
|
13 |
-
This is a RoBERTa-base model trained on
|
14 |
More details and performance scores are available in the [TimeLMs paper](https://arxiv.org/abs/2202.03829).
|
15 |
|
16 |
Below, we provide some usage examples using the standard Transformers interface. For another interface more suited to comparing predictions and perplexity scores between models trained at different temporal intervals, check the [TimeLMs repository](https://github.com/cardiffnlp/timelms).
|
@@ -36,7 +36,7 @@ def preprocess(text):
|
|
36 |
```python
|
37 |
from transformers import pipeline, AutoTokenizer
|
38 |
|
39 |
-
MODEL = "cardiffnlp/twitter-roberta-base-
|
40 |
fill_mask = pipeline("fill-mask", model=MODEL, tokenizer=MODEL)
|
41 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
42 |
|
@@ -99,7 +99,7 @@ def get_embedding(text): # naive approach for demonstration
|
|
99 |
return np.mean(features[0], axis=0)
|
100 |
|
101 |
|
102 |
-
MODEL = "cardiffnlp/twitter-roberta-base-
|
103 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
104 |
model = AutoModel.from_pretrained(MODEL)
|
105 |
|
@@ -136,7 +136,7 @@ Most similar to: The book was awesome
|
|
136 |
from transformers import AutoTokenizer, AutoModel, TFAutoModel
|
137 |
import numpy as np
|
138 |
|
139 |
-
MODEL = "cardiffnlp/twitter-roberta-base-
|
140 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
141 |
|
142 |
text = "Good night 😊"
|
|
|
8 |
- twitter-api
|
9 |
---
|
10 |
|
11 |
+
# Twitter September 2022 (RoBERTa-base, 169M)
|
12 |
|
13 |
+
This is a RoBERTa-base model trained on 168.86M tweets until the end of September 2022 (15M tweets increment).
|
14 |
More details and performance scores are available in the [TimeLMs paper](https://arxiv.org/abs/2202.03829).
|
15 |
|
16 |
Below, we provide some usage examples using the standard Transformers interface. For another interface more suited to comparing predictions and perplexity scores between models trained at different temporal intervals, check the [TimeLMs repository](https://github.com/cardiffnlp/timelms).
|
|
|
36 |
```python
|
37 |
from transformers import pipeline, AutoTokenizer
|
38 |
|
39 |
+
MODEL = "cardiffnlp/twitter-roberta-base-sep2022"
|
40 |
fill_mask = pipeline("fill-mask", model=MODEL, tokenizer=MODEL)
|
41 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
42 |
|
|
|
99 |
return np.mean(features[0], axis=0)
|
100 |
|
101 |
|
102 |
+
MODEL = "cardiffnlp/twitter-roberta-base-sep2022"
|
103 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
104 |
model = AutoModel.from_pretrained(MODEL)
|
105 |
|
|
|
136 |
from transformers import AutoTokenizer, AutoModel, TFAutoModel
|
137 |
import numpy as np
|
138 |
|
139 |
+
MODEL = "cardiffnlp/twitter-roberta-base-sep2022"
|
140 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
141 |
|
142 |
text = "Good night 😊"
|