Commit
·
861610c
1
Parent(s):
895197b
Update functions.py
Browse files- functions.py +2 -0
functions.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
|
2 |
from scipy.special import softmax
|
|
|
3 |
# Define the preprocess function
|
4 |
def preprocess(text):
|
5 |
new_text = []
|
6 |
for t in text.split(" "):
|
7 |
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
8 |
t = 'http' if t.startswith('http') else t
|
|
|
9 |
new_text.append(t)
|
10 |
return " ".join(new_text)
|
11 |
|
|
|
1 |
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
|
2 |
from scipy.special import softmax
|
3 |
+
import emoji
|
4 |
# Define the preprocess function
|
5 |
def preprocess(text):
|
6 |
new_text = []
|
7 |
for t in text.split(" "):
|
8 |
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
9 |
t = 'http' if t.startswith('http') else t
|
10 |
+
t = emoji.demojize(t) # Convert emojis to text representation
|
11 |
new_text.append(t)
|
12 |
return " ".join(new_text)
|
13 |
|