petermutwiri commited on
Commit
2e4daec
·
1 Parent(s): 861610c

add emoji preprocessor

Browse files
Files changed (1) hide show
  1. functions.py +17 -6
functions.py CHANGED
@@ -1,14 +1,25 @@
1
  from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
2
  from scipy.special import softmax
3
- import emoji
4
  # Define the preprocess function
5
- def preprocess(text):
6
  new_text = []
7
  for t in text.split(" "):
8
- t = '@user' if t.startswith('@') and len(t) > 1 else t
9
- t = 'http' if t.startswith('http') else t
10
- t = emoji.demojize(t) # Convert emojis to text representation
11
- new_text.append(t)
 
 
 
 
 
 
 
 
 
 
 
12
  return " ".join(new_text)
13
 
14
  # Define the sentiment_analysis function
 
1
  from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
2
  from scipy.special import softmax
3
+
4
  # Define the preprocess function
5
+ def process_text(text):
6
  new_text = []
7
  for t in text.split(" "):
8
+ # Check if the token is an emoji
9
+ if emoji.is_emoji(t):
10
+ # Add the emoji to the new text list
11
+ new_text.append(t)
12
+ else:
13
+ # Check if the token starts with '@' and has more than one character
14
+ if t.startswith('@') and len(t) > 1:
15
+ # Replace the token with '@user'
16
+ t = '@user'
17
+ # Check if the token starts with 'http'
18
+ if t.startswith('http'):
19
+ # Replace the token with 'http'
20
+ t = 'http'
21
+ # Add the processed token to the new text list
22
+ new_text.append(t)
23
  return " ".join(new_text)
24
 
25
  # Define the sentiment_analysis function