File size: 421 Bytes
905ea5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import re


def remove_twitter_handles(row):
    tweet = row["text"]
    row["text"] = re.sub("@[^\s]+", "", tweet)
    return row


def remove_urls(row):
    tweet = row["text"]
    row["text"] = re.sub(r"http\S+", "", tweet)
    return row


CLEANR = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});")


def clean_html(row):
    tweet = row["text"]
    row["text"] = re.sub(CLEANR, "", tweet)
    return row