import re | |
def remove_twitter_handles(row): | |
tweet = row["text"] | |
row["text"] = re.sub("@[^\s]+", "", tweet) | |
return row | |
def remove_urls(row): | |
tweet = row["text"] | |
row["text"] = re.sub(r"http\S+", "", tweet) | |
return row | |
CLEANR = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});") | |
def clean_html(row): | |
tweet = row["text"] | |
row["text"] = re.sub(CLEANR, "", tweet) | |
return row | |