File size: 421 Bytes
905ea5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import re
def remove_twitter_handles(row):
tweet = row["text"]
row["text"] = re.sub("@[^\s]+", "", tweet)
return row
def remove_urls(row):
tweet = row["text"]
row["text"] = re.sub(r"http\S+", "", tweet)
return row
CLEANR = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});")
def clean_html(row):
tweet = row["text"]
row["text"] = re.sub(CLEANR, "", tweet)
return row
|