francisco-perez-sorrosal's picture
First version
905ea5f
raw
history blame contribute delete
421 Bytes
import re
def remove_twitter_handles(row):
tweet = row["text"]
row["text"] = re.sub("@[^\s]+", "", tweet)
return row
def remove_urls(row):
tweet = row["text"]
row["text"] = re.sub(r"http\S+", "", tweet)
return row
CLEANR = re.compile("<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});")
def clean_html(row):
tweet = row["text"]
row["text"] = re.sub(CLEANR, "", tweet)
return row