yitingliii commited on
Commit
b98218f
·
verified ·
1 Parent(s): cfabd2f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -18
README.md CHANGED
@@ -37,27 +37,14 @@ from sklearn.svm import SVC
37
 
38
 
39
  ```python
40
- def clean(df):
41
- stop_words = set(stopwords.words('english'))
42
- lemmatizer = WordNetLemmatizer()
43
- cleaned_headlines = []
44
 
45
- for headline in df['title']:
46
- headline = BeautifulSoup(headline, 'html.parser').get_text()
47
- headline = re.sub(r'[^a-zA-Z0-9\s]', '', headline)
48
- headline = re.sub(r'\s+', ' ', headline).strip()
49
- headline = headline.lower()
50
 
51
- words = headline.split()
52
- words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
53
 
54
- cleaned_headline = ' '.join(words)
55
- cleaned_headlines.append(cleaned_headline)
56
-
57
- df['title'] = cleaned_headlines
58
- df.drop_duplicates(subset=['title'], inplace=True)
59
-
60
- return df
61
  ```
62
 
63
  3. run the SVM model
 
37
 
38
 
39
  ```python
40
+ from clean_data import clean
 
 
 
41
 
42
+ # Load your data
43
+ df = pd.read_csv('your_dataset.csv')
 
 
 
44
 
45
+ # Clean the data
46
+ cleaned_df = clean(df)
47
 
 
 
 
 
 
 
 
48
  ```
49
 
50
  3. run the SVM model