yitingliii
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -37,27 +37,14 @@ from sklearn.svm import SVC
|
|
37 |
|
38 |
|
39 |
```python
|
40 |
-
|
41 |
-
stop_words = set(stopwords.words('english'))
|
42 |
-
lemmatizer = WordNetLemmatizer()
|
43 |
-
cleaned_headlines = []
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
headline = re.sub(r'[^a-zA-Z0-9\s]', '', headline)
|
48 |
-
headline = re.sub(r'\s+', ' ', headline).strip()
|
49 |
-
headline = headline.lower()
|
50 |
|
51 |
-
|
52 |
-
|
53 |
|
54 |
-
cleaned_headline = ' '.join(words)
|
55 |
-
cleaned_headlines.append(cleaned_headline)
|
56 |
-
|
57 |
-
df['title'] = cleaned_headlines
|
58 |
-
df.drop_duplicates(subset=['title'], inplace=True)
|
59 |
-
|
60 |
-
return df
|
61 |
```
|
62 |
|
63 |
3. run the SVM model
|
|
|
37 |
|
38 |
|
39 |
```python
|
40 |
+
from clean_data import clean
|
|
|
|
|
|
|
41 |
|
42 |
+
# Load your data
|
43 |
+
df = pd.read_csv('your_dataset.csv')
|
|
|
|
|
|
|
44 |
|
45 |
+
# Clean the data
|
46 |
+
cleaned_df = clean(df)
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
```
|
49 |
|
50 |
3. run the SVM model
|