CIS5190abcd
/

svm

yitingliii commited on Dec 13, 2024

Commit

b98218f

verified ·

1 Parent(s): cfabd2f

Update README.md

Files changed (1) hide show

README.md CHANGED Viewed

@@ -37,27 +37,14 @@ from sklearn.svm import SVC
 ```python
-def clean(df):
-    stop_words = set(stopwords.words('english'))
-    lemmatizer = WordNetLemmatizer()
-    cleaned_headlines = []
-    for headline in df['title']:
-        headline = BeautifulSoup(headline, 'html.parser').get_text()
-        headline = re.sub(r'[^a-zA-Z0-9\s]', '', headline)
-        headline = re.sub(r'\s+', ' ', headline).strip()
-        headline = headline.lower()
-        words = headline.split()
-        words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
-        cleaned_headline = ' '.join(words)
-        cleaned_headlines.append(cleaned_headline)
-    df['title'] = cleaned_headlines
-    df.drop_duplicates(subset=['title'], inplace=True)
-    return df
 ```
 3. run the SVM model

 ```python
+from clean_data import clean
+# Load your data
+df = pd.read_csv('your_dataset.csv')
+# Clean the data
+cleaned_df = clean(df)
 ```
 3. run the SVM model