omykhailiv
commited on
Commit
•
9b46208
1
Parent(s):
212b2b0
Update README.md
Browse files
README.md
CHANGED
@@ -106,7 +106,7 @@ def testing_data_prep(text):
|
|
106 |
# Removing stopwords, such as do, not, as, etc. (https://gist.github.com/sebleier/554280)
|
107 |
new_filtered_words = [
|
108 |
word for word in words if word not in stopwords.words('english')]
|
109 |
-
if
|
110 |
return ' '.join(new_filtered_words)
|
111 |
return ' '
|
112 |
```
|
@@ -131,6 +131,7 @@ The following hyperparameters were used during training:
|
|
131 |
https://huggingface.co/datasets/GonzaloA/fake_news
|
132 |
https://github.com/GeorgeMcIntire/fake_real_news_dataset
|
133 |
https://onlineacademiccommunity.uvic.ca/isot/2022/11/27/fake-news-detection-datasets/
|
|
|
134 |
|
135 |
|
136 |
#### Metrics
|
@@ -152,14 +153,15 @@ weighted avg 0.94 0.94 0.94 8117
|
|
152 |
|
153 |
For testing on https://github.com/GeorgeMcIntire/fake_real_news_dataset
|
154 |
```
|
155 |
-
|
|
|
|
|
|
|
156 |
|
157 |
-
|
158 |
-
|
|
|
159 |
|
160 |
-
accuracy 0.83 4594
|
161 |
-
macro avg 0.83 0.83 0.83 4594
|
162 |
-
weighted avg 0.83 0.83 0.83 4594
|
163 |
```
|
164 |
For testing on https://onlineacademiccommunity.uvic.ca/isot/2022/11/27/fake-news-detection-datasets/
|
165 |
```
|
@@ -173,6 +175,17 @@ For testing on https://onlineacademiccommunity.uvic.ca/isot/2022/11/27/fake-news
|
|
173 |
weighted avg 0.9731 0.9731 0.9731 19996
|
174 |
```
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
#### Hardware
|
177 |
|
178 |
Tesla T4 GPU, available for free in Google Collab
|
|
|
106 |
# Removing stopwords, such as do, not, as, etc. (https://gist.github.com/sebleier/554280)
|
107 |
new_filtered_words = [
|
108 |
word for word in words if word not in stopwords.words('english')]
|
109 |
+
if 20 >= len(new_filtered_words) >= 6:
|
110 |
return ' '.join(new_filtered_words)
|
111 |
return ' '
|
112 |
```
|
|
|
131 |
https://huggingface.co/datasets/GonzaloA/fake_news
|
132 |
https://github.com/GeorgeMcIntire/fake_real_news_dataset
|
133 |
https://onlineacademiccommunity.uvic.ca/isot/2022/11/27/fake-news-detection-datasets/
|
134 |
+
https://www.kaggle.com/datasets/saurabhshahane/fake-news-classification/data
|
135 |
|
136 |
|
137 |
#### Metrics
|
|
|
153 |
|
154 |
For testing on https://github.com/GeorgeMcIntire/fake_real_news_dataset
|
155 |
```
|
156 |
+
precision recall f1-score support
|
157 |
+
|
158 |
+
0 0.93 0.88 0.90 2297
|
159 |
+
1 0.89 0.93 0.91 2297
|
160 |
|
161 |
+
accuracy 0.91 4594
|
162 |
+
macro avg 0.91 0.91 0.91 4594
|
163 |
+
weighted avg 0.91 0.91 0.91 4594
|
164 |
|
|
|
|
|
|
|
165 |
```
|
166 |
For testing on https://onlineacademiccommunity.uvic.ca/isot/2022/11/27/fake-news-detection-datasets/
|
167 |
```
|
|
|
175 |
weighted avg 0.9731 0.9731 0.9731 19996
|
176 |
```
|
177 |
|
178 |
+
For testing on random 1k rows of https://www.kaggle.com/datasets/saurabhshahane/fake-news-classification/data
|
179 |
+
```
|
180 |
+
precision recall f1-score support
|
181 |
+
|
182 |
+
0 0.87 0.80 0.84 492
|
183 |
+
1 0.82 0.89 0.85 508
|
184 |
+
|
185 |
+
accuracy 0.85 1000
|
186 |
+
macro avg 0.85 0.85 0.85 1000
|
187 |
+
weighted avg 0.85 0.85 0.85 1000
|
188 |
+
```
|
189 |
#### Hardware
|
190 |
|
191 |
Tesla T4 GPU, available for free in Google Collab
|