Spaces:
Runtime error
Runtime error
OwusuBlessing
commited on
Commit
·
33d6818
1
Parent(s):
9184004
new
Browse files- .gitattributes +0 -34
- .github/workflows/push_to_hgface.yml +20 -0
- .gitignore +163 -0
- Artifacts/catboost_info/catboost_training.json +104 -0
- Artifacts/catboost_info/learn/events.out.tfevents +0 -0
- Artifacts/catboost_info/learn_error.tsv +101 -0
- Artifacts/catboost_info/time_left.tsv +101 -0
- Artifacts/cb_fakes_news_model.cbm +0 -0
- Artifacts/classification_report.txt +8 -0
- Artifacts/tfidf_preprocessor.pkl +0 -0
- Notebooks/Fake_News_Classification.ipynb +0 -0
- README.md +2 -4
- app.py +73 -0
- requirements.txt +7 -0
- src/prediction.py +32 -0
- src/preprocessor.py +36 -0
.gitattributes
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/push_to_hgface.yml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [master]
|
5 |
+
|
6 |
+
# to run this workflow manually from the Actions tab
|
7 |
+
workflow_dispatch:
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync-to-hub:
|
11 |
+
runs-on: ubuntu-latest
|
12 |
+
steps:
|
13 |
+
- uses: actions/checkout@v3
|
14 |
+
with:
|
15 |
+
fetch-depth: 0
|
16 |
+
lfs: true
|
17 |
+
- name: Push to hub
|
18 |
+
env:
|
19 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
20 |
+
run: git push https://OwusuDynamo:[email protected]/spaces/OwusuDynamo/fake-news-detection-app master
|
.gitignore
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
#Datasets
|
9 |
+
|
10 |
+
|
11 |
+
# Distribution / packaging
|
12 |
+
.Python
|
13 |
+
build/
|
14 |
+
develop-eggs/
|
15 |
+
dist/
|
16 |
+
downloads/
|
17 |
+
eggs/
|
18 |
+
.eggs/
|
19 |
+
lib/
|
20 |
+
lib64/
|
21 |
+
parts/
|
22 |
+
sdist/
|
23 |
+
var/
|
24 |
+
wheels/
|
25 |
+
share/python-wheels/
|
26 |
+
*.egg-info/
|
27 |
+
.installed.cfg
|
28 |
+
*.egg
|
29 |
+
MANIFEST
|
30 |
+
|
31 |
+
# PyInstaller
|
32 |
+
# Usually these files are written by a python script from a template
|
33 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
34 |
+
*.manifest
|
35 |
+
*.spec
|
36 |
+
|
37 |
+
# Installer logs
|
38 |
+
pip-log.txt
|
39 |
+
pip-delete-this-directory.txt
|
40 |
+
|
41 |
+
# Unit test / coverage reports
|
42 |
+
htmlcov/
|
43 |
+
.tox/
|
44 |
+
.nox/
|
45 |
+
.coverage
|
46 |
+
.coverage.*
|
47 |
+
.cache
|
48 |
+
nosetests.xml
|
49 |
+
coverage.xml
|
50 |
+
*.cover
|
51 |
+
*.py,cover
|
52 |
+
.hypothesis/
|
53 |
+
.pytest_cache/
|
54 |
+
cover/
|
55 |
+
|
56 |
+
# Translations
|
57 |
+
*.mo
|
58 |
+
*.pot
|
59 |
+
|
60 |
+
# Django stuff:
|
61 |
+
*.log
|
62 |
+
local_settings.py
|
63 |
+
db.sqlite3
|
64 |
+
db.sqlite3-journal
|
65 |
+
|
66 |
+
# Flask stuff:
|
67 |
+
instance/
|
68 |
+
.webassets-cache
|
69 |
+
|
70 |
+
# Scrapy stuff:
|
71 |
+
.scrapy
|
72 |
+
|
73 |
+
# Sphinx documentation
|
74 |
+
docs/_build/
|
75 |
+
|
76 |
+
# PyBuilder
|
77 |
+
.pybuilder/
|
78 |
+
target/
|
79 |
+
|
80 |
+
# Jupyter Notebook
|
81 |
+
.ipynb_checkpoints
|
82 |
+
|
83 |
+
# IPython
|
84 |
+
profile_default/
|
85 |
+
ipython_config.py
|
86 |
+
.ipynb_checkpoints/
|
87 |
+
|
88 |
+
# pyenv
|
89 |
+
# For a library or package, you might want to ignore these files since the code is
|
90 |
+
# intended to run in multiple environments; otherwise, check them in:
|
91 |
+
# .python-version
|
92 |
+
|
93 |
+
# pipenv
|
94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
97 |
+
# install all needed dependencies.
|
98 |
+
#Pipfile.lock
|
99 |
+
|
100 |
+
# poetry
|
101 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
102 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
103 |
+
# commonly ignored for libraries.
|
104 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
105 |
+
#poetry.lock
|
106 |
+
|
107 |
+
# pdm
|
108 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
109 |
+
#pdm.lock
|
110 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
111 |
+
# in version control.
|
112 |
+
# https://pdm.fming.dev/#use-with-ide
|
113 |
+
.pdm.toml
|
114 |
+
|
115 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116 |
+
__pypackages__/
|
117 |
+
|
118 |
+
# Celery stuff
|
119 |
+
celerybeat-schedule
|
120 |
+
celerybeat.pid
|
121 |
+
|
122 |
+
# SageMath parsed files
|
123 |
+
*.sage.py
|
124 |
+
|
125 |
+
# Environments
|
126 |
+
.env
|
127 |
+
.venv
|
128 |
+
env/
|
129 |
+
venv/
|
130 |
+
ENV/
|
131 |
+
env.bak/
|
132 |
+
venv.bak/
|
133 |
+
|
134 |
+
# Spyder project settings
|
135 |
+
.spyderproject
|
136 |
+
.spyproject
|
137 |
+
|
138 |
+
# Rope project settings
|
139 |
+
.ropeproject
|
140 |
+
|
141 |
+
# mkdocs documentation
|
142 |
+
/site
|
143 |
+
|
144 |
+
# mypy
|
145 |
+
.mypy_cache/
|
146 |
+
.dmypy.json
|
147 |
+
dmypy.json
|
148 |
+
|
149 |
+
# Pyre type checker
|
150 |
+
.pyre/
|
151 |
+
|
152 |
+
# pytype static type analyzer
|
153 |
+
.pytype/
|
154 |
+
|
155 |
+
# Cython debug symbols
|
156 |
+
cython_debug/
|
157 |
+
|
158 |
+
# PyCharm
|
159 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
160 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
161 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
162 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
163 |
+
#.idea/
|
Artifacts/catboost_info/catboost_training.json
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"},
|
3 |
+
"iterations":[
|
4 |
+
{"learn":[0.4073772263],"iteration":0,"passed_time":0.1994576486,"remaining_time":19.74630721},
|
5 |
+
{"learn":[0.3137234975],"iteration":1,"passed_time":0.2859673858,"remaining_time":14.0124019},
|
6 |
+
{"learn":[0.284236184],"iteration":2,"passed_time":0.3706169391,"remaining_time":11.98328103},
|
7 |
+
{"learn":[0.2668503982],"iteration":3,"passed_time":0.4527206823,"remaining_time":10.86529638},
|
8 |
+
{"learn":[0.2593665744],"iteration":4,"passed_time":0.5364983441,"remaining_time":10.19346854},
|
9 |
+
{"learn":[0.2516184276],"iteration":5,"passed_time":0.6197788246,"remaining_time":9.709868252},
|
10 |
+
{"learn":[0.2445631076],"iteration":6,"passed_time":0.7044917529,"remaining_time":9.359676146},
|
11 |
+
{"learn":[0.2384404882],"iteration":7,"passed_time":0.7944588986,"remaining_time":9.136277334},
|
12 |
+
{"learn":[0.2360618876],"iteration":8,"passed_time":0.8773936264,"remaining_time":8.871424445},
|
13 |
+
{"learn":[0.2336608468],"iteration":9,"passed_time":0.9606759468,"remaining_time":8.646083521},
|
14 |
+
{"learn":[0.229775124],"iteration":10,"passed_time":1.045986758,"remaining_time":8.462983771},
|
15 |
+
{"learn":[0.2276650503],"iteration":11,"passed_time":1.128766468,"remaining_time":8.277620769},
|
16 |
+
{"learn":[0.2251991995],"iteration":12,"passed_time":1.210428476,"remaining_time":8.100559803},
|
17 |
+
{"learn":[0.2228250972],"iteration":13,"passed_time":1.29392577,"remaining_time":7.948401158},
|
18 |
+
{"learn":[0.2215833855],"iteration":14,"passed_time":1.376584169,"remaining_time":7.800643626},
|
19 |
+
{"learn":[0.2202659273],"iteration":15,"passed_time":1.459185374,"remaining_time":7.660723212},
|
20 |
+
{"learn":[0.2188989356],"iteration":16,"passed_time":1.541965914,"remaining_time":7.528421814},
|
21 |
+
{"learn":[0.2174759806],"iteration":17,"passed_time":1.626208659,"remaining_time":7.40828389},
|
22 |
+
{"learn":[0.2164400273],"iteration":18,"passed_time":1.707259685,"remaining_time":7.278317603},
|
23 |
+
{"learn":[0.2145296529],"iteration":19,"passed_time":1.80270204,"remaining_time":7.210808161},
|
24 |
+
{"learn":[0.2134351214],"iteration":20,"passed_time":1.884295734,"remaining_time":7.088541093},
|
25 |
+
{"learn":[0.2124248348],"iteration":21,"passed_time":1.971896535,"remaining_time":6.991269533},
|
26 |
+
{"learn":[0.2113669983],"iteration":22,"passed_time":2.057298229,"remaining_time":6.887476681},
|
27 |
+
{"learn":[0.2102720461],"iteration":23,"passed_time":2.139732996,"remaining_time":6.775821154},
|
28 |
+
{"learn":[0.2091248728],"iteration":24,"passed_time":2.224650768,"remaining_time":6.673952305},
|
29 |
+
{"learn":[0.2076362453],"iteration":25,"passed_time":2.322191089,"remaining_time":6.6093131},
|
30 |
+
{"learn":[0.2066888613],"iteration":26,"passed_time":2.40559888,"remaining_time":6.504026602},
|
31 |
+
{"learn":[0.2055625239],"iteration":27,"passed_time":2.493682384,"remaining_time":6.412326129},
|
32 |
+
{"learn":[0.2046374022],"iteration":28,"passed_time":2.575420346,"remaining_time":6.305339467},
|
33 |
+
{"learn":[0.2037215169],"iteration":29,"passed_time":2.658577386,"remaining_time":6.203347234},
|
34 |
+
{"learn":[0.2020789062],"iteration":30,"passed_time":2.750785096,"remaining_time":6.122715214},
|
35 |
+
{"learn":[0.2010651162],"iteration":31,"passed_time":2.835820729,"remaining_time":6.02611905},
|
36 |
+
{"learn":[0.2003155492],"iteration":32,"passed_time":2.916948599,"remaining_time":5.92228958},
|
37 |
+
{"learn":[0.199547054],"iteration":33,"passed_time":3.001835314,"remaining_time":5.82709208},
|
38 |
+
{"learn":[0.1989116811],"iteration":34,"passed_time":3.090939507,"remaining_time":5.740316228},
|
39 |
+
{"learn":[0.1977211783],"iteration":35,"passed_time":3.178372242,"remaining_time":5.650439541},
|
40 |
+
{"learn":[0.1968471172],"iteration":36,"passed_time":3.261604416,"remaining_time":5.553542655},
|
41 |
+
{"learn":[0.1957080812],"iteration":37,"passed_time":3.34268884,"remaining_time":5.453860738},
|
42 |
+
{"learn":[0.1950747179],"iteration":38,"passed_time":3.42343091,"remaining_time":5.354597064},
|
43 |
+
{"learn":[0.1941590906],"iteration":39,"passed_time":3.503865274,"remaining_time":5.255797911},
|
44 |
+
{"learn":[0.1930812766],"iteration":40,"passed_time":3.588701693,"remaining_time":5.164229265},
|
45 |
+
{"learn":[0.1924486569],"iteration":41,"passed_time":3.674558191,"remaining_time":5.074389883},
|
46 |
+
{"learn":[0.19179662],"iteration":42,"passed_time":3.766949766,"remaining_time":4.993398528},
|
47 |
+
{"learn":[0.1906261212],"iteration":43,"passed_time":3.849100966,"remaining_time":4.898855775},
|
48 |
+
{"learn":[0.1900373396],"iteration":44,"passed_time":3.931568651,"remaining_time":4.805250573},
|
49 |
+
{"learn":[0.189016182],"iteration":45,"passed_time":4.018057559,"remaining_time":4.716850178},
|
50 |
+
{"learn":[0.1880234969],"iteration":46,"passed_time":4.100924523,"remaining_time":4.624446802},
|
51 |
+
{"learn":[0.1874627296],"iteration":47,"passed_time":4.182183702,"remaining_time":4.53069901},
|
52 |
+
{"learn":[0.1864628676],"iteration":48,"passed_time":4.269557121,"remaining_time":4.443824759},
|
53 |
+
{"learn":[0.1859694423],"iteration":49,"passed_time":4.352617699,"remaining_time":4.352617699},
|
54 |
+
{"learn":[0.1854000284],"iteration":50,"passed_time":4.434282906,"remaining_time":4.260389459},
|
55 |
+
{"learn":[0.1847740363],"iteration":51,"passed_time":4.5168872,"remaining_time":4.169434339},
|
56 |
+
{"learn":[0.1839432146],"iteration":52,"passed_time":4.601568901,"remaining_time":4.080636573},
|
57 |
+
{"learn":[0.1834647897],"iteration":53,"passed_time":4.683102749,"remaining_time":3.989309749},
|
58 |
+
{"learn":[0.182940847],"iteration":54,"passed_time":4.770363468,"remaining_time":3.903024655},
|
59 |
+
{"learn":[0.1818482915],"iteration":55,"passed_time":4.855443107,"remaining_time":3.814991013},
|
60 |
+
{"learn":[0.181244672],"iteration":56,"passed_time":4.939164783,"remaining_time":3.726036591},
|
61 |
+
{"learn":[0.1804523567],"iteration":57,"passed_time":5.023063486,"remaining_time":3.6373908},
|
62 |
+
{"learn":[0.1792150047],"iteration":58,"passed_time":5.105549329,"remaining_time":3.54792411},
|
63 |
+
{"learn":[0.1787887539],"iteration":59,"passed_time":5.188931602,"remaining_time":3.459287735},
|
64 |
+
{"learn":[0.1783067348],"iteration":60,"passed_time":5.271667275,"remaining_time":3.370410225},
|
65 |
+
{"learn":[0.1779043172],"iteration":61,"passed_time":5.353906798,"remaining_time":3.281426747},
|
66 |
+
{"learn":[0.1771281345],"iteration":62,"passed_time":5.436227254,"remaining_time":3.192704895},
|
67 |
+
{"learn":[0.1759185832],"iteration":63,"passed_time":5.518357015,"remaining_time":3.104075821},
|
68 |
+
{"learn":[0.175523618],"iteration":64,"passed_time":5.600270793,"remaining_time":3.015530427},
|
69 |
+
{"learn":[0.1751159439],"iteration":65,"passed_time":5.682210089,"remaining_time":2.927199137},
|
70 |
+
{"learn":[0.1744740365],"iteration":66,"passed_time":5.765215792,"remaining_time":2.839583898},
|
71 |
+
{"learn":[0.173006689],"iteration":67,"passed_time":5.855969826,"remaining_time":2.755750506},
|
72 |
+
{"learn":[0.1721463838],"iteration":68,"passed_time":5.941702234,"remaining_time":2.669460424},
|
73 |
+
{"learn":[0.1697501862],"iteration":69,"passed_time":6.026043432,"remaining_time":2.582590042},
|
74 |
+
{"learn":[0.1693384959],"iteration":70,"passed_time":6.112568517,"remaining_time":2.496682916},
|
75 |
+
{"learn":[0.1689667046],"iteration":71,"passed_time":6.194277691,"remaining_time":2.408885769},
|
76 |
+
{"learn":[0.1683670976],"iteration":72,"passed_time":6.314849504,"remaining_time":2.335629268},
|
77 |
+
{"learn":[0.1674515062],"iteration":73,"passed_time":6.457696597,"remaining_time":2.268920426},
|
78 |
+
{"learn":[0.1667025898],"iteration":74,"passed_time":6.598154568,"remaining_time":2.199384856},
|
79 |
+
{"learn":[0.1659401991],"iteration":75,"passed_time":6.747145449,"remaining_time":2.13067751},
|
80 |
+
{"learn":[0.1655595047],"iteration":76,"passed_time":6.890559158,"remaining_time":2.058218969},
|
81 |
+
{"learn":[0.164865277],"iteration":77,"passed_time":7.034427652,"remaining_time":1.984069338},
|
82 |
+
{"learn":[0.1644866935],"iteration":78,"passed_time":7.181369464,"remaining_time":1.90897163},
|
83 |
+
{"learn":[0.1638354875],"iteration":79,"passed_time":7.338045702,"remaining_time":1.834511425},
|
84 |
+
{"learn":[0.1634971926],"iteration":80,"passed_time":7.497690556,"remaining_time":1.758717538},
|
85 |
+
{"learn":[0.1630796548],"iteration":81,"passed_time":7.661620144,"remaining_time":1.681819056},
|
86 |
+
{"learn":[0.1627086068],"iteration":82,"passed_time":7.824841608,"remaining_time":1.602678402},
|
87 |
+
{"learn":[0.1620494],"iteration":83,"passed_time":7.985147001,"remaining_time":1.520980381},
|
88 |
+
{"learn":[0.1610732023],"iteration":84,"passed_time":8.1240988,"remaining_time":1.433664494},
|
89 |
+
{"learn":[0.160270845],"iteration":85,"passed_time":8.285406684,"remaining_time":1.348787135},
|
90 |
+
{"learn":[0.1598202952],"iteration":86,"passed_time":8.436509479,"remaining_time":1.260627853},
|
91 |
+
{"learn":[0.1594650718],"iteration":87,"passed_time":8.591243189,"remaining_time":1.171533162},
|
92 |
+
{"learn":[0.1591502269],"iteration":88,"passed_time":8.739559663,"remaining_time":1.080170295},
|
93 |
+
{"learn":[0.1583429364],"iteration":89,"passed_time":8.889175115,"remaining_time":0.9876861239},
|
94 |
+
{"learn":[0.1579797457],"iteration":90,"passed_time":9.044786926,"remaining_time":0.8945393663},
|
95 |
+
{"learn":[0.1576658943],"iteration":91,"passed_time":9.194544297,"remaining_time":0.799525591},
|
96 |
+
{"learn":[0.157323332],"iteration":92,"passed_time":9.355725281,"remaining_time":0.704194376},
|
97 |
+
{"learn":[0.1570125802],"iteration":93,"passed_time":9.511794086,"remaining_time":0.6071357927},
|
98 |
+
{"learn":[0.1566978865],"iteration":94,"passed_time":9.663098075,"remaining_time":0.5085841092},
|
99 |
+
{"learn":[0.1564011047],"iteration":95,"passed_time":9.81229429,"remaining_time":0.4088455954},
|
100 |
+
{"learn":[0.1559077381],"iteration":96,"passed_time":9.909306273,"remaining_time":0.3064733899},
|
101 |
+
{"learn":[0.155400761],"iteration":97,"passed_time":9.994785321,"remaining_time":0.2039752106},
|
102 |
+
{"learn":[0.1551113733],"iteration":98,"passed_time":10.07576513,"remaining_time":0.1017754054},
|
103 |
+
{"learn":[0.154801749],"iteration":99,"passed_time":10.1571871,"remaining_time":0}
|
104 |
+
]}
|
Artifacts/catboost_info/learn/events.out.tfevents
ADDED
Binary file (5.4 kB). View file
|
|
Artifacts/catboost_info/learn_error.tsv
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
iter Logloss
|
2 |
+
0 0.4073772263
|
3 |
+
1 0.3137234975
|
4 |
+
2 0.284236184
|
5 |
+
3 0.2668503982
|
6 |
+
4 0.2593665744
|
7 |
+
5 0.2516184276
|
8 |
+
6 0.2445631076
|
9 |
+
7 0.2384404882
|
10 |
+
8 0.2360618876
|
11 |
+
9 0.2336608468
|
12 |
+
10 0.229775124
|
13 |
+
11 0.2276650503
|
14 |
+
12 0.2251991995
|
15 |
+
13 0.2228250972
|
16 |
+
14 0.2215833855
|
17 |
+
15 0.2202659273
|
18 |
+
16 0.2188989356
|
19 |
+
17 0.2174759806
|
20 |
+
18 0.2164400273
|
21 |
+
19 0.2145296529
|
22 |
+
20 0.2134351214
|
23 |
+
21 0.2124248348
|
24 |
+
22 0.2113669983
|
25 |
+
23 0.2102720461
|
26 |
+
24 0.2091248728
|
27 |
+
25 0.2076362453
|
28 |
+
26 0.2066888613
|
29 |
+
27 0.2055625239
|
30 |
+
28 0.2046374022
|
31 |
+
29 0.2037215169
|
32 |
+
30 0.2020789062
|
33 |
+
31 0.2010651162
|
34 |
+
32 0.2003155492
|
35 |
+
33 0.199547054
|
36 |
+
34 0.1989116811
|
37 |
+
35 0.1977211783
|
38 |
+
36 0.1968471172
|
39 |
+
37 0.1957080812
|
40 |
+
38 0.1950747179
|
41 |
+
39 0.1941590906
|
42 |
+
40 0.1930812766
|
43 |
+
41 0.1924486569
|
44 |
+
42 0.19179662
|
45 |
+
43 0.1906261212
|
46 |
+
44 0.1900373396
|
47 |
+
45 0.189016182
|
48 |
+
46 0.1880234969
|
49 |
+
47 0.1874627296
|
50 |
+
48 0.1864628676
|
51 |
+
49 0.1859694423
|
52 |
+
50 0.1854000284
|
53 |
+
51 0.1847740363
|
54 |
+
52 0.1839432146
|
55 |
+
53 0.1834647897
|
56 |
+
54 0.182940847
|
57 |
+
55 0.1818482915
|
58 |
+
56 0.181244672
|
59 |
+
57 0.1804523567
|
60 |
+
58 0.1792150047
|
61 |
+
59 0.1787887539
|
62 |
+
60 0.1783067348
|
63 |
+
61 0.1779043172
|
64 |
+
62 0.1771281345
|
65 |
+
63 0.1759185832
|
66 |
+
64 0.175523618
|
67 |
+
65 0.1751159439
|
68 |
+
66 0.1744740365
|
69 |
+
67 0.173006689
|
70 |
+
68 0.1721463838
|
71 |
+
69 0.1697501862
|
72 |
+
70 0.1693384959
|
73 |
+
71 0.1689667046
|
74 |
+
72 0.1683670976
|
75 |
+
73 0.1674515062
|
76 |
+
74 0.1667025898
|
77 |
+
75 0.1659401991
|
78 |
+
76 0.1655595047
|
79 |
+
77 0.164865277
|
80 |
+
78 0.1644866935
|
81 |
+
79 0.1638354875
|
82 |
+
80 0.1634971926
|
83 |
+
81 0.1630796548
|
84 |
+
82 0.1627086068
|
85 |
+
83 0.1620494
|
86 |
+
84 0.1610732023
|
87 |
+
85 0.160270845
|
88 |
+
86 0.1598202952
|
89 |
+
87 0.1594650718
|
90 |
+
88 0.1591502269
|
91 |
+
89 0.1583429364
|
92 |
+
90 0.1579797457
|
93 |
+
91 0.1576658943
|
94 |
+
92 0.157323332
|
95 |
+
93 0.1570125802
|
96 |
+
94 0.1566978865
|
97 |
+
95 0.1564011047
|
98 |
+
96 0.1559077381
|
99 |
+
97 0.155400761
|
100 |
+
98 0.1551113733
|
101 |
+
99 0.154801749
|
Artifacts/catboost_info/time_left.tsv
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
iter Passed Remaining
|
2 |
+
0 199 19746
|
3 |
+
1 285 14012
|
4 |
+
2 370 11983
|
5 |
+
3 452 10865
|
6 |
+
4 536 10193
|
7 |
+
5 619 9709
|
8 |
+
6 704 9359
|
9 |
+
7 794 9136
|
10 |
+
8 877 8871
|
11 |
+
9 960 8646
|
12 |
+
10 1045 8462
|
13 |
+
11 1128 8277
|
14 |
+
12 1210 8100
|
15 |
+
13 1293 7948
|
16 |
+
14 1376 7800
|
17 |
+
15 1459 7660
|
18 |
+
16 1541 7528
|
19 |
+
17 1626 7408
|
20 |
+
18 1707 7278
|
21 |
+
19 1802 7210
|
22 |
+
20 1884 7088
|
23 |
+
21 1971 6991
|
24 |
+
22 2057 6887
|
25 |
+
23 2139 6775
|
26 |
+
24 2224 6673
|
27 |
+
25 2322 6609
|
28 |
+
26 2405 6504
|
29 |
+
27 2493 6412
|
30 |
+
28 2575 6305
|
31 |
+
29 2658 6203
|
32 |
+
30 2750 6122
|
33 |
+
31 2835 6026
|
34 |
+
32 2916 5922
|
35 |
+
33 3001 5827
|
36 |
+
34 3090 5740
|
37 |
+
35 3178 5650
|
38 |
+
36 3261 5553
|
39 |
+
37 3342 5453
|
40 |
+
38 3423 5354
|
41 |
+
39 3503 5255
|
42 |
+
40 3588 5164
|
43 |
+
41 3674 5074
|
44 |
+
42 3766 4993
|
45 |
+
43 3849 4898
|
46 |
+
44 3931 4805
|
47 |
+
45 4018 4716
|
48 |
+
46 4100 4624
|
49 |
+
47 4182 4530
|
50 |
+
48 4269 4443
|
51 |
+
49 4352 4352
|
52 |
+
50 4434 4260
|
53 |
+
51 4516 4169
|
54 |
+
52 4601 4080
|
55 |
+
53 4683 3989
|
56 |
+
54 4770 3903
|
57 |
+
55 4855 3814
|
58 |
+
56 4939 3726
|
59 |
+
57 5023 3637
|
60 |
+
58 5105 3547
|
61 |
+
59 5188 3459
|
62 |
+
60 5271 3370
|
63 |
+
61 5353 3281
|
64 |
+
62 5436 3192
|
65 |
+
63 5518 3104
|
66 |
+
64 5600 3015
|
67 |
+
65 5682 2927
|
68 |
+
66 5765 2839
|
69 |
+
67 5855 2755
|
70 |
+
68 5941 2669
|
71 |
+
69 6026 2582
|
72 |
+
70 6112 2496
|
73 |
+
71 6194 2408
|
74 |
+
72 6314 2335
|
75 |
+
73 6457 2268
|
76 |
+
74 6598 2199
|
77 |
+
75 6747 2130
|
78 |
+
76 6890 2058
|
79 |
+
77 7034 1984
|
80 |
+
78 7181 1908
|
81 |
+
79 7338 1834
|
82 |
+
80 7497 1758
|
83 |
+
81 7661 1681
|
84 |
+
82 7824 1602
|
85 |
+
83 7985 1520
|
86 |
+
84 8124 1433
|
87 |
+
85 8285 1348
|
88 |
+
86 8436 1260
|
89 |
+
87 8591 1171
|
90 |
+
88 8739 1080
|
91 |
+
89 8889 987
|
92 |
+
90 9044 894
|
93 |
+
91 9194 799
|
94 |
+
92 9355 704
|
95 |
+
93 9511 607
|
96 |
+
94 9663 508
|
97 |
+
95 9812 408
|
98 |
+
96 9909 306
|
99 |
+
97 9994 203
|
100 |
+
98 10075 101
|
101 |
+
99 10157 0
|
Artifacts/cb_fakes_news_model.cbm
ADDED
Binary file (578 kB). View file
|
|
Artifacts/classification_report.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
precision recall f1-score support
|
2 |
+
|
3 |
+
0 0.99 0.88 0.93 2072
|
4 |
+
1 0.87 0.99 0.92 1585
|
5 |
+
|
6 |
+
accuracy 0.93 3657
|
7 |
+
macro avg 0.93 0.93 0.93 3657
|
8 |
+
weighted avg 0.94 0.93 0.93 3657
|
Artifacts/tfidf_preprocessor.pkl
ADDED
Binary file (561 kB). View file
|
|
Notebooks/Fake_News_Classification.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
title: Fake News Detection App
|
3 |
emoji: 💻
|
4 |
colorFrom: green
|
@@ -7,6 +7,4 @@ sdk: streamlit
|
|
7 |
sdk_version: 1.19.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
|
2 |
title: Fake News Detection App
|
3 |
emoji: 💻
|
4 |
colorFrom: green
|
|
|
7 |
sdk_version: 1.19.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
# Fake-news-classification
|
|
|
|
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Mon May 8 23:58:36 2023
|
4 |
+
|
5 |
+
@author: ME
|
6 |
+
"""
|
7 |
+
import catboost
|
8 |
+
from src.prediction import Prediction
|
9 |
+
from src.preprocessor import Preprocessing
|
10 |
+
import streamlit as st
|
11 |
+
import joblib
|
12 |
+
import pandas as pd
|
13 |
+
import altair as alt
|
14 |
+
|
15 |
+
|
16 |
+
#load saved model
|
17 |
+
model = catboost.CatBoostClassifier()
|
18 |
+
model_path = "Artifacts/cb_fakes_news_model.cbm"
|
19 |
+
model.load_model(model_path)
|
20 |
+
|
21 |
+
def predict_article(text):
|
22 |
+
pred_,conf = Prediction(text,model).predict()
|
23 |
+
return pred_,conf
|
24 |
+
|
25 |
+
#create emoji for predictions
|
26 |
+
fake_emoji = "\U0001F925"
|
27 |
+
real_emoji = "\U0001F60A"
|
28 |
+
emoji_dict = {"The news is real":real_emoji,"The news is fake":fake_emoji}
|
29 |
+
def main():
|
30 |
+
st.title("TruthFinder: Detecting Fake News through US Article Titles")
|
31 |
+
menu = ["Home","Tracker","About"]
|
32 |
+
choice = st.sidebar.selectbox("Menu",menu)
|
33 |
+
|
34 |
+
if choice == "Home":
|
35 |
+
st.subheader("Home - Article title In Text")
|
36 |
+
with st.form(key="fake_news_form"):
|
37 |
+
raw_text = st.text_area("Type Here")
|
38 |
+
submit_text = st.form_submit_button(label="Submit")
|
39 |
+
|
40 |
+
if submit_text:
|
41 |
+
col1, col2 = st.columns(2)
|
42 |
+
|
43 |
+
#predict article title
|
44 |
+
pred,proba = predict_article(raw_text)
|
45 |
+
|
46 |
+
with col1:
|
47 |
+
st.success("Original Text")
|
48 |
+
st.write(raw_text)
|
49 |
+
st.success("Prediction")
|
50 |
+
emoji_icon = emoji_dict[pred]
|
51 |
+
st.write("{} {}".format(pred,emoji_icon))
|
52 |
+
confidence = proba.max()
|
53 |
+
st.success("Prediction confidence")
|
54 |
+
confidence = f"{round(confidence* 100,2)}%"
|
55 |
+
st.write(confidence)
|
56 |
+
|
57 |
+
with col2:
|
58 |
+
st.success("Prediction Probability")
|
59 |
+
|
60 |
+
proba_df = pd.DataFrame(proba,columns=["Fake","Real"])
|
61 |
+
#st.write(proba_df.T)
|
62 |
+
proba_df_clean = proba_df.T.reset_index()
|
63 |
+
proba_df_clean.columns = ["Label","Probability"]
|
64 |
+
|
65 |
+
fig = alt.Chart(proba_df_clean).mark_bar().encode(x="Label",y="Probability")
|
66 |
+
st.altair_chart(fig,use_container_width=True)
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
if __name__ == "__main__":
|
73 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.24.3
|
2 |
+
pandas==2.0.1
|
3 |
+
nltk==3.8.1
|
4 |
+
scikit-learn==1.2.2
|
5 |
+
catboost==1.2
|
6 |
+
joblib==1.2.0
|
7 |
+
streamlit==1.22.0
|
src/prediction.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Mon May 8 23:57:50 2023
|
4 |
+
|
5 |
+
@author: ME
|
6 |
+
"""
|
7 |
+
|
8 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
9 |
+
from src.preprocessor import Preprocessing
|
10 |
+
import joblib
|
11 |
+
|
12 |
+
# Load the saved TF-IDF preprocessor using joblib
|
13 |
+
path = "Artifacts/tfidf_preprocessor.pkl"
|
14 |
+
class Prediction:
|
15 |
+
def __init__(self,pred_data,model):
|
16 |
+
self.pred_data = pred_data
|
17 |
+
self.model = model
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
def predict(self):
|
22 |
+
preprocess_data = Preprocessing(self.pred_data).preprocess_text()
|
23 |
+
|
24 |
+
loaded_tfidf = joblib.load(path)
|
25 |
+
data = loaded_tfidf.transform(preprocess_data)
|
26 |
+
predicted = self.model.predict(data)
|
27 |
+
proba = self.model.predict_proba(data)
|
28 |
+
|
29 |
+
if predicted[0] == 0:
|
30 |
+
return "The news is fake",proba
|
31 |
+
else:
|
32 |
+
return "The news is real",proba
|
src/preprocessor.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Mon May 8 23:58:07 2023
|
4 |
+
|
5 |
+
@author: ME
|
6 |
+
"""
|
7 |
+
import re
|
8 |
+
import nltk
|
9 |
+
from nltk.corpus import stopwords
|
10 |
+
from nltk.stem import WordNetLemmatizer
|
11 |
+
|
12 |
+
"""
|
13 |
+
nltk.download('wordnet')
|
14 |
+
nltk.download('stopwords')"""
|
15 |
+
lm = WordNetLemmatizer()
|
16 |
+
stop_words = set(stopwords.words("english"))
|
17 |
+
|
18 |
+
class Preprocessing:
|
19 |
+
def __init__(self,data):
|
20 |
+
self.data = data
|
21 |
+
|
22 |
+
def preprocess_text(self):
|
23 |
+
lm = WordNetLemmatizer()
|
24 |
+
#initialise corpus to store texts p
|
25 |
+
pred_data = [self.data]
|
26 |
+
preprocessed_data = []
|
27 |
+
|
28 |
+
for data in pred_data:
|
29 |
+
review = re.sub("a-zA-Z0-9"," ",data)
|
30 |
+
review = review.lower() #convert to lower case
|
31 |
+
review = review.split() #Tokenize text
|
32 |
+
review = [lm.lemmatize(x) for x in review if x not in list(stop_words)] #lemmatize and removing stopwords
|
33 |
+
review = " ".join(review) #join as text
|
34 |
+
preprocessed_data.append(review)
|
35 |
+
|
36 |
+
return preprocessed_data
|