Spaces:

OwusuDynamo
/

fake-news-detection-app

Runtime error

App Files Files Community

OwusuBlessing commited on May 12, 2023

Commit

33d6818

1 Parent(s): 9184004

new

Browse files

Files changed (16) hide show

.gitattributes +0 -34
.github/workflows/push_to_hgface.yml +20 -0
.gitignore +163 -0
Artifacts/catboost_info/catboost_training.json +104 -0
Artifacts/catboost_info/learn/events.out.tfevents +0 -0
Artifacts/catboost_info/learn_error.tsv +101 -0
Artifacts/catboost_info/time_left.tsv +101 -0
Artifacts/cb_fakes_news_model.cbm +0 -0
Artifacts/classification_report.txt +8 -0
Artifacts/tfidf_preprocessor.pkl +0 -0
Notebooks/Fake_News_Classification.ipynb +0 -0
README.md +2 -4
app.py +73 -0
requirements.txt +7 -0
src/prediction.py +32 -0
src/preprocessor.py +36 -0

.gitattributes DELETED Viewed

@@ -1,34 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.github/workflows/push_to_hgface.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [master]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://OwusuDynamo:[email protected]/spaces/OwusuDynamo/fake-news-detection-app master

.gitignore ADDED Viewed

	@@ -0,0 +1,163 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+#Datasets
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+.ipynb_checkpoints/
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

Artifacts/catboost_info/catboost_training.json ADDED Viewed

	@@ -0,0 +1,104 @@

+{
+"meta":{"test_sets":[],"test_metrics":[],"learn_metrics":[{"best_value":"Min","name":"Logloss"}],"launch_mode":"Train","parameters":"","iteration_count":100,"learn_sets":["learn"],"name":"experiment"},
+"iterations":[
+{"learn":[0.4073772263],"iteration":0,"passed_time":0.1994576486,"remaining_time":19.74630721},
+{"learn":[0.3137234975],"iteration":1,"passed_time":0.2859673858,"remaining_time":14.0124019},
+{"learn":[0.284236184],"iteration":2,"passed_time":0.3706169391,"remaining_time":11.98328103},
+{"learn":[0.2668503982],"iteration":3,"passed_time":0.4527206823,"remaining_time":10.86529638},
+{"learn":[0.2593665744],"iteration":4,"passed_time":0.5364983441,"remaining_time":10.19346854},
+{"learn":[0.2516184276],"iteration":5,"passed_time":0.6197788246,"remaining_time":9.709868252},
+{"learn":[0.2445631076],"iteration":6,"passed_time":0.7044917529,"remaining_time":9.359676146},
+{"learn":[0.2384404882],"iteration":7,"passed_time":0.7944588986,"remaining_time":9.136277334},
+{"learn":[0.2360618876],"iteration":8,"passed_time":0.8773936264,"remaining_time":8.871424445},
+{"learn":[0.2336608468],"iteration":9,"passed_time":0.9606759468,"remaining_time":8.646083521},
+{"learn":[0.229775124],"iteration":10,"passed_time":1.045986758,"remaining_time":8.462983771},
+{"learn":[0.2276650503],"iteration":11,"passed_time":1.128766468,"remaining_time":8.277620769},
+{"learn":[0.2251991995],"iteration":12,"passed_time":1.210428476,"remaining_time":8.100559803},
+{"learn":[0.2228250972],"iteration":13,"passed_time":1.29392577,"remaining_time":7.948401158},
+{"learn":[0.2215833855],"iteration":14,"passed_time":1.376584169,"remaining_time":7.800643626},
+{"learn":[0.2202659273],"iteration":15,"passed_time":1.459185374,"remaining_time":7.660723212},
+{"learn":[0.2188989356],"iteration":16,"passed_time":1.541965914,"remaining_time":7.528421814},
+{"learn":[0.2174759806],"iteration":17,"passed_time":1.626208659,"remaining_time":7.40828389},
+{"learn":[0.2164400273],"iteration":18,"passed_time":1.707259685,"remaining_time":7.278317603},
+{"learn":[0.2145296529],"iteration":19,"passed_time":1.80270204,"remaining_time":7.210808161},
+{"learn":[0.2134351214],"iteration":20,"passed_time":1.884295734,"remaining_time":7.088541093},
+{"learn":[0.2124248348],"iteration":21,"passed_time":1.971896535,"remaining_time":6.991269533},
+{"learn":[0.2113669983],"iteration":22,"passed_time":2.057298229,"remaining_time":6.887476681},
+{"learn":[0.2102720461],"iteration":23,"passed_time":2.139732996,"remaining_time":6.775821154},
+{"learn":[0.2091248728],"iteration":24,"passed_time":2.224650768,"remaining_time":6.673952305},
+{"learn":[0.2076362453],"iteration":25,"passed_time":2.322191089,"remaining_time":6.6093131},
+{"learn":[0.2066888613],"iteration":26,"passed_time":2.40559888,"remaining_time":6.504026602},
+{"learn":[0.2055625239],"iteration":27,"passed_time":2.493682384,"remaining_time":6.412326129},
+{"learn":[0.2046374022],"iteration":28,"passed_time":2.575420346,"remaining_time":6.305339467},
+{"learn":[0.2037215169],"iteration":29,"passed_time":2.658577386,"remaining_time":6.203347234},
+{"learn":[0.2020789062],"iteration":30,"passed_time":2.750785096,"remaining_time":6.122715214},
+{"learn":[0.2010651162],"iteration":31,"passed_time":2.835820729,"remaining_time":6.02611905},
+{"learn":[0.2003155492],"iteration":32,"passed_time":2.916948599,"remaining_time":5.92228958},
+{"learn":[0.199547054],"iteration":33,"passed_time":3.001835314,"remaining_time":5.82709208},
+{"learn":[0.1989116811],"iteration":34,"passed_time":3.090939507,"remaining_time":5.740316228},
+{"learn":[0.1977211783],"iteration":35,"passed_time":3.178372242,"remaining_time":5.650439541},
+{"learn":[0.1968471172],"iteration":36,"passed_time":3.261604416,"remaining_time":5.553542655},
+{"learn":[0.1957080812],"iteration":37,"passed_time":3.34268884,"remaining_time":5.453860738},
+{"learn":[0.1950747179],"iteration":38,"passed_time":3.42343091,"remaining_time":5.354597064},
+{"learn":[0.1941590906],"iteration":39,"passed_time":3.503865274,"remaining_time":5.255797911},
+{"learn":[0.1930812766],"iteration":40,"passed_time":3.588701693,"remaining_time":5.164229265},
+{"learn":[0.1924486569],"iteration":41,"passed_time":3.674558191,"remaining_time":5.074389883},
+{"learn":[0.19179662],"iteration":42,"passed_time":3.766949766,"remaining_time":4.993398528},
+{"learn":[0.1906261212],"iteration":43,"passed_time":3.849100966,"remaining_time":4.898855775},
+{"learn":[0.1900373396],"iteration":44,"passed_time":3.931568651,"remaining_time":4.805250573},
+{"learn":[0.189016182],"iteration":45,"passed_time":4.018057559,"remaining_time":4.716850178},
+{"learn":[0.1880234969],"iteration":46,"passed_time":4.100924523,"remaining_time":4.624446802},
+{"learn":[0.1874627296],"iteration":47,"passed_time":4.182183702,"remaining_time":4.53069901},
+{"learn":[0.1864628676],"iteration":48,"passed_time":4.269557121,"remaining_time":4.443824759},
+{"learn":[0.1859694423],"iteration":49,"passed_time":4.352617699,"remaining_time":4.352617699},
+{"learn":[0.1854000284],"iteration":50,"passed_time":4.434282906,"remaining_time":4.260389459},
+{"learn":[0.1847740363],"iteration":51,"passed_time":4.5168872,"remaining_time":4.169434339},
+{"learn":[0.1839432146],"iteration":52,"passed_time":4.601568901,"remaining_time":4.080636573},
+{"learn":[0.1834647897],"iteration":53,"passed_time":4.683102749,"remaining_time":3.989309749},
+{"learn":[0.182940847],"iteration":54,"passed_time":4.770363468,"remaining_time":3.903024655},
+{"learn":[0.1818482915],"iteration":55,"passed_time":4.855443107,"remaining_time":3.814991013},
+{"learn":[0.181244672],"iteration":56,"passed_time":4.939164783,"remaining_time":3.726036591},
+{"learn":[0.1804523567],"iteration":57,"passed_time":5.023063486,"remaining_time":3.6373908},
+{"learn":[0.1792150047],"iteration":58,"passed_time":5.105549329,"remaining_time":3.54792411},
+{"learn":[0.1787887539],"iteration":59,"passed_time":5.188931602,"remaining_time":3.459287735},
+{"learn":[0.1783067348],"iteration":60,"passed_time":5.271667275,"remaining_time":3.370410225},
+{"learn":[0.1779043172],"iteration":61,"passed_time":5.353906798,"remaining_time":3.281426747},
+{"learn":[0.1771281345],"iteration":62,"passed_time":5.436227254,"remaining_time":3.192704895},
+{"learn":[0.1759185832],"iteration":63,"passed_time":5.518357015,"remaining_time":3.104075821},
+{"learn":[0.175523618],"iteration":64,"passed_time":5.600270793,"remaining_time":3.015530427},
+{"learn":[0.1751159439],"iteration":65,"passed_time":5.682210089,"remaining_time":2.927199137},
+{"learn":[0.1744740365],"iteration":66,"passed_time":5.765215792,"remaining_time":2.839583898},
+{"learn":[0.173006689],"iteration":67,"passed_time":5.855969826,"remaining_time":2.755750506},
+{"learn":[0.1721463838],"iteration":68,"passed_time":5.941702234,"remaining_time":2.669460424},
+{"learn":[0.1697501862],"iteration":69,"passed_time":6.026043432,"remaining_time":2.582590042},
+{"learn":[0.1693384959],"iteration":70,"passed_time":6.112568517,"remaining_time":2.496682916},
+{"learn":[0.1689667046],"iteration":71,"passed_time":6.194277691,"remaining_time":2.408885769},
+{"learn":[0.1683670976],"iteration":72,"passed_time":6.314849504,"remaining_time":2.335629268},
+{"learn":[0.1674515062],"iteration":73,"passed_time":6.457696597,"remaining_time":2.268920426},
+{"learn":[0.1667025898],"iteration":74,"passed_time":6.598154568,"remaining_time":2.199384856},
+{"learn":[0.1659401991],"iteration":75,"passed_time":6.747145449,"remaining_time":2.13067751},
+{"learn":[0.1655595047],"iteration":76,"passed_time":6.890559158,"remaining_time":2.058218969},
+{"learn":[0.164865277],"iteration":77,"passed_time":7.034427652,"remaining_time":1.984069338},
+{"learn":[0.1644866935],"iteration":78,"passed_time":7.181369464,"remaining_time":1.90897163},
+{"learn":[0.1638354875],"iteration":79,"passed_time":7.338045702,"remaining_time":1.834511425},
+{"learn":[0.1634971926],"iteration":80,"passed_time":7.497690556,"remaining_time":1.758717538},
+{"learn":[0.1630796548],"iteration":81,"passed_time":7.661620144,"remaining_time":1.681819056},
+{"learn":[0.1627086068],"iteration":82,"passed_time":7.824841608,"remaining_time":1.602678402},
+{"learn":[0.1620494],"iteration":83,"passed_time":7.985147001,"remaining_time":1.520980381},
+{"learn":[0.1610732023],"iteration":84,"passed_time":8.1240988,"remaining_time":1.433664494},
+{"learn":[0.160270845],"iteration":85,"passed_time":8.285406684,"remaining_time":1.348787135},
+{"learn":[0.1598202952],"iteration":86,"passed_time":8.436509479,"remaining_time":1.260627853},
+{"learn":[0.1594650718],"iteration":87,"passed_time":8.591243189,"remaining_time":1.171533162},
+{"learn":[0.1591502269],"iteration":88,"passed_time":8.739559663,"remaining_time":1.080170295},
+{"learn":[0.1583429364],"iteration":89,"passed_time":8.889175115,"remaining_time":0.9876861239},
+{"learn":[0.1579797457],"iteration":90,"passed_time":9.044786926,"remaining_time":0.8945393663},
+{"learn":[0.1576658943],"iteration":91,"passed_time":9.194544297,"remaining_time":0.799525591},
+{"learn":[0.157323332],"iteration":92,"passed_time":9.355725281,"remaining_time":0.704194376},
+{"learn":[0.1570125802],"iteration":93,"passed_time":9.511794086,"remaining_time":0.6071357927},
+{"learn":[0.1566978865],"iteration":94,"passed_time":9.663098075,"remaining_time":0.5085841092},
+{"learn":[0.1564011047],"iteration":95,"passed_time":9.81229429,"remaining_time":0.4088455954},
+{"learn":[0.1559077381],"iteration":96,"passed_time":9.909306273,"remaining_time":0.3064733899},
+{"learn":[0.155400761],"iteration":97,"passed_time":9.994785321,"remaining_time":0.2039752106},
+{"learn":[0.1551113733],"iteration":98,"passed_time":10.07576513,"remaining_time":0.1017754054},
+{"learn":[0.154801749],"iteration":99,"passed_time":10.1571871,"remaining_time":0}
+]}

Artifacts/catboost_info/learn/events.out.tfevents ADDED Viewed

Binary file (5.4 kB). View file

Artifacts/catboost_info/learn_error.tsv ADDED Viewed

	@@ -0,0 +1,101 @@

+iter	Logloss
+0	0.4073772263
+1	0.3137234975
+2	0.284236184
+3	0.2668503982
+4	0.2593665744
+5	0.2516184276
+6	0.2445631076
+7	0.2384404882
+8	0.2360618876
+9	0.2336608468
+10	0.229775124
+11	0.2276650503
+12	0.2251991995
+13	0.2228250972
+14	0.2215833855
+15	0.2202659273
+16	0.2188989356
+17	0.2174759806
+18	0.2164400273
+19	0.2145296529
+20	0.2134351214
+21	0.2124248348
+22	0.2113669983
+23	0.2102720461
+24	0.2091248728
+25	0.2076362453
+26	0.2066888613
+27	0.2055625239
+28	0.2046374022
+29	0.2037215169
+30	0.2020789062
+31	0.2010651162
+32	0.2003155492
+33	0.199547054
+34	0.1989116811
+35	0.1977211783
+36	0.1968471172
+37	0.1957080812
+38	0.1950747179
+39	0.1941590906
+40	0.1930812766
+41	0.1924486569
+42	0.19179662
+43	0.1906261212
+44	0.1900373396
+45	0.189016182
+46	0.1880234969
+47	0.1874627296
+48	0.1864628676
+49	0.1859694423
+50	0.1854000284
+51	0.1847740363
+52	0.1839432146
+53	0.1834647897
+54	0.182940847
+55	0.1818482915
+56	0.181244672
+57	0.1804523567
+58	0.1792150047
+59	0.1787887539
+60	0.1783067348
+61	0.1779043172
+62	0.1771281345
+63	0.1759185832
+64	0.175523618
+65	0.1751159439
+66	0.1744740365
+67	0.173006689
+68	0.1721463838
+69	0.1697501862
+70	0.1693384959
+71	0.1689667046
+72	0.1683670976
+73	0.1674515062
+74	0.1667025898
+75	0.1659401991
+76	0.1655595047
+77	0.164865277
+78	0.1644866935
+79	0.1638354875
+80	0.1634971926
+81	0.1630796548
+82	0.1627086068
+83	0.1620494
+84	0.1610732023
+85	0.160270845
+86	0.1598202952
+87	0.1594650718
+88	0.1591502269
+89	0.1583429364
+90	0.1579797457
+91	0.1576658943
+92	0.157323332
+93	0.1570125802
+94	0.1566978865
+95	0.1564011047
+96	0.1559077381
+97	0.155400761
+98	0.1551113733
+99	0.154801749

Artifacts/catboost_info/time_left.tsv ADDED Viewed

	@@ -0,0 +1,101 @@

+iter	Passed	Remaining
+0	199	19746
+1	285	14012
+2	370	11983
+3	452	10865
+4	536	10193
+5	619	9709
+6	704	9359
+7	794	9136
+8	877	8871
+9	960	8646
+10	1045	8462
+11	1128	8277
+12	1210	8100
+13	1293	7948
+14	1376	7800
+15	1459	7660
+16	1541	7528
+17	1626	7408
+18	1707	7278
+19	1802	7210
+20	1884	7088
+21	1971	6991
+22	2057	6887
+23	2139	6775
+24	2224	6673
+25	2322	6609
+26	2405	6504
+27	2493	6412
+28	2575	6305
+29	2658	6203
+30	2750	6122
+31	2835	6026
+32	2916	5922
+33	3001	5827
+34	3090	5740
+35	3178	5650
+36	3261	5553
+37	3342	5453
+38	3423	5354
+39	3503	5255
+40	3588	5164
+41	3674	5074
+42	3766	4993
+43	3849	4898
+44	3931	4805
+45	4018	4716
+46	4100	4624
+47	4182	4530
+48	4269	4443
+49	4352	4352
+50	4434	4260
+51	4516	4169
+52	4601	4080
+53	4683	3989
+54	4770	3903
+55	4855	3814
+56	4939	3726
+57	5023	3637
+58	5105	3547
+59	5188	3459
+60	5271	3370
+61	5353	3281
+62	5436	3192
+63	5518	3104
+64	5600	3015
+65	5682	2927
+66	5765	2839
+67	5855	2755
+68	5941	2669
+69	6026	2582
+70	6112	2496
+71	6194	2408
+72	6314	2335
+73	6457	2268
+74	6598	2199
+75	6747	2130
+76	6890	2058
+77	7034	1984
+78	7181	1908
+79	7338	1834
+80	7497	1758
+81	7661	1681
+82	7824	1602
+83	7985	1520
+84	8124	1433
+85	8285	1348
+86	8436	1260
+87	8591	1171
+88	8739	1080
+89	8889	987
+90	9044	894
+91	9194	799
+92	9355	704
+93	9511	607
+94	9663	508
+95	9812	408
+96	9909	306
+97	9994	203
+98	10075	101
+99	10157	0

Artifacts/cb_fakes_news_model.cbm ADDED Viewed

Binary file (578 kB). View file

Artifacts/classification_report.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+              precision    recall  f1-score   support
+           0       0.99      0.88      0.93      2072
+           1       0.87      0.99      0.92      1585
+    accuracy                           0.93      3657
+   macro avg       0.93      0.93      0.93      3657
+weighted avg       0.94      0.93      0.93      3657

Artifacts/tfidf_preprocessor.pkl ADDED Viewed

Binary file (561 kB). View file

Notebooks/Fake_News_Classification.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -1,4 +1,4 @@
----
 title: Fake News Detection App
 emoji: 💻
 colorFrom: green
@@ -7,6 +7,4 @@ sdk: streamlit
 sdk_version: 1.19.0
 app_file: app.py
 pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 title: Fake News Detection App
 emoji: 💻
 colorFrom: green
 sdk_version: 1.19.0
 app_file: app.py
 pinned: false
+# Fake-news-classification

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Mon May  8 23:58:36 2023
+@author: ME
+"""
+import catboost
+from src.prediction import Prediction
+from src.preprocessor import Preprocessing
+import streamlit as st
+import joblib
+import pandas as pd
+import altair as alt
+#load saved model
+model = catboost.CatBoostClassifier()
+model_path = "Artifacts/cb_fakes_news_model.cbm"
+model.load_model(model_path)
+def predict_article(text):
+    pred_,conf = Prediction(text,model).predict()
+    return pred_,conf
+#create emoji for predictions
+fake_emoji = "\U0001F925"
+real_emoji = "\U0001F60A"
+emoji_dict = {"The news is real":real_emoji,"The news is fake":fake_emoji}
+def main():
+    st.title("TruthFinder: Detecting Fake News through US Article Titles")
+    menu = ["Home","Tracker","About"]
+    choice = st.sidebar.selectbox("Menu",menu)
+    if choice == "Home":
+        st.subheader("Home - Article title In Text")
+        with st.form(key="fake_news_form"):
+            raw_text = st.text_area("Type Here")
+            submit_text = st.form_submit_button(label="Submit")
+        if submit_text:
+            col1, col2 = st.columns(2)
+            #predict article title
+            pred,proba = predict_article(raw_text)
+            with col1:
+                st.success("Original Text")
+                st.write(raw_text)
+                st.success("Prediction")
+                emoji_icon = emoji_dict[pred]
+                st.write("{} {}".format(pred,emoji_icon))
+                confidence =  proba.max()
+                st.success("Prediction confidence")
+                confidence = f"{round(confidence* 100,2)}%"
+                st.write(confidence)
+            with col2:
+               st.success("Prediction Probability")
+               proba_df = pd.DataFrame(proba,columns=["Fake","Real"])
+               #st.write(proba_df.T)
+               proba_df_clean = proba_df.T.reset_index()
+               proba_df_clean.columns = ["Label","Probability"]
+               fig = alt.Chart(proba_df_clean).mark_bar().encode(x="Label",y="Probability")
+               st.altair_chart(fig,use_container_width=True)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy==1.24.3
+pandas==2.0.1
+nltk==3.8.1
+scikit-learn==1.2.2
+catboost==1.2
+joblib==1.2.0
+streamlit==1.22.0

src/prediction.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Mon May  8 23:57:50 2023
+@author: ME
+"""
+from sklearn.feature_extraction.text import TfidfVectorizer
+from src.preprocessor import Preprocessing
+import joblib
+# Load the saved TF-IDF preprocessor using joblib
+path = "Artifacts/tfidf_preprocessor.pkl"
+class Prediction:
+  def __init__(self,pred_data,model):
+      self.pred_data = pred_data
+      self.model = model
+  def predict(self):
+    preprocess_data = Preprocessing(self.pred_data).preprocess_text()
+    loaded_tfidf = joblib.load(path)
+    data = loaded_tfidf.transform(preprocess_data)
+    predicted = self.model.predict(data)
+    proba = self.model.predict_proba(data)
+    if predicted[0] == 0:
+      return "The news is fake",proba
+    else:
+      return "The news is real",proba

src/preprocessor.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Mon May  8 23:58:07 2023
+@author: ME
+"""
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+"""
+nltk.download('wordnet')
+nltk.download('stopwords')"""
+lm = WordNetLemmatizer()
+stop_words = set(stopwords.words("english"))
+class Preprocessing:
+  def __init__(self,data):
+     self.data = data
+  def preprocess_text(self):
+        lm = WordNetLemmatizer()
+        #initialise corpus to store texts p
+        pred_data = [self.data]
+        preprocessed_data = []
+        for data in pred_data:
+            review = re.sub("a-zA-Z0-9"," ",data)
+            review = review.lower() #convert to lower case
+            review = review.split() #Tokenize text
+            review = [lm.lemmatize(x) for x in review if x not in list(stop_words)] #lemmatize and removing stopwords
+            review  = " ".join(review) #join as text
+            preprocessed_data.append(review)
+        return preprocessed_data