Amiruzzaman commited on
Commit
edf0e0d
·
verified ·
1 Parent(s): 766486d

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Cleaned_Hinglish_dataset.xlsx filter=lfs diff=lfs merge=lfs -text
Cleaned_Hinglish_dataset.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50522beac6bdd99e328fc48a8a4d2cc27c044419659c6f67cb52e7d87a53a870
3
+ size 1370811
CyberBullyingHinglish.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.linear_model import LogisticRegression
5
+ import re
6
+
7
+
8
+ def predict_bullying_type(input_text, bad_words):
9
+ df = pd.read_excel("Cleaned_Hinglish_dataset.xlsx")
10
+ df.dropna(subset=["cleaned_text"], inplace=True)
11
+ X = df["cleaned_text"]
12
+ y = df["type"]
13
+ tfidf_vectorizer = TfidfVectorizer()
14
+ X_tfidf = tfidf_vectorizer.fit_transform(X)
15
+ logistic_regression_classifier = LogisticRegression(max_iter=1000)
16
+ logistic_regression_classifier.fit(X_tfidf, y)
17
+
18
+ text_tfidf = tfidf_vectorizer.transform([input_text])
19
+ predicted_types = logistic_regression_classifier.predict(text_tfidf)
20
+
21
+ detected_bad_words = []
22
+
23
+ for bad_word in bad_words:
24
+ if re.search(r'\b{}\b'.format(re.escape(bad_word)), input_text, flags=re.IGNORECASE):
25
+ detected_bad_words.append(bad_word)
26
+ input_text = re.sub(r'\b{}\b'.format(re.escape(bad_word)), '*******', input_text, flags=re.IGNORECASE)
27
+
28
+ prediction = "Not Cyberbullying" if predicted_types[0] != "OAG" else "Cyberbullying"
29
+ return prediction, predicted_types[0], input_text, detected_bad_words
30
+
31
+ st.title("Cyberbullying Detection App (Hinglish)")
32
+
33
+ input_text = st.text_area("Enter a text for cyberbullying detection:")
34
+
35
+
36
+ if st.button("Predict"):
37
+ bad_words = ['bahenchod', 'peshan', 'behenchod', 'bhenchod', 'bhenchodd', 'b.c.', 'bc', 'bakchod', 'bakchodd', 'bakchodi', 'bevda', 'bewda', 'bevdey', 'bewday', 'bhadwaa', 'bhosada', 'bhosda', 'bhosdaa', 'bhosdike', 'bhonsdike', 'bhosdiki', 'bhosdiwala', 'bhosdiwale', 'Bhosadchodal', 'Bhosadchod', 'Bhosadchodal', 'Bhosadchod', 'babbe', 'babbey', 'bube', 'bubey', 'bur', 'burr', 'buurr', 'buur', 'charsi', 'chooche', 'choochi', 'chuchi', 'chhod', 'chod', 'chodd', 'chudne', 'chudney', 'chudwa', 'chudwaa', 'chudwane', 'chudwaane', 'chaat', 'choot', 'chut', 'chute', 'chutia', 'chutiya', 'chutiye', 'dalaal', 'dalal', 'dalle', 'dalley', 'fattu', 'gadha', 'gadhe', 'gadhalund', 'gaand', 'gand', 'gandu', 'gandfat', 'gandfut', 'gandiya', 'gandiye', 'goo', 'gu', 'gote', 'gotey', 'gotte', 'hag', 'haggu', 'hagne', 'hagney', 'harami', 'haramjada', 'haraamjaada', 'haramzyada', 'haraamzyaada', 'haraamjaade', 'haraamzaade', 'haraamkhor', 'haramkhor', 'jhat', 'jhaat', 'jhaatu', 'jhatu', 'kutta', 'kutte', 'kuttey', 'kutia', 'kutiya', 'kuttiya', 'kutti', 'landi', 'landy', 'laude', 'laudey', 'laura', 'lora', 'lauda', 'ling', 'loda', 'lode', 'lund', 'launda', 'lounde', 'laundey', 'laundi', 'loundi', 'laundiya', 'loundiya', 'lulli', 'maar ja', 'madarchod', 'madarchodd', 'madarchood', 'madarchoot', 'madarchut', 'm.c.', 'mc', 'mamme', 'mammey', 'moot', 'mut', 'mootne', 'mutne', 'mooth', 'muth', 'nunnu', 'pesaab', 'pesab', 'peshaab', 'peshab', 'pillay', 'pille', 'pilley', 'pisaab', 'pisab', 'porkistan', 'raand', 'rand', 'randi', 'randy', 'suar', 'tatti', 'tatty', 'ullu', 'pappu']
38
+ prediction, bullying_type, filtered_text, detected_bad_words = predict_bullying_type(input_text, bad_words)
39
+
40
+ st.write("Prediction:", prediction)
41
+ st.write("Cyberbullying Type: ", bullying_type)
42
+ if detected_bad_words:
43
+ st.write("Bad Words:", ', '.join(detected_bad_words))
44
+ else:
45
+ st.write("<span style='color:cyan;'>No bad words found.</span>", unsafe_allow_html=True)
46
+
47
+ if detected_bad_words:
48
+ st.write("Filtered Text:")
49
+ st.write(f"<span style='color:red; font-weight:bold'>{filtered_text}</span>", unsafe_allow_html=True)
50
+ else:
51
+ st.write("Original Text:")
52
+ st.write(f"{input_text}", unsafe_allow_html=True)
53
+
54
+ st.header("Sample Texts")
55
+ st.write("es" + "<span style='color:red; font-weight:bold'> kutte</span> ko jail daal desh drohi hai", unsafe_allow_html=True)
56
+ st.write("" + "<span style='color:red; font-weight:bold'>pappu </span>gandhi", unsafe_allow_html=True)
57
+ st.write("bhaijan mera business partner ka naam alisher h hum aaps ak dusre par aankh band krke vishvash krte h")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ scikit-learn
2
+ regex
3
+ keras
4
+ tensorflow
5
+ numpy
6
+ streamlit
7
+ pandas
8
+ openpyxl