Amiruzzaman commited on
Commit
1f3f8ce
·
verified ·
1 Parent(s): 9bbf738
Files changed (5) hide show
  1. app.py +88 -0
  2. cyberbullying_model.h5 +3 -0
  3. label_encoder.pkl +3 -0
  4. requirements.txt +8 -0
  5. tokenizer.pkl +3 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import joblib
4
+ import re
5
+ from keras.models import load_model
6
+ from keras.preprocessing.sequence import pad_sequences
7
+
8
+ # Define max_sequence_length (same as what was used during training)
9
+ max_sequence_length = 186
10
+
11
+ # Define bad_words list
12
+ bad_words = ["মাদারচুদ", "বাইন চুদ", "জুকার", "ফালতু", "শালা", "লেংটা", "বাটপার", "মাদারচুদ", "ফালতু", "শালা", "নাস্তিকের বাচ্চা", "শুয়ার", "কুত্তা", "পুটকি", "নগ্নতায়", "সমকামি", "চুদছে", "চুদতে", "চুদা", "আবাল চোদা", "শুয়োরের বাচ্চা", "কুত্তার বাচ্চা", "হারামির বাচ্চা", "হারামজাদা", "শালার পো", "চুতমারানি", "চুদির ভাই","হাউয়ার নাতি", "খানকি", "মারা", "হোগা", "খানকির পোলা", "চোদা", "মিয়া খলিফা", "জনি সিন্স", "মাগির পোলা", "মাগি", "মাগী", "পর্ণ", "গরুচোদা", "হিজরার", "হিজরা"]
13
+
14
+ # Load the model
15
+ loaded_model = load_model('cyberbullying_model.h5')
16
+
17
+ # Load the tokenizer and label encoder
18
+ loaded_tokenizer = joblib.load('tokenizer.pkl')
19
+ loaded_label_encoder = joblib.load('label_encoder.pkl')
20
+
21
+ # Function to filter out bad words from text and return filtered words
22
+ def filter_bad_words_with_model(text):
23
+ filtered_words = []
24
+ for bad_word in bad_words:
25
+ # Create a regular expression pattern for the bad word, ignoring case
26
+ pattern = re.compile(re.escape(bad_word), re.IGNORECASE)
27
+ # Replace occurrences of the bad word with asterisks (*) of the same length
28
+ text, num_replacements = pattern.subn('*' * len(bad_word), text)
29
+ if num_replacements > 0:
30
+ filtered_words.append(bad_word)
31
+ return text, filtered_words
32
+
33
+
34
+ # Streamlit UI
35
+ st.title("Cyberbullying Detection App (Bangla)")
36
+
37
+ # Input text for prediction
38
+ input_text = st.text_area("Enter Text:")
39
+
40
+ if st.button("Predict"):
41
+ if input_text:
42
+ # Filter out bad words using the loaded model
43
+ filtered_text, filtered_bad_words = filter_bad_words_with_model(input_text)
44
+
45
+ # Tokenize and pad the filtered input text
46
+ input_sequence = loaded_tokenizer.texts_to_sequences([filtered_text])
47
+ input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length)
48
+
49
+ # Make a prediction using the loaded model
50
+ predicted_probabilities = loaded_model.predict(input_sequence)
51
+
52
+ # Get the class with the highest probability as the predicted class
53
+ predicted_class = np.argmax(predicted_probabilities)
54
+
55
+ # Decode the predicted class back to the original label using the loaded label encoder
56
+ predicted_label = loaded_label_encoder.classes_[predicted_class]
57
+
58
+ st.subheader("Prediction Result:")
59
+ if predicted_label == "not bully":
60
+ st.write("Prediction: Not Cyberbullying")
61
+ st.write("No bad words found.")
62
+ else:
63
+ st.write("Prediction: Cyberbullying")
64
+ st.write(f"Cyberbullying Type: {predicted_label}")
65
+
66
+ if filtered_bad_words:
67
+ st.write(f"Bad Words: {', '.join(filtered_bad_words)}")
68
+ else:
69
+ st.write("<span style='color:cyan;'>No bad words found.</span>", unsafe_allow_html=True)
70
+
71
+ if filtered_bad_words:
72
+ st.write("Filtered Text:")
73
+ filtered_text_with_asterisks = filtered_text
74
+ for bad_word in filtered_bad_words:
75
+ filtered_text_with_asterisks = re.sub(re.escape(bad_word), '*' * len(bad_word), filtered_text_with_asterisks, flags=re.IGNORECASE)
76
+
77
+ st.write(f"<span style='color:red; font-weight:bold'>{filtered_text_with_asterisks}</span>", unsafe_allow_html=True)
78
+ else:
79
+ st.write("Original Text:")
80
+ st.write(f"{input_text}", unsafe_allow_html=True)
81
+
82
+
83
+
84
+
85
+
86
+ st.header("Sample Texts")
87
+ st.write("ভেবেছিলাম তুই একটা ছেলে!!! এখন দেখি এটা একটা" + "<span style='color:red; font-weight:bold'> হিজরা</span>?", unsafe_allow_html=True)
88
+ st.write("প্রতিটি নাটক কয়েকবার করে দেখা হয়ে গেছে")
cyberbullying_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc432e360c18e5cf63c4dee6cf0d3120fddf09189baefa229b7968c5c98d6be
3
+ size 8645176
label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9eed52d22388d26d10e2032d8aaf6210f839b604f77fd039a8e2f6220e8eabc
3
+ size 582
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ scikit-learn
2
+ joblib
3
+ regex
4
+ keras
5
+ tensorflow
6
+ numpy
7
+ streamlit
8
+ dill
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a77dc749ed802387c54a13ff7af56dc210594b1b9b03797da5de60615048a733
3
+ size 3424620