Tirath5504 commited on
Commit
4bbce2a
·
verified ·
1 Parent(s): 24c0c0d

Upload 4 files

Browse files
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import string
4
+ import nltk
5
+ from nltk.tokenize import word_tokenize
6
+ from nltk.corpus import stopwords
7
+ from nltk.stem import WordNetLemmatizer
8
+ import tensorflow as tf
9
+ from tensorflow import keras
10
+ from keras import layers
11
+ from tensorflow.keras.preprocessing.text import Tokenizer
12
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
13
+ from tensorflow.keras.models import load_model
14
+ from joblib import load
15
+ import pickle
16
+
17
+ nltk.download('stopwords')
18
+ nltk.download('omw-1.4')
19
+ nltk.download('wordnet')
20
+ nltk.download('punkt')
21
+
22
+ try:
23
+ model = load_model('shubham_english_text_model.h5')
24
+ except ValueError as e:
25
+ print(f"Error: {e}")
26
+ with open('shubham_english_text_tokenizer.pkl', 'rb') as handle:
27
+ tokenizer = pickle.load(handle)
28
+
29
+ def preprocess(text, tokenizer):
30
+ lemmatizer = WordNetLemmatizer()
31
+ vocab = set()
32
+ stop_words = set(stopwords.words('english'))
33
+ tokens = word_tokenize(text)
34
+ tokens = [word for word in tokens if word.lower() not in stop_words and word not in string.punctuation]
35
+ tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens]
36
+ vocab.update(tokens)
37
+ preprocessed_text = ' '.join(tokens)
38
+ X = tokenizer.texts_to_sequences(preprocessed_text)
39
+ max_len = max(len(y) for y in X)
40
+ X = pad_sequences(X, maxlen=max_len)
41
+ return X
42
+
43
+ def predict(text):
44
+ X = preprocess(text, tokenizer)
45
+ pred = model.predict(X)
46
+ probabilities = np.mean(pred, axis=0)
47
+ final_class = np.argmax(probabilities)
48
+ if final_class == 0:
49
+ prediction = "The string is classified as hate speech."
50
+ else:
51
+ prediction = "The string is classified as normal speech."
52
+ return prediction, probabilities.tolist()
53
+
54
+ iface = gr.Interface(
55
+ fn=predict,
56
+ inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
57
+ outputs=[gr.Textbox(label="Prediction"), gr.Textbox(label="Probabilities")],
58
+ title="Hate Speech Classifier",
59
+ description="A classifier to detect hate speech in a given text.",
60
+ )
61
+
62
+ if __name__ == "__main__":
63
+ iface.launch()
requirements (1).txt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ astunparse==1.6.3
3
+ blinker==1.8.2
4
+ certifi==2024.7.4
5
+ charset-normalizer==3.3.2
6
+ click==8.1.7
7
+ colorama==0.4.6
8
+ filelock==3.15.4
9
+ Flask==3.0.3
10
+ flatbuffers==24.3.25
11
+ fsspec==2024.6.1
12
+ gast==0.6.0
13
+ google-pasta==0.2.0
14
+ grpcio==1.65.4
15
+ h5py==3.11.0
16
+ huggingface-hub==0.24.5
17
+ idna==3.7
18
+ itsdangerous==2.2.0
19
+ Jinja2==3.1.4
20
+ joblib==1.4.2
21
+ keras==3.4.1
22
+ libclang==18.1.1
23
+ Markdown==3.6
24
+ markdown-it-py==3.0.0
25
+ MarkupSafe==2.1.5
26
+ mdurl==0.1.2
27
+ ml-dtypes==0.4.0
28
+ namex==0.0.8
29
+ nltk==3.8.1
30
+ numpy==1.26.4
31
+ opt-einsum==3.3.0
32
+ optree==0.12.1
33
+ packaging==24.1
34
+ pickle5
35
+ pip==24.2
36
+ protobuf==4.25.4
37
+ Pygments==2.18.0
38
+ PyYAML==6.0.1
39
+ regex==2024.7.24
40
+ requests==2.32.3
41
+ rich==13.7.1
42
+ safetensors==0.4.3
43
+ six==1.16.0
44
+ tensorboard==2.17.0
45
+ tensorboard-data-server==0.7.2
46
+ tensorflow==2.17.0
47
+ tensorflow-io-gcs-filesystem==0.31.0
48
+ termcolor==2.4.0
49
+ tokenizers==0.19.1
50
+ tqdm==4.66.5
51
+ transformers==4.43.3
52
+ typing_extensions==4.12.2
53
+ urllib3==2.2.2
54
+ Werkzeug==3.0.3
55
+ wrapt==1.16.0
shubham_english_text_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127425b47a8a3060e4bbcc08a8afd3054abbccee1c6438718558d93baa758b4c
3
+ size 238550656
shubham_english_text_tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631166443d3b25b4d14ff50c9fb6501b5a6e605daf2690733bc8cf8f0edd452d
3
+ size 1495518