Spaces:

dj-dawgs-ipd
/

IPD-English-Text

Running

App Files Files Community

Tirath5504 commited on Aug 5, 2024

Commit

4bbce2a

verified ·

1 Parent(s): 24c0c0d

Upload 4 files

Browse files

Files changed (4) hide show

app.py +63 -0
requirements (1).txt +55 -0
shubham_english_text_model.h5 +3 -0
shubham_english_text_tokenizer.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+import numpy as np
+import string
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+import tensorflow as tf
+from tensorflow import keras
+from keras import layers
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.models import load_model
+from joblib import load
+import pickle
+nltk.download('stopwords')
+nltk.download('omw-1.4')
+nltk.download('wordnet')
+nltk.download('punkt')
+try:
+    model = load_model('shubham_english_text_model.h5')
+except ValueError as e:
+    print(f"Error: {e}")
+with open('shubham_english_text_tokenizer.pkl', 'rb') as handle:
+    tokenizer = pickle.load(handle)
+def preprocess(text, tokenizer):
+    lemmatizer = WordNetLemmatizer()
+    vocab = set()
+    stop_words = set(stopwords.words('english'))
+    tokens = word_tokenize(text)
+    tokens = [word for word in tokens if word.lower() not in stop_words and word not in string.punctuation]
+    tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens]
+    vocab.update(tokens)
+    preprocessed_text = ' '.join(tokens)
+    X = tokenizer.texts_to_sequences(preprocessed_text)
+    max_len = max(len(y) for y in X)
+    X = pad_sequences(X, maxlen=max_len)
+    return X
+def predict(text):
+    X = preprocess(text, tokenizer)
+    pred = model.predict(X)
+    probabilities = np.mean(pred, axis=0)
+    final_class = np.argmax(probabilities)
+    if final_class == 0:
+        prediction = "The string is classified as hate speech."
+    else:
+        prediction = "The string is classified as normal speech."
+    return prediction, probabilities.tolist()
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
+    outputs=[gr.Textbox(label="Prediction"), gr.Textbox(label="Probabilities")],
+    title="Hate Speech Classifier",
+    description="A classifier to detect hate speech in a given text.",
+)
+if __name__ == "__main__":
+    iface.launch()

requirements (1).txt ADDED Viewed

	@@ -0,0 +1,55 @@

+absl-py==2.1.0
+astunparse==1.6.3
+blinker==1.8.2
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+filelock==3.15.4
+Flask==3.0.3
+flatbuffers==24.3.25
+fsspec==2024.6.1
+gast==0.6.0
+google-pasta==0.2.0
+grpcio==1.65.4
+h5py==3.11.0
+huggingface-hub==0.24.5
+idna==3.7
+itsdangerous==2.2.0
+Jinja2==3.1.4
+joblib==1.4.2
+keras==3.4.1
+libclang==18.1.1
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+ml-dtypes==0.4.0
+namex==0.0.8
+nltk==3.8.1
+numpy==1.26.4
+opt-einsum==3.3.0
+optree==0.12.1
+packaging==24.1
+pickle5
+pip==24.2
+protobuf==4.25.4
+Pygments==2.18.0
+PyYAML==6.0.1
+regex==2024.7.24
+requests==2.32.3
+rich==13.7.1
+safetensors==0.4.3
+six==1.16.0
+tensorboard==2.17.0
+tensorboard-data-server==0.7.2
+tensorflow==2.17.0
+tensorflow-io-gcs-filesystem==0.31.0
+termcolor==2.4.0
+tokenizers==0.19.1
+tqdm==4.66.5
+transformers==4.43.3
+typing_extensions==4.12.2
+urllib3==2.2.2
+Werkzeug==3.0.3
+wrapt==1.16.0

shubham_english_text_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:127425b47a8a3060e4bbcc08a8afd3054abbccee1c6438718558d93baa758b4c
+size 238550656

shubham_english_text_tokenizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:631166443d3b25b4d14ff50c9fb6501b5a6e605daf2690733bc8cf8f0edd452d
+size 1495518