|
import gradio as gr |
|
import re |
|
from nltk.tokenize import word_tokenize |
|
from nltk.corpus import stopwords |
|
from nltk.stem import WordNetLemmatizer |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
|
import joblib |
|
|
|
import nltk |
|
|
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
nltk.download('wordnet') |
|
|
|
|
|
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl') |
|
classifier = joblib.load('classifier.pkl') |
|
|
|
|
|
def preprocess_text(text): |
|
|
|
text = re.sub(r'[^\w\s]', '', text) |
|
|
|
|
|
tokens = word_tokenize(text) |
|
|
|
|
|
stop_words = set(stopwords.words('english')) |
|
tokens = [token for token in tokens if token.lower() not in stop_words] |
|
|
|
|
|
lemmatizer = WordNetLemmatizer() |
|
tokens = [lemmatizer.lemmatize(token) for token in tokens] |
|
|
|
|
|
preprocessed_text = ' '.join(tokens) |
|
|
|
return preprocessed_text |
|
|
|
|
|
def classify_threat(text): |
|
|
|
preprocessed_text = preprocess_text(text) |
|
|
|
|
|
tfidf_text = tfidf_vectorizer.transform([preprocessed_text]) |
|
|
|
threat = classifier.predict(tfidf_text) |
|
return threat[0] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=classify_threat, |
|
inputs="text", |
|
outputs="text", |
|
title="Threat Classification", |
|
description="Enter a text description to classify the threat (e.g., oil, chemical).", |
|
) |
|
|
|
|
|
iface.launch() |
|
|