File size: 1,532 Bytes
c752674 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
classifier = joblib.load('classifier.pkl')
# Preprocess function
def preprocess_text(text):
# Text Cleaning
text = re.sub(r'[^\w\s]', '', text)
# Tokenization
tokens = word_tokenize(text)
# Stop Word Removal
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]
# Lemmatization
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(token) for token in tokens]
# Join tokens back to text
preprocessed_text = ' '.join(tokens)
return preprocessed_text
# Gradio Function
def classify_threat(text):
# Preprocess input text
preprocessed_text = preprocess_text(text)
# Convert to TF-IDF vector
tfidf_text = tfidf_vectorizer.transform([preprocessed_text])
# Predict threat
threat = classifier.predict(tfidf_text)
return threat[0]
# Create a Gradio interface
iface = gr.Interface(
fn=classify_threat,
inputs="text",
outputs="text",
title="Threat Classification",
description="Enter a text description to classify the threat (e.g., oil, chemical).",
)
# Launch the interface
iface.launch()
|