import gradio as gr import re from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from sklearn.feature_extraction.text import TfidfVectorizer import joblib import nltk nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl') classifier = joblib.load('classifier.pkl') # Preprocess function def preprocess_text(text): # Text Cleaning text = re.sub(r'[^\w\s]', '', text) # Tokenization tokens = word_tokenize(text) # Stop Word Removal stop_words = set(stopwords.words('english')) tokens = [token for token in tokens if token.lower() not in stop_words] # Lemmatization lemmatizer = WordNetLemmatizer() tokens = [lemmatizer.lemmatize(token) for token in tokens] # Join tokens back to text preprocessed_text = ' '.join(tokens) return preprocessed_text # Gradio Function def classify_threat(text): # Preprocess input text preprocessed_text = preprocess_text(text) # Convert to TF-IDF vector tfidf_text = tfidf_vectorizer.transform([preprocessed_text]) # Predict threat threat = classifier.predict(tfidf_text) return threat[0] # Create a Gradio interface iface = gr.Interface( fn=classify_threat, inputs="text", outputs="text", title="Threat Classification", description="Enter a text description to classify the threat (e.g., oil, chemical).", ) # Launch the interface iface.launch()