gridflowai's picture
Upload 4 files
c752674
import gradio as gr
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
classifier = joblib.load('classifier.pkl')
# Preprocess function
def preprocess_text(text):
# Text Cleaning
text = re.sub(r'[^\w\s]', '', text)
# Tokenization
tokens = word_tokenize(text)
# Stop Word Removal
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token.lower() not in stop_words]
# Lemmatization
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(token) for token in tokens]
# Join tokens back to text
preprocessed_text = ' '.join(tokens)
return preprocessed_text
# Gradio Function
def classify_threat(text):
# Preprocess input text
preprocessed_text = preprocess_text(text)
# Convert to TF-IDF vector
tfidf_text = tfidf_vectorizer.transform([preprocessed_text])
# Predict threat
threat = classifier.predict(tfidf_text)
return threat[0]
# Create a Gradio interface
iface = gr.Interface(
fn=classify_threat,
inputs="text",
outputs="text",
title="Threat Classification",
description="Enter a text description to classify the threat (e.g., oil, chemical).",
)
# Launch the interface
iface.launch()