Spaces:
Sleeping
Sleeping
# Imports | |
import gradio as gr | |
from sklearn.linear_model import LogisticRegression | |
import pickle5 as pickle | |
import re | |
import string | |
import nltk | |
from nltk.corpus import stopwords | |
nltk.download('stopwords') | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
# file name | |
lr_filename = 'lr_021223.pkl' | |
# Load model from pickle file | |
model = pickle.load(open(lr_filename, 'rb')) | |
# Process input text, including removing stopwords, converting to lowercase, and removing punctuation | |
stop = stopwords.words('english') | |
def process_text(text): | |
text = [word for word in text.split() if word not in stop] | |
text = str(text).lower() | |
text = re.sub( | |
f"[{re.escape(string.punctuation)}]", " ", text | |
) | |
text = " ".join(text.split()) | |
return text | |
# Vectorize input text | |
vectorizer = CountVectorizer() | |
def vectorize_text(text): | |
text = process_text(text) | |
text = vectorizer.fit_transform([text]) | |
return text | |
# Valid input for the model so number of features match | |
def create_valid_input(text): | |
text = vectorize_text(text) | |
text = text.toarray() | |
text = text.reshape(1, -1) | |
return text | |
# Prediction function | |
def predict(text): | |
text = vectorize_text(text) | |
prediction = model.predict(text) | |
return prediction | |
# Define interface | |
demo = gr.Interface(fn=predict, | |
title="Text Classification Demo", | |
description="This is a demo of a text classification model using Logistic Regression.", | |
inputs=gr.Textbox(lines=10, placeholder='Input text here...', label="Input Text"), | |
outputs=gr.Textbox(label="Predicted Label: Other: 1, Healthcare: 2, Technology: 3", lines=2, placeholder='Predicted label will appear here...'), | |
allow_flagging='never' | |
) | |
demo.launch() | |