amanscoder's picture
updated app.py
e6712cd
import gradio as gr
import joblib
import pandas as pd
import re
import nltk
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import numpy as np
nltk.download('punkt')
nltk.download('stopwords')
def text_preprocessing(df):
"""
This function does in-place replacement of data so it won't return anything
"""
# Convert to lower cases
df['Text'] = df['Text'].str.lower()
# Remove punctuation
df['Text'] = df['Text'].apply(lambda doc: re.sub(r'[^\w\s]+', '', doc))
# Remove stopwords
stop_words = nltk.corpus.stopwords.words('english')
df['Text'] = df['Text'].apply(lambda doc: ' '.join([word for word in doc.split() if word not in (stop_words)]))
# Remove extra spaces
df['Text'] = df['Text'].apply(lambda doc: re.sub(' +', ' ', doc))
# Stemming
porter_stemmer = PorterStemmer()
df['Text'] = df['Text'].apply(lambda doc: [porter_stemmer.stem(word) for word in word_tokenize(doc)])
df['Text'] = df['Text'].apply(lambda words: ' '.join(words))
def predict_user_input(paragraph, tfidf, nmf, label_mapping_yp):
data = pd.DataFrame({'Text': [paragraph]})
text_preprocessing(data)
tfidf_transformed = tfidf.transform(data['Text'])
nmf_transformed = nmf.transform(tfidf_transformed)
y_pred = np.argmax(nmf_transformed, axis=1)
y_pred = [label_mapping_yp[y] for y in y_pred]
return y_pred[0]
def process_paragraph(paragraph):
tfidf = joblib.load('tfidf_vectorizer.pkl')
nmf = joblib.load('nmf_model.pkl')
label_mapping_yp = joblib.load('label_mapping.pkl')
predicted_class = predict_user_input(paragraph, tfidf, nmf, label_mapping_yp)
print(f"The predicted class for the input paragraph is: {predicted_class}")
return predicted_class
def paragraph_processing_app(paragraph):
processed_text = process_paragraph(paragraph)
return processed_text
input_text = gr.Textbox(lines=10, label="Enter a article:")
output_text = gr.Textbox(label="Category(Out of Business, Tech, Sport, Politics and Entertainment.)")
gr.Interface(fn=paragraph_processing_app, inputs=input_text, outputs=output_text).launch(share=True)