File size: 2,406 Bytes
04eb1fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f72805a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import pickle
import numpy as np
import os
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
nltk.download('wordnet')
model = load_model('best_model.keras')
# Load the tokenizer
with open('tokenizer.pkl' ,'rb') as f:
    tokenizer = pickle.load(f)

# Load the label encoder
with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Load max_length
with open('max_length.pkl', 'rb') as f:
    max_length = pickle.load(f)

# Load stop words
with open('stop_words.pkl', 'rb') as f:
    stop_words = pickle.load(f)

lemmatizer = WordNetLemmatizer()
def preprocess_text(text):
    text = str(text)
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    words = text.split()
    st_words = stop_words
    words = [word for word in words if word not in stop_words]
    words = [lemmatizer.lemmatize(word) for word in words]
    text = ' '.join(words)
    return text
def classify_text(text):
    text = preprocess_text(text)
    seq = tokenizer.texts_to_sequences([text])
    padded_seq = np.pad(seq, ((0, 0), (0, max_length - len(seq[0]))), mode='constant')

    prediction = model.predict(padded_seq)
    predicted_label_index = np.argmax(prediction, axis=1)[0]
    predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
    categories = predicted_label.split('|')
    
    if len(categories) == 3:
        main_category = categories[0]
        sub_category = categories[1]
        lowest_category = categories[2]
    else:
        main_category = "Unknown"
        sub_category = "Unknown"
        lowest_category = "Unknown"
    return main_category, sub_category, lowest_category


# Streamlit UI
def main():
    st.title("Text Classifier")
    
    # Text input
    user_input = st.text_input("Enter text to classify")
    
    if st.button("Classify"):
        if user_input:
            # Classify input text
           main_category, sub_category, lowest_category = classify_text(user_input)
           st.success(f"Main Category: {main_category}, Sub Category: {sub_category}, Lowest Category: {lowest_category}")
        else:
            st.warning("Please enter some text.")

if __name__ == '__main__':
    main()