from flask import Flask, request, render_template import joblib import re import string from Sastrawi.Stemmer.StemmerFactory import StemmerFactory from sklearn.feature_extraction.text import TfidfVectorizer from tqdm import tqdm from nltk.corpus import stopwords import nltk nltk.download('stopwords') app = Flask(__name__) # app.config['APPLICATION_ROOT'] = '/klasifikasi-berita' # Load pre-trained model and vectorizer def load_model(): model = joblib.load("logistic_regression_model.pkl") # Example model file vectorizer = joblib.load("content_vectorizer.pkl") # Example vectorizer file return model, vectorizer model, vectorizer = load_model() # Load model and vectorizer once on startup # Function to clean the input string def clean_string(text): text = text.lower() # Make text lowercase text = re.sub(r'\n', ' ', text) # Remove line breaks translator = str.maketrans('', '', string.punctuation) # Remove punctuation text = text.translate(translator) text = re.sub(r'\d+', '', text) # Remove numbers text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces text = re.sub(r'[^\x00-\x7F]+', ' ', text) # Remove non-ASCII characters stop_words = set(stopwords.words('indonesian')) # Remove stopwords text = ' '.join([word for word in text.split() if word not in stop_words]) return text # Function to stem the input string using Sastrawi def sastrawi_stemmer(text): factory = StemmerFactory() stemmer = factory.create_stemmer() stemmed_text = ' '.join(stemmer.stem(word) for word in tqdm(text.split()) if word in text) return stemmed_text # Function to classify news article def classify_news(text, model, vectorizer): # Clean and preprocess the text cleaned_text = clean_string(text) stemmed_text = sastrawi_stemmer(cleaned_text) # Vectorize the text text_vectorized = vectorizer.transform([stemmed_text]) # Get prediction and probabilities prediction = model.predict(text_vectorized) prediction_proba = model.predict_proba(text_vectorized) return prediction[0], prediction_proba[0] # Flask route for the main page @app.route("/", methods=["GET", "POST"]) def home(): category_name = None probabilities = None user_input = "" if request.method == "POST": user_input = request.form["news_text"] if user_input.strip() != "": # Classify the text category, probabilities = classify_news(user_input, model, vectorizer) # Map category to string if category == 0: category_name = "Ekonomi" elif category == 1: category_name = "Politik" return render_template("index.html", category_name=category_name, probabilities=probabilities, user_input=user_input) # Run the Flask app if __name__ == "__main__": app.run(debug=True)