from flask import Flask, request, render_template import pickle5 as pkl from preprocessing import clean_text, clean_stopword, preprocess_text import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import re import string import pandas as pd import networkx as nx import matplotlib.pyplot as plt from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity nltk.download('stopwords') nltk.download('punkt_tab') # Initialize Flask app app = Flask(__name__) def process_input(text): prep_result = preprocess_text(text) kalimat = nltk.sent_tokenize(prep_result) tfidf_vectorizer = pkl.load(open('./tfidf_vectorizer.pkl', 'rb')) tfidf_preprocessing = pkl.load(open('./tfidf_preprocessing.pkl', 'rb')) terms = tfidf_vectorizer.get_feature_names_out() tfidf = pd.DataFrame(data=tfidf_preprocessing.toarray(), columns=terms) cossim = cosine_similarity(tfidf, tfidf) similarity_matrix = pd.DataFrame(cossim, index=range(len(kalimat)), columns=range(len(kalimat))) G = nx.DiGraph() for i in range(len(cossim)): G.add_node(i) for i in range(len(cossim)): for j in range(len(cossim)): similarity = cossim[i][j] if similarity > 0.1 and i != j: G.add_edge(i, j) closeness = nx.closeness_centrality(G) sorted_closeness = sorted(closeness.items(), key=lambda x: x[1], reverse=True) print(sorted_closeness) ringkasan_closeness = "" for node, closeness_preprocessing in sorted_closeness[:3]: top_sentence = kalimat[node] ringkasan_closeness += top_sentence + " " print(ringkasan_closeness) return top_sentence @app.route('/', methods=['GET', 'POST']) def summarize(): result = None if request.method == 'POST': text = request.form['text'] if text: result = process_input(text) return render_template('index.html', result=result) if __name__ == '__main__': app.run(debug=True)