Spaces:
Sleeping
Sleeping
from flask import Flask, request, render_template | |
import pickle5 as pkl | |
from preprocessing import clean_text, clean_stopword, preprocess_text | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
import re | |
import string | |
import pandas as pd | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
nltk.download('stopwords') | |
nltk.download('punkt_tab') | |
# Initialize Flask app | |
app = Flask(__name__) | |
def process_input(text): | |
prep_result = preprocess_text(text) | |
kalimat = nltk.sent_tokenize(prep_result) | |
tfidf_vectorizer = pkl.load(open('./tfidf_vectorizer.pkl', 'rb')) | |
tfidf_preprocessing = pkl.load(open('./tfidf_preprocessing.pkl', 'rb')) | |
terms = tfidf_vectorizer.get_feature_names_out() | |
tfidf = pd.DataFrame(data=tfidf_preprocessing.toarray(), columns=terms) | |
cossim = cosine_similarity(tfidf, tfidf) | |
similarity_matrix = pd.DataFrame(cossim, | |
index=range(len(kalimat)), | |
columns=range(len(kalimat))) | |
G = nx.DiGraph() | |
for i in range(len(cossim)): | |
G.add_node(i) | |
for i in range(len(cossim)): | |
for j in range(len(cossim)): | |
similarity = cossim[i][j] | |
if similarity > 0.1 and i != j: | |
G.add_edge(i, j) | |
closeness = nx.closeness_centrality(G) | |
sorted_closeness = sorted(closeness.items(), key=lambda x: x[1], reverse=True) | |
print(sorted_closeness) | |
ringkasan_closeness = "" | |
for node, closeness_preprocessing in sorted_closeness[:3]: | |
top_sentence = kalimat[node] | |
ringkasan_closeness += top_sentence + " " | |
print(ringkasan_closeness) | |
return top_sentence | |
def summarize(): | |
result = None | |
if request.method == 'POST': | |
text = request.form['text'] | |
if text: | |
result = process_input(text) | |
return render_template('index.html', result=result) | |
if __name__ == '__main__': | |
app.run(debug=True) | |