wiraindrak's picture
Update app.py
455960a
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline
import nltk.data
import pandas as pd
import matplotlib.pyplot as plt
nltk.download('punkt')
import gradio as gr
from gradio.mix import Parallel
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier"
pretrained_ner = "cahya/bert-base-indonesian-NER"
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=pretrained_sentiment,
tokenizer=pretrained_sentiment,
return_all_scores=True
)
ner_pipeline = pipeline(
"ner",
model=pretrained_ner,
tokenizer=pretrained_ner,
grouped_entities=True
)
def summ_t5(text):
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
summary_ids = model_t5.generate(input_ids,
max_length=100,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def sentiment_analysis(text):
output = sentiment_pipeline(text)
return {elm["label"]: elm["score"] for elm in output[0]}
def ner(text):
output = ner_pipeline(text)
for elm in output:
elm['entity'] = elm['entity_group']
return {"text": text, "entities": output}
def sentiment_df(text):
df = pd.DataFrame(columns=['Text', 'Label', 'Score'])
text_list = sentence_tokenizer.tokenize(text)
result = [sentiment_analysis(text) for text in text_list]
labels = []
scores = []
for pred in result:
idx = list(pred.values()).index(max(list(pred.values())))
labels.append(list(pred.keys())[idx])
scores.append(round(list(pred.values())[idx], 3))
df['Text'] = text_list
df['Label'] = labels
df['Score'] = scores
return df
def run(text):
summ_ = summ_t5(text)
sent_ = sentiment_analysis(summ_)
ner_ = ner(summ_)
df_ = sentiment_df(text)
ner_all = ner(text)
fig = plt.figure()
df_.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6))
return summ_, sent_, ner_, fig, ner_all, df_
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("""<h1 style="text-align:center">News Analyzer - Indonesia</h1>""")
gr.Markdown(
"""
Creator: Wira Indra Kusuma
"""
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text")
analyze_button = gr.Button(label="Analyze")
summ_output = gr.Textbox(label="Article Summary")
ner_output = gr.HighlightedText(label="NER Summary")
sent_output = gr.Label(label="Sentiment Summary")
with gr.Column():
plot_component = gr.Plot(label="Pie Chart of Sentiments of Article")
ner_all_output = gr.HighlightedText(label="NER Article")
dataframe_component = gr.DataFrame(type="pandas",
label="Dataframe",
max_rows=(20,'fixed'),
overflow_row_behaviour='paginate',
wrap=True)
analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output, plot_component, ner_all_output, dataframe_component ])
demo.launch()