Spaces:
Runtime error
Runtime error
File size: 3,904 Bytes
ee90915 71dcf62 9cd47c7 ee90915 2f9d0c2 0fd3cfb 4b9ceb2 b6adfce 0fd3cfb ee90915 3995371 ee90915 b020e81 0fd3cfb d8843f7 3995371 ee90915 74899d4 ee90915 d3f7143 ee90915 b980ccf 17aedb0 ee90915 960336c 0fd3cfb 1b9db30 ee90915 34b3fd1 455960a 34b3fd1 b309611 ee90915 455960a ee90915 b309611 455960a ee90915 17aedb0 2f9d0c2 1b9db30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline
import nltk.data
import pandas as pd
import matplotlib.pyplot as plt
nltk.download('punkt')
import gradio as gr
from gradio.mix import Parallel
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier"
pretrained_ner = "cahya/bert-base-indonesian-NER"
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=pretrained_sentiment,
tokenizer=pretrained_sentiment,
return_all_scores=True
)
ner_pipeline = pipeline(
"ner",
model=pretrained_ner,
tokenizer=pretrained_ner,
grouped_entities=True
)
def summ_t5(text):
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
summary_ids = model_t5.generate(input_ids,
max_length=100,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def sentiment_analysis(text):
output = sentiment_pipeline(text)
return {elm["label"]: elm["score"] for elm in output[0]}
def ner(text):
output = ner_pipeline(text)
for elm in output:
elm['entity'] = elm['entity_group']
return {"text": text, "entities": output}
def sentiment_df(text):
df = pd.DataFrame(columns=['Text', 'Label', 'Score'])
text_list = sentence_tokenizer.tokenize(text)
result = [sentiment_analysis(text) for text in text_list]
labels = []
scores = []
for pred in result:
idx = list(pred.values()).index(max(list(pred.values())))
labels.append(list(pred.keys())[idx])
scores.append(round(list(pred.values())[idx], 3))
df['Text'] = text_list
df['Label'] = labels
df['Score'] = scores
return df
def run(text):
summ_ = summ_t5(text)
sent_ = sentiment_analysis(summ_)
ner_ = ner(summ_)
df_ = sentiment_df(text)
ner_all = ner(text)
fig = plt.figure()
df_.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6))
return summ_, sent_, ner_, fig, ner_all, df_
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("""<h1 style="text-align:center">News Analyzer - Indonesia</h1>""")
gr.Markdown(
"""
Creator: Wira Indra Kusuma
"""
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text")
analyze_button = gr.Button(label="Analyze")
summ_output = gr.Textbox(label="Article Summary")
ner_output = gr.HighlightedText(label="NER Summary")
sent_output = gr.Label(label="Sentiment Summary")
with gr.Column():
plot_component = gr.Plot(label="Pie Chart of Sentiments of Article")
ner_all_output = gr.HighlightedText(label="NER Article")
dataframe_component = gr.DataFrame(type="pandas",
label="Dataframe",
max_rows=(20,'fixed'),
overflow_row_behaviour='paginate',
wrap=True)
analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output, plot_component, ner_all_output, dataframe_component ])
demo.launch() |