File size: 3,380 Bytes
ee90915
 
 
0fd3cfb
4b9ceb2
 
 
b6adfce
 
0fd3cfb
ee90915
 
 
 
3995371
ee90915
 
 
 
 
 
 
 
 
 
 
 
 
b020e81
 
0fd3cfb
d8843f7
 
 
 
 
 
 
 
 
 
 
3995371
ee90915
 
 
74899d4
ee90915
 
 
 
 
d3f7143
ee90915
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fd3cfb
 
1b9db30
ee90915
34b3fd1
 
 
 
 
 
b309611
 
 
 
ee90915
 
 
 
 
b309611
ee90915
 
 
 
1b9db30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline
import nltk.data


import gradio as gr
from gradio.mix import Parallel

tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")

pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier"
pretrained_ner = "cahya/bert-base-indonesian-NER"

sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model=pretrained_sentiment,
    tokenizer=pretrained_sentiment,
    return_all_scores=True
)

ner_pipeline = pipeline(
    "ner",
    model=pretrained_ner,
    tokenizer=pretrained_ner,
    grouped_entities=True
)


def summ_t5(text):
    input_ids = tokenizer_t5.encode(text, return_tensors='pt')
    summary_ids = model_t5.generate(input_ids,
                max_length=100,
                num_beams=2,
                repetition_penalty=2.5,
                length_penalty=1.0,
                early_stopping=True,
                no_repeat_ngram_size=2,
                use_cache=True)
    summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
    return summary_text
    
def sentiment_analysis(text):
    output = sentiment_pipeline(text)
    return {elm["label"]: elm["score"] for elm in output[0]}

def ner(text):
    output = ner_pipeline(text)
    for elm in output:
        elm['entity'] = elm['entity_group']
    return {"text": text, "entities": output}
    
def sentiment_df(text):
    text_list = tokenizer.tokenize(text)
    result = [sentiment_analysis(text) for text in text_list]
    sentence = []
    labels = []
    scores = []
    for pred in result:
        idx = list(pred.values()).index(max(list(pred.values())))
        labels.append(list(pred.keys())[idx])
        scores.append(round(list(pred.values())[idx], 3))
    df['Text'] = text_list
    df['Label'] = labels
    df['Score'] = scores
    return df
    
def run(text):
    summ_ = summ_t5(text)
    sent_ = sentiment_analysis(summ_)
    ner_ = ner(summ_)
    df_ = sentiment_df(text)
    ner_all = ner(text)
    fig = plt.figure()
    df.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6))
    return summ_, sent_, ner_, fig, ner_all

if __name__ == "__main__":
    with gr.Blocks() as demo:
        gr.Markdown("""<h1 style="text-align:center">News Analyzer - Indonesia</h1>""")

        gr.Markdown(
            """
            Creator: wiraindrak
            """
            )
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(label="Input Text")
                analyze_button = gr.Button(label="Analyze")
                
                summ_output = gr.Textbox(label="Article Summary")
                ner_output = gr.HighlightedText(label="NER Summary")
                sent_output = gr.Textbox(label="Sentiment Summary")
                
            with gr.Column():
                plot_component = gr.Plot(label="Pie Chart of Sentiments")
                ner_all_output = gr.HighlightedText(label="NER Article")
                
        analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output, plot_component, ner_all_output])
    demo.launch()