Spaces:
Runtime error
Runtime error
File size: 3,886 Bytes
ee90915 71dcf62 9cd47c7 ee90915 2f9d0c2 0fd3cfb 4b9ceb2 b6adfce 0fd3cfb ee90915 3995371 ee90915 b020e81 0fd3cfb d8843f7 3995371 ee90915 74899d4 ee90915 d3f7143 ee90915 b980ccf 17aedb0 ee90915 2f9d0c2 0fd3cfb 1b9db30 ee90915 34b3fd1 b309611 ee90915 b309611 ee90915 17aedb0 2f9d0c2 1b9db30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline
import nltk.data
import pandas as pd
import matplotlib.pyplot as plt
nltk.download('punkt')
import gradio as gr
from gradio.mix import Parallel
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier"
pretrained_ner = "cahya/bert-base-indonesian-NER"
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=pretrained_sentiment,
tokenizer=pretrained_sentiment,
return_all_scores=True
)
ner_pipeline = pipeline(
"ner",
model=pretrained_ner,
tokenizer=pretrained_ner,
grouped_entities=True
)
def summ_t5(text):
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
summary_ids = model_t5.generate(input_ids,
max_length=100,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def sentiment_analysis(text):
output = sentiment_pipeline(text)
return {elm["label"]: elm["score"] for elm in output[0]}
def ner(text):
output = ner_pipeline(text)
for elm in output:
elm['entity'] = elm['entity_group']
return {"text": text, "entities": output}
def sentiment_df(text):
df = pd.DataFrame(columns=['Text', 'Label', 'Score'])
text_list = sentence_tokenizer.tokenize(text)
result = [sentiment_analysis(text) for text in text_list]
labels = []
scores = []
for pred in result:
idx = list(pred.values()).index(max(list(pred.values())))
labels.append(list(pred.keys())[idx])
scores.append(round(list(pred.values())[idx], 3))
df['Text'] = text_list
df['Label'] = labels
df['Score'] = scores
return df
def run(text):
summ_ = summ_t5(text)
sent_ = sentiment_analysis(summ_)
ner_ = ner(summ_)
df_ = sentiment_df(text)
ner_all = ner(text)
fig = plt.figure()
df.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6))
return summ_, sent_, ner_, fig, ner_all, df
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("""<h1 style="text-align:center">News Analyzer - Indonesia</h1>""")
gr.Markdown(
"""
Creator: wiraindrak
"""
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text")
analyze_button = gr.Button(label="Analyze")
summ_output = gr.Textbox(label="Article Summary")
ner_output = gr.HighlightedText(label="NER Summary")
sent_output = gr.Textbox(label="Sentiment Summary")
with gr.Column():
plot_component = gr.Plot(label="Pie Chart of Sentiments")
ner_all_output = gr.HighlightedText(label="NER Article")
dataframe_component = gr.DataFrame(type="pandas",
label="Dataframe",
max_rows=(20,'fixed'),
overflow_row_behaviour='paginate',
wrap=True)
analyze_button.click(run, inputs=input_text, outputs=[summ_output, sent_output, ner_output, plot_component, ner_all_output, dataframe_component ])
demo.launch() |