|
import streamlit as st |
|
from transformers import pipeline |
|
import torch |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers import BertForSequenceClassification, BertTokenizer |
|
|
|
model = BertForSequenceClassification.from_pretrained("RuudVelo/dutch_news_clf_bert_finetuned") |
|
|
|
|
|
tokenizer = BertTokenizer.from_pretrained("RuudVelo/dutch_news_clf_bert_finetuned") |
|
|
|
|
|
|
|
st.title("Dutch news article classification") |
|
|
|
st.write("This app classifies a Dutch news article into one of 9 pre-defined* article categories") |
|
|
|
|
|
st.image('dataset-cover_articles.jpeg', width=150) |
|
|
|
text = st.text_area('Please type/copy/paste text of the Dutch article and click Submit') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if st.button('Submit'): |
|
with st.spinner('Generating a response...'): |
|
encoding = tokenizer(text, return_tensors="pt") |
|
outputs = model(**encoding) |
|
predictions = outputs.logits.argmax(-1) |
|
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
|
fig = plt.figure() |
|
ax = fig.add_axes([0,0,1,1]) |
|
labels_plot = ['Binnenland', 'Buitenland' ,'Cultuur & Media' ,'Economie' ,'Koningshuis', |
|
'Opmerkelijk' ,'Politiek', 'Regionaal nieuws', 'Tech'] |
|
probs_plot = probabilities[0].cpu().detach().numpy() |
|
|
|
ax.barh(labels_plot,probs_plot) |
|
ax.set_title("Predicted article category probability") |
|
ax.set_xlabel("Probability") |
|
ax.set_ylabel("Predicted category") |
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
|
|
|
st.write("* The predefined categories are Binnenland, Buitenland, Cultuur & Media, Economie , Koningshuis, Opmerkelijk, Politiek, 'Regionaal nieuws en Tech") |
|
st.write("The model for this app has been trained using data from Dutch news articles published by NOS. For more information regarding the dataset can be found at https://www.kaggle.com/maxscheijen/dutch-news-articles") |
|
|
|
st.write('The model performance details can be found at https://huggingface.co/RuudVelo/dutch_news_classifier_bert_finetuned') |
|
|