Mongolian-GPT2 / app.py
bayartsogt's picture
get rid of state + some intro
3e5d9ea
raw
history blame
2.01 kB
import re
import streamlit as st
from wordcloud import WordCloud
from googletrans import Translator
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
from enums import MODEL_NAME, MESSAGES, DESCRIPTION
def iso2lang(iso):
return MESSAGES["iso"][iso]
def simple_clean(text):
return re.sub('[!@#$.,\n-?]', ' ', text.lower())
def load_tokenizer():
return AutoTokenizer.from_pretrained(MODEL_NAME)
@st.cache(allow_output_mutation=True)
def load_model():
return AutoModelWithLMHead.from_pretrained(MODEL_NAME)
def load_pipe():
model = load_model()
tokenizer = load_tokenizer()
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# ---------------------------------------------------------------------- #
st.write(DESCRIPTION)
lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
translator = Translator()
with st.spinner(MESSAGES["loading_text"][lang]):
pipe = load_pipe()
st.success(MESSAGES["success_model_load"][lang])
text = st.text_input(
MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])
with st.spinner(MESSAGES["loading_text"][lang]):
if lang == "mn":
result = pipe(text)[0]['generated_text']
st.write(result)
elif lang == "en":
text = translator.translate(text, src='en', dest='mn').text
result = pipe(text)[0]['generated_text']
result_en = translator.translate(result, src='mn', dest='en').text
st.write(f"*Translated:* {result_en}")
st.write(f"> *Original:* {result}")
st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
st.write("### WordCloud:")
with st.spinner(MESSAGES["loading_text"][lang]):
wordcloud = WordCloud(width = 300, height = 300,
background_color ='white',
min_font_size = 5).generate(result)
st.image(wordcloud.to_array())