File size: 2,013 Bytes
8028fa1
84a03d5
8028fa1
84a03d5
8028fa1
84a03d5
 
 
 
 
 
8028fa1
 
 
84a03d5
 
 
 
 
 
 
 
 
 
 
8028fa1
84a03d5
 
 
 
 
 
 
 
 
8028fa1
84a03d5
 
 
 
 
 
 
 
8028fa1
 
84a03d5
8028fa1
 
 
3e5d9ea
8028fa1
3e5d9ea
8028fa1
3e5d9ea
8028fa1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import re
import streamlit as st
from wordcloud import WordCloud
from googletrans import Translator
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline

from enums import MODEL_NAME, MESSAGES, DESCRIPTION

def iso2lang(iso):
    return MESSAGES["iso"][iso]

def simple_clean(text):
    return re.sub('[!@#$.,\n-?]', ' ', text.lower())

def load_tokenizer():
    return AutoTokenizer.from_pretrained(MODEL_NAME)

@st.cache(allow_output_mutation=True)
def load_model():
    return AutoModelWithLMHead.from_pretrained(MODEL_NAME)

def load_pipe():
    model = load_model()
    tokenizer = load_tokenizer()
    return pipeline("text-generation", model=model, tokenizer=tokenizer)
# ---------------------------------------------------------------------- #
st.write(DESCRIPTION)

lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
translator = Translator()

with st.spinner(MESSAGES["loading_text"][lang]):
    pipe = load_pipe()
st.success(MESSAGES["success_model_load"][lang])

text = st.text_input(
    MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])

with st.spinner(MESSAGES["loading_text"][lang]):
    if lang == "mn":
        result = pipe(text)[0]['generated_text']
        st.write(result)
    elif lang == "en":
        text = translator.translate(text, src='en', dest='mn').text
        result = pipe(text)[0]['generated_text']
        result_en = translator.translate(result, src='mn', dest='en').text
        st.write(f"*Translated:* {result_en}")
        st.write(f"> *Original:* {result}")
        st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')

st.write("### WordCloud:")
with st.spinner(MESSAGES["loading_text"][lang]):
    wordcloud = WordCloud(width = 300, height = 300,
                background_color ='white',
                min_font_size = 5).generate(result)

    st.image(wordcloud.to_array())