Spaces:
Runtime error
Runtime error
File size: 2,708 Bytes
8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 84a03d5 8028fa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import re
import time
import streamlit as st
import pandas as pd
from wordcloud import WordCloud
from googletrans import Translator
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
from enums import MODEL_NAME, MESSAGES, DESCRIPTION
def iso2lang(iso):
return MESSAGES["iso"][iso]
def create_df_from_io(input, output):
return pd.DataFrame([[input, output, time.time()]], columns=["input", "output", "timestamp"])
def simple_clean(text):
return re.sub('[!@#$.,\n-?]', ' ', text.lower())
def load_tokenizer():
return AutoTokenizer.from_pretrained(MODEL_NAME)
@st.cache(allow_output_mutation=True)
def load_model():
return AutoModelWithLMHead.from_pretrained(MODEL_NAME)
def load_pipe():
model = load_model()
tokenizer = load_tokenizer()
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# ---------------------------------------------------------------------- #
st.write(DESCRIPTION)
lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
translator = Translator()
if "df" not in st.session_state:
st.session_state.df = pd.DataFrame(columns=["input", "output", "timestamp"])
with st.spinner(MESSAGES["loading_text"][lang]):
pipe = load_pipe()
st.success(MESSAGES["success_model_load"][lang])
text = st.text_input(
MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])
with st.spinner(MESSAGES["loading_text"][lang]):
if lang == "mn":
result = pipe(text)[0]['generated_text']
st.write(result)
elif lang == "en":
text = translator.translate(text, src='en', dest='mn').text
result = pipe(text)[0]['generated_text']
result_en = translator.translate(result, src='mn', dest='en').text
st.write(f"*Translated:* {result_en}")
st.write(f"> *Original:* {result}")
st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
st.session_state.df = st.session_state.df.append(create_df_from_io(text, result))
st.write("### WordCloud based on previous outputs")
with st.spinner(MESSAGES["loading_text"][lang]):
wordcloud_input = ""
for text in st.session_state.df.output.tolist():
wordcloud_input += simple_clean(text)
wordcloud = WordCloud(width = 800, height = 800,
background_color ='white',
min_font_size = 10).generate(wordcloud_input)
st.image(wordcloud.to_array())
st.write("### Түүх / History")
with st.spinner(MESSAGES["loading_text"][lang]):
st.table(st.session_state.df.sort_values(by="timestamp", ascending=False))
|