Spaces:
Runtime error
Runtime error
import re | |
import time | |
import streamlit as st | |
import pandas as pd | |
from wordcloud import WordCloud | |
from googletrans import Translator | |
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline | |
from enums import MODEL_NAME, MESSAGES, DESCRIPTION | |
def iso2lang(iso): | |
return MESSAGES["iso"][iso] | |
def create_df_from_io(input, output): | |
return pd.DataFrame([[input, output, time.time()]], columns=["input", "output", "timestamp"]) | |
def simple_clean(text): | |
return re.sub('[!@#$.,\n-?]', ' ', text.lower()) | |
def load_tokenizer(): | |
return AutoTokenizer.from_pretrained(MODEL_NAME) | |
def load_model(): | |
return AutoModelWithLMHead.from_pretrained(MODEL_NAME) | |
def load_pipe(): | |
model = load_model() | |
tokenizer = load_tokenizer() | |
return pipeline("text-generation", model=model, tokenizer=tokenizer) | |
# ---------------------------------------------------------------------- # | |
st.write(DESCRIPTION) | |
lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang) | |
translator = Translator() | |
if "df" not in st.session_state: | |
st.session_state.df = pd.DataFrame(columns=["input", "output", "timestamp"]) | |
with st.spinner(MESSAGES["loading_text"][lang]): | |
pipe = load_pipe() | |
st.success(MESSAGES["success_model_load"][lang]) | |
text = st.text_input( | |
MESSAGES["input_description"][lang], MESSAGES["input_default"][lang]) | |
with st.spinner(MESSAGES["loading_text"][lang]): | |
if lang == "mn": | |
result = pipe(text)[0]['generated_text'] | |
st.write(result) | |
elif lang == "en": | |
text = translator.translate(text, src='en', dest='mn').text | |
result = pipe(text)[0]['generated_text'] | |
result_en = translator.translate(result, src='mn', dest='en').text | |
st.write(f"*Translated:* {result_en}") | |
st.write(f"> *Original:* {result}") | |
st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage') | |
st.session_state.df = st.session_state.df.append(create_df_from_io(text, result)) | |
st.write("### WordCloud based on previous outputs") | |
with st.spinner(MESSAGES["loading_text"][lang]): | |
wordcloud_input = "" | |
for text in st.session_state.df.output.tolist(): | |
wordcloud_input += simple_clean(text) | |
wordcloud = WordCloud(width = 800, height = 800, | |
background_color ='white', | |
min_font_size = 10).generate(wordcloud_input) | |
st.image(wordcloud.to_array()) | |
st.write("### Түүх / History") | |
with st.spinner(MESSAGES["loading_text"][lang]): | |
st.table(st.session_state.df.sort_values(by="timestamp", ascending=False)) | |