bayartsogt commited on
Commit
8028fa1
·
1 Parent(s): cb2703f

wordcloud + history addition

Browse files
Files changed (2) hide show
  1. app.py +36 -7
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,13 +1,22 @@
 
1
  import time
2
  import streamlit as st
3
- from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
 
4
  from googletrans import Translator
 
5
 
6
  from enums import MODEL_NAME, MESSAGES, DESCRIPTION
7
 
8
  def iso2lang(iso):
9
  return MESSAGES["iso"][iso]
10
 
 
 
 
 
 
 
11
  def load_tokenizer():
12
  return AutoTokenizer.from_pretrained(MODEL_NAME)
13
 
@@ -19,17 +28,19 @@ def load_pipe():
19
  model = load_model()
20
  tokenizer = load_tokenizer()
21
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
22
-
23
  st.write(DESCRIPTION)
24
 
25
  lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
26
  translator = Translator()
 
 
27
 
28
  with st.spinner(MESSAGES["loading_text"][lang]):
29
  pipe = load_pipe()
30
  st.success(MESSAGES["success_model_load"][lang])
31
 
32
- text = st.text_area(
33
  MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])
34
 
35
  with st.spinner(MESSAGES["loading_text"][lang]):
@@ -38,9 +49,27 @@ with st.spinner(MESSAGES["loading_text"][lang]):
38
  st.write(result)
39
  elif lang == "en":
40
  text = translator.translate(text, src='en', dest='mn').text
41
- result_mn = pipe(text)[0]['generated_text']
42
- result_en = translator.translate(result_mn, src='mn', dest='en').text
43
  st.write(f"*Translated:* {result_en}")
44
- st.write(f"> *Original:* {result_mn}")
 
 
 
45
 
46
- st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
  import time
3
  import streamlit as st
4
+ import pandas as pd
5
+ from wordcloud import WordCloud
6
  from googletrans import Translator
7
+ from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
8
 
9
  from enums import MODEL_NAME, MESSAGES, DESCRIPTION
10
 
11
  def iso2lang(iso):
12
  return MESSAGES["iso"][iso]
13
 
14
+ def create_df_from_io(input, output):
15
+ return pd.DataFrame([[input, output, time.time()]], columns=["input", "output", "timestamp"])
16
+
17
+ def simple_clean(text):
18
+ return re.sub('[!@#$.,\n-?]', ' ', text.lower())
19
+
20
  def load_tokenizer():
21
  return AutoTokenizer.from_pretrained(MODEL_NAME)
22
 
 
28
  model = load_model()
29
  tokenizer = load_tokenizer()
30
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
31
+ # ---------------------------------------------------------------------- #
32
  st.write(DESCRIPTION)
33
 
34
  lang = st.radio('Хэл / Language', ('mn', 'en'), format_func=iso2lang)
35
  translator = Translator()
36
+ if "df" not in st.session_state:
37
+ st.session_state.df = pd.DataFrame(columns=["input", "output", "timestamp"])
38
 
39
  with st.spinner(MESSAGES["loading_text"][lang]):
40
  pipe = load_pipe()
41
  st.success(MESSAGES["success_model_load"][lang])
42
 
43
+ text = st.text_input(
44
  MESSAGES["input_description"][lang], MESSAGES["input_default"][lang])
45
 
46
  with st.spinner(MESSAGES["loading_text"][lang]):
 
49
  st.write(result)
50
  elif lang == "en":
51
  text = translator.translate(text, src='en', dest='mn').text
52
+ result = pipe(text)[0]['generated_text']
53
+ result_en = translator.translate(result, src='mn', dest='en').text
54
  st.write(f"*Translated:* {result_en}")
55
+ st.write(f"> *Original:* {result}")
56
+ st.warning('Translation is done by [`googletrans`](https://github.com/ssut/py-googletrans). Please check out the usage. https://github.com/ssut/py-googletrans#note-on-library-usage')
57
+
58
+ st.session_state.df = st.session_state.df.append(create_df_from_io(text, result))
59
 
60
+
61
+ st.write("### WordCloud based on previous outputs")
62
+ with st.spinner(MESSAGES["loading_text"][lang]):
63
+ wordcloud_input = ""
64
+ for text in st.session_state.df.output.tolist():
65
+ wordcloud_input += simple_clean(text)
66
+
67
+ wordcloud = WordCloud(width = 800, height = 800,
68
+ background_color ='white',
69
+ min_font_size = 10).generate(wordcloud_input)
70
+
71
+ st.image(wordcloud.to_array())
72
+
73
+ st.write("### Түүх / History")
74
+ with st.spinner(MESSAGES["loading_text"][lang]):
75
+ st.table(st.session_state.df.sort_values(by="timestamp", ascending=False))
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  torch
 
 
2
  transformers
3
  streamlit
4
  googletrans==3.1.0a
 
1
  torch
2
+ pandas
3
+ wordcloud
4
  transformers
5
  streamlit
6
  googletrans==3.1.0a