snoop2head commited on
Commit
cffbd0f
·
1 Parent(s): 2beda2c
Files changed (1) hide show
  1. app.py +50 -50
app.py CHANGED
@@ -8,9 +8,14 @@ st.set_page_config(
8
  page_title="KoQuillBot", layout="wide", initial_sidebar_state="expanded"
9
  )
10
 
 
 
 
 
 
11
  tokenizer = AutoTokenizer.from_pretrained("QuoQA-NLP/KE-T5-Ko2En-Base")
12
- ko2en_model = AutoModelForSeq2SeqLM.from_pretrained("QuoQA-NLP/KE-T5-Ko2En-Base")
13
- en2ko_model = AutoModelForSeq2SeqLM.from_pretrained("QuoQA-NLP/KE-T5-En2Ko-Base")
14
 
15
 
16
  st.title("🤖 KoQuillBot")
@@ -27,57 +32,52 @@ print(src_text)
27
 
28
 
29
 
30
- if st.button("문장 변환") or src_text == default_value:
31
- if src_text == "":
32
- st.warning("Please **enter text** for translation")
33
-
34
- else:
35
- # translate into english sentence
36
- english_translation = ko2en_model.generate(
37
- **tokenizer(
38
- src_text,
39
- return_tensors="pt",
40
- padding="max_length",
41
- truncation=True,
42
- max_length=64,
43
- ),
44
- max_length=64,
45
- num_beams=5,
46
- repetition_penalty=1.3,
47
- no_repeat_ngram_size=3,
48
- num_return_sequences=1,
49
- )
50
- english_translation = tokenizer.decode(
51
- english_translation[0],
52
- clean_up_tokenization_spaces=True,
53
- skip_special_tokens=True,
54
- )
55
 
56
- # translate back to korean
57
- korean_translation = en2ko_model.generate(
58
- **tokenizer(
59
- english_translation,
60
- return_tensors="pt",
61
- padding="max_length",
62
- truncation=True,
63
- max_length=64,
64
- ),
65
- max_length=64,
66
- num_beams=5,
67
- repetition_penalty=1.3,
68
- no_repeat_ngram_size=3,
69
- num_return_sequences=1,
70
- )
 
 
 
 
 
71
 
72
- korean_translation = tokenizer.decode(
73
- korean_translation[0],
74
- clean_up_tokenization_spaces=True,
75
- skip_special_tokens=True,
76
- )
77
- print(f"{src_text} -> {english_translation} -> {korean_translation}")
78
- else:
79
- pass
 
 
 
 
 
 
 
80
 
 
 
 
 
 
 
81
 
82
  st.write(korean_translation)
83
  print(korean_translation)
 
8
  page_title="KoQuillBot", layout="wide", initial_sidebar_state="expanded"
9
  )
10
 
11
+ @st.cache
12
+ def load_model(model_name):
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
14
+ return model
15
+
16
  tokenizer = AutoTokenizer.from_pretrained("QuoQA-NLP/KE-T5-Ko2En-Base")
17
+ ko2en_model = load_model("QuoQA-NLP/KE-T5-Ko2En-Base")
18
+ en2ko_model = load_model("QuoQA-NLP/KE-T5-En2Ko-Base")
19
 
20
 
21
  st.title("🤖 KoQuillBot")
 
32
 
33
 
34
 
35
+ if src_text == "":
36
+ st.warning("Please **enter text** for translation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # translate into english sentence
39
+ english_translation = ko2en_model.generate(
40
+ **tokenizer(
41
+ src_text,
42
+ return_tensors="pt",
43
+ padding="max_length",
44
+ truncation=True,
45
+ max_length=64,
46
+ ),
47
+ max_length=64,
48
+ num_beams=5,
49
+ repetition_penalty=1.3,
50
+ no_repeat_ngram_size=3,
51
+ num_return_sequences=1,
52
+ )
53
+ english_translation = tokenizer.decode(
54
+ english_translation[0],
55
+ clean_up_tokenization_spaces=True,
56
+ skip_special_tokens=True,
57
+ )
58
 
59
+ # translate back to korean
60
+ korean_translation = en2ko_model.generate(
61
+ **tokenizer(
62
+ english_translation,
63
+ return_tensors="pt",
64
+ padding="max_length",
65
+ truncation=True,
66
+ max_length=64,
67
+ ),
68
+ max_length=64,
69
+ num_beams=5,
70
+ repetition_penalty=1.3,
71
+ no_repeat_ngram_size=3,
72
+ num_return_sequences=1,
73
+ )
74
 
75
+ korean_translation = tokenizer.decode(
76
+ korean_translation[0],
77
+ clean_up_tokenization_spaces=True,
78
+ skip_special_tokens=True,
79
+ )
80
+ print(f"{src_text} -> {english_translation} -> {korean_translation}")
81
 
82
  st.write(korean_translation)
83
  print(korean_translation)