snoop2head commited on
Commit
dc6144b
Β·
1 Parent(s): dcbf031

initial commit

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +2 -2
  3. app.py +72 -0
  4. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .python-version
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Translation
3
- emoji: πŸ“‰
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: streamlit
 
1
  ---
2
+ title: Ko-En Translation
3
+ emoji: πŸ’¬
4
  colorFrom: gray
5
  colorTo: blue
6
  sdk: streamlit
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import numpy as np
3
+ import streamlit as st
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+
6
+
7
+ st.set_page_config(
8
+ page_title="λ²ˆμ—­κΈ°", layout="wide", initial_sidebar_state="expanded"
9
+ )
10
+
11
+ @st.cache
12
+ def load_model(model_name):
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
14
+ return model
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained("QuoQA-NLP/KE-T5-Ko2En-Base")
17
+ ko2en_model = load_model("QuoQA-NLP/KE-T5-Ko2En-Base")
18
+ en2ko_model = load_model("QuoQA-NLP/KE-T5-En2Ko-Base")
19
+
20
+
21
+ st.title("πŸ€– λ²ˆμ—­κΈ°")
22
+ st.write("μ’ŒμΈ‘μ— λ²ˆμ—­ λͺ¨λ“œλ₯Ό μ„ νƒν•˜κ³ , CTRL+Enter(CMD+Enter)λ₯Ό λˆ„λ₯΄μ„Έμš” πŸ€—")
23
+ st.write("Select Translation Mode at the left and press CTRL+Enter(CMD+Enter)πŸ€—")
24
+
25
+ translation_list = ["ν•œκ΅­μ–΄μ—μ„œ μ˜μ–΄ | Korean to English", "μ˜μ–΄μ—μ„œ ν•œκ΅­μ–΄ | English to Korean"]
26
+ translation_mode = st.sidebar.radio("λ²ˆμ—­ λͺ¨λ“œλ₯Ό 선택(Translation Mode):", translation_list)
27
+
28
+
29
+ default_value = "ν”„λ‘œμ νŠΈ κ°€μΉ˜κ°€ λ―Έν™” 1백만 λ‹¬λŸ¬ 이상인 곡곡 νŒŒνŠΈλ„ˆκ°€ μ‹œμž‘ν•œ PPP ν”„λ‘œμ νŠΈμ— λŒ€ν•΄ 2단계 μž…μ°°μ΄ μ‹€μ‹œλ©λ‹ˆλ‹€. μž…μ°°μ„ μ „μž λ°©μ‹μœΌλ‘œ μ§„ν–‰ν•˜λŠ” 것이 ν—ˆμš©λ©λ‹ˆλ‹€. (즉, μ‹ μ²­μ„œ 및 μž…μ°° μ œμ•ˆμ˜ μ „μž 제좜). COVID-19 전염병과 그에 λ”°λ₯Έ μ—¬ν–‰ μ œν•œμœΌλ‘œ 인해 μ˜€λŠ˜λ‚ μ—λŠ” 일반적인 관행이 λ˜μ—ˆμŠ΅λ‹ˆλ‹€."
30
+ src_text = st.text_area(
31
+ "λ²ˆμ—­ν•˜κ³  싢은 λ¬Έμž₯을 μž…λ ₯ν•˜μ„Έμš”:",
32
+ default_value,
33
+ height=100,
34
+ max_chars=200,
35
+ )
36
+ print(src_text)
37
+
38
+
39
+
40
+ if src_text == "":
41
+ st.warning("Please **enter text** for translation")
42
+
43
+ # translate into english sentence
44
+ if translation_mode == translation_list[0]:
45
+ model = ko2en_model
46
+ else:
47
+ model = en2ko_model
48
+
49
+ translation_result = model.generate(
50
+ **tokenizer(
51
+ src_text,
52
+ return_tensors="pt",
53
+ padding="max_length",
54
+ truncation=True,
55
+ max_length=64,
56
+ ),
57
+ max_length=64,
58
+ num_beams=5,
59
+ repetition_penalty=1.3,
60
+ no_repeat_ngram_size=3,
61
+ num_return_sequences=1,
62
+ )
63
+ translation_result = tokenizer.decode(
64
+ translation_result[0],
65
+ clean_up_tokenization_spaces=True,
66
+ skip_special_tokens=True,
67
+ )
68
+
69
+ print(f"{src_text} -> {translation_result}")
70
+
71
+ st.write(translation_result)
72
+ print(translation_result)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ streamlit
3
+ torch
4
+ numpy