Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,9 @@ from PIL import Image
|
|
5 |
import os
|
6 |
|
7 |
|
8 |
-
def
|
|
|
|
|
9 |
"Converts a sequence of words into a list of tokens"
|
10 |
tokens_list = []
|
11 |
for word in word_list:
|
@@ -14,22 +16,22 @@ def get_tokens_as_list(word_list):
|
|
14 |
return tokens_list
|
15 |
|
16 |
|
17 |
-
def translate(text):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
|
33 |
|
34 |
st.title("LLM Translate for ko->eng")
|
35 |
|
|
|
5 |
import os
|
6 |
|
7 |
|
8 |
+
def main():
|
9 |
+
|
10 |
+
def get_tokens_as_list(word_list):
|
11 |
"Converts a sequence of words into a list of tokens"
|
12 |
tokens_list = []
|
13 |
for word in word_list:
|
|
|
16 |
return tokens_list
|
17 |
|
18 |
|
19 |
+
def translate(text):
|
20 |
+
# Prepare the prompt
|
21 |
+
messages = f"Translate from Korean to English: {text}"
|
22 |
+
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
|
23 |
+
prompt_padded_len = len(input_ids[0])
|
24 |
+
|
25 |
+
# Generate the translation
|
26 |
+
gen_tokens = model.generate(input_ids, max_length=max_new_tokens, temperature=temperature, top_k=top_k, top_p=top_p, bad_words_ids = bad_words_ids)
|
27 |
+
gen_tokens = [
|
28 |
+
gt[prompt_padded_len:] for gt in gen_tokens
|
29 |
+
]
|
30 |
+
translation = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
|
31 |
+
|
32 |
+
return translation
|
33 |
|
34 |
+
|
35 |
|
36 |
st.title("LLM Translate for ko->eng")
|
37 |
|