File size: 4,327 Bytes
bb6cfb1
04dc601
 
 
777821c
46e037c
 
 
8ea5435
1b52551
 
 
 
 
 
baf34e0
dbd00a1
 
 
2831fcf
 
 
 
 
 
8ea5435
 
46e037c
dbd00a1
46e037c
 
 
dbd00a1
1b52551
 
dbd00a1
 
 
46e037c
dbd00a1
 
 
 
 
 
8ea5435
dbd00a1
0406849
 
 
 
 
 
 
 
 
 
 
 
 
 
46e037c
0406849
 
 
 
 
 
 
 
bb6cfb1
0406849
bb6cfb1
1b52551
21ec5ae
0406849
 
 
 
 
 
 
 
564a2d1
0406849
 
 
46e037c
 
0406849
 
8ea5435
ca2878e
5de4564
8ea5435
 
68c89fe
6aca495
8ea5435
 
 
 
5de4564
 
 
 
 
 
 
 
 
 
 
8ea5435
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import spaces
import streamlit as st
from transformers import pipeline
from PIL import Image
import os
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch

import torch
print(f"Is CUDA available: {torch.cuda.is_available()}")
# True
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Tesla T4

# @spaces.GPU
def main():

    def get_tokens_as_list(word_list):
        "Converts a sequence of words into a list of tokens"
        tokens_list = []
        for word in word_list:
            tokenized_word = tokenizer_with_prefix_space([word], add_special_tokens=False).input_ids[0]
            tokens_list.append(tokenized_word)
        return tokens_list


    def translate(text, tokenizer, model, do_sample, max_new_tokens, temperature, top_k, top_p, bad_words_ids):
        # Prepare the prompt
        prompts = f"Translate from Korean to English: {text}"
        messages = [{"role": "user", "content": prompts}]
        
        input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
        input_ids = input_ids.to("cuda")
        
        prompt_padded_len = len(input_ids[0])
        
        # Generate the translation
        gen_tokens = model.generate(input_ids, do_sample = do_sample, max_length=max_new_tokens, temperature=temperature, top_k=top_k, top_p=top_p, bad_words_ids = bad_words_ids)
        gen_tokens = [
          gt[prompt_padded_len:] for gt in gen_tokens
        ]
        translation = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
        
        return translation

    

    st.title("LLM Translate for ko->eng")

    # adding the text that will show in the text box as default
    
    text_default = """
    ๊ทธ๋ฅผ ์ค‘์‹ฌ์œผ๋กœ ํœ˜๋ชฐ์•„์น˜๋Š” ๋ง‰๋Œ€ํ•œ๋งˆ๋‚˜. ํ—ˆ๊ณต์—์„œ ํ”ผ์–ด์˜ค๋ฅธ ๋‹ค์„ฏ ๊ฐœ์˜๋ถˆ๊ฝ‚์ด ํฌ๊ธฐ๋ฅผ ๋ถ€ํ’€๋ฆฌ๊ณ , ์ด๋‚ด ํฌํƒ„์ฒ˜๋Ÿผ ์˜์•„์กŒ๋‹ค.
    
    ํ›„์šฐ์šฐ์šฐ์›…, ๊นŒ์•™!
    
    ์ˆ˜๋งŒ์˜ ๋ชฌ์Šคํ„ฐ๋กœ ์ด๋ฃจ์–ด์ง„ ๊ฒ€์€ ํŒŒ๋„๊ฐ€ ๊ฐˆ๋ผ์กŒ๋‹ค. ์ดˆ๊ณ ์˜จ์˜ ์—ด๊ธฐ๊ฐ€ ์‚ด๊ณผ ๋ผˆ๋ฅผ ํƒœ์šฐ๊ณ  ์ง€๋ฉด์„ ๋…น์˜€๋‹ค."""
    
    hf_token = os.getenv("HF_ACCESS_TOKEN")
    

    
    # attn_implementation = None
    # USE_FLASH_ATTENTION = False
    # if USE_FLASH_ATTENTION:
    #   attn_implementation="flash_attention_2"
    
    
    model_id = "r1208/c4ai-command-r-v01-4bit_32r"

    
    model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, use_auth_token=hf_token)
    model = model.to("cuda")
    tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
    
    tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True, use_auth_token=hf_token)




    bad_words_ids = get_tokens_as_list( word_list=["\n", "\n\n", "\ ", " \ ", "\\", "'\n'"] )

    max_new_tokens = st.sidebar.slider("Max Length", value=400, min_value=10, max_value=1000)
    temperature = st.sidebar.slider("Temperature", value=0.3, min_value=0.0, max_value=1.0, step=0.05)
    top_k = st.sidebar.slider("Top-k", min_value=0, max_value=50, value=0)
    top_p = st.sidebar.slider("Top-p", min_value=0.75, max_value=1.0, step=0.05, value=0.9)
    do_sample = st.selectbox("do_sample: ",
                     ['True', 'False'])


    st.subheader("Enter text to translate")
    input_text = st.text_area("Text to Translate", value= "Enter Korean text here", height=300)

    if st.button("Translate"):
        if input_text:
            translation = translate(input_text, tokenizer, model, do_sample = do_sample, max_new_tokens = max_new_tokens, temperature = temperature, top_k = top_k, top_p = top_p, bad_words_ids = bad_words_ids)
            translation = translation[0]
            st.text_area("Translated Text", value=translation, height=300)
        else:
            st.error("Please enter some text to translate.")

            
    # st.subheader("Enter text to translate")
    # input_text = st.text_area("", height=300)
    
    # if st.button("Translate"):
    #     if input_text:
    #         translation = translate(input_text)
    #         st.text_area("Translated Text", value=translation, height=300)
    #     else:
    #         st.error("Please enter some text to translate.")

if __name__ == "__main__":
    main()