r1208 commited on
Commit
8ea5435
ยท
verified ยท
1 Parent(s): a517f63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -1
app.py CHANGED
@@ -2,4 +2,73 @@ import streamlit as st
2
  from transformers import pipeline
3
  from PIL import Image
4
 
5
- pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
  from PIL import Image
4
 
5
+ st.title("LLM Translate for ko->eng")
6
+
7
+ # adding the text that will show in the text box as default
8
+
9
+ text_default = """
10
+ ๊ทธ๋ฅผ ์ค‘์‹ฌ์œผ๋กœ ํœ˜๋ชฐ์•„์น˜๋Š” ๋ง‰๋Œ€ํ•œ ๋งˆ๋‚˜. ํ—ˆ๊ณต์—์„œ ํ”ผ์–ด์˜ค๋ฅธ ๋‹ค์„ฏ ๊ฐœ์˜๋ถˆ๊ฝ‚์ด ํฌ๊ธฐ๋ฅผ ๋ถ€ํ’€๋ฆฌ๊ณ , ์ด๋‚ด ํฌํƒ„์ฒ˜๋Ÿผ ์˜์•„์กŒ๋‹ค.
11
+
12
+ ํ›„์šฐ์šฐ์šฐ์›…, ๊นŒ์•™!
13
+
14
+ ์ˆ˜๋งŒ์˜ ๋ชฌ์Šคํ„ฐ๋กœ ์ด๋ฃจ์–ด์ง„ ๊ฒ€์€ ํŒŒ๋„๊ฐ€ ๊ฐˆ๋ผ์กŒ๋‹ค. ์ดˆ๊ณ ์˜จ์˜ ์—ด๊ธฐ๊ฐ€ ์‚ด๊ณผ ๋ผˆ๋ฅผ ํƒœ์šฐ๊ณ  ์ง€๋ฉด์„ ๋…น์˜€๋‹ค."""
15
+
16
+
17
+ from peft import AutoPeftModelForCausalLM
18
+ from transformers import AutoTokenizer
19
+ import torch
20
+
21
+ attn_implementation = None
22
+ if USE_FLASH_ATTENTION:
23
+ attn_implementation="flash_attention_2"
24
+
25
+
26
+ model_id = "r1208/c4ai-command-r-v01-4bit_32r"
27
+
28
+ model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, attn_implementation=attn_implementation,)
29
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
30
+
31
+ tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True)
32
+
33
+ def get_tokens_as_list(word_list):
34
+ "Converts a sequence of words into a list of tokens"
35
+ tokens_list = []
36
+ for word in word_list:
37
+ tokenized_word = tokenizer_with_prefix_space([word], add_special_tokens=False).input_ids[0]
38
+ tokens_list.append(tokenized_word)
39
+ return tokens_list
40
+ bad_words_ids = get_tokens_as_list( word_list=["\n", "\n\n", "\ ", " \ ", "\\", "'\n'"] )
41
+
42
+ max_new_tokens = st.sidebar.slider("Max Length", value=100, min_value=10, max_value=1000)
43
+ temperature = st.sidebar.slider("Temperature", value=0.3, min_value=0.0, max_value=1.0, step=0.05)
44
+ top_k = st.sidebar.slider("Top-k", min_value=0, max_value=50, value=0)
45
+ top_p = st.sidebar.slider("Top-p", min_value=0.75, max_value=1.0, step=0.05, value=0.9)
46
+
47
+ def translate(text):
48
+ # Prepare the prompt
49
+ messages = f"Translate from Korean to English: {text}"
50
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
51
+
52
+ # Generate the translation
53
+ outputs = model.generate(input_ids, max_length=max_new_tokens, temperature=temperature, top_k=top_k, top_p=top_p, bad_words_ids = bad_words_ids)
54
+
55
+ translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+
57
+ return translation
58
+
59
+ def main():
60
+ st.subheader("Enter text to translate")
61
+ input_text = st.text_area("", height=300)
62
+
63
+ if st.button("Translate"):
64
+ if input_text:
65
+ translation = translate(input_text)
66
+ st.text_area("Translated Text", value=translation, height=300)
67
+ else:
68
+ st.error("Please enter some text to translate.")
69
+
70
+ if __name__ == "__main__":
71
+ main()
72
+
73
+
74
+