chobob311 commited on
Commit
fefdb18
Β·
verified Β·
1 Parent(s): 43dcf0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -2,20 +2,19 @@ import gradio as gr
2
  from transformers import LlamaTokenizer, LlamaForCausalLM
3
  import torch
4
 
5
- model_repo_id = "Bllossom/llama-3-Korean-Bllossom-70B"
 
6
 
7
- # ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
8
  tokenizer = LlamaTokenizer.from_pretrained(
9
- model_repo_id,
10
- use_auth_token='your_hf_access_token' # ν•„μš”ν•œ 경우 μ•‘μ„ΈμŠ€ 토큰 μΆ”κ°€
11
  )
12
-
13
- # λͺ¨λΈ λ‘œλ“œ
14
  model = LlamaForCausalLM.from_pretrained(
15
- model_repo_id,
16
- torch_dtype=torch.float16, # λ˜λŠ” torch.bfloat16
17
- device_map="auto", # κ°€λŠ₯ν•œ 경우 GPU에 μžλ™ ν• λ‹Ή
18
- use_auth_token='your_hf_access_token' # ν•„μš”ν•œ 경우 μ•‘μ„ΈμŠ€ 토큰 μΆ”κ°€
19
  )
20
 
21
  def respond(
@@ -32,8 +31,10 @@ def respond(
32
  prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
33
  prompt += f"User: {message}\nAssistant:"
34
 
 
35
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
36
 
 
37
  outputs = model.generate(
38
  **inputs,
39
  max_new_tokens=max_tokens,
@@ -44,17 +45,23 @@ def respond(
44
  pad_token_id=tokenizer.eos_token_id,
45
  )
46
 
 
47
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
  response = response[len(prompt):].strip()
49
 
 
50
  history.append((message, response))
51
 
52
  return history
53
 
 
54
  demo = gr.ChatInterface(
55
  fn=respond,
56
  additional_inputs=[
57
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
 
 
 
58
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
59
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
60
  gr.Slider(
 
2
  from transformers import LlamaTokenizer, LlamaForCausalLM
3
  import torch
4
 
5
+ model_id = 'Bllossom/llama-3-Korean-Bllossom-70B'
6
+ hf_access_token = 'γ…—γ…—' # μ‹€μ œ ν—ˆκΉ…νŽ˜μ΄μŠ€ μ•‘μ„ΈμŠ€ ν† ν°μœΌλ‘œ κ΅μ²΄ν•˜μ„Έμš”
7
 
8
+ # ν† ν¬λ‚˜μ΄μ €μ™€ λͺ¨λΈ λ‘œλ“œ
9
  tokenizer = LlamaTokenizer.from_pretrained(
10
+ model_id,
11
+ use_auth_token=hf_access_token
12
  )
 
 
13
  model = LlamaForCausalLM.from_pretrained(
14
+ model_id,
15
+ torch_dtype=torch.bfloat16,
16
+ device_map="auto",
17
+ use_auth_token=hf_access_token
18
  )
19
 
20
  def respond(
 
31
  prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
32
  prompt += f"User: {message}\nAssistant:"
33
 
34
+ # μž…λ ₯ 토큰화
35
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
36
 
37
+ # λͺ¨λΈ 응닡 생성
38
  outputs = model.generate(
39
  **inputs,
40
  max_new_tokens=max_tokens,
 
45
  pad_token_id=tokenizer.eos_token_id,
46
  )
47
 
48
+ # 응닡 λ””μ½”λ”©
49
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
  response = response[len(prompt):].strip()
51
 
52
+ # νžˆμŠ€ν† λ¦¬μ— μΆ”κ°€
53
  history.append((message, response))
54
 
55
  return history
56
 
57
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
58
  demo = gr.ChatInterface(
59
  fn=respond,
60
  additional_inputs=[
61
+ gr.Textbox(
62
+ value="You are a friendly Chatbot.",
63
+ label="System message"
64
+ ),
65
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
66
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
67
  gr.Slider(