hunkim commited on
Commit
b97592a
1 Parent(s): 060762d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -8,18 +8,21 @@ tokenizer = AutoTokenizer.from_pretrained(
8
  'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b',
9
  bos_token='[BOS]', eos_token='[EOS]', unk_token='[UNK]', pad_token='[PAD]', mask_token='[MASK]'
10
  )
 
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b',
13
  pad_token_id=tokenizer.eos_token_id,
14
  torch_dtype=torch.float16, low_cpu_mem_usage=False
15
- ).to(device='cpu', non_blocking=True)
16
  _ = model.eval()
17
 
18
  print("Model loading done!")
19
 
20
  def gpt(prompt):
21
  with torch.no_grad():
22
- tokens = tokenizer.encode(prompt, return_tensors='pt').to(device='cpu', non_blocking=True)
23
  gen_tokens = model.generate(tokens, do_sample=True, temperature=0.8, max_length=256)
24
  generated = tokenizer.batch_decode(gen_tokens)[0]
25
 
 
8
  'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b',
9
  bos_token='[BOS]', eos_token='[EOS]', unk_token='[UNK]', pad_token='[PAD]', mask_token='[MASK]'
10
  )
11
+
12
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
+
14
  model = AutoModelForCausalLM.from_pretrained(
15
  'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b',
16
  pad_token_id=tokenizer.eos_token_id,
17
  torch_dtype=torch.float16, low_cpu_mem_usage=False
18
+ ).to(device=device, non_blocking=True)
19
  _ = model.eval()
20
 
21
  print("Model loading done!")
22
 
23
  def gpt(prompt):
24
  with torch.no_grad():
25
+ tokens = tokenizer.encode(prompt, return_tensors='pt').to(device=device, non_blocking=True)
26
  gen_tokens = model.generate(tokens, do_sample=True, temperature=0.8, max_length=256)
27
  generated = tokenizer.batch_decode(gen_tokens)[0]
28