Tonic commited on
Commit
55ca4f6
·
verified ·
1 Parent(s): 297570c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -48,16 +48,12 @@ def convert_history(chat_history, max_input_length=1024):
48
  return history_text
49
 
50
  @spaces.GPU
 
51
  def instruct(instruction, max_token_output=1024):
52
  input_text = instruction
53
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
54
- input_ids = tokenizer(input_text, return_tensors='pt', truncation=False)
55
- input_ids["input_ids"] = input_ids["input_ids"].cuda()
56
- input_ids["attention_mask"] = input_ids["attention_mask"].cuda()
57
- generation_kwargs = dict(input_ids, streamer=streamer, max_new_tokens=max_token_output, do_sample=False)
58
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
59
- thread.start()
60
- return streamer
61
 
62
 
63
  with gr.Blocks() as demo:
 
48
  return history_text
49
 
50
  @spaces.GPU
51
+ @torch.inference_mode()
52
  def instruct(instruction, max_token_output=1024):
53
  input_text = instruction
54
+ input_ids = tokenizer(input_text, return_tensors='pt', truncation=True).to('cuda')
55
+ outputs = model.generate(**input_ids, max_length=max_token_output, do_sample=False)
56
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
57
 
58
 
59
  with gr.Blocks() as demo: