mamkkl commited on
Commit
456ee4b
·
verified ·
1 Parent(s): e012f60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -69,6 +69,18 @@ def generate_prompt(instruction, input=None):
69
  return PROMPT_DICT["prompt_no_input"].format(instruction=instruction)
70
 
71
  @spaces.GPU
 
 
 
 
 
 
 
 
 
 
 
 
72
  def respond(
73
  message,
74
  history: list[tuple[str, str]],
@@ -89,16 +101,7 @@ def respond(
89
  num_beams=1,
90
  max_new_tokens = max_new_tokens
91
  )
92
-
93
- # Without streaming
94
- with torch.no_grad():
95
- generation_output = model.generate(
96
- input_ids=input_ids,
97
- generation_config=generation_config,
98
- return_dict_in_generate=True,
99
- output_scores=False,
100
- max_new_tokens=max_new_tokens,
101
- )
102
  s = generation_output.sequences[0]
103
  output = tokenizer.decode(s)
104
  response = output.split("Response:")[1].strip()
 
69
  return PROMPT_DICT["prompt_no_input"].format(instruction=instruction)
70
 
71
  @spaces.GPU
72
+ def generator(input_ids, generation_config, max_new_tokens):
73
+ # Without streaming
74
+ with torch.no_grad():
75
+ generation_output = model.generate(
76
+ input_ids=input_ids,
77
+ generation_config=generation_config,
78
+ return_dict_in_generate=True,
79
+ output_scores=False,
80
+ max_new_tokens=max_new_tokens,
81
+ )
82
+ return generation_output
83
+
84
  def respond(
85
  message,
86
  history: list[tuple[str, str]],
 
101
  num_beams=1,
102
  max_new_tokens = max_new_tokens
103
  )
104
+ generation_output = generator(input_ids, generation_config, max_new_tokens)
 
 
 
 
 
 
 
 
 
105
  s = generation_output.sequences[0]
106
  output = tokenizer.decode(s)
107
  response = output.split("Response:")[1].strip()