Tonic commited on
Commit
5736661
·
unverified ·
1 Parent(s): 55b91e5

fix model length gpuzero timeout

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -31,14 +31,18 @@ def create_prompt(system_message, user_message, tool_definition="", context=""):
31
  else:
32
  return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
33
 
34
- @spaces.GPU
35
  def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
36
  full_prompt = create_prompt(system_message, message, tool_definition, context)
37
 
38
  if use_pipeline:
39
  response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
40
  else:
41
- inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
 
 
 
 
42
  input_ids = inputs['input_ids'].to(model.device)
43
  attention_mask = inputs['attention_mask'].to(model.device)
44
 
 
31
  else:
32
  return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
33
 
34
+ @spaces.GPU(duration=94)
35
  def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
36
  full_prompt = create_prompt(system_message, message, tool_definition, context)
37
 
38
  if use_pipeline:
39
  response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
40
  else:
41
+ max_model_length = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 8192
42
+
43
+ max_length = max_model_length - max_tokens
44
+
45
+ inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
46
  input_ids = inputs['input_ids'].to(model.device)
47
  attention_mask = inputs['attention_mask'].to(model.device)
48