Nemo-Mistral-Minitron

Runtime error

Tonic commited on Oct 3, 2024

Commit

5736661

unverified ·

1 Parent(s): 55b91e5

fix model length gpuzero timeout

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,14 +31,18 @@ def create_prompt(system_message, user_message, tool_definition="", context=""):
     else:
         return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
-@spaces.GPU
 def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
     full_prompt = create_prompt(system_message, message, tool_definition, context)
     if use_pipeline:
         response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
     else:
-        inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
         input_ids = inputs['input_ids'].to(model.device)
         attention_mask = inputs['attention_mask'].to(model.device)

     else:
         return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
+@spaces.GPU(duration=94)
 def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
     full_prompt = create_prompt(system_message, message, tool_definition, context)
     if use_pipeline:
         response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
     else:
+        max_model_length = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 8192
+        max_length = max_model_length - max_tokens
+        inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
         input_ids = inputs['input_ids'].to(model.device)
         attention_mask = inputs['attention_mask'].to(model.device)