Spaces:
Runtime error
Runtime error
fix model length gpuzero timeout
Browse files
app.py
CHANGED
@@ -31,14 +31,18 @@ def create_prompt(system_message, user_message, tool_definition="", context=""):
|
|
31 |
else:
|
32 |
return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
|
33 |
|
34 |
-
@spaces.GPU
|
35 |
def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
|
36 |
full_prompt = create_prompt(system_message, message, tool_definition, context)
|
37 |
|
38 |
if use_pipeline:
|
39 |
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
|
40 |
else:
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
input_ids = inputs['input_ids'].to(model.device)
|
43 |
attention_mask = inputs['attention_mask'].to(model.device)
|
44 |
|
|
|
31 |
else:
|
32 |
return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
|
33 |
|
34 |
+
@spaces.GPU(duration=94)
|
35 |
def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
|
36 |
full_prompt = create_prompt(system_message, message, tool_definition, context)
|
37 |
|
38 |
if use_pipeline:
|
39 |
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
|
40 |
else:
|
41 |
+
max_model_length = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 8192
|
42 |
+
|
43 |
+
max_length = max_model_length - max_tokens
|
44 |
+
|
45 |
+
inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
|
46 |
input_ids = inputs['input_ids'].to(model.device)
|
47 |
attention_mask = inputs['attention_mask'].to(model.device)
|
48 |
|