name: "ensemble" | |
platform: "ensemble" | |
max_batch_size: 1024 | |
input [ | |
{ | |
name: "INPUT_0" | |
data_type: TYPE_STRING | |
dims: [ -1 ] | |
}, | |
{ | |
name: "INPUT_1" | |
data_type: TYPE_UINT32 | |
dims: [ -1 ] | |
}, | |
{ | |
name: "INPUT_2" | |
data_type: TYPE_STRING | |
dims: [ -1 ] | |
}, | |
{ | |
name: "INPUT_3" | |
data_type: TYPE_STRING | |
dims: [ -1 ] | |
}, | |
{ | |
name: "runtime_top_k" | |
data_type: TYPE_UINT32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "runtime_top_p" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "beam_search_diversity_rate" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "temperature" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "len_penalty" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "repetition_penalty" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "random_seed" | |
data_type: TYPE_UINT64 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "is_return_log_probs" | |
data_type: TYPE_BOOL | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "beam_width" | |
data_type: TYPE_UINT32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "start_id" | |
data_type: TYPE_UINT32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "end_id" | |
data_type: TYPE_UINT32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "prompt_learning_task_name_ids" | |
data_type: TYPE_UINT32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "top_p_decay" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "top_p_min" | |
data_type: TYPE_FP32 | |
dims: [ 1 ] | |
optional: true | |
}, | |
{ | |
name: "top_p_reset_ids" | |
data_type: TYPE_UINT32 | |
dims: [ 1 ] | |
optional: true | |
} | |
] | |
output [ | |
{ | |
name: "OUTPUT_0" | |
data_type: TYPE_STRING | |
dims: [ -1, -1 ] | |
}, | |
{ | |
name: "sequence_length" | |
data_type: TYPE_UINT32 | |
dims: [ -1 ] | |
}, | |
{ | |
name: "cum_log_probs" | |
data_type: TYPE_FP32 | |
dims: [ -1 ] | |
}, | |
{ | |
name: "output_log_probs" | |
data_type: TYPE_FP32 | |
dims: [ -1, -1 ] | |
} | |
] | |
ensemble_scheduling { | |
step [ | |
{ | |
model_name: "preprocessing" | |
model_version: -1 | |
input_map { | |
key: "QUERY" | |
value: "INPUT_0" | |
} | |
input_map { | |
key: "REQUEST_OUTPUT_LEN" | |
value: "INPUT_1" | |
} | |
input_map { | |
key: "BAD_WORDS_DICT" | |
value: "INPUT_2" | |
} | |
input_map { | |
key: "STOP_WORDS_DICT" | |
value: "INPUT_3" | |
} | |
output_map { | |
key: "INPUT_ID" | |
value: "_INPUT_ID" | |
} | |
output_map { | |
key: "BAD_WORDS_IDS" | |
value: "_BAD_WORDS_IDS" | |
} | |
output_map { | |
key: "STOP_WORDS_IDS" | |
value: "_STOP_WORDS_IDS" | |
} | |
output_map { | |
key: "REQUEST_INPUT_LEN" | |
value: "_REQUEST_INPUT_LEN" | |
} | |
output_map { | |
key: "REQUEST_OUTPUT_LEN" | |
value: "_REQUEST_OUTPUT_LEN" | |
} | |
}, | |
{ | |
model_name: "codegen-350M-mono-gptj" | |
model_version: -1 | |
input_map { | |
key: "input_ids" | |
value: "_INPUT_ID" | |
} | |
input_map { | |
key: "input_lengths" | |
value: "_REQUEST_INPUT_LEN" | |
} | |
input_map { | |
key: "request_output_len" | |
value: "_REQUEST_OUTPUT_LEN" | |
} | |
input_map { | |
key: "prompt_learning_task_name_ids" | |
value: "prompt_learning_task_name_ids" | |
} | |
input_map { | |
key: "runtime_top_k" | |
value: "runtime_top_k" | |
} | |
input_map { | |
key: "runtime_top_p" | |
value: "runtime_top_p" | |
} | |
input_map { | |
key: "beam_search_diversity_rate" | |
value: "beam_search_diversity_rate" | |
} | |
input_map { | |
key: "temperature" | |
value: "temperature" | |
} | |
input_map { | |
key: "len_penalty" | |
value: "len_penalty" | |
} | |
input_map { | |
key: "repetition_penalty" | |
value: "repetition_penalty" | |
} | |
input_map { | |
key: "random_seed" | |
value: "random_seed" | |
} | |
input_map { | |
key: "is_return_log_probs" | |
value: "is_return_log_probs" | |
} | |
input_map { | |
key: "beam_width" | |
value: "beam_width" | |
} | |
input_map { | |
key: "start_id" | |
value: "start_id" | |
} | |
input_map { | |
key: "end_id" | |
value: "end_id" | |
} | |
input_map { | |
key: "stop_words_list" | |
value: "_STOP_WORDS_IDS" | |
} | |
input_map { | |
key: "bad_words_list" | |
value: "_BAD_WORDS_IDS" | |
} | |
input_map { | |
key: "top_p_decay" | |
value: "top_p_decay" | |
} | |
input_map { | |
key: "top_p_min" | |
value: "top_p_min" | |
} | |
input_map { | |
key: "top_p_reset_ids" | |
value: "top_p_reset_ids" | |
} | |
output_map { | |
key: "output_ids" | |
value: "_TOKENS_BATCH" | |
} | |
output_map { | |
key: "sequence_length" | |
value: "sequence_length" | |
} | |
output_map { | |
key: "cum_log_probs" | |
value: "cum_log_probs" | |
} | |
output_map { | |
key: "output_log_probs" | |
value: "output_log_probs" | |
} | |
}, | |
{ | |
model_name: "postprocessing" | |
model_version: -1 | |
input_map { | |
key: "TOKENS_BATCH" | |
value: "_TOKENS_BATCH" | |
} | |
output_map { | |
key: "OUTPUT" | |
value: "OUTPUT_0" | |
} | |
} | |
] | |
} | |