Curt-Park
Add fastertransformer model
966f777
name: "ensemble"
platform: "ensemble"
max_batch_size: 1024
input [
{
name: "INPUT_0"
data_type: TYPE_STRING
dims: [ -1 ]
},
{
name: "INPUT_1"
data_type: TYPE_UINT32
dims: [ -1 ]
},
{
name: "INPUT_2"
data_type: TYPE_STRING
dims: [ -1 ]
},
{
name: "INPUT_3"
data_type: TYPE_STRING
dims: [ -1 ]
},
{
name: "runtime_top_k"
data_type: TYPE_UINT32
dims: [ 1 ]
optional: true
},
{
name: "runtime_top_p"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "beam_search_diversity_rate"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "temperature"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "len_penalty"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "repetition_penalty"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "random_seed"
data_type: TYPE_UINT64
dims: [ 1 ]
optional: true
},
{
name: "is_return_log_probs"
data_type: TYPE_BOOL
dims: [ 1 ]
optional: true
},
{
name: "beam_width"
data_type: TYPE_UINT32
dims: [ 1 ]
optional: true
},
{
name: "start_id"
data_type: TYPE_UINT32
dims: [ 1 ]
optional: true
},
{
name: "end_id"
data_type: TYPE_UINT32
dims: [ 1 ]
optional: true
},
{
name: "prompt_learning_task_name_ids"
data_type: TYPE_UINT32
dims: [ 1 ]
optional: true
},
{
name: "top_p_decay"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "top_p_min"
data_type: TYPE_FP32
dims: [ 1 ]
optional: true
},
{
name: "top_p_reset_ids"
data_type: TYPE_UINT32
dims: [ 1 ]
optional: true
}
]
output [
{
name: "OUTPUT_0"
data_type: TYPE_STRING
dims: [ -1, -1 ]
},
{
name: "sequence_length"
data_type: TYPE_UINT32
dims: [ -1 ]
},
{
name: "cum_log_probs"
data_type: TYPE_FP32
dims: [ -1 ]
},
{
name: "output_log_probs"
data_type: TYPE_FP32
dims: [ -1, -1 ]
}
]
ensemble_scheduling {
step [
{
model_name: "preprocessing"
model_version: -1
input_map {
key: "QUERY"
value: "INPUT_0"
}
input_map {
key: "REQUEST_OUTPUT_LEN"
value: "INPUT_1"
}
input_map {
key: "BAD_WORDS_DICT"
value: "INPUT_2"
}
input_map {
key: "STOP_WORDS_DICT"
value: "INPUT_3"
}
output_map {
key: "INPUT_ID"
value: "_INPUT_ID"
}
output_map {
key: "BAD_WORDS_IDS"
value: "_BAD_WORDS_IDS"
}
output_map {
key: "STOP_WORDS_IDS"
value: "_STOP_WORDS_IDS"
}
output_map {
key: "REQUEST_INPUT_LEN"
value: "_REQUEST_INPUT_LEN"
}
output_map {
key: "REQUEST_OUTPUT_LEN"
value: "_REQUEST_OUTPUT_LEN"
}
},
{
model_name: "codegen-350M-mono-gptj"
model_version: -1
input_map {
key: "input_ids"
value: "_INPUT_ID"
}
input_map {
key: "input_lengths"
value: "_REQUEST_INPUT_LEN"
}
input_map {
key: "request_output_len"
value: "_REQUEST_OUTPUT_LEN"
}
input_map {
key: "prompt_learning_task_name_ids"
value: "prompt_learning_task_name_ids"
}
input_map {
key: "runtime_top_k"
value: "runtime_top_k"
}
input_map {
key: "runtime_top_p"
value: "runtime_top_p"
}
input_map {
key: "beam_search_diversity_rate"
value: "beam_search_diversity_rate"
}
input_map {
key: "temperature"
value: "temperature"
}
input_map {
key: "len_penalty"
value: "len_penalty"
}
input_map {
key: "repetition_penalty"
value: "repetition_penalty"
}
input_map {
key: "random_seed"
value: "random_seed"
}
input_map {
key: "is_return_log_probs"
value: "is_return_log_probs"
}
input_map {
key: "beam_width"
value: "beam_width"
}
input_map {
key: "start_id"
value: "start_id"
}
input_map {
key: "end_id"
value: "end_id"
}
input_map {
key: "stop_words_list"
value: "_STOP_WORDS_IDS"
}
input_map {
key: "bad_words_list"
value: "_BAD_WORDS_IDS"
}
input_map {
key: "top_p_decay"
value: "top_p_decay"
}
input_map {
key: "top_p_min"
value: "top_p_min"
}
input_map {
key: "top_p_reset_ids"
value: "top_p_reset_ids"
}
output_map {
key: "output_ids"
value: "_TOKENS_BATCH"
}
output_map {
key: "sequence_length"
value: "sequence_length"
}
output_map {
key: "cum_log_probs"
value: "cum_log_probs"
}
output_map {
key: "output_log_probs"
value: "output_log_probs"
}
},
{
model_name: "postprocessing"
model_version: -1
input_map {
key: "TOKENS_BATCH"
value: "_TOKENS_BATCH"
}
output_map {
key: "OUTPUT"
value: "OUTPUT_0"
}
}
]
}