File size: 8,966 Bytes
616e23d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import os
import sys
import json
import numpy as np
import triton_python_backend_utils as pb_utils
PWD = os.path.dirname(__file__)
INFERENCE_MODULE_DIR = "/home/indicTrans2/"
sys.path.insert(0, INFERENCE_MODULE_DIR)
from inference.engine import Model, iso_to_flores
INDIC_LANGUAGES = set(iso_to_flores)
ALLOWED_DIRECTION_STRINGS = {"en-indic", "indic-en", "indic-indic"}
FORCE_PIVOTING = False
DEFAULT_PIVOT_LANG = "en"
class TritonPythonModel:
def initialize(self, args):
self.model_config = json.loads(args['model_config'])
self.model_instance_device_id = json.loads(args['model_instance_device_id'])
self.output_name = "OUTPUT_TEXT"
self.output_dtype = pb_utils.triton_string_to_numpy(
pb_utils.get_output_config_by_name(self.model_config, self.output_name)["data_type"])
# checkpoints_root_dir = os.path.join(PWD, "checkpoints")
checkpoints_root_dir = "/models/checkpoints"
checkpoint_folders = [ f.path for f in os.scandir(checkpoints_root_dir) if f.is_dir() ]
# The assumption is that, each folder name is `<src_direction>-to-<tgt_direction>`
if not checkpoint_folders:
raise RuntimeError(f"No checkpoint folders in: {checkpoints_root_dir}")
self.models = {}
for checkpoint_folder in checkpoint_folders:
direction_string = os.path.basename(checkpoint_folder)
assert direction_string in ALLOWED_DIRECTION_STRINGS, f"Checkpoint folder-name `{direction_string}` not allowed"
self.models[direction_string] = Model(os.path.join(checkpoint_folder, "ct2_fp16_model"), input_lang_code_format="iso", model_type="ctranslate2")
# self.models[direction_string] = Model(checkpoint_folder, input_lang_code_format="iso", model_type="fairseq")
self.pivot_lang = None
if "en-indic" in self.models and "indic-en" in self.models:
if "indic-indic" not in self.models:
self.pivot_lang = DEFAULT_PIVOT_LANG
elif FORCE_PIVOTING:
del self.models["indic-indic"]
self.pivot_lang = DEFAULT_PIVOT_LANG
def get_direction_string(self, input_language_id, output_language_id):
direction_string = None
if input_language_id == DEFAULT_PIVOT_LANG and output_language_id in INDIC_LANGUAGES:
direction_string = "en-indic"
elif input_language_id in INDIC_LANGUAGES:
if output_language_id == DEFAULT_PIVOT_LANG:
direction_string = "indic-en"
elif output_language_id in INDIC_LANGUAGES:
direction_string = "indic-indic"
return direction_string
def get_model(self, input_language_id, output_language_id):
direction_string = self.get_direction_string(input_language_id, output_language_id)
if direction_string in self.models:
return self.models[direction_string]
raise RuntimeError(f"Language-pair not supported: {input_language_id}-{output_language_id}")
def execute(self,requests):
# print("REQ_COUNT", len(requests))
modelwise_batches = {}
responses = []
for request_id, request in enumerate(requests):
input_text_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT").as_numpy()
input_language_id_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID").as_numpy()
output_language_id_batch = pb_utils.get_input_tensor_by_name(request, "OUTPUT_LANGUAGE_ID").as_numpy()
input_text_batch = [input_text[0].decode("utf-8", "ignore") for input_text in input_text_batch]
input_language_id_batch = [input_language_id[0].decode("utf-8", "ignore") for input_language_id in input_language_id_batch]
output_language_id_batch = [output_language_id[0].decode("utf-8", "ignore") for output_language_id in output_language_id_batch]
responses.append([['']] * len(input_text_batch))
for input_id, (input_text, input_language_id, output_language_id) in enumerate(zip(input_text_batch, input_language_id_batch, output_language_id_batch)):
direction_string = self.get_direction_string(input_language_id, output_language_id)
if direction_string not in self.models:
if direction_string == "indic-indic" and self.pivot_lang:
pass
else:
raise RuntimeError(f"Language-pair not supported: {input_language_id}-{output_language_id}")
if direction_string not in modelwise_batches:
modelwise_batches[direction_string] = {
"payloads": [],
"text_id_to_req_id_input_id": [],
}
modelwise_batches[direction_string]["payloads"].append([input_text, input_language_id, output_language_id])
modelwise_batches[direction_string]["text_id_to_req_id_input_id"].append((request_id, input_id))
for direction_string, batch in modelwise_batches.items():
if direction_string == "indic-indic" and self.pivot_lang:
model = self.get_model("hi", self.pivot_lang)
original_langs = []
for i in range(len(batch["payloads"])):
original_langs.append(batch["payloads"][i][2])
batch["payloads"][i][2] = self.pivot_lang
pivot_texts = model.paragraphs_batch_translate__multilingual(batch["payloads"])
for i in range(len(batch["payloads"])):
batch["payloads"][i][0] = pivot_texts[i]
batch["payloads"][i][1] = self.pivot_lang
batch["payloads"][i][2] = original_langs[i]
model = self.get_model(self.pivot_lang, "hi")
translations = model.paragraphs_batch_translate__multilingual(batch["payloads"])
else:
model = self.models[direction_string]
translations = model.paragraphs_batch_translate__multilingual(batch["payloads"])
# translations = ["bro"] * len(batch["payloads"])
for translation, (request_id, output_id) in zip(translations, batch["text_id_to_req_id_input_id"]):
responses[request_id][output_id] = [translation]
for i in range(len(responses)):
responses[i] = pb_utils.InferenceResponse(output_tensors=[
pb_utils.Tensor(
self.output_name,
np.array(responses[i], dtype=self.output_dtype),
)
])
return responses
def execute_sequential(self,requests):
# print("REQ_COUNT", len(requests))
responses = []
for request in requests:
input_text_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_TEXT").as_numpy()
input_language_id_batch = pb_utils.get_input_tensor_by_name(request, "INPUT_LANGUAGE_ID").as_numpy()
output_language_id_batch = pb_utils.get_input_tensor_by_name(request, "OUTPUT_LANGUAGE_ID").as_numpy()
input_text_batch = [input_text[0].decode("utf-8", "ignore") for input_text in input_text_batch]
input_language_id_batch = [input_language_id[0].decode("utf-8", "ignore") for input_language_id in input_language_id_batch]
output_language_id_batch = [output_language_id[0].decode("utf-8", "ignore") for output_language_id in output_language_id_batch]
generated_outputs = []
for input_text, input_language_id, output_language_id in zip(input_text_batch, input_language_id_batch, output_language_id_batch):
if self.pivot_lang and (input_language_id != self.pivot_lang and output_language_id != self.pivot_lang):
model = self.get_model(input_language_id, self.pivot_lang)
pivot_text = model.translate_paragraph(input_text, input_language_id, self.pivot_lang)
model = self.get_model(self.pivot_lang, output_language_id)
translation = model.translate_paragraph(pivot_text, self.pivot_lang, output_language_id)
else:
model = self.get_model(input_language_id, output_language_id)
translation = model.translate_paragraph(input_text, input_language_id, output_language_id)
generated_outputs.append([translation])
inference_response = pb_utils.InferenceResponse(output_tensors=[
pb_utils.Tensor(
self.output_name,
np.array(generated_outputs, dtype=self.output_dtype),
)
])
responses.append(inference_response)
return responses
|