|
import json |
|
import struct |
|
|
|
import requests |
|
|
|
|
|
text: str = "My name is Wolfgang and I live in Berlin" |
|
|
|
context_text: bytes = text.encode("UTF-8") |
|
|
|
context_text_struct: bytes = struct.pack("<I", len(context_text)) + context_text |
|
|
|
len_context_text_struct = len(context_text_struct) |
|
|
|
data_struct = context_text_struct |
|
|
|
request_data = { |
|
"inputs": [ |
|
{ |
|
"name": "TEXT", |
|
"shape": [1], |
|
"datatype": "BYTES", |
|
"parameters": {"binary_data_size": len_context_text_struct}, |
|
}, |
|
], |
|
"outputs": [{"name": "OUTPUT_TEXT", "parameters": {"binary_data": False}}], |
|
} |
|
|
|
data = json.dumps(request_data).encode() + data_struct |
|
|
|
print(data) |
|
|
|
|
|
with open("t5_query_body.bin", "wb") as f: |
|
f.write(data) |
|
|
|
|
|
curl = f""" |
|
curl -X POST http://localhost:8000/v2/models/t5-dec-if-node_onnx_generate/versions/1/infer \ |
|
--data-binary "@demo/generative-model/t5_query_body.bin" \ |
|
--header "Inference-Header-Content-Length: {len(json.dumps(request_data).encode())}" |
|
""" |
|
print(curl) |
|
|
|
|
|
res = requests.post( |
|
url="http://localhost:8000/v2/models/t5-dec-if-node_onnx_generate/versions/1/infer", |
|
data="@demo/generative-model/t5_query_body.bin", |
|
headers={"Inference-Header-Content-Length": len(json.dumps(request_data).encode()).to_bytes(5, "little")}, |
|
) |
|
|