Skip to content

Commit c434d47

Browse files
Tabriziantanmayv25
andauthored
Implement the decoupled API (triton-inference-server#136)
* Some examples for using decoupled API (triton-inference-server#137) * Add repeat model demonstrating the decoupled API * Add the square model for testing decoupled API * Add the model configs * Some doc fixes * Addressed the review comments * Add clients in the example * Improve comments * Some model fixes * Initial decoupled implementation * Add response sender * Fix bugs and add response sender close * Bug fixes * Fix example models * Review edits * Fix ci Co-authored-by: Tanmay Verma <[email protected]>
1 parent 98ffa0a commit c434d47

19 files changed

+1990
-312
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ set(
181181
PYTHNON_BACKEND_STUB_SRCS
182182
src/pb_stub_utils.h
183183
src/pb_stub_utils.cc
184+
src/response_sender.cc
185+
src/response_sender.h
184186
src/pb_stub.h
185187
src/pb_stub.cc
186188
)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
import sys
28+
from functools import partial
29+
import numpy as np
30+
31+
from tritonclient.utils import *
32+
import tritonclient.grpc as grpcclient
33+
34+
35+
class UserData:
36+
37+
def __init__(self):
38+
self._completed_requests = queue.Queue()
39+
40+
41+
def callback(user_data, result, error):
42+
if error:
43+
user_data._completed_requests.put(error)
44+
else:
45+
user_data._completed_requests.put(result)
46+
47+
48+
# This client sends a single request to the model with the
49+
# following tensor data. In compliance with the behavior
50+
# of repeat_int32 model, it will expect the 4 responses
51+
# with output: [4], [2], [0] and [1] respectively.
52+
model_name = "repeat_int32"
53+
in_value = [4, 2, 0, 1]
54+
delay_value = 2
55+
wait_value = 5
56+
57+
inputs = []
58+
inputs.append(grpcclient.InferInput('IN', [len(in_value)], "INT32"))
59+
inputs.append(grpcclient.InferInput('DELAY', [1], "UINT32"))
60+
inputs.append(grpcclient.InferInput('WAIT', [1], "UINT32"))
61+
62+
outputs = []
63+
outputs.append(grpcclient.InferRequestedOutput('OUT'))
64+
outputs.append(grpcclient.InferRequestedOutput('IDX'))
65+
66+
with grpcclient.InferenceServerClient(url="localhost:8001",
67+
verbose=True) as triton_client:
68+
# Establish stream
69+
triton_client.start_stream(callback=partial(callback, user_data))
70+
71+
in_data = np.array(in_value, dtype=np.int32)
72+
inputs[0].set_data_from_numpy(in_data)
73+
delay_data = np.array([delay_value], dtype=np.uint32)
74+
inputs[1].set_data_from_numpy(delay_data)
75+
wait_data = np.array([wait_value], dtype=np.uint32)
76+
inputs[2].set_data_from_numpy(wait_data)
77+
78+
request_id = "0"
79+
triton_client.async_stream_infer(model_name=model_name,
80+
inputs=inputs,
81+
request_id=request_id,
82+
outputs=outputs)
83+
84+
# Retrieve results...
85+
recv_count = 0
86+
expected_count = len(in_value)
87+
result_dict = {}
88+
while recv_count < expected_count:
89+
data_item = user_data._completed_requests.get()
90+
if type(data_item) == InferenceServerException:
91+
raise data_item
92+
else:
93+
this_id = data_item.get_response().id
94+
if this_id not in result_dict.keys():
95+
result_dict[this_id] = []
96+
result_dict[this_id].append((recv_count, data_item))
97+
98+
recv_count += 1
99+
100+
# Validate results...
101+
if len(result_dict[request_id]) != len(in_values):
102+
print("expected {} many responses for request id {}, got {}".format(
103+
len(in_values), request_id, len(result_dict[request_id])))
104+
sys.exit(1)
105+
106+
result_list = result_dict[request_id]
107+
for i in range(len(result_list)):
108+
expected_data = np.array([in_values[i]], dtype=np.int32)
109+
this_data = result_list[i][1].as_numpy('OUT')
110+
if not np.array_equal(expected_data, this_data):
111+
print("incorrect data: expected {}, got {}".format(
112+
expected_data, this_data))
113+
sys.exit(1)
114+
115+
print('PASS: repeat_int32')
116+
sys.exit(0)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
name: "repeat_int32"
28+
backend: "python"
29+
max_batch_size: 0
30+
model_transaction_policy {
31+
decoupled: True
32+
}
33+
input [
34+
{
35+
name: "IN"
36+
data_type: TYPE_INT32
37+
dims: [ -1 ]
38+
},
39+
{
40+
name: "DELAY"
41+
data_type: TYPE_UINT32
42+
dims: [ -1 ]
43+
},
44+
{
45+
name: "WAIT"
46+
data_type: TYPE_UINT32
47+
dims: [ 1 ]
48+
}
49+
]
50+
output [
51+
{
52+
name: "OUT"
53+
data_type: TYPE_INT32
54+
dims: [ 1 ]
55+
},
56+
{
57+
name: "IDX"
58+
data_type: TYPE_UINT32
59+
dims: [ 1 ]
60+
}
61+
]
62+
instance_group [{ kind: KIND_CPU }]
63+

0 commit comments

Comments
 (0)