Skip to content

Commit 884ff52

Browse files
authored
Add SaveToSharedMemory and LoadFromSharedMemory to InferRequest and InferResponse objects (triton-inference-server#67)
* Move InferenceRequest implementation to C++ * Move InferResponse to c++ * Add save to shared memory methods for pb_tensor * Disable copy and assignment constructors for PbTensors * Refactor input tensor creation on the main process * Refactor tensor creation on the main process * Add LoadFromShared memory to infer request * Add LoadFromSharedMemory to infer_response * Add BLS exec * Fix identity tensor * Fix type bytes tensor * Remove BLS related changes * Fix the ifdef flags for CPU only build * Review edits * Rebase fix
1 parent 82d4db8 commit 884ff52

17 files changed

+1649
-1151
lines changed

CMakeLists.txt

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,20 @@ configure_file(src/libtriton_python.ldscript libtriton_python.ldscript COPYONLY)
128128
add_library(
129129
triton-python-backend SHARED
130130
src/python.cc
131-
src/pb_utils.cc
132-
src/pb_utils.h
133131
src/pb_env.cc
132+
src/infer_response.cc
133+
src/infer_response.h
134+
src/pb_main_utils.cc
135+
src/pb_main_utils.h
136+
src/infer_request.cc
137+
src/infer_request.h
134138
src/pb_env.h
139+
src/pb_error.cc
140+
src/pb_error.h
141+
src/pb_tensor.cc
142+
src/pb_tensor.h
143+
src/pb_utils.cc
144+
src/pb_utils.h
135145
src/shm_manager.cc
136146
src/shm_manager.h
137147
)
@@ -140,7 +150,14 @@ add_executable(
140150
triton-python-backend-stub
141151
src/pb_stub_utils.cc
142152
src/pb_stub_utils.h
153+
src/infer_request.cc
154+
src/infer_request.h
155+
src/infer_response.cc
156+
src/infer_response.h
143157
src/pb_stub.cc
158+
src/pb_stub.h
159+
src/pb_error.cc
160+
src/pb_error.h
144161
src/pb_utils.cc
145162
src/pb_utils.h
146163
src/pb_tensor.cc
@@ -171,10 +188,12 @@ target_compile_options(
171188
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
172189
-fvisibility=hidden -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
173190
)
191+
target_compile_definitions(triton-python-backend-stub PRIVATE TRITON_PB_STUB)
174192

175193
target_link_libraries(
176194
triton-python-backend
177195
PRIVATE
196+
dlpack
178197
triton-core-serverstub # from repo-core
179198
triton-backend-utils # from repo-backend
180199
ZLIB::ZLIB

src/infer_request.cc

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// Redistribution and use in source and binary forms, with or without
4+
// modification, are permitted provided that the following conditions
5+
// are met:
6+
// * Redistributions of source code must retain the above copyright
7+
// notice, this list of conditions and the following disclaimer.
8+
// * Redistributions in binary form must reproduce the above copyright
9+
// notice, this list of conditions and the following disclaimer in the
10+
// documentation and/or other materials provided with the distribution.
11+
// * Neither the name of NVIDIA CORPORATION nor the names of its
12+
// contributors may be used to endorse or promote products derived
13+
// from this software without specific prior written permission.
14+
//
15+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
#include "infer_request.h"
28+
29+
#ifdef TRITON_PB_STUB
30+
#include "infer_response.h"
31+
#include "pb_stub.h"
32+
#endif
33+
34+
namespace triton { namespace backend { namespace python {
35+
36+
InferRequest::InferRequest(
37+
const std::string& request_id, uint64_t correlation_id,
38+
const std::vector<std::shared_ptr<PbTensor>>& inputs,
39+
const std::vector<std::string>& requested_output_names,
40+
const std::string& model_name, const int64_t model_version)
41+
: request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs),
42+
requested_output_names_(requested_output_names), model_name_(model_name),
43+
model_version_(model_version)
44+
{
45+
}
46+
47+
const std::vector<std::shared_ptr<PbTensor>>&
48+
InferRequest::Inputs()
49+
{
50+
return inputs_;
51+
}
52+
53+
const std::string&
54+
InferRequest::RequestId()
55+
{
56+
return request_id_;
57+
}
58+
59+
uint64_t
60+
InferRequest::CorrelationId()
61+
{
62+
return correlation_id_;
63+
}
64+
65+
const std::vector<std::string>&
66+
InferRequest::RequestedOutputNames()
67+
{
68+
return requested_output_names_;
69+
}
70+
71+
const std::string&
72+
InferRequest::ModelName()
73+
{
74+
return model_name_;
75+
}
76+
77+
int64_t
78+
InferRequest::ModelVersion()
79+
{
80+
return model_version_;
81+
}
82+
83+
void
84+
InferRequest::SaveToSharedMemory(
85+
std::unique_ptr<SharedMemory>& shm_pool, Request* request_shm)
86+
{
87+
request_shm->correlation_id = this->CorrelationId();
88+
off_t id_offset;
89+
SaveStringToSharedMemory(shm_pool, id_offset, this->RequestId().c_str());
90+
request_shm->id = id_offset;
91+
request_shm->requested_output_count = this->RequestedOutputNames().size();
92+
off_t requested_output_names_offset;
93+
off_t* requested_output_names;
94+
shm_pool->Map(
95+
(char**)&requested_output_names,
96+
sizeof(off_t) * request_shm->requested_output_count,
97+
requested_output_names_offset);
98+
99+
request_shm->requested_output_names = requested_output_names_offset;
100+
size_t i = 0;
101+
for (auto& requested_output_name : requested_output_names_) {
102+
SaveStringToSharedMemory(
103+
shm_pool, requested_output_names[i], requested_output_name.c_str());
104+
i++;
105+
}
106+
107+
request_shm->requested_input_count = this->Inputs().size();
108+
request_shm->model_version = this->model_version_;
109+
SaveStringToSharedMemory(
110+
shm_pool, request_shm->model_name, this->model_name_.c_str());
111+
}
112+
113+
std::unique_ptr<InferRequest>
114+
InferRequest::LoadFromSharedMemory(
115+
std::unique_ptr<SharedMemory>& shm_pool, off_t request_offset)
116+
{
117+
Request* request;
118+
shm_pool->MapOffset((char**)&request, request_offset);
119+
120+
char* id = nullptr;
121+
LoadStringFromSharedMemory(shm_pool, request->id, id);
122+
123+
uint32_t requested_input_count = request->requested_input_count;
124+
125+
std::vector<std::shared_ptr<PbTensor>> py_input_tensors;
126+
for (size_t input_idx = 0; input_idx < requested_input_count; ++input_idx) {
127+
std::shared_ptr<PbTensor> pb_input_tensor = PbTensor::LoadFromSharedMemory(
128+
shm_pool, request->inputs + sizeof(Tensor) * input_idx);
129+
py_input_tensors.emplace_back(std::move(pb_input_tensor));
130+
}
131+
132+
std::vector<std::string> requested_output_names;
133+
uint32_t requested_output_count = request->requested_output_count;
134+
off_t* output_names;
135+
shm_pool->MapOffset((char**)&output_names, request->requested_output_names);
136+
137+
for (size_t output_idx = 0; output_idx < requested_output_count;
138+
++output_idx) {
139+
char* output_name = nullptr;
140+
LoadStringFromSharedMemory(shm_pool, output_names[output_idx], output_name);
141+
requested_output_names.emplace_back(output_name);
142+
}
143+
144+
char* model_name;
145+
LoadStringFromSharedMemory(shm_pool, request->model_name, model_name);
146+
return std::make_unique<InferRequest>(
147+
id, request->correlation_id, std::move(py_input_tensors),
148+
requested_output_names, model_name, request->model_version);
149+
}
150+
151+
}}} // namespace triton::backend::python

src/infer_request.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// Redistribution and use in source and binary forms, with or without
4+
// modification, are permitted provided that the following conditions
5+
// are met:
6+
// * Redistributions of source code must retain the above copyright
7+
// notice, this list of conditions and the following disclaimer.
8+
// * Redistributions in binary form must reproduce the above copyright
9+
// notice, this list of conditions and the following disclaimer in the
10+
// documentation and/or other materials provided with the distribution.
11+
// * Neither the name of NVIDIA CORPORATION nor the names of its
12+
// contributors may be used to endorse or promote products derived
13+
// from this software without specific prior written permission.
14+
//
15+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
#pragma once
28+
29+
#include <string>
30+
#include "pb_tensor.h"
31+
32+
namespace triton { namespace backend { namespace python {
33+
class InferRequest {
34+
std::string request_id_;
35+
uint64_t correlation_id_;
36+
std::vector<std::shared_ptr<PbTensor>> inputs_;
37+
std::vector<std::string> requested_output_names_;
38+
std::string model_name_;
39+
int64_t model_version_;
40+
41+
public:
42+
InferRequest(
43+
const std::string& request_id, uint64_t correlation_id,
44+
const std::vector<std::shared_ptr<PbTensor>>& inputs,
45+
const std::vector<std::string>& requested_output_names,
46+
const std::string& model_name, const int64_t model_version);
47+
48+
const std::vector<std::shared_ptr<PbTensor>>& Inputs();
49+
const std::string& RequestId();
50+
uint64_t CorrelationId();
51+
const std::string& ModelName();
52+
int64_t ModelVersion();
53+
const std::vector<std::string>& RequestedOutputNames();
54+
55+
/// Save an Inference Request to shared memory.
56+
/// \param shm_pool Shared memory pool to save the inference request.
57+
/// \param request_shm A pointer to a location in shared memory with enough
58+
/// space to save the inference request.
59+
void SaveToSharedMemory(
60+
std::unique_ptr<SharedMemory>& shm_pool, Request* request_shm);
61+
62+
/// Create an Inference Request object from shared memory.
63+
/// \param shm_pool Shared memory pool
64+
/// \param request_offset Shared memory offset of the request.
65+
static std::unique_ptr<InferRequest> LoadFromSharedMemory(
66+
std::unique_ptr<SharedMemory>& shm_pool,
67+
off_t request_offset);
68+
};
69+
}}}; // namespace triton::backend::python

0 commit comments

Comments
 (0)