Skip to content

Commit 474fb30

Browse files
authored
Exposing trace context to python backend (triton-inference-server#346)
Exposing trace context to python backend
1 parent 537571b commit 474fb30

File tree

8 files changed

+236
-31
lines changed

8 files changed

+236
-31
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ set(
155155
src/infer_response.h
156156
src/infer_request.cc
157157
src/infer_request.h
158+
src/infer_trace.cc
159+
src/infer_trace.h
158160
src/message_queue.h
159161
src/ipc_message.cc
160162
src/ipc_message.h

src/infer_request.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ InferRequest::GetPreferredMemory()
170170
}
171171

172172
InferenceTrace&
173-
InferRequest::Trace()
173+
InferRequest::GetTrace()
174174
{
175175
return trace_;
176176
}
@@ -210,7 +210,6 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
210210
infer_request_shm_ptr_->is_decoupled = is_decoupled_;
211211
infer_request_shm_ptr_->timeout = timeout_;
212212
infer_request_shm_ptr_->preferred_memory = preferred_memory_;
213-
infer_request_shm_ptr_->trace = trace_;
214213
infer_request_shm_ptr_->request_release_flags = request_release_flags_;
215214

216215
output_names_handle_shm_ptr_ =
@@ -258,6 +257,9 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
258257
PbString::Create(shm_pool, Parameters());
259258
infer_request_shm_ptr_->parameters_shm_handle = parameters_shm->ShmHandle();
260259

260+
trace_.SaveToSharedMemory(shm_pool);
261+
infer_request_shm_ptr_->trace_shm_handle = trace_.ShmHandle();
262+
261263
// Save the references to shared memory.
262264
infer_request_shm_ = std::move(infer_request_shm);
263265
request_id_shm_ = std::move(request_id_shm);
@@ -312,6 +314,10 @@ InferRequest::LoadFromSharedMemory(
312314
CorrelationId::LoadFromSharedMemory(
313315
shm_pool, infer_request_shm_ptr->correlation_id_shm_handle);
314316

317+
std::unique_ptr<InferenceTrace> infer_trace_shm =
318+
InferenceTrace::LoadFromSharedMemory(
319+
shm_pool, infer_request_shm_ptr->trace_shm_handle);
320+
315321
std::unique_ptr<PbString> model_name_shm = PbString::LoadFromSharedMemory(
316322
shm_pool, infer_request_shm_ptr->model_name_shm_handle);
317323
std::unique_ptr<PbString> request_id_shm = PbString::LoadFromSharedMemory(
@@ -321,8 +327,8 @@ InferRequest::LoadFromSharedMemory(
321327

322328
return std::unique_ptr<InferRequest>(new InferRequest(
323329
infer_request_shm, request_id_shm, correlation_id_shm,
324-
requested_output_names_shm, model_name_shm, input_tensors,
325-
parameters_shm));
330+
requested_output_names_shm, model_name_shm, input_tensors, parameters_shm,
331+
infer_trace_shm));
326332
}
327333

328334
InferRequest::InferRequest(
@@ -332,7 +338,8 @@ InferRequest::InferRequest(
332338
std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
333339
std::unique_ptr<PbString>& model_name_shm,
334340
std::vector<std::shared_ptr<PbTensor>>& input_tensors,
335-
std::unique_ptr<PbString>& parameters_shm)
341+
std::unique_ptr<PbString>& parameters_shm,
342+
std::unique_ptr<InferenceTrace>& infer_trace_shm)
336343
: infer_request_shm_(std::move(infer_request_shm)),
337344
request_id_shm_(std::move(request_id_shm)),
338345
requested_output_names_shm_(std::move(requested_output_names_shm)),
@@ -373,7 +380,7 @@ InferRequest::InferRequest(
373380
is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
374381
timeout_ = infer_request_shm_ptr_->timeout;
375382
preferred_memory_ = infer_request_shm_ptr_->preferred_memory;
376-
trace_ = infer_request_shm_ptr_->trace;
383+
trace_ = InferenceTrace(infer_trace_shm);
377384
request_release_flags_ = infer_request_shm_ptr_->request_release_flags;
378385

379386
#ifdef TRITON_PB_STUB

src/infer_request.h

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "correlation_id.h"
3333
#include "infer_response.h"
34+
#include "infer_trace.h"
3435
#include "pb_preferred_memory.h"
3536
#include "pb_tensor.h"
3637

@@ -43,22 +44,6 @@ namespace triton { namespace backend { namespace python {
4344

4445
class Stub;
4546

46-
//
47-
// Inference Trace
48-
//
49-
struct InferenceTrace {
50-
#ifndef TRITON_PB_STUB
51-
TRITONSERVER_InferenceTrace* triton_trace_;
52-
InferenceTrace(TRITONSERVER_InferenceTrace* triton_trace)
53-
: triton_trace_(triton_trace)
54-
{
55-
}
56-
#else
57-
void* triton_trace_;
58-
#endif
59-
InferenceTrace() : triton_trace_(nullptr) {}
60-
};
61-
6247
//
6348
// Inference Request
6449
//
@@ -72,7 +57,7 @@ struct InferRequestShm {
7257
bool is_decoupled;
7358
uint64_t timeout;
7459
PreferredMemory preferred_memory;
75-
InferenceTrace trace;
60+
bi::managed_external_buffer::handle_t trace_shm_handle;
7661
uint32_t request_release_flags;
7762
bi::managed_external_buffer::handle_t correlation_id_shm_handle;
7863
bi::managed_external_buffer::handle_t model_name_shm_handle;
@@ -108,7 +93,7 @@ class InferRequest {
10893
bool IsDecoupled();
10994
void SetIsDecoupled(const bool is_decoupled);
11095
PreferredMemory& GetPreferredMemory();
111-
InferenceTrace& Trace();
96+
InferenceTrace& GetTrace();
11297
uint32_t ReleaseFlags();
11398
void SetReleaseFlags(const uint32_t& flags);
11499

@@ -149,7 +134,8 @@ class InferRequest {
149134
std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
150135
std::unique_ptr<PbString>& model_name_shm,
151136
std::vector<std::shared_ptr<PbTensor>>& input_tensors,
152-
std::unique_ptr<PbString>& parameters_shm);
137+
std::unique_ptr<PbString>& parameters_shm,
138+
std::unique_ptr<InferenceTrace>& infer_trace_shm);
153139

154140
std::string request_id_;
155141
CorrelationId correlation_id_;

src/infer_trace.cc

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// Redistribution and use in source and binary forms, with or without
4+
// modification, are permitted provided that the following conditions
5+
// are met:
6+
// * Redistributions of source code must retain the above copyright
7+
// notice, this list of conditions and the following disclaimer.
8+
// * Redistributions in binary form must reproduce the above copyright
9+
// notice, this list of conditions and the following disclaimer in the
10+
// documentation and/or other materials provided with the distribution.
11+
// * Neither the name of NVIDIA CORPORATION nor the names of its
12+
// contributors may be used to endorse or promote products derived
13+
// from this software without specific prior written permission.
14+
//
15+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
#include "infer_trace.h"
28+
29+
namespace triton { namespace backend { namespace python {
30+
31+
InferenceTrace::InferenceTrace(const InferenceTrace& rhs)
32+
{
33+
triton_trace_ = rhs.triton_trace_;
34+
trace_context_ = rhs.trace_context_;
35+
}
36+
37+
InferenceTrace&
38+
InferenceTrace::operator=(const InferenceTrace& rhs)
39+
{
40+
triton_trace_ = rhs.triton_trace_;
41+
trace_context_ = rhs.trace_context_;
42+
return *this;
43+
}
44+
45+
InferenceTrace::InferenceTrace(std::unique_ptr<InferenceTrace>& trace_shm)
46+
{
47+
triton_trace_ = trace_shm->triton_trace_;
48+
trace_context_ = trace_shm->trace_context_;
49+
}
50+
51+
void
52+
InferenceTrace::SaveToSharedMemory(
53+
std::unique_ptr<SharedMemoryManager>& shm_pool)
54+
{
55+
AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm =
56+
shm_pool->Construct<InferenceTraceShm>();
57+
infer_trace_shm_ptr_ = infer_trace_shm.data_.get();
58+
59+
infer_trace_shm_ptr_->triton_trace = triton_trace_;
60+
61+
std::unique_ptr<PbString> trace_context_shm =
62+
PbString::Create(shm_pool, trace_context_);
63+
64+
infer_trace_shm_ptr_->trace_context_shm_handle =
65+
trace_context_shm->ShmHandle();
66+
67+
// Save the references to shared memory.
68+
trace_context_shm_ = std::move(trace_context_shm);
69+
infer_trace_shm_ = std::move(infer_trace_shm);
70+
shm_handle_ = infer_trace_shm_.handle_;
71+
}
72+
73+
std::unique_ptr<InferenceTrace>
74+
InferenceTrace::LoadFromSharedMemory(
75+
std::unique_ptr<SharedMemoryManager>& shm_pool,
76+
bi::managed_external_buffer::handle_t handle)
77+
{
78+
AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm =
79+
shm_pool->Load<InferenceTraceShm>(handle);
80+
InferenceTraceShm* infer_trace_shm_ptr = infer_trace_shm.data_.get();
81+
82+
std::unique_ptr<PbString> trace_context_shm = PbString::LoadFromSharedMemory(
83+
shm_pool, infer_trace_shm_ptr->trace_context_shm_handle);
84+
85+
return std::unique_ptr<InferenceTrace>(
86+
new InferenceTrace(infer_trace_shm, trace_context_shm));
87+
}
88+
89+
InferenceTrace::InferenceTrace(
90+
AllocatedSharedMemory<InferenceTraceShm>& infer_trace_shm,
91+
std::unique_ptr<PbString>& trace_context_shm)
92+
: infer_trace_shm_(std::move(infer_trace_shm)),
93+
trace_context_shm_(std::move(trace_context_shm))
94+
{
95+
infer_trace_shm_ptr_ = infer_trace_shm_.data_.get();
96+
shm_handle_ = infer_trace_shm_.handle_;
97+
triton_trace_ = infer_trace_shm_ptr_->triton_trace;
98+
trace_context_ = trace_context_shm_->String();
99+
}
100+
101+
}}}; // namespace triton::backend::python

src/infer_trace.h

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
//
3+
// Redistribution and use in source and binary forms, with or without
4+
// modification, are permitted provided that the following conditions
5+
// are met:
6+
// * Redistributions of source code must retain the above copyright
7+
// notice, this list of conditions and the following disclaimer.
8+
// * Redistributions in binary form must reproduce the above copyright
9+
// notice, this list of conditions and the following disclaimer in the
10+
// documentation and/or other materials provided with the distribution.
11+
// * Neither the name of NVIDIA CORPORATION nor the names of its
12+
// contributors may be used to endorse or promote products derived
13+
// from this software without specific prior written permission.
14+
//
15+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
#pragma once
28+
29+
#include <string>
30+
31+
#include "pb_string.h"
32+
#include "pb_utils.h"
33+
34+
namespace triton { namespace backend { namespace python {
35+
36+
struct InferenceTraceShm {
37+
bi::managed_external_buffer::handle_t trace_context_shm_handle;
38+
// The address of the 'TRITONSERVER_InferTrace' object.
39+
void* triton_trace;
40+
};
41+
42+
//
43+
// Inference Trace
44+
//
45+
class InferenceTrace {
46+
public:
47+
InferenceTrace(void* triton_trace, const std::string& ctxt)
48+
: triton_trace_(triton_trace), trace_context_(ctxt)
49+
{
50+
}
51+
InferenceTrace() : triton_trace_(nullptr), trace_context_("") {}
52+
InferenceTrace(const InferenceTrace& rhs);
53+
InferenceTrace(std::unique_ptr<InferenceTrace>& trace_shm);
54+
InferenceTrace& operator=(const InferenceTrace& rhs);
55+
/// Save InferenceTrace object to shared memory.
56+
/// \param shm_pool Shared memory pool to save the InferenceTrace object.
57+
void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
58+
59+
/// Create a InferenceTrace object from shared memory.
60+
/// \param shm_pool Shared memory pool
61+
/// \param handle Shared memory handle of the InferenceTrace.
62+
/// \return Returns the InferenceTrace in the specified handle
63+
/// location.
64+
static std::unique_ptr<InferenceTrace> LoadFromSharedMemory(
65+
std::unique_ptr<SharedMemoryManager>& shm_pool,
66+
bi::managed_external_buffer::handle_t handle);
67+
68+
void* TritonTrace() { return triton_trace_; }
69+
const std::string& Context() const { return trace_context_; }
70+
71+
bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; }
72+
73+
private:
74+
// The private constructor for creating a InferenceTrace object from shared
75+
// memory.
76+
InferenceTrace(
77+
AllocatedSharedMemory<InferenceTraceShm>& infer_trace_shm,
78+
std::unique_ptr<PbString>& trace_context_shm);
79+
80+
void* triton_trace_;
81+
std::string trace_context_;
82+
83+
// Shared Memory Data Structures
84+
AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm_;
85+
InferenceTraceShm* infer_trace_shm_ptr_;
86+
bi::managed_external_buffer::handle_t shm_handle_;
87+
std::unique_ptr<PbString> trace_context_shm_;
88+
};
89+
90+
}}}; // namespace triton::backend::python

src/pb_stub.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,7 +1611,14 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
16111611
.export_values();
16121612

16131613
py::class_<InferenceTrace, std::shared_ptr<InferenceTrace>>(
1614-
module, "InferenceTrace");
1614+
module, "InferenceTrace")
1615+
.def("get_context", [](InferenceTrace& self) -> py::object {
1616+
auto context = self.Context();
1617+
if (context != "") {
1618+
return py::str(context);
1619+
}
1620+
return py::none();
1621+
});
16151622

16161623
py::class_<InferRequest, std::shared_ptr<InferRequest>>(
16171624
module, "InferenceRequest")
@@ -1698,7 +1705,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
16981705
.def("set_flags", &InferRequest::SetFlags)
16991706
.def("timeout", &InferRequest::Timeout)
17001707
.def("parameters", &InferRequest::Parameters)
1701-
.def("trace", &InferRequest::Trace)
1708+
.def("trace", &InferRequest::GetTrace)
17021709
.def(
17031710
"exec",
17041711
[](std::shared_ptr<InferRequest>& infer_request,

src/python_be.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,14 +383,25 @@ ModelInstanceState::SaveRequestsToSharedMemory(
383383

384384
// Do not return if error in this case, because Triton core
385385
// will return an error if tracing is disabled (see PYBE PR#295).
386+
// For the same reason, we do not log the error message, otherwise
387+
// when Triton is compiled without tracing, it'll constantly log
388+
// this error.
386389
TRITONSERVER_InferenceTrace* triton_trace;
387390
auto err = TRITONBACKEND_RequestTrace(request, &triton_trace);
388391
if (err != nullptr) {
389392
triton_trace = nullptr;
390393
TRITONSERVER_ErrorDelete(err);
391394
}
395+
const char* val = nullptr;
396+
if (triton_trace != nullptr) {
397+
LOG_IF_ERROR(
398+
TRITONSERVER_InferenceTraceContext(triton_trace, &val),
399+
"failed to retrieve trace context");
400+
}
401+
std::string context = (val != nullptr) ? std::string(val) : "";
392402

393-
InferenceTrace trace = InferenceTrace(triton_trace);
403+
InferenceTrace trace =
404+
InferenceTrace(reinterpret_cast<void*>(triton_trace), context);
394405

395406
uint64_t request_timeout;
396407
RETURN_IF_ERROR(TRITONBACKEND_InferenceRequestTimeoutMicroseconds(
@@ -415,7 +426,6 @@ ModelInstanceState::SaveRequestsToSharedMemory(
415426
reinterpret_cast<intptr_t>(request),
416427
PreferredMemory(PreferredMemory::kDefault, 0), trace);
417428
}
418-
419429
RETURN_IF_EXCEPTION(infer_request->SaveToSharedMemory(Stub()->ShmPool()));
420430
requests_shm[r] = infer_request->ShmHandle();
421431
pb_infer_requests.emplace_back(std::move(infer_request));

0 commit comments

Comments
 (0)