Skip to content

Commit 73fdcda

Browse files
authored
Add request statistic reporting for decoupled mode (triton-inference-server#163)
1 parent 8ae1782 commit 73fdcda

File tree

4 files changed

+54
-12
lines changed

4 files changed

+54
-12
lines changed

src/pb_metric_reporter.cc

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ PbMetricReporter::PbMetricReporter(
3636
std::shared_ptr<std::vector<TRITONBACKEND_Response*>> responses)
3737
: instance_(instance), requests_(requests), request_count_(request_count),
3838
responses_(responses), total_batch_size_(0), exec_start_ns_(0),
39-
compute_start_ns_(0), compute_end_ns_(0), exec_end_ns_(0)
39+
compute_start_ns_(0), compute_end_ns_(0), exec_end_ns_(0),
40+
success_status_(true)
4041
{
4142
}
4243

@@ -51,21 +52,28 @@ PbMetricReporter::~PbMetricReporter()
5152
// request object. We use the execution start/end time for
5253
// compute also so that the entire execution time is associated
5354
// with the inference computation.
54-
LOG_IF_ERROR(
55-
TRITONBACKEND_ModelInstanceReportStatistics(
56-
instance_, request,
57-
((*responses_)[r] != nullptr) /* success */, exec_start_ns_,
58-
compute_start_ns_, compute_end_ns_, exec_end_ns_),
59-
"failed reporting request statistics");
55+
if (responses_) {
56+
LOG_IF_ERROR(
57+
TRITONBACKEND_ModelInstanceReportStatistics(
58+
instance_, request, ((*responses_)[r] != nullptr) /* success */,
59+
exec_start_ns_, compute_start_ns_, compute_end_ns_, exec_end_ns_),
60+
"failed reporting request statistics");
61+
} else {
62+
LOG_IF_ERROR(
63+
TRITONBACKEND_ModelInstanceReportStatistics(
64+
instance_, request, success_status_, exec_start_ns_,
65+
compute_start_ns_, compute_end_ns_, exec_end_ns_),
66+
"failed reporting request statistics");
67+
}
6068
}
6169

6270
// Report the entire batch statistics. This backend does not support
6371
// batching so the total batch size is always 1.
6472
if (total_batch_size_ != 0) {
6573
LOG_IF_ERROR(
6674
TRITONBACKEND_ModelInstanceReportBatchStatistics(
67-
instance_, total_batch_size_, exec_start_ns_,
68-
compute_start_ns_, compute_end_ns_, exec_end_ns_),
75+
instance_, total_batch_size_, exec_start_ns_, compute_start_ns_,
76+
compute_end_ns_, exec_end_ns_),
6977
"failed reporting batch request statistics");
7078
}
7179
}
@@ -100,4 +108,10 @@ PbMetricReporter::SetExecEndNs(const uint64_t exec_end_ns)
100108
exec_end_ns_ = exec_end_ns;
101109
}
102110

111+
void
112+
PbMetricReporter::SetSuccessStatus(const bool success_status)
113+
{
114+
success_status_ = success_status;
115+
}
116+
103117
}}} // namespace triton::backend::python

src/pb_metric_reporter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class PbMetricReporter {
4242
uint64_t compute_start_ns_;
4343
uint64_t compute_end_ns_;
4444
uint64_t exec_end_ns_;
45+
bool success_status_;
4546

4647
public:
4748
PbMetricReporter(
@@ -54,5 +55,6 @@ class PbMetricReporter {
5455
void SetComputeStartNs(const uint64_t compute_start_ns);
5556
void SetComputeEndNs(const uint64_t compute_end_ns);
5657
void SetExecEndNs(const uint64_t exec_end_ns);
58+
void SetSuccessStatus(const bool success_status);
5759
};
5860
}}}; // namespace triton::backend::python

src/python_be.cc

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -926,7 +926,8 @@ ModelInstanceState::ResponseSendDecoupled(
926926
TRITONSERVER_Error*
927927
ModelInstanceState::ProcessRequestsDecoupled(
928928
TRITONBACKEND_Request** requests, const uint32_t request_count,
929-
std::vector<std::unique_ptr<InferRequest>>& pb_inference_requests)
929+
std::vector<std::unique_ptr<InferRequest>>& pb_inference_requests,
930+
PbMetricReporter& reporter)
930931
{
931932
NVTX_RANGE(nvtx_, "ProcessRequests " + Name());
932933
closed_requests_ = {};
@@ -954,6 +955,10 @@ ModelInstanceState::ProcessRequestsDecoupled(
954955
requests, request_count, pb_inference_requests, request_batch,
955956
responses));
956957

958+
uint64_t compute_start_ns = 0;
959+
SET_TIMESTAMP(compute_start_ns);
960+
reporter.SetComputeStartNs(compute_start_ns);
961+
957962
std::unique_ptr<IPCMessage> ipc_message;
958963
RETURN_IF_EXCEPTION(
959964
ipc_message =
@@ -971,6 +976,12 @@ ModelInstanceState::ProcessRequestsDecoupled(
971976

972977
AllocatedSharedMemory<ResponseBatch> response_batch =
973978
Stub()->ShmPool()->Load<ResponseBatch>(received_message_->Args());
979+
980+
uint64_t compute_end_ns = 0;
981+
SET_TIMESTAMP(compute_end_ns);
982+
reporter.SetComputeEndNs(compute_end_ns);
983+
reporter.SetBatchStatistics(request_count);
984+
974985
if (response_batch.data_->has_error) {
975986
if (response_batch.data_->is_error_set) {
976987
auto error = PbString::LoadFromSharedMemory(
@@ -1819,10 +1830,24 @@ TRITONBACKEND_ModelInstanceExecute(
18191830
}
18201831
} else {
18211832
std::vector<std::unique_ptr<InferRequest>> infer_requests;
1833+
1834+
uint64_t exec_start_ns = 0;
1835+
SET_TIMESTAMP(exec_start_ns);
1836+
1837+
PbMetricReporter reporter(
1838+
instance_state->TritonModelInstance(), requests, request_count,
1839+
nullptr);
1840+
reporter.SetExecStartNs(exec_start_ns);
1841+
18221842
error = instance_state->ProcessRequestsDecoupled(
1823-
requests, request_count, infer_requests);
1843+
requests, request_count, infer_requests, reporter);
1844+
1845+
uint64_t exec_end_ns = 0;
1846+
SET_TIMESTAMP(exec_end_ns);
1847+
reporter.SetExecEndNs(exec_end_ns);
18241848

18251849
if (error != nullptr) {
1850+
reporter.SetSuccessStatus(false);
18261851
for (uint32_t r = 0; r < request_count; ++r) {
18271852
TRITONBACKEND_Request* request = requests[r];
18281853
if (!instance_state->ExistsInClosedRequests(

src/python_be.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,8 @@ class ModelInstanceState : public BackendModelInstance {
320320
// Process all the requests in the decoupled mode.
321321
TRITONSERVER_Error* ProcessRequestsDecoupled(
322322
TRITONBACKEND_Request** requests, const uint32_t request_count,
323-
std::vector<std::unique_ptr<InferRequest>>& pb_infer_requests);
323+
std::vector<std::unique_ptr<InferRequest>>& pb_infer_requests,
324+
PbMetricReporter& pb_metric_reporter);
324325

325326
bool ExistsInClosedRequests(intptr_t closed_request);
326327

0 commit comments

Comments
 (0)