Skip to content

Commit b106301

Browse files
authored
Fix deferred callback when there is an error (triton-inference-server#164)
1 parent 73fdcda commit b106301

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/python_be.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,7 @@ ModelInstanceState::ProcessRequests(
11781178
TRITONBACKEND_Response* response = (*responses)[r];
11791179
TRITONBACKEND_Request* request = requests[r];
11801180
uint32_t requested_output_count = 0;
1181+
requires_deferred_callback.push_back(false);
11811182

11821183
shm_responses.emplace_back(nullptr);
11831184
std::unique_ptr<InferResponse>& infer_response = shm_responses.back();
@@ -1236,13 +1237,14 @@ ModelInstanceState::ProcessRequests(
12361237
gpu_output_buffers[r], requested_output_names, response);
12371238
GUARDED_RESPOND_IF_ERROR(responses, r, *error);
12381239

1240+
requires_deferred_callback[r] = require_deferred_callback;
1241+
12391242
// Error object will be deleted by the GUARDED_RESPOND macro
12401243
*error = nullptr;
12411244
error.reset();
1242-
if (require_deferred_callback) {
1245+
if (requires_deferred_callback[r]) {
12431246
has_gpu_output = true;
12441247
}
1245-
requires_deferred_callback.push_back(require_deferred_callback);
12461248
}
12471249

12481250
// Finalize the execute.

0 commit comments

Comments
 (0)