Skip to content

Commit 63cc437

Browse files
authored
Fix L0_backend_python timeout issue (triton-inference-server#218)
* Fix up missing mutex * Rename variable * Fix for the case where returning the first decoupled response is slower than the following responses * Address comment
1 parent bdf75da commit 63cc437

File tree

5 files changed

+59
-26
lines changed

5 files changed

+59
-26
lines changed

src/infer_request.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,11 +553,11 @@ InferRequest::Exec(const bool is_decoupled)
553553

554554
if (responses_is_set) {
555555
auto& memory_manager_message_queue = stub->MemoryManagerQueue();
556-
std::unique_ptr<InferResponse> error_response =
556+
std::unique_ptr<InferResponse> return_response =
557557
InferResponse::LoadFromSharedMemory(
558558
shm_pool, *response_handle, true /* open cuda handle */);
559559

560-
for (auto& output_tensor : error_response->OutputTensors()) {
560+
for (auto& output_tensor : return_response->OutputTensors()) {
561561
if (!output_tensor->IsCPU()) {
562562
uint64_t memory_release_id = output_tensor->Memory()->MemoryReleaseId();
563563
output_tensor->Memory()->SetMemoryReleaseCallback(
@@ -567,7 +567,7 @@ InferRequest::Exec(const bool is_decoupled)
567567
}
568568
}
569569

570-
return error_response;
570+
return return_response;
571571
} else {
572572
auto error_response = std::make_unique<InferResponse>(
573573
std::vector<std::shared_ptr<PbTensor>>{},

src/pb_response_iterator.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,11 @@ ResponseIterator::Iter()
114114
}
115115

116116
void
117-
ResponseIterator::EnqueueResponse(std::unique_ptr<InferResponse> infer_response)
117+
ResponseIterator::EnqueueResponse(std::shared_ptr<InferResponse> infer_response)
118118
{
119119
{
120120
std::lock_guard<std::mutex> lock{mu_};
121-
response_buffer_.push(std::move(infer_response));
121+
response_buffer_.push(infer_response);
122122
}
123123
cv_.notify_one();
124124
}
@@ -144,4 +144,19 @@ ResponseIterator::Clear()
144144
is_cleared_ = true;
145145
}
146146

147+
std::vector<std::shared_ptr<InferResponse>>
148+
ResponseIterator::GetExistingResponses()
149+
{
150+
std::vector<std::shared_ptr<InferResponse>> responses;
151+
std::unique_lock<std::mutex> lock{mu_};
152+
while (!response_buffer_.empty()) {
153+
responses.push_back(response_buffer_.front());
154+
response_buffer_.pop();
155+
}
156+
is_finished_ = true;
157+
is_cleared_ = true;
158+
159+
return responses;
160+
}
161+
147162
}}} // namespace triton::backend::python

src/pb_response_iterator.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,10 @@ class ResponseIterator {
3838

3939
std::shared_ptr<InferResponse> Next();
4040
py::iterator Iter();
41-
void EnqueueResponse(std::unique_ptr<InferResponse> infer_response);
41+
void EnqueueResponse(std::shared_ptr<InferResponse> infer_response);
4242
void* Id();
4343
void Clear();
44+
std::vector<std::shared_ptr<InferResponse>> GetExistingResponses();
4445

4546
private:
4647
std::vector<std::shared_ptr<InferResponse>> responses_;

src/pb_stub.cc

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -918,8 +918,11 @@ Stub::ServiceStubToParentRequests()
918918
break;
919919
} else {
920920
bls_response_cleanup_buffer_.pop();
921+
{
922+
std::lock_guard<std::mutex> lock(response_iterator_map_mu_);
923+
response_iterator_map_.erase(id);
924+
}
921925
SendCleanupId(id);
922-
response_iterator_map_.erase(id);
923926
}
924927
}
925928
}
@@ -1093,7 +1096,11 @@ Stub::ParentToStubMQMonitor()
10931096
response_iterator_map_[infer_response->Id()]->EnqueueResponse(
10941097
std::move(infer_response));
10951098
} else {
1096-
LOG_INFO << "Failed to enqueue the response to its response iterator.";
1099+
auto response_iterator =
1100+
std::make_shared<ResponseIterator>(std::move(infer_response));
1101+
response_iterator_map_.insert(
1102+
std::pair<void*, std::shared_ptr<ResponseIterator>>(
1103+
response_iterator->Id(), response_iterator));
10971104
}
10981105
}
10991106

@@ -1115,13 +1122,31 @@ Stub::ParentToStubServiceActive()
11151122
return parent_to_stub_thread_;
11161123
}
11171124

1118-
void
1119-
Stub::SaveResponseIterator(std::shared_ptr<ResponseIterator> response_iterator)
1125+
std::shared_ptr<ResponseIterator>
1126+
Stub::GetResponseIterator(std::shared_ptr<InferResponse> infer_response)
11201127
{
11211128
std::lock_guard<std::mutex> lock(response_iterator_map_mu_);
1122-
response_iterator_map_.insert(
1123-
std::pair<void*, std::shared_ptr<ResponseIterator>>(
1124-
response_iterator->Id(), response_iterator));
1129+
if (response_iterator_map_.find(infer_response->Id()) !=
1130+
response_iterator_map_.end()) {
1131+
// Need to re-construct the 'ResponseIterator' and update the
1132+
// 'response_iterator_map_' to make sure the 'ResponseIterator' object has
1133+
// the correct first response.
1134+
auto response_iterator = std::make_shared<ResponseIterator>(infer_response);
1135+
std::vector<std::shared_ptr<InferResponse>> existing_responses =
1136+
response_iterator_map_[infer_response->Id()]->GetExistingResponses();
1137+
for (auto& response : existing_responses) {
1138+
response_iterator->EnqueueResponse(response);
1139+
}
1140+
1141+
response_iterator_map_[infer_response->Id()] = response_iterator;
1142+
} else {
1143+
auto response_iterator = std::make_shared<ResponseIterator>(infer_response);
1144+
response_iterator_map_.insert(
1145+
std::pair<void*, std::shared_ptr<ResponseIterator>>(
1146+
response_iterator->Id(), response_iterator));
1147+
}
1148+
1149+
return response_iterator_map_[infer_response->Id()];
11251150
}
11261151

11271152
bool
@@ -1304,12 +1329,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
13041329
infer_request->Exec(decoupled);
13051330
py::object response_object;
13061331
if (decoupled) {
1307-
auto response_iterator =
1308-
std::make_shared<ResponseIterator>(response);
1332+
auto response_iterator = stub->GetResponseIterator(response);
13091333
response_object = py::cast(response_iterator);
1310-
if (response_iterator->Id() != nullptr) {
1311-
stub->SaveResponseIterator(response_iterator);
1312-
}
13131334
} else {
13141335
response_object = py::cast(response);
13151336
}
@@ -1334,12 +1355,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
13341355
infer_request->Exec(decoupled);
13351356
py::object response_object;
13361357
if (decoupled) {
1337-
auto response_iterator =
1338-
std::make_shared<ResponseIterator>(response);
1358+
auto response_iterator = stub->GetResponseIterator(response);
13391359
response_object = py::cast(response_iterator);
1340-
if (response_iterator->Id() != nullptr) {
1341-
stub->SaveResponseIterator(response_iterator);
1342-
}
13431360
} else {
13441361
response_object = py::cast(response);
13451362
}

src/pb_stub.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,9 @@ class Stub {
246246
/// Thread process
247247
void ParentToStubMQMonitor();
248248

249-
/// Keep track of the ResponseIterator object
250-
void SaveResponseIterator(
251-
std::shared_ptr<ResponseIterator> response_iterator);
249+
/// Get the ResponseIterator object associated with the infer response
250+
std::shared_ptr<ResponseIterator> GetResponseIterator(
251+
std::shared_ptr<InferResponse> infer_response);
252252

253253
/// Send the id to the python backend for object cleanup
254254
void SendCleanupId(void* id);

0 commit comments

Comments
 (0)