@@ -379,21 +379,7 @@ ModelInstanceState::SaveRequestsToSharedMemory(
379379    std::unique_ptr<InferRequest> infer_request;
380380    if  (model_state->IsDecoupled ()) {
381381      TRITONBACKEND_ResponseFactory* factory_ptr;
382-       //  Reuse the response factory if there is already a response factory
383-       //  associated with the request
384-       std::lock_guard<std::mutex> guard{response_factory_map_mutex_};
385-       {
386-         if  (response_factory_map_.find (reinterpret_cast <intptr_t >(request)) !=
387-             response_factory_map_.end ()) {
388-           factory_ptr =
389-               response_factory_map_[reinterpret_cast <intptr_t >(request)];
390-         } else  {
391-           RETURN_IF_ERROR (
392-               TRITONBACKEND_ResponseFactoryNew (&factory_ptr, request));
393-           response_factory_map_[reinterpret_cast <intptr_t >(request)] =
394-               factory_ptr;
395-         }
396-       }
382+       RETURN_IF_ERROR (TRITONBACKEND_ResponseFactoryNew (&factory_ptr, request));
397383
398384      infer_request = std::make_unique<InferRequest>(
399385          id, correlation_id, pb_input_tensors, requested_output_names,
@@ -843,7 +829,8 @@ ModelInstanceState::StubToParentMQMonitor()
843829        ProcessLogRequest (message);
844830        break ;
845831      }
846-       case  PYTHONSTUB_CleanupRequest: {
832+       case  PYTHONSTUB_BLSDecoupledInferPayloadCleanup:
833+       case  PYTHONSTUB_BLSDecoupledResponseFactoryCleanup: {
847834        ProcessBLSCleanupRequest (message);
848835        break ;
849836      }
@@ -941,9 +928,17 @@ ModelInstanceState::ProcessBLSCleanupRequest(
941928      Stub ()->ShmPool ()->Load <char >(message->Args ());
942929  CleanupMessage* cleanup_message_ptr =
943930      reinterpret_cast <CleanupMessage*>(cleanup_request_message.data_ .get ());
944- 
945-   void * id = cleanup_message_ptr->id ;
946-   infer_payload_.erase (reinterpret_cast <intptr_t >(id));
931+   intptr_t  id = reinterpret_cast <intptr_t >(cleanup_message_ptr->id );
932+   if  (message->Command () == PYTHONSTUB_BLSDecoupledInferPayloadCleanup) {
933+     //  Remove the InferPayload object from the map.
934+     infer_payload_.erase (id);
935+   } else  if  (
936+       message->Command () == PYTHONSTUB_BLSDecoupledResponseFactoryCleanup) {
937+     //  Delete response factory
938+     std::unique_ptr<
939+         TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
940+         response_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(id));
941+   }
947942
948943  {
949944    bi::scoped_lock<bi::interprocess_mutex> lock{*(message->ResponseMutex ())};
@@ -1172,12 +1167,6 @@ ModelInstanceState::ResponseSendDecoupled(
11721167      std::lock_guard<std::mutex> guard{closed_requests_mutex_};
11731168      closed_requests_.push_back (send_message_payload->request_address );
11741169    }
1175- 
1176-     //  Clean up the response factory map.
1177-     {
1178-       std::lock_guard<std::mutex> guard{response_factory_map_mutex_};
1179-       response_factory_map_.erase (send_message_payload->request_address );
1180-     }
11811170  }
11821171
11831172  if  (send_message_payload->response  != 0 ) {
@@ -1195,14 +1184,7 @@ ModelInstanceState::ResponseSendDecoupled(
11951184        error_message);
11961185
11971186    std::vector<std::pair<std::unique_ptr<PbMemory>, void *>> gpu_output_buffers;
1198-     std::unique_ptr<
1199-         TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1200-         response_factory_ptr;
12011187    GPUBuffersHelper gpu_buffer_helper;
1202-     if  (send_message_payload->flags  == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1203-       response_factory_ptr.reset (
1204-           reinterpret_cast <TRITONBACKEND_ResponseFactory*>(response_factory));
1205-     }
12061188
12071189#ifdef  TRITON_ENABLE_GPU
12081190    for  (auto & output_tensor : infer_response->OutputTensors ()) {
@@ -1289,13 +1271,6 @@ ModelInstanceState::ResponseSendDecoupled(
12891271        response_factory, send_message_payload->flags );
12901272    SetErrorForResponseSendMessage (
12911273        send_message_payload, WrapTritonErrorInSharedPtr (error), error_message);
1292- 
1293-     if  (send_message_payload->flags  == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1294-       std::unique_ptr<
1295-           TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1296-           response_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1297-               send_message_payload->response_factory_address ));
1298-     }
12991274  }
13001275}
13011276
@@ -1368,11 +1343,6 @@ ModelInstanceState::ProcessRequestsDecoupled(
13681343          TRITONSERVER_ERROR_INTERNAL, error->String ().c_str ());
13691344    }
13701345
1371-     //  Reset the release flags for all the requests.
1372-     for  (auto & infer_request : pb_infer_requests) {
1373-       infer_request->SetReleaseFlags (TRITONSERVER_REQUEST_RELEASE_ALL);
1374-     }
1375- 
13761346    return  TRITONSERVER_ErrorNew (
13771347        TRITONSERVER_ERROR_INTERNAL, " Failed to process the requests."  );
13781348  }
@@ -2499,15 +2469,9 @@ TRITONBACKEND_ModelInstanceExecute(
24992469        }
25002470      }
25012471
2502-       //  We should only delete the response factory for the requests that have
2503-       //  not been closed.
25042472      for  (auto & infer_request : infer_requests) {
2505-         if  (!instance_state->ExistsInClosedRequests (
2506-                 infer_request->RequestAddress ())) {
2507-           LOG_IF_ERROR (
2508-               infer_request->DeleteResponseFactory (),
2509-               " Failed to delete the response factory."  );
2510-         }
2473+         //  Reset the release flags for all the requests.
2474+         infer_request->SetReleaseFlags (TRITONSERVER_REQUEST_RELEASE_ALL);
25112475      }
25122476    }
25132477  }
0 commit comments