Skip to content

Commit fbbfa65

Browse files
authored
Add support for DLPack Tensors (triton-inference-server#62)
* Move triton_pb.Tensor to C++ * Add support for DLPack GPU tensor * Add support for conversion between dlpack datatypes and triton * Add support for output tensors in GPU * Fix bug when the output tensors contain one of the input tensors * Add clean up for DLPack tensors * Fix deadlock when init takes long * Fix DLPack for CPU tensors * Clean up * Fix up * Add FORCE_CPU_ONLY_INPUT_TENSORS * Minor fixes for TYPE_STRING and contiguous tensors * Review edits * Manually adjust the offset for cudaIpcMemHandle * Print appropriate error when the tensor is not contiguous * Refactor communication between the main process and the stub process (triton-inference-server#65) * Fix CPU_PINNED_MEMORY * Delete shared memory region if there is an exception * Minor improvements * Fix shutdown and identity tensor return * Add more comment
1 parent 1f03aaf commit fbbfa65

File tree

12 files changed

+1986
-642
lines changed

12 files changed

+1986
-642
lines changed

CMakeLists.txt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,17 @@ FetchContent_Declare(
8484
)
8585
FetchContent_MakeAvailable(pybind11)
8686

87+
#
88+
# DLPack
89+
#
90+
FetchContent_Declare(
91+
dlpack
92+
GIT_REPOSITORY "https://github.com/dmlc/dlpack"
93+
GIT_TAG "v0.5"
94+
GIT_SHALLOW ON
95+
)
96+
FetchContent_MakeAvailable(dlpack)
97+
8798
#
8899
# Boost
89100
#
@@ -99,6 +110,15 @@ ExternalProject_Add(
99110
)
100111
set(boostorg_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/boost/")
101112

113+
#
114+
# CUDA
115+
#
116+
if(${TRITON_ENABLE_GPU})
117+
find_package(CUDAToolkit REQUIRED)
118+
message(STATUS "Using CUDA ${CUDA_VERSION}")
119+
set(CUDA_NVCC_FLAGS -std=c++11)
120+
endif() # TRITON_ENABLE_GPU
121+
102122
find_package(ZLIB REQUIRED)
103123
find_package(Threads REQUIRED)
104124

@@ -118,9 +138,13 @@ add_library(
118138

119139
add_executable(
120140
triton-python-backend-stub
141+
src/pb_stub_utils.cc
142+
src/pb_stub_utils.h
121143
src/pb_stub.cc
122144
src/pb_utils.cc
123145
src/pb_utils.h
146+
src/pb_tensor.cc
147+
src/pb_tensor.h
124148
src/shm_manager.cc
125149
src/shm_manager.h
126150
)
@@ -162,11 +186,26 @@ target_link_libraries(
162186
PRIVATE
163187
Threads::Threads
164188
pybind11::embed
189+
dlpack
165190
triton-backend-utils # from repo-backend
166191
-larchive # libarchive
167192
-lrt # shared memory
168193
)
169194

195+
if(${TRITON_ENABLE_GPU})
196+
target_link_libraries(
197+
triton-python-backend-stub
198+
PUBLIC
199+
CUDA::cuda_driver
200+
)
201+
202+
target_link_libraries(
203+
triton-python-backend
204+
PUBLIC
205+
CUDA::cuda_driver
206+
)
207+
endif() # TRITON_ENABLE_GPU
208+
170209
set_target_properties(
171210
triton-python-backend PROPERTIES
172211
POSITION_INDEPENDENT_CODE ON

0 commit comments

Comments
 (0)