1- ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
2- ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3
3- ARG NVRTC_VER=13.0.48-1
4- ARG TRT_VER=10.13.2.6
5- ARG NCCL_VER=2.27.7-1+cuda13.0
6- ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz
7- ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz
8- 
9- # Versions of packages to copy from pytorch image
10- ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
11- ARG TORCHVISION_VER=0.23.0a0+428a54c9
12- ARG SETUPTOOLS_VER=79.0.1
13- ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
14- ARG NETWORKX_VER=3.5
15- ARG SYMPY_VER=1.14.0
16- ARG PACKAGING_VER=23.2
17- ARG FLASH_ATTN_VER=2.7.4.post1
18- 
1+ # syntax=docker/dockerfile:1
192ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git
20- ARG TENSORRTLLM_REPO_TAG=v1.2.0rc0
21- ARG TENSORRTLLM_VER=1.2.0rc0
3+ ARG TENSORRTLLM_REPO_TAG=v1.2.0rc1
4+ ARG TENSORRTLLM_VER=1.2.0rc1
5+ ARG TENSORRTLLM_WHEEL
6+ ARG TENSORRTLLM_TRT_VER=10.13.2.6
227
23- FROM ${PYTORCH_IMAGE} AS pytorch_image
24- FROM ${BASE_IMAGE} AS install_dependencies
258
26- WORKDIR /workspace
9+ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
10+ ARG SOURCE_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3
2711
28- # Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
29- COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
3012
31- ENV PIP_BREAK_SYSTEM_PACKAGES=1
32- RUN apt-get update -q=2 \
33-     && apt-get install -y --no-install-recommends \
34-         python3-dev \
35-         python3-pip \
36-         git-lfs \
37-         # Remove previous TRT installation
38-     && apt-get purge -y "libnvinfer*" \
39-     && pip3 uninstall -y tensorrt \
40-     && rm -rf /var/lib/apt/lists/*
13+ # Versions of packages to copy from source image
14+ ARG FLASH_ATTN_VER=2.7.4.post1
15+ ARG NCCL_VER=2.27.7-1+cuda13.0
16+ ARG NETWORKX_VER=3.5
17+ ARG NVRTC_VER=13.0.48-1
18+ ARG PACKAGING_VER=23.2
19+ ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
20+ ARG SETUPTOOLS_VER=79.0.1
21+ ARG SYMPY_VER=1.14.0
22+ ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
23+ ARG TORCHVISION_VER=0.23.0a0+428a54c9
24+ ARG TRT_URL_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TENSORRTLLM_TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz
25+ ARG TRT_URL_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TENSORRTLLM_TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz
4126
42- ARG TRT_VER
43- ARG NVRTC_VER
44- ARG NCCL_VER
4527
46- ENV TRT_VERSION=$TRT_VER \
47-     TRT_VER=$TRT_VER \
48-     CUDA_VER=$CUDA_VERSION \
49-     CUDNN_VER=$CUDNN_VERSION \
50-     NCCL_VER=$NCCL_VER \
51-     CUBLAS_VER=$CUBLAS_VERSION \
52-     NVRTC_VER="${NVRTC_VER}"
28+ FROM ${SOURCE_IMAGE} AS source_image
29+ FROM ${BASE_IMAGE} AS requirements
5330
54- LABEL TRT_VERSION=$TRT_VER
55- LABEL NCCL_VER=$NCCL_VER
31+ # Download & install TRT release
32+ ARG TRT_URL_x86
33+ ARG TRT_URL_ARM
5634
57- # Install NVRTC
58- RUN [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" \
59-     && curl -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.1-1_all.deb \
60-     && apt install /tmp/cuda-keyring.deb \
61-     && rm /tmp/cuda-keyring.deb \
62-     && apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* \
63-     && CUDA_VER_SHORT=${CUDA_VER: 0:4} \
64-     && NVRTC_CUDA_VERSION=${CUDA_VER_SHORT/./-} \
65-     && apt-get update -qq \
66-     && apt-get install -y --no-install-recommends \
67-         cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} \
68-         libnccl2=${NCCL_VER} \
69-         libnccl-dev=${NCCL_VER} \
70-     && rm -rf /var/lib/apt/lists/*
35+ WORKDIR /requirements/
7136
72- # Download & install TRT release
73- ARG RELEASE_URL_TRT_x86
74- ARG RELEASE_URL_TRT_ARM
7537
76- RUN [ "$(uname -m)" != "x86_64" ] && RELEASE_URL_TRT=${RELEASE_URL_TRT_ARM} || RELEASE_URL_TRT=${RELEASE_URL_TRT_x86} \
77-     && curl -fSL -o /tmp/tensorrt.tar.gz ${RELEASE_URL_TRT} \
38+ ARG TENSORRTLLM_TRT_VER
39+ RUN [ "$(uname -m)" != "x86_64" ] && TRT_URL=${TRT_URL_ARM} || TRT_URL=${TRT_URL_x86} \
40+     && curl -fSL -o /tmp/tensorrt.tar.gz ${TRT_URL} \
7841    # Extract the tarball, excluding Windows libraries and static libraries as
7942    # they are not needed for Linux build
8043    && tar xzvf /tmp/tensorrt.tar.gz --exclude="lib*win.so*" --exclude="*.a" -C /usr/local \
8144    && rm /tmp/tensorrt.tar.gz \
8245    && find /usr/local -maxdepth 1 -name Tens* -type d -exec ln -s {} /usr/local/tensorrt \;
8346
84- RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))" )* 
47+ WORKDIR /wheels/ 
8548
86- ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
87- ENV TRT_ROOT=/usr/local/tensorrt
88- 
89- FROM install_dependencies AS tensorrt_llm_code
49+ ARG TENSORRTLLM_WHEEL
50+ ADD ${TENSORRTLLM_WHEEL} /wheels/
9051
91- WORKDIR /workspace
52+ RUN apt-get update -q=2 \
53+     && apt-get install -y --no-install-recommends \
54+         git-lfs \
55+     && git-lfs install  \
56+     && rm -rf /var/lib/apt/lists/*
9257
9358ARG TENSORRTLLM_REPO
9459ARG TENSORRTLLM_REPO_TAG
95- RUN git-lfs install \
96-     && git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm
60+ WORKDIR /workspace/
61+ RUN git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm
62+ 
9763
9864# Final stage to build the TRT-LLM container
9965FROM ${BASE_IMAGE} AS final_stage
@@ -107,86 +73,78 @@ ARG SYMPY_VER
10773ARG PACKAGING_VER
10874ARG FLASH_ATTN_VER
10975# Copy necessary files from the base stage
110- COPY --from=pytorch_image /usr/local/lib/lib* /usr/local/lib/
111- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
112- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
113- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
114- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
115- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
116- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
117- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
118- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
119- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
120- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
121- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
122- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
123- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
124- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
125- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
126- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
127- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
128- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
129- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
130- COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
131- 
132- # Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
133- COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
76+ COPY --from=source_image /usr/local/lib/lib* /usr/local/lib/
77+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
78+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
79+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
80+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
81+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
82+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
83+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
84+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
85+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
86+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
87+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
88+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
89+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
90+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
91+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
92+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
93+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
94+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
95+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
96+ COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
13497
13598ARG NVRTC_VER
13699ARG NCCL_VER
137100ENV CUDA_VER=$CUDA_VERSION \
138101    NVRTC_VER="${NVRTC_VER}" \
139102    NCCL_VER="${NCCL_VER}"
140103
141- # Install the necessary dependencies and remove previous TRT installation in the
142- # final image
143104ENV PIP_BREAK_SYSTEM_PACKAGES=1
144- RUN apt-get update -q=2 \
145-     && apt-get install -y --no-install-recommends \
146-         python3-dev \
147-         python3-pip \
148-         git-lfs \
149-         perl \
150-         # Remove previous TRT installation
151-     && apt-get purge -y "libnvinfer*" \
152-     && pip3 uninstall -y tensorrt \
153-     && rm -rf /var/lib/apt/lists/* \
154-     && pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5
155105
156- # Install NVRTC
157106RUN [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" \
158107    && curl -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.1-1_all.deb \
159-     && apt install /tmp/cuda-keyring.deb \
108+     && apt-get  install /tmp/cuda-keyring.deb \
160109    && rm /tmp/cuda-keyring.deb \
161-     && apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* \
110+     && apt-get remove --purge -y --allow-change-held-packages \
111+         cuda-nvrtc-dev* \
112+         libnvinfer* \
113+         tensorrt* \
162114    && CUDA_VER_SHORT=${CUDA_VER: 0:4} \
163115    && NVRTC_CUDA_VERSION=${CUDA_VER_SHORT/./-} \
164116    && apt-get update -qq \
165117    && apt-get install -y --no-install-recommends \
166118        cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} \
167-         libnccl2=${NCCL_VER}  \
119+         git-lfs  \
168120        libnccl-dev=${NCCL_VER} \
169-     && rm -rf /var/lib/apt/lists/*
121+         libnccl2=${NCCL_VER} \
122+         perl \
123+         python3-dev \
124+         python3-pip \
125+     && rm -rf /var/lib/apt/lists/* \
126+     && pip3 uninstall -y tensorrt \
127+     && pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5
128+ 
170129
171130# Install TRT
172- COPY --from=install_dependencies  /usr/local/tensorrt /usr/local/tensorrt
131+ COPY --from=requirements  /usr/local/tensorrt /usr/local/tensorrt
173132RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))" )*
174133
175- # Set environment  variables
176- ARG TRT_VER 
177- ENV TRT_VERSION=$TRT_VER 
134+ # Set enviroment  variables
135+ ARG TENSORRTLM_TRT_VER 
136+ ENV TRT_VERSION=$TENSORRTLLM_TRT_VER 
178137ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
179138ENV TRT_ROOT=/usr/local/tensorrt
180139
181140# Install TRT-LLM wheel after all the dependencies are installed
182- ARG TENSORRTLLM_VER
183- RUN --mount=type=secret,id=pypi_extra_values,env=PYPI_EXTRA_VALUES \
184-     pip install --no-cache-dir ${PYPI_EXTRA_VALUES} tensorrt_llm==${TENSORRTLLM_VER}
141+ RUN --mount=type=bind,from=requirements,source=/wheels/,target=/wheels/ \
142+       pip3 install /wheels/tensorrt_llm*.whl
185143
186144# Copying the Tensorrt LLM scripts and applications
187145WORKDIR /app
188- COPY --from=tensorrt_llm_code  /workspace/tensorrt_llm/triton_backend/scripts scripts
189- COPY --from=tensorrt_llm_code  /workspace/tensorrt_llm/triton_backend/all_models all_models
190- COPY --from=tensorrt_llm_code  /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
191- COPY --from=tensorrt_llm_code  /workspace/tensorrt_llm/triton_backend/tools tools
192- COPY --from=tensorrt_llm_code  /workspace/tensorrt_llm/examples examples
146+ COPY --from=requirements  /workspace/tensorrt_llm/triton_backend/scripts scripts
147+ COPY --from=requirements  /workspace/tensorrt_llm/triton_backend/all_models all_models
148+ COPY --from=requirements  /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
149+ COPY --from=requirements  /workspace/tensorrt_llm/triton_backend/tools tools
150+ COPY --from=requirements  /workspace/tensorrt_llm/examples examples
0 commit comments