From 7f0a603e2c684221b13fd71b14e62b19347fed44 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com>
Date: Thu, 16 Oct 2025 14:42:19 -0700
Subject: [PATCH 1/3] Update Dockerfile versions. (#811)

---
 dockerfile/Dockerfile.triton.trt_llm_backend | 30 +++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend
index e36da3e..b9f5758 100644
--- a/dockerfile/Dockerfile.triton.trt_llm_backend
+++ b/dockerfile/Dockerfile.triton.trt_llm_backend
@@ -1,25 +1,24 @@
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.06-py3-min
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3
-ARG NVRTC_VER=12.9.86-1
-ARG TRT_VER=10.11.0.33
-ARG NCCL_VER=2.27.5-1+cuda12.9
-ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-12.9.tar.gz
-ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-12.9.tar.gz
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
+ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3
+ARG NVRTC_VER=13.0.48-1
+ARG TRT_VER=10.13.2.6
+ARG NCCL_VER=2.27.7-1+cuda13.0
+ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz
+ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz
 
 # Versions of packages to copy from pytorch image
-ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
-ARG TORCHVISION_VER=0.22.0a0+95f10a4e
-ARG SETUPTOOLS_VER=78.1.1
-ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
-ARG JINJA2_VER=3.1.6
+ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
+ARG TORCHVISION_VER=0.23.0a0+428a54c9
+ARG SETUPTOOLS_VER=79.0.1
+ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
 ARG NETWORKX_VER=3.5
 ARG SYMPY_VER=1.14.0
 ARG PACKAGING_VER=23.2
 ARG FLASH_ATTN_VER=2.7.4.post1
 
 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git
-ARG TENSORRTLLM_REPO_TAG=release/1.0
-ARG TENSORRTLLM_VER=1.0.0
+ARG TENSORRTLLM_REPO_TAG=v1.2.0rc0
+ARG TENSORRTLLM_VER=1.2.0rc0
 
 FROM ${PYTORCH_IMAGE} AS pytorch_image
 FROM ${BASE_IMAGE} AS install_dependencies
@@ -103,7 +102,6 @@ ARG TORCH_VER
 ARG TORCHVISION_VER
 ARG SETUPTOOLS_VER
 ARG PYTORCH_TRITON_VER
-ARG JINJA2_VER
 ARG NETWORKX_VER
 ARG SYMPY_VER
 ARG PACKAGING_VER
@@ -121,8 +119,6 @@ COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${S
 COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
 COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
 COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info
 COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
 COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
 COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy

From 8bcf50b108af46f92c29954beecaae188c62ad7f Mon Sep 17 00:00:00 2001
From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com>
Date: Mon, 27 Oct 2025 10:23:34 -0700
Subject: [PATCH 2/3] Update Dockerfile instructions in 25.10 release (#815)

* Restoring old way for container provisioning

* revert to old values

* Trying bypass arch issue

* remove symbol

* Update version

* switching to devel

* Using connan home directory

* Renaming image

* remove old values

* Restructuring Dockerfile.

* fix: Update argument value
---
 dockerfile/Dockerfile.triton.trt_llm_backend | 214 ++++++++-----------
 1 file changed, 86 insertions(+), 128 deletions(-)

diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend
index b9f5758..d9b8484 100644
--- a/dockerfile/Dockerfile.triton.trt_llm_backend
+++ b/dockerfile/Dockerfile.triton.trt_llm_backend
@@ -1,99 +1,65 @@
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3
-ARG NVRTC_VER=13.0.48-1
-ARG TRT_VER=10.13.2.6
-ARG NCCL_VER=2.27.7-1+cuda13.0
-ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz
-ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz
-
-# Versions of packages to copy from pytorch image
-ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
-ARG TORCHVISION_VER=0.23.0a0+428a54c9
-ARG SETUPTOOLS_VER=79.0.1
-ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
-ARG NETWORKX_VER=3.5
-ARG SYMPY_VER=1.14.0
-ARG PACKAGING_VER=23.2
-ARG FLASH_ATTN_VER=2.7.4.post1
-
+# syntax=docker/dockerfile:1
 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git
-ARG TENSORRTLLM_REPO_TAG=v1.2.0rc0
-ARG TENSORRTLLM_VER=1.2.0rc0
+ARG TENSORRTLLM_REPO_TAG=v1.2.0rc1
+ARG TENSORRTLLM_VER=1.2.0rc1
+ARG TENSORRTLLM_WHEEL
+ARG TENSORRTLLM_TRT_VER=10.13.2.6
 
-FROM ${PYTORCH_IMAGE} AS pytorch_image
-FROM ${BASE_IMAGE} AS install_dependencies
 
-WORKDIR /workspace
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
+ARG SOURCE_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3
 
-# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
-COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
 
-ENV PIP_BREAK_SYSTEM_PACKAGES=1
-RUN apt-get update -q=2 \
-    && apt-get install -y --no-install-recommends \
-        python3-dev \
-        python3-pip \
-        git-lfs \
-        # Remove previous TRT installation
-    && apt-get purge -y "libnvinfer*" \
-    && pip3 uninstall -y tensorrt \
-    && rm -rf /var/lib/apt/lists/*
+# Versions of packages to copy from source image
+ARG FLASH_ATTN_VER=2.7.4.post1
+ARG NCCL_VER=2.27.7-1+cuda13.0
+ARG NETWORKX_VER=3.5
+ARG NVRTC_VER=13.0.48-1
+ARG PACKAGING_VER=23.2
+ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
+ARG SETUPTOOLS_VER=79.0.1
+ARG SYMPY_VER=1.14.0
+ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
+ARG TORCHVISION_VER=0.23.0a0+428a54c9
+ARG TRT_URL_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TENSORRTLLM_TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz
+ARG TRT_URL_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TENSORRTLLM_TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz
 
-ARG TRT_VER
-ARG NVRTC_VER
-ARG NCCL_VER
 
-ENV TRT_VERSION=$TRT_VER \
-    TRT_VER=$TRT_VER \
-    CUDA_VER=$CUDA_VERSION \
-    CUDNN_VER=$CUDNN_VERSION \
-    NCCL_VER=$NCCL_VER \
-    CUBLAS_VER=$CUBLAS_VERSION \
-    NVRTC_VER="${NVRTC_VER}"
+FROM ${SOURCE_IMAGE} AS source_image
+FROM ${BASE_IMAGE} AS requirements
 
-LABEL TRT_VERSION=$TRT_VER
-LABEL NCCL_VER=$NCCL_VER
+# Download & install TRT release
+ARG TRT_URL_x86
+ARG TRT_URL_ARM
 
-# Install NVRTC
-RUN [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" \
-    && curl -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.1-1_all.deb \
-    && apt install /tmp/cuda-keyring.deb \
-    && rm /tmp/cuda-keyring.deb \
-    && apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* \
-    && CUDA_VER_SHORT=${CUDA_VER: 0:4} \
-    && NVRTC_CUDA_VERSION=${CUDA_VER_SHORT/./-} \
-    && apt-get update -qq \
-    && apt-get install -y --no-install-recommends \
-        cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} \
-        libnccl2=${NCCL_VER} \
-        libnccl-dev=${NCCL_VER} \
-    && rm -rf /var/lib/apt/lists/*
+WORKDIR /requirements/
 
-# Download & install TRT release
-ARG RELEASE_URL_TRT_x86
-ARG RELEASE_URL_TRT_ARM
 
-RUN [ "$(uname -m)" != "x86_64" ] && RELEASE_URL_TRT=${RELEASE_URL_TRT_ARM} || RELEASE_URL_TRT=${RELEASE_URL_TRT_x86} \
-    && curl -fSL -o /tmp/tensorrt.tar.gz ${RELEASE_URL_TRT} \
+ARG TENSORRTLLM_TRT_VER
+RUN [ "$(uname -m)" != "x86_64" ] && TRT_URL=${TRT_URL_ARM} || TRT_URL=${TRT_URL_x86} \
+    && curl -fSL -o /tmp/tensorrt.tar.gz ${TRT_URL} \
     # Extract the tarball, excluding Windows libraries and static libraries as
     # they are not needed for Linux build
     && tar xzvf /tmp/tensorrt.tar.gz --exclude="lib*win.so*" --exclude="*.a" -C /usr/local \
     && rm /tmp/tensorrt.tar.gz \
     && find /usr/local -maxdepth 1 -name Tens* -type d -exec ln -s {} /usr/local/tensorrt \;
 
-RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))" )*
+WORKDIR /wheels/
 
-ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
-ENV TRT_ROOT=/usr/local/tensorrt
-
-FROM install_dependencies AS tensorrt_llm_code
+ARG TENSORRTLLM_WHEEL
+ADD ${TENSORRTLLM_WHEEL} /wheels/
 
-WORKDIR /workspace
+RUN apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends \
+        git-lfs \
+    && git-lfs install  \
+    && rm -rf /var/lib/apt/lists/*
 
 ARG TENSORRTLLM_REPO
 ARG TENSORRTLLM_REPO_TAG
-RUN git-lfs install \
-    && git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm
+WORKDIR /workspace/
+RUN git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm
+
 
 # Final stage to build the TRT-LLM container
 FROM ${BASE_IMAGE} AS final_stage
@@ -107,30 +73,27 @@ ARG SYMPY_VER
 ARG PACKAGING_VER
 ARG FLASH_ATTN_VER
 # Copy necessary files from the base stage
-COPY --from=pytorch_image /usr/local/lib/lib* /usr/local/lib/
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
-COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
-
-# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
-COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
+COPY --from=source_image /usr/local/lib/lib* /usr/local/lib/
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
+COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
 
 ARG NVRTC_VER
 ARG NCCL_VER
@@ -138,55 +101,50 @@ ENV CUDA_VER=$CUDA_VERSION \
     NVRTC_VER="${NVRTC_VER}" \
     NCCL_VER="${NCCL_VER}"
 
-# Install the necessary dependencies and remove previous TRT installation in the
-# final image
 ENV PIP_BREAK_SYSTEM_PACKAGES=1
-RUN apt-get update -q=2 \
-    && apt-get install -y --no-install-recommends \
-        python3-dev \
-        python3-pip \
-        git-lfs \
-        perl \
-        # Remove previous TRT installation
-    && apt-get purge -y "libnvinfer*" \
-    && pip3 uninstall -y tensorrt \
-    && rm -rf /var/lib/apt/lists/* \
-    && pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5
 
-# Install NVRTC
 RUN [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" \
     && curl -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.1-1_all.deb \
-    && apt install /tmp/cuda-keyring.deb \
+    && apt-get install /tmp/cuda-keyring.deb \
     && rm /tmp/cuda-keyring.deb \
-    && apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* \
+    && apt-get remove --purge -y --allow-change-held-packages \
+        cuda-nvrtc-dev* \
+        libnvinfer* \
+        tensorrt* \
     && CUDA_VER_SHORT=${CUDA_VER: 0:4} \
     && NVRTC_CUDA_VERSION=${CUDA_VER_SHORT/./-} \
     && apt-get update -qq \
     && apt-get install -y --no-install-recommends \
         cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} \
-        libnccl2=${NCCL_VER} \
+        git-lfs \
         libnccl-dev=${NCCL_VER} \
-    && rm -rf /var/lib/apt/lists/*
+        libnccl2=${NCCL_VER} \
+        perl \
+        python3-dev \
+        python3-pip \
+    && rm -rf /var/lib/apt/lists/* \
+    && pip3 uninstall -y tensorrt \
+    && pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5
+
 
 # Install TRT
-COPY --from=install_dependencies /usr/local/tensorrt /usr/local/tensorrt
+COPY --from=requirements /usr/local/tensorrt /usr/local/tensorrt
 RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))" )*
 
-# Set environment variables
-ARG TRT_VER
-ENV TRT_VERSION=$TRT_VER
+# Set enviroment variables
+ARG TENSORRTLM_TRT_VER
+ENV TRT_VERSION=$TENSORRTLLM_TRT_VER
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
 ENV TRT_ROOT=/usr/local/tensorrt
 
 # Install TRT-LLM wheel after all the dependencies are installed
-ARG TENSORRTLLM_VER
-RUN --mount=type=secret,id=pypi_extra_values,env=PYPI_EXTRA_VALUES \
-    pip install --no-cache-dir ${PYPI_EXTRA_VALUES} tensorrt_llm==${TENSORRTLLM_VER}
+RUN --mount=type=bind,from=requirements,source=/wheels/,target=/wheels/ \
+      pip3 install /wheels/tensorrt_llm*.whl
 
 # Copying the Tensorrt LLM scripts and applications
 WORKDIR /app
-COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/scripts scripts
-COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/all_models all_models
-COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
-COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/tools tools
-COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/examples examples
+COPY --from=requirements /workspace/tensorrt_llm/triton_backend/scripts scripts
+COPY --from=requirements /workspace/tensorrt_llm/triton_backend/all_models all_models
+COPY --from=requirements /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
+COPY --from=requirements /workspace/tensorrt_llm/triton_backend/tools tools
+COPY --from=requirements /workspace/tensorrt_llm/examples examples

From e2e72dc83f7255a36738b080446570f32b466781 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:22:55 -0700
Subject: [PATCH 3/3] rollback: Reverting TensorRT-LLM Backend changes. (#816)

---
 dockerfile/Dockerfile.triton.trt_llm_backend | 218 +++++++++++--------
 tensorrt_llm                                 |   2 +-
 2 files changed, 133 insertions(+), 87 deletions(-)

diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend
index d9b8484..e36da3e 100644
--- a/dockerfile/Dockerfile.triton.trt_llm_backend
+++ b/dockerfile/Dockerfile.triton.trt_llm_backend
@@ -1,65 +1,100 @@
-# syntax=docker/dockerfile:1
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.06-py3-min
+ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3
+ARG NVRTC_VER=12.9.86-1
+ARG TRT_VER=10.11.0.33
+ARG NCCL_VER=2.27.5-1+cuda12.9
+ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-12.9.tar.gz
+ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-12.9.tar.gz
+
+# Versions of packages to copy from pytorch image
+ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
+ARG TORCHVISION_VER=0.22.0a0+95f10a4e
+ARG SETUPTOOLS_VER=78.1.1
+ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
+ARG JINJA2_VER=3.1.6
+ARG NETWORKX_VER=3.5
+ARG SYMPY_VER=1.14.0
+ARG PACKAGING_VER=23.2
+ARG FLASH_ATTN_VER=2.7.4.post1
+
 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git
-ARG TENSORRTLLM_REPO_TAG=v1.2.0rc1
-ARG TENSORRTLLM_VER=1.2.0rc1
-ARG TENSORRTLLM_WHEEL
-ARG TENSORRTLLM_TRT_VER=10.13.2.6
+ARG TENSORRTLLM_REPO_TAG=release/1.0
+ARG TENSORRTLLM_VER=1.0.0
 
+FROM ${PYTORCH_IMAGE} AS pytorch_image
+FROM ${BASE_IMAGE} AS install_dependencies
 
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
-ARG SOURCE_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3
+WORKDIR /workspace
 
+# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
+COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
 
-# Versions of packages to copy from source image
-ARG FLASH_ATTN_VER=2.7.4.post1
-ARG NCCL_VER=2.27.7-1+cuda13.0
-ARG NETWORKX_VER=3.5
-ARG NVRTC_VER=13.0.48-1
-ARG PACKAGING_VER=23.2
-ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
-ARG SETUPTOOLS_VER=79.0.1
-ARG SYMPY_VER=1.14.0
-ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
-ARG TORCHVISION_VER=0.23.0a0+428a54c9
-ARG TRT_URL_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TENSORRTLLM_TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz
-ARG TRT_URL_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TENSORRTLLM_TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz
+ENV PIP_BREAK_SYSTEM_PACKAGES=1
+RUN apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends \
+        python3-dev \
+        python3-pip \
+        git-lfs \
+        # Remove previous TRT installation
+    && apt-get purge -y "libnvinfer*" \
+    && pip3 uninstall -y tensorrt \
+    && rm -rf /var/lib/apt/lists/*
 
+ARG TRT_VER
+ARG NVRTC_VER
+ARG NCCL_VER
 
-FROM ${SOURCE_IMAGE} AS source_image
-FROM ${BASE_IMAGE} AS requirements
+ENV TRT_VERSION=$TRT_VER \
+    TRT_VER=$TRT_VER \
+    CUDA_VER=$CUDA_VERSION \
+    CUDNN_VER=$CUDNN_VERSION \
+    NCCL_VER=$NCCL_VER \
+    CUBLAS_VER=$CUBLAS_VERSION \
+    NVRTC_VER="${NVRTC_VER}"
 
-# Download & install TRT release
-ARG TRT_URL_x86
-ARG TRT_URL_ARM
+LABEL TRT_VERSION=$TRT_VER
+LABEL NCCL_VER=$NCCL_VER
 
-WORKDIR /requirements/
+# Install NVRTC
+RUN [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" \
+    && curl -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.1-1_all.deb \
+    && apt install /tmp/cuda-keyring.deb \
+    && rm /tmp/cuda-keyring.deb \
+    && apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* \
+    && CUDA_VER_SHORT=${CUDA_VER: 0:4} \
+    && NVRTC_CUDA_VERSION=${CUDA_VER_SHORT/./-} \
+    && apt-get update -qq \
+    && apt-get install -y --no-install-recommends \
+        cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} \
+        libnccl2=${NCCL_VER} \
+        libnccl-dev=${NCCL_VER} \
+    && rm -rf /var/lib/apt/lists/*
 
+# Download & install TRT release
+ARG RELEASE_URL_TRT_x86
+ARG RELEASE_URL_TRT_ARM
 
-ARG TENSORRTLLM_TRT_VER
-RUN [ "$(uname -m)" != "x86_64" ] && TRT_URL=${TRT_URL_ARM} || TRT_URL=${TRT_URL_x86} \
-    && curl -fSL -o /tmp/tensorrt.tar.gz ${TRT_URL} \
+RUN [ "$(uname -m)" != "x86_64" ] && RELEASE_URL_TRT=${RELEASE_URL_TRT_ARM} || RELEASE_URL_TRT=${RELEASE_URL_TRT_x86} \
+    && curl -fSL -o /tmp/tensorrt.tar.gz ${RELEASE_URL_TRT} \
     # Extract the tarball, excluding Windows libraries and static libraries as
     # they are not needed for Linux build
     && tar xzvf /tmp/tensorrt.tar.gz --exclude="lib*win.so*" --exclude="*.a" -C /usr/local \
     && rm /tmp/tensorrt.tar.gz \
     && find /usr/local -maxdepth 1 -name Tens* -type d -exec ln -s {} /usr/local/tensorrt \;
 
-WORKDIR /wheels/
+RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))" )*
 
-ARG TENSORRTLLM_WHEEL
-ADD ${TENSORRTLLM_WHEEL} /wheels/
+ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
+ENV TRT_ROOT=/usr/local/tensorrt
 
-RUN apt-get update -q=2 \
-    && apt-get install -y --no-install-recommends \
-        git-lfs \
-    && git-lfs install  \
-    && rm -rf /var/lib/apt/lists/*
+FROM install_dependencies AS tensorrt_llm_code
+
+WORKDIR /workspace
 
 ARG TENSORRTLLM_REPO
 ARG TENSORRTLLM_REPO_TAG
-WORKDIR /workspace/
-RUN git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm
-
+RUN git-lfs install \
+    && git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm
 
 # Final stage to build the TRT-LLM container
 FROM ${BASE_IMAGE} AS final_stage
@@ -68,32 +103,38 @@ ARG TORCH_VER
 ARG TORCHVISION_VER
 ARG SETUPTOOLS_VER
 ARG PYTORCH_TRITON_VER
+ARG JINJA2_VER
 ARG NETWORKX_VER
 ARG SYMPY_VER
 ARG PACKAGING_VER
 ARG FLASH_ATTN_VER
 # Copy necessary files from the base stage
-COPY --from=source_image /usr/local/lib/lib* /usr/local/lib/
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
-COPY --from=source_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
+COPY --from=pytorch_image /usr/local/lib/lib* /usr/local/lib/
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
+COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
+
+# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
+COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
 
 ARG NVRTC_VER
 ARG NCCL_VER
@@ -101,50 +142,55 @@ ENV CUDA_VER=$CUDA_VERSION \
     NVRTC_VER="${NVRTC_VER}" \
     NCCL_VER="${NCCL_VER}"
 
+# Install the necessary dependencies and remove previous TRT installation in the
+# final image
 ENV PIP_BREAK_SYSTEM_PACKAGES=1
+RUN apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends \
+        python3-dev \
+        python3-pip \
+        git-lfs \
+        perl \
+        # Remove previous TRT installation
+    && apt-get purge -y "libnvinfer*" \
+    && pip3 uninstall -y tensorrt \
+    && rm -rf /var/lib/apt/lists/* \
+    && pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5
 
+# Install NVRTC
 RUN [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" \
     && curl -o /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.1-1_all.deb \
-    && apt-get install /tmp/cuda-keyring.deb \
+    && apt install /tmp/cuda-keyring.deb \
     && rm /tmp/cuda-keyring.deb \
-    && apt-get remove --purge -y --allow-change-held-packages \
-        cuda-nvrtc-dev* \
-        libnvinfer* \
-        tensorrt* \
+    && apt-get remove --purge -y --allow-change-held-packages cuda-nvrtc-dev* \
     && CUDA_VER_SHORT=${CUDA_VER: 0:4} \
     && NVRTC_CUDA_VERSION=${CUDA_VER_SHORT/./-} \
     && apt-get update -qq \
     && apt-get install -y --no-install-recommends \
         cuda-nvrtc-dev-${NVRTC_CUDA_VERSION}=${NVRTC_VER} \
-        git-lfs \
-        libnccl-dev=${NCCL_VER} \
         libnccl2=${NCCL_VER} \
-        perl \
-        python3-dev \
-        python3-pip \
-    && rm -rf /var/lib/apt/lists/* \
-    && pip3 uninstall -y tensorrt \
-    && pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5
-
+        libnccl-dev=${NCCL_VER} \
+    && rm -rf /var/lib/apt/lists/*
 
 # Install TRT
-COPY --from=requirements /usr/local/tensorrt /usr/local/tensorrt
+COPY --from=install_dependencies /usr/local/tensorrt /usr/local/tensorrt
 RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import sys; print(str(sys.version_info.major) + str(sys.version_info.minor))" )*
 
-# Set enviroment variables
-ARG TENSORRTLM_TRT_VER
-ENV TRT_VERSION=$TENSORRTLLM_TRT_VER
+# Set environment variables
+ARG TRT_VER
+ENV TRT_VERSION=$TRT_VER
 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
 ENV TRT_ROOT=/usr/local/tensorrt
 
 # Install TRT-LLM wheel after all the dependencies are installed
-RUN --mount=type=bind,from=requirements,source=/wheels/,target=/wheels/ \
-      pip3 install /wheels/tensorrt_llm*.whl
+ARG TENSORRTLLM_VER
+RUN --mount=type=secret,id=pypi_extra_values,env=PYPI_EXTRA_VALUES \
+    pip install --no-cache-dir ${PYPI_EXTRA_VALUES} tensorrt_llm==${TENSORRTLLM_VER}
 
 # Copying the Tensorrt LLM scripts and applications
 WORKDIR /app
-COPY --from=requirements /workspace/tensorrt_llm/triton_backend/scripts scripts
-COPY --from=requirements /workspace/tensorrt_llm/triton_backend/all_models all_models
-COPY --from=requirements /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
-COPY --from=requirements /workspace/tensorrt_llm/triton_backend/tools tools
-COPY --from=requirements /workspace/tensorrt_llm/examples examples
+COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/scripts scripts
+COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/all_models all_models
+COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
+COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/tools tools
+COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/examples examples
diff --git a/tensorrt_llm b/tensorrt_llm
index 560ded5..ae8270b 160000
--- a/tensorrt_llm
+++ b/tensorrt_llm
@@ -1 +1 @@
-Subproject commit 560ded5450b79efde720162fc397d7efa59aae6d
+Subproject commit ae8270b713446948246f16fadf4e2a32e35d0f62