From 70793f6300cbddaa79d76e196d6b454e2b846480 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Fri, 26 Sep 2025 12:11:12 +0530
Subject: [PATCH 1/4] Update run-nvidia.sh

---
 script/get-ml-model-llama2/run-nvidia.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/get-ml-model-llama2/run-nvidia.sh b/script/get-ml-model-llama2/run-nvidia.sh
index 2895a44bf..8b0acc67f 100644
--- a/script/get-ml-model-llama2/run-nvidia.sh
+++ b/script/get-ml-model-llama2/run-nvidia.sh
@@ -19,7 +19,7 @@ RUN_CMD="bash -c 'git lfs install && git lfs pull && python3 scripts/build_wheel
 echo "$RUN_CMD"
 
 # TODO: check whether --device nvidia.com/gpu=all would work for docker
-DOCKER_RUN_ARGS=" -v ${MLC_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt -v ${MLC_NVIDIA_PREPROCESSED_CALIBRATION_DATASET_PATH}:/calib_dataset -u $(id -u):$(id -g) --userns=keep-id --device nvidia.com/gpu=all -e NVIDIA_VISIBLE_DEVICES=all"
+DOCKER_RUN_ARGS=" -v ${MLC_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt -v ${MLC_NVIDIA_PREPROCESSED_CALIBRATION_DATASET_PATH}:/calib_dataset -u $(id -u):$(id -g) --device nvidia.com/gpu=all -e NVIDIA_VISIBLE_DEVICES=all"
 export DOCKER_RUN_ARGS="$DOCKER_RUN_ARGS"
 export RUN_CMD="$RUN_CMD"
 make -C docker run LOCAL_USER=1

From b661da12184b4b7f2bf03a95afb283eb8d848d37 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Sun, 28 Sep 2025 20:30:42 +0530
Subject: [PATCH 2/4] Install git-lfs before initializing

---
 script/get-ml-model-llama2/run-nvidia.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/get-ml-model-llama2/run-nvidia.sh b/script/get-ml-model-llama2/run-nvidia.sh
index 8b0acc67f..c02506a91 100644
--- a/script/get-ml-model-llama2/run-nvidia.sh
+++ b/script/get-ml-model-llama2/run-nvidia.sh
@@ -15,7 +15,7 @@ cd ${MLC_TENSORRT_LLM_CHECKOUT_PATH}
 make -C docker build
 test $? -eq 0 || exit $?
 
-RUN_CMD="bash -c 'git lfs install && git lfs pull && python3 scripts/build_wheel.py -a=${MLC_GPU_ARCH} --clean --install --use_ccache --benchmarks --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16  --output_dir=/mnt/models/Llama2/fp8-quantized-ammo/llama-2-70b-chat-hf-tp${MLC_NVIDIA_TP_SIZE}pp${MLC_NVIDIA_PP_SIZE}-fp8 --model_dir=/mnt/models/Llama2/Llama-2-70b-chat-hf --qformat=fp8 --kv_cache_dtype=fp8 --tp_size ${MLC_NVIDIA_TP_SIZE} --pp_size ${MLC_NVIDIA_PP_SIZE} --calib_dataset=/calib_dataset'"
+RUN_CMD="bash -c 'sudo apt-get install git-lfs && git lfs install && git lfs pull && python3 scripts/build_wheel.py -a=${MLC_GPU_ARCH} --clean --install --use_ccache --benchmarks --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16  --output_dir=/mnt/models/Llama2/fp8-quantized-ammo/llama-2-70b-chat-hf-tp${MLC_NVIDIA_TP_SIZE}pp${MLC_NVIDIA_PP_SIZE}-fp8 --model_dir=/mnt/models/Llama2/Llama-2-70b-chat-hf --qformat=fp8 --kv_cache_dtype=fp8 --tp_size ${MLC_NVIDIA_TP_SIZE} --pp_size ${MLC_NVIDIA_PP_SIZE} --calib_dataset=/calib_dataset'"
 echo "$RUN_CMD"
 
 # TODO: check whether --device nvidia.com/gpu=all would work for docker

From b703492d308b64f1ac9e07aaa0feebe7810ca439 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Fri, 3 Oct 2025 02:49:18 +0530
Subject: [PATCH 3/4] set memlock to unlimited

---
 script/app-mlperf-inference/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml
index 7d5dd3fbf..ee7f84533 100644
--- a/script/app-mlperf-inference/meta.yaml
+++ b/script/app-mlperf-inference/meta.yaml
@@ -629,7 +629,7 @@ variations:
   nvidia-original:
     docker:
       interactive: True
-      extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
+      extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --ulimit memlock=-1 --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
       base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public
       os: "ubuntu"
       os_version: "20.04"

From f63bed9adfd2b83b26288ca120bd4efd4a9f0415 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Fri, 3 Oct 2025 23:37:42 +0530
Subject: [PATCH 4/4] Update meta.yaml

---
 script/app-mlperf-inference/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml
index ee7f84533..7d5dd3fbf 100644
--- a/script/app-mlperf-inference/meta.yaml
+++ b/script/app-mlperf-inference/meta.yaml
@@ -629,7 +629,7 @@ variations:
   nvidia-original:
     docker:
       interactive: True
-      extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --ulimit memlock=-1 --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
+      extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
       base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public
       os: "ubuntu"
       os_version: "20.04"