From 70793f6300cbddaa79d76e196d6b454e2b846480 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Fri, 26 Sep 2025 12:11:12 +0530 Subject: [PATCH 1/4] Update run-nvidia.sh --- script/get-ml-model-llama2/run-nvidia.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-ml-model-llama2/run-nvidia.sh b/script/get-ml-model-llama2/run-nvidia.sh index 2895a44bf..8b0acc67f 100644 --- a/script/get-ml-model-llama2/run-nvidia.sh +++ b/script/get-ml-model-llama2/run-nvidia.sh @@ -19,7 +19,7 @@ RUN_CMD="bash -c 'git lfs install && git lfs pull && python3 scripts/build_wheel echo "$RUN_CMD" # TODO: check whether --device nvidia.com/gpu=all would work for docker -DOCKER_RUN_ARGS=" -v ${MLC_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt -v ${MLC_NVIDIA_PREPROCESSED_CALIBRATION_DATASET_PATH}:/calib_dataset -u $(id -u):$(id -g) --userns=keep-id --device nvidia.com/gpu=all -e NVIDIA_VISIBLE_DEVICES=all" +DOCKER_RUN_ARGS=" -v ${MLC_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt -v ${MLC_NVIDIA_PREPROCESSED_CALIBRATION_DATASET_PATH}:/calib_dataset -u $(id -u):$(id -g) --device nvidia.com/gpu=all -e NVIDIA_VISIBLE_DEVICES=all" export DOCKER_RUN_ARGS="$DOCKER_RUN_ARGS" export RUN_CMD="$RUN_CMD" make -C docker run LOCAL_USER=1 From b661da12184b4b7f2bf03a95afb283eb8d848d37 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 28 Sep 2025 20:30:42 +0530 Subject: [PATCH 2/4] Install git-lfs before initializing --- script/get-ml-model-llama2/run-nvidia.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-ml-model-llama2/run-nvidia.sh b/script/get-ml-model-llama2/run-nvidia.sh index 8b0acc67f..c02506a91 100644 --- a/script/get-ml-model-llama2/run-nvidia.sh +++ b/script/get-ml-model-llama2/run-nvidia.sh @@ -15,7 +15,7 @@ cd ${MLC_TENSORRT_LLM_CHECKOUT_PATH} make -C docker build test $? -eq 0 || exit $? -RUN_CMD="bash -c 'git lfs install && git lfs pull && python3 scripts/build_wheel.py -a=${MLC_GPU_ARCH} --clean --install --use_ccache --benchmarks --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/Llama2/fp8-quantized-ammo/llama-2-70b-chat-hf-tp${MLC_NVIDIA_TP_SIZE}pp${MLC_NVIDIA_PP_SIZE}-fp8 --model_dir=/mnt/models/Llama2/Llama-2-70b-chat-hf --qformat=fp8 --kv_cache_dtype=fp8 --tp_size ${MLC_NVIDIA_TP_SIZE} --pp_size ${MLC_NVIDIA_PP_SIZE} --calib_dataset=/calib_dataset'" +RUN_CMD="bash -c 'sudo apt-get install git-lfs && git lfs install && git lfs pull && python3 scripts/build_wheel.py -a=${MLC_GPU_ARCH} --clean --install --use_ccache --benchmarks --trt_root /usr/local/tensorrt/ && python examples/quantization/quantize.py --dtype=float16 --output_dir=/mnt/models/Llama2/fp8-quantized-ammo/llama-2-70b-chat-hf-tp${MLC_NVIDIA_TP_SIZE}pp${MLC_NVIDIA_PP_SIZE}-fp8 --model_dir=/mnt/models/Llama2/Llama-2-70b-chat-hf --qformat=fp8 --kv_cache_dtype=fp8 --tp_size ${MLC_NVIDIA_TP_SIZE} --pp_size ${MLC_NVIDIA_PP_SIZE} --calib_dataset=/calib_dataset'" echo "$RUN_CMD" # TODO: check whether --device nvidia.com/gpu=all would work for docker From b703492d308b64f1ac9e07aaa0feebe7810ca439 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Fri, 3 Oct 2025 02:49:18 +0530 Subject: [PATCH 3/4] set memlock to unlimited --- script/app-mlperf-inference/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 7d5dd3fbf..ee7f84533 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -629,7 +629,7 @@ variations: nvidia-original: docker: interactive: True - extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --ulimit memlock=-1 --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public os: "ubuntu" os_version: "20.04" From f63bed9adfd2b83b26288ca120bd4efd4a9f0415 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:37:42 +0530 Subject: [PATCH 4/4] Update meta.yaml --- script/app-mlperf-inference/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index ee7f84533..7d5dd3fbf 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -629,7 +629,7 @@ variations: nvidia-original: docker: interactive: True - extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --ulimit memlock=-1 --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public os: "ubuntu" os_version: "20.04"