From 2d0f250ac295d075a8776071e3923e5b0b7c6ecb Mon Sep 17 00:00:00 2001 From: Yingge He <157551214+yinggeh@users.noreply.github.com> Date: Wed, 24 Sep 2025 15:07:44 -0700 Subject: [PATCH 1/2] ci: Remove deprecated vLLM config "disable_log_requests" (#96) --- ci/L0_backend_vllm/vllm_backend/test.sh | 4 ++-- ci/L0_multi_gpu_vllm/multi_lora/test.sh | 4 ---- docs/llama_multi_lora_tutorial.md | 4 +--- samples/model_repository/vllm_model/1/model.json | 1 - 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/ci/L0_backend_vllm/vllm_backend/test.sh b/ci/L0_backend_vllm/vllm_backend/test.sh index 87e04b21..690599b5 100755 --- a/ci/L0_backend_vllm/vllm_backend/test.sh +++ b/ci/L0_backend_vllm/vllm_backend/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -64,7 +64,7 @@ wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server # Invalid model attribute cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/ -sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json +sed -i 's/"enforce_eager"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json # Invalid model name cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/ diff --git a/ci/L0_multi_gpu_vllm/multi_lora/test.sh b/ci/L0_multi_gpu_vllm/multi_lora/test.sh index bcc52770..8cf2c3fa 100755 --- a/ci/L0_multi_gpu_vllm/multi_lora/test.sh +++ b/ci/L0_multi_gpu_vllm/multi_lora/test.sh @@ -110,7 +110,6 @@ export SERVER_ENABLE_LORA=true model_json=$(cat < Date: Mon, 6 Oct 2025 16:45:58 -0700 Subject: [PATCH 2/2] TPRD-1710: Update default branches post-25.09 (#99) Co-authored-by: Yingge He <157551214+yinggeh@users.noreply.github.com> --- ci/L0_additional_outputs_vllm/test.sh | 1 - ci/L0_backend_vllm/test.sh | 1 - ci/L0_check_health_vllm/test.sh | 1 - ci/L0_multi_gpu_vllm/multi_lora/test.sh | 3 +++ ci/L0_multi_gpu_vllm/test.sh | 1 - ci/L0_multi_gpu_vllm/vllm_backend/test.sh | 3 +++ 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ci/L0_additional_outputs_vllm/test.sh b/ci/L0_additional_outputs_vllm/test.sh index a13fdae2..967e771f 100755 --- a/ci/L0_additional_outputs_vllm/test.sh +++ b/ci/L0_additional_outputs_vllm/test.sh @@ -30,7 +30,6 @@ source ../common/util.sh pip3 install pytest==8.1.1 pip3 install tritonclient[grpc] -pip3 install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency # Prepare Model rm -rf models vllm_baseline_output.pkl && mkdir -p models diff --git a/ci/L0_backend_vllm/test.sh b/ci/L0_backend_vllm/test.sh index 674b0fa9..b4d27357 100755 --- a/ci/L0_backend_vllm/test.sh +++ b/ci/L0_backend_vllm/test.sh @@ -29,7 +29,6 @@ RET=0 SUBTESTS="accuracy_test request_cancellation enabled_stream vllm_backend metrics_test" python3 -m pip install tritonclient[grpc] -python3 -m pip install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency for TEST in ${SUBTESTS}; do (cd ${TEST} && bash -ex test.sh && cd ..) diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh index 655b043f..80668bcb 100755 --- a/ci/L0_check_health_vllm/test.sh +++ b/ci/L0_check_health_vllm/test.sh @@ -30,7 +30,6 @@ source ../common/util.sh pip3 install pytest==8.1.1 pip3 install tritonclient[grpc] -pip3 install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency RET=0 diff --git a/ci/L0_multi_gpu_vllm/multi_lora/test.sh b/ci/L0_multi_gpu_vllm/multi_lora/test.sh index 8cf2c3fa..c045c4fc 100755 --- a/ci/L0_multi_gpu_vllm/multi_lora/test.sh +++ b/ci/L0_multi_gpu_vllm/multi_lora/test.sh @@ -41,6 +41,9 @@ EXPECTED_NUM_TESTS=2 GENERATE_ENDPOINT="localhost:8000/v2/models/vllm_llama_multi_lora/generate" CHECK_FOR_ERROR=true +export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH +export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas + make_api_call() { local endpoint="$1" local data="$2" diff --git a/ci/L0_multi_gpu_vllm/test.sh b/ci/L0_multi_gpu_vllm/test.sh index 38caa77a..34843139 100755 --- a/ci/L0_multi_gpu_vllm/test.sh +++ b/ci/L0_multi_gpu_vllm/test.sh @@ -29,7 +29,6 @@ RET=0 SUBTESTS="vllm_backend multi_lora" python3 -m pip install tritonclient[grpc] -python3 -m pip install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency for TEST in ${SUBTESTS}; do (cd ${TEST} && bash -ex test.sh && cd ..) diff --git a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh index 0609bebf..e4de2ad2 100755 --- a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh +++ b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh @@ -36,6 +36,9 @@ CLIENT_PY="./vllm_multi_gpu_test.py" SAMPLE_MODELS_REPO="../../../samples/model_repository" EXPECTED_NUM_TESTS=1 +export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH +export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas + ### Helpers function validate_file_contains() { local KEY="${1}"