diff --git a/ci/L0_additional_outputs_vllm/test.sh b/ci/L0_additional_outputs_vllm/test.sh
index a13fdae2..967e771f 100755
--- a/ci/L0_additional_outputs_vllm/test.sh
+++ b/ci/L0_additional_outputs_vllm/test.sh
@@ -30,7 +30,6 @@ source ../common/util.sh
 
 pip3 install pytest==8.1.1
 pip3 install tritonclient[grpc]
-pip3 install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency
 
 # Prepare Model
 rm -rf models vllm_baseline_output.pkl && mkdir -p models
diff --git a/ci/L0_backend_vllm/test.sh b/ci/L0_backend_vllm/test.sh
index 674b0fa9..b4d27357 100755
--- a/ci/L0_backend_vllm/test.sh
+++ b/ci/L0_backend_vllm/test.sh
@@ -29,7 +29,6 @@ RET=0
 SUBTESTS="accuracy_test request_cancellation enabled_stream vllm_backend metrics_test"
 
 python3 -m pip install tritonclient[grpc]
-python3 -m pip install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency
 
 for TEST in ${SUBTESTS}; do
     (cd ${TEST} && bash -ex test.sh && cd ..)
diff --git a/ci/L0_backend_vllm/vllm_backend/test.sh b/ci/L0_backend_vllm/vllm_backend/test.sh
index 87e04b21..690599b5 100755
--- a/ci/L0_backend_vllm/vllm_backend/test.sh
+++ b/ci/L0_backend_vllm/vllm_backend/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -64,7 +64,7 @@ wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server
 
 # Invalid model attribute
 cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/
-sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
+sed -i 's/"enforce_eager"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
 
 # Invalid model name
 cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/
diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh
index 655b043f..80668bcb 100755
--- a/ci/L0_check_health_vllm/test.sh
+++ b/ci/L0_check_health_vllm/test.sh
@@ -30,7 +30,6 @@ source ../common/util.sh
 
 pip3 install pytest==8.1.1
 pip3 install tritonclient[grpc]
-pip3 install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency
 
 RET=0
 
diff --git a/ci/L0_multi_gpu_vllm/multi_lora/test.sh b/ci/L0_multi_gpu_vllm/multi_lora/test.sh
index bcc52770..c045c4fc 100755
--- a/ci/L0_multi_gpu_vllm/multi_lora/test.sh
+++ b/ci/L0_multi_gpu_vllm/multi_lora/test.sh
@@ -41,6 +41,9 @@ EXPECTED_NUM_TESTS=2
 GENERATE_ENDPOINT="localhost:8000/v2/models/vllm_llama_multi_lora/generate"
 CHECK_FOR_ERROR=true
 
+export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
+export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
+
 make_api_call() {
     local endpoint="$1"
     local data="$2"
@@ -110,7 +113,6 @@ export SERVER_ENABLE_LORA=true
 model_json=$(cat <<EOF
 {
     "model":"./weights/backbone/gemma-2b",
-    "disable_log_requests": true,
     "gpu_memory_utilization": 0.7,
     "tensor_parallel_size": 2,
     "block_size": 16,
@@ -202,7 +204,6 @@ wait $SERVER_PID
 model_json=$(cat <<EOF
 {
     "model":"./weights/backbone/gemma-2b",
-    "disable_log_requests": true,
     "gpu_memory_utilization": 0.7,
     "tensor_parallel_size": 2,
     "block_size": 16,
@@ -282,7 +283,6 @@ export SERVER_ENABLE_LORA=false
 model_json=$(cat <<EOF
 {
     "model":"./weights/backbone/gemma-2b",
-    "disable_log_requests": true,
     "gpu_memory_utilization": 0.8,
     "tensor_parallel_size": 2,
     "block_size": 16,
@@ -344,7 +344,6 @@ export SERVER_ENABLE_LORA=false
 model_json=$(cat <<EOF
 {
     "model":"./weights/backbone/gemma-2b",
-    "disable_log_requests": true,
     "gpu_memory_utilization": 0.8,
     "tensor_parallel_size": 2,
     "block_size": 16,
diff --git a/ci/L0_multi_gpu_vllm/test.sh b/ci/L0_multi_gpu_vllm/test.sh
index 38caa77a..34843139 100755
--- a/ci/L0_multi_gpu_vllm/test.sh
+++ b/ci/L0_multi_gpu_vllm/test.sh
@@ -29,7 +29,6 @@ RET=0
 SUBTESTS="vllm_backend multi_lora"
 
 python3 -m pip install tritonclient[grpc]
-python3 -m pip install "transformers<=4.53.3" # TODO:DLIS-8441 remove this dependency
 
 for TEST in ${SUBTESTS}; do
     (cd ${TEST} && bash -ex test.sh && cd ..)
diff --git a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh
index 0609bebf..e4de2ad2 100755
--- a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh
+++ b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh
@@ -36,6 +36,9 @@ CLIENT_PY="./vllm_multi_gpu_test.py"
 SAMPLE_MODELS_REPO="../../../samples/model_repository"
 EXPECTED_NUM_TESTS=1
 
+export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH
+export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
+
 ### Helpers
 function validate_file_contains() {
     local KEY="${1}"
diff --git a/docs/llama_multi_lora_tutorial.md b/docs/llama_multi_lora_tutorial.md
index c12910e6..5159ae4e 100644
--- a/docs/llama_multi_lora_tutorial.md
+++ b/docs/llama_multi_lora_tutorial.md
@@ -1,5 +1,5 @@
 <!--
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -146,7 +146,6 @@ For this tutorial we will use the following set of parameters, specified in the
 ```json
 {
     "model":"/vllm_workspace/weights/backbone/llama-7b-hf",
-    "disable_log_requests": "true",
     "gpu_memory_utilization": 0.8,
     "tensor_parallel_size": 2,
     "block_size": 16,
@@ -157,7 +156,6 @@ For this tutorial we will use the following set of parameters, specified in the
 ```
 
 + `model`: The path to your model repository
-+ `disable_log_requests`: To show logs when launch vllm or not.
 + `gpu_memory_utilization`: The gpu memory allocated for the model weights and vllm *PagedAttention* kv cache manager.
 + `tensor_parallel_size`: The vllm now support the tensor paralism, so you can decide how many gpus you want to use for serving.
 + `block_size`: vLLM kv cache block size.
diff --git a/samples/model_repository/vllm_model/1/model.json b/samples/model_repository/vllm_model/1/model.json
index 8a32050d..50ed9637 100644
--- a/samples/model_repository/vllm_model/1/model.json
+++ b/samples/model_repository/vllm_model/1/model.json
@@ -1,6 +1,5 @@
 {
     "model":"facebook/opt-125m",
-    "disable_log_requests": true,
     "gpu_memory_utilization": 0.5,
     "enforce_eager": true
 }