NVIDIA · syuoni · Jul 8, 2025 · Jul 7, 2025 · Jul 8, 2025
diff --git a/tests/unittest/llmapi/test_llm.py b/tests/unittest/llmapi/test_llm.py
@@ -1402,12 +1402,19 @@ def llama_v2_7b_prompt_adapter_test_harness(**llm_kwargs):
         "Tweet text: I have no problems Label: ",
     ]
     references = [
-        "painter at the École des Beaux-Arts in Paris. He was a member of the",
-        "chef and has worked in the restaurant industry for 15 years.Ћ\nBorn in north",
-        "1999.\nTweet text: I have complaints! Label: 19",
-        "no complaint",
-        "100%\nI have no problems Label: 100%\nI have no",
-        "no complaint",
+        [
+            "painter at the École des Beaux-Arts in Paris. He was a member of the"
+        ],
+        [
+            "chef and has worked in the restaurant industry for 15 years.Ћ\nBorn in north"
+        ],
+        ["1999.\nTweet text: I have complaints! Label: 19"],
+        ["no complaint"],
+        [
+            "100%\nI have no problems Label: 100%\nI have no",
+            "1999\nLabel: 1999 (1999)\nT"
+        ],
+        ["no complaint"],
     ]
     pa_req = PromptAdapterRequest('tweet', 1, hf_prompt_adapter_dir)
     sampling_params = SamplingParams(max_tokens=20)
@@ -1416,7 +1423,9 @@ def llama_v2_7b_prompt_adapter_test_harness(**llm_kwargs):
         sampling_params,
         prompt_adapter_request=[None, pa_req, None, pa_req, None, pa_req])
     for output, ref in zip(outputs, references):
-        assert similar(output.outputs[0].text, ref)
+        # Currently, the 5th request may have non-deterministic outputs.
+        # Let the test pass if the generation output matches any of the candidate references.
+        assert any(similar(output.outputs[0].text, r) for r in ref)
 
 
 @skip_gpu_memory_less_than_40gb

diff --git a/tests/unittest/llmapi/test_llm_multi_gpu.py b/tests/unittest/llmapi/test_llm_multi_gpu.py
@@ -268,7 +268,6 @@ def test_llama_7b_multi_lora_tp2():
         kv_cache_config=global_kv_cache_config)
 
 
-@pytest.mark.skip(reason="https://nvbugs/5362426")
 @pytest.mark.gpu2
 @pytest.mark.part3
 def test_llama_v2_7b_prompt_adapter_tp2():